diff mbox series

[v4,3/5] iommu/arm-smmu: add ACTLR data and support for SM8550

Message ID 20231215101827.30549-4-quic_bibekkum@quicinc.com (mailing list archive)
State New, archived
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Dec. 15, 2023, 10:18 a.m. UTC
Add ACTLR data table for SM8550 along with support for
same including SM8550 specific implementation operations.

Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
 1 file changed, 89 insertions(+)

--
2.17.1

Comments

Dmitry Baryshkov Dec. 15, 2023, 10:44 a.m. UTC | #1
On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Add ACTLR data table for SM8550 along with support for
> same including SM8550 specific implementation operations.
>
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>  1 file changed, 89 insertions(+)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index cb49291f5233..d2006f610243 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -20,6 +20,85 @@ struct actlr_config {
>         u32 actlr;
>  };
>
> +/*
> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
> + * buffer). The remaining bits are implementation defined and vary across
> + * SoCs.
> + */
> +
> +#define PREFETCH_DEFAULT       0
> +#define PREFETCH_SHALLOW       BIT(8)
> +#define PREFETCH_MODERATE      BIT(9)
> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))

I thin the following might be more correct:

#include <linux/bitfield.h>

#define PREFETCH_MASK GENMASK(9, 8)
#define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
#define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
#define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
#define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)

> +#define PREFETCH_SWITCH_GFX    (BIT(5) | BIT(3))
> +#define CPRE                   BIT(1)
> +#define CMTLB                  BIT(0)
> +
> +static const struct actlr_config sm8550_apps_actlr_cfg[] = {
> +       { 0x18a0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x18e0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
> +       { 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
> +       { 0x1820, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +       { 0x1860, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +       { 0x0c01, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c02, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c03, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c04, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c05, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c06, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c07, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c08, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c09, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c0c, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c0d, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c0e, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x0c0f, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1961, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1962, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1963, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1964, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1965, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1966, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1967, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1968, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1969, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x196c, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x196d, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x196e, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x196f, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c1, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c2, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c3, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c4, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c5, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c6, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c7, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c8, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19c9, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19cc, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19cd, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19ce, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x19cf, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
> +       { 0x1c00, 0x0002, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1c01, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +       { 0x1920, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1923, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1924, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1940, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1941, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1943, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1944, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +       {},
> +};
> +
> +static const struct actlr_config sm8550_gfx_actlr_cfg[] = {
> +       { 0x0000, 0x03ff, PREFETCH_SWITCH_GFX | PREFETCH_DEEP | CPRE | CMTLB },
> +       {},
> +};
> +
>  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>  {
>         return container_of(smmu, struct qcom_smmu, smmu);
> @@ -549,6 +628,15 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
>         /* Also no debug configuration. */
>  };
>
> +
> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
> +       .impl = &qcom_smmu_500_impl,
> +       .adreno_impl = &qcom_adreno_smmu_500_impl,
> +       .cfg = &qcom_smmu_impl0_cfg,
> +       .actlrcfg = sm8550_apps_actlr_cfg,
> +       .actlrcfg_gfx = sm8550_gfx_actlr_cfg,
> +};
> +
>  static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
>         .impl = &qcom_smmu_500_impl,
>         .adreno_impl = &qcom_adreno_smmu_500_impl,
> @@ -583,6 +671,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>         { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
>         { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
>         { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data },
> +       { .compatible = "qcom,sm8550-smmu-500", .data = &sm8550_smmu_500_impl0_data },
>         { .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data },
>         { }
>  };
> --
> 2.17.1
>
Bibek Kumar Patro Dec. 15, 2023, 12:20 p.m. UTC | #2
On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>> Add ACTLR data table for SM8550 along with support for
>> same including SM8550 specific implementation operations.
>>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>>   1 file changed, 89 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index cb49291f5233..d2006f610243 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -20,6 +20,85 @@ struct actlr_config {
>>          u32 actlr;
>>   };
>>
>> +/*
>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>> + * buffer). The remaining bits are implementation defined and vary across
>> + * SoCs.
>> + */
>> +
>> +#define PREFETCH_DEFAULT       0
>> +#define PREFETCH_SHALLOW       BIT(8)
>> +#define PREFETCH_MODERATE      BIT(9)
>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
> 
> I thin the following might be more correct:
> 
> #include <linux/bitfield.h>
> 
> #define PREFETCH_MASK GENMASK(9, 8)
> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
> 

Ack, thanks for this suggestion. Let me try this out using
GENMASK. Once tested, will take care of this in next version.

Thanks,
Bibek

>> +#define PREFETCH_SWITCH_GFX    (BIT(5) | BIT(3))
>> +#define CPRE                   BIT(1)
>> +#define CMTLB                  BIT(0)
>> +
>> +static const struct actlr_config sm8550_apps_actlr_cfg[] = {
>> +       { 0x18a0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x18e0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
>> +       { 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
>> +       { 0x1820, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +       { 0x1860, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +       { 0x0c01, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c02, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c03, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c04, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c05, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c06, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c07, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c08, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c09, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c0c, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c0d, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c0e, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x0c0f, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1961, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1962, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1963, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1964, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1965, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1966, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1967, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1968, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1969, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x196c, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x196d, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x196e, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x196f, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c1, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c2, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c3, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c4, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c5, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c6, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c7, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c8, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19c9, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19cc, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19cd, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19ce, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x19cf, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>> +       { 0x1c00, 0x0002, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1c01, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +       { 0x1920, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1923, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1924, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1940, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1941, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1943, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1944, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +       {},
>> +};
>> +
>> +static const struct actlr_config sm8550_gfx_actlr_cfg[] = {
>> +       { 0x0000, 0x03ff, PREFETCH_SWITCH_GFX | PREFETCH_DEEP | CPRE | CMTLB },
>> +       {},
>> +};
>> +
>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>   {
>>          return container_of(smmu, struct qcom_smmu, smmu);
>> @@ -549,6 +628,15 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
>>          /* Also no debug configuration. */
>>   };
>>
>> +
>> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
>> +       .impl = &qcom_smmu_500_impl,
>> +       .adreno_impl = &qcom_adreno_smmu_500_impl,
>> +       .cfg = &qcom_smmu_impl0_cfg,
>> +       .actlrcfg = sm8550_apps_actlr_cfg,
>> +       .actlrcfg_gfx = sm8550_gfx_actlr_cfg,
>> +};
>> +
>>   static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
>>          .impl = &qcom_smmu_500_impl,
>>          .adreno_impl = &qcom_adreno_smmu_500_impl,
>> @@ -583,6 +671,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
>>          { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
>>          { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
>>          { .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data },
>> +       { .compatible = "qcom,sm8550-smmu-500", .data = &sm8550_smmu_500_impl0_data },
>>          { .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data },
>>          { }
>>   };
>> --
>> 2.17.1
>>
> 
>
Robin Murphy Dec. 15, 2023, 12:54 p.m. UTC | #3
On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
> 
> 
> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>> <quic_bibekkum@quicinc.com> wrote:
>>>
>>> Add ACTLR data table for SM8550 along with support for
>>> same including SM8550 specific implementation operations.
>>>
>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>> ---
>>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>>>   1 file changed, 89 insertions(+)
>>>
>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> index cb49291f5233..d2006f610243 100644
>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>          u32 actlr;
>>>   };
>>>
>>> +/*
>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>> prefetch
>>> + * buffer). The remaining bits are implementation defined and vary 
>>> across
>>> + * SoCs.
>>> + */
>>> +
>>> +#define PREFETCH_DEFAULT       0
>>> +#define PREFETCH_SHALLOW       BIT(8)
>>> +#define PREFETCH_MODERATE      BIT(9)
>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>
>> I thin the following might be more correct:
>>
>> #include <linux/bitfield.h>
>>
>> #define PREFETCH_MASK GENMASK(9, 8)
>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>
> 
> Ack, thanks for this suggestion. Let me try this out using
> GENMASK. Once tested, will take care of this in next version.

FWIW the more typical usage would be to just define the named macros for 
the raw field values, then put the FIELD_PREP() at the point of use. 
However in this case that's liable to get pretty verbose, so although 
I'm usually a fan of bitfield.h, the most readable option here might 
actually be to stick with simpler definitions of "(0 << 8)", "(1 << 8)", 
etc. However it's not really a big deal either way, and I defer to 
whatever Dmitry and Konrad prefer, since they're the ones looking after 
arm-smmu-qcom the most :)

Thanks,
Robin.

> 
> Thanks,
> Bibek
> 
>>> +#define PREFETCH_SWITCH_GFX    (BIT(5) | BIT(3))
>>> +#define CPRE                   BIT(1)
>>> +#define CMTLB                  BIT(0)
>>> +
>>> +static const struct actlr_config sm8550_apps_actlr_cfg[] = {
>>> +       { 0x18a0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x18e0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>> +       { 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
>>> +       { 0x1820, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>> +       { 0x1860, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>> +       { 0x0c01, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c02, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c03, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c04, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c05, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c06, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c07, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c08, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c09, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c0c, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c0d, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c0e, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x0c0f, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1961, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1962, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1963, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1964, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1965, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1966, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1967, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1968, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1969, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x196c, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x196d, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x196e, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x196f, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c1, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c2, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c3, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c4, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c5, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c6, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c7, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c8, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19c9, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19cc, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19cd, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19ce, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x19cf, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>> +       { 0x1c00, 0x0002, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1c01, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>> +       { 0x1920, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1923, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1924, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1940, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1941, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1943, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1944, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>> +       {},
>>> +};
>>> +
>>> +static const struct actlr_config sm8550_gfx_actlr_cfg[] = {
>>> +       { 0x0000, 0x03ff, PREFETCH_SWITCH_GFX | PREFETCH_DEEP | CPRE 
>>> | CMTLB },
>>> +       {},
>>> +};
>>> +
>>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>   {
>>>          return container_of(smmu, struct qcom_smmu, smmu);
>>> @@ -549,6 +628,15 @@ static const struct qcom_smmu_match_data 
>>> sdm845_smmu_500_data = {
>>>          /* Also no debug configuration. */
>>>   };
>>>
>>> +
>>> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
>>> +       .impl = &qcom_smmu_500_impl,
>>> +       .adreno_impl = &qcom_adreno_smmu_500_impl,
>>> +       .cfg = &qcom_smmu_impl0_cfg,
>>> +       .actlrcfg = sm8550_apps_actlr_cfg,
>>> +       .actlrcfg_gfx = sm8550_gfx_actlr_cfg,
>>> +};
>>> +
>>>   static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
>>>          .impl = &qcom_smmu_500_impl,
>>>          .adreno_impl = &qcom_adreno_smmu_500_impl,
>>> @@ -583,6 +671,7 @@ static const struct of_device_id __maybe_unused 
>>> qcom_smmu_impl_of_match[] = {
>>>          { .compatible = "qcom,sm8250-smmu-500", .data = 
>>> &qcom_smmu_500_impl0_data },
>>>          { .compatible = "qcom,sm8350-smmu-500", .data = 
>>> &qcom_smmu_500_impl0_data },
>>>          { .compatible = "qcom,sm8450-smmu-500", .data = 
>>> &qcom_smmu_500_impl0_data },
>>> +       { .compatible = "qcom,sm8550-smmu-500", .data = 
>>> &sm8550_smmu_500_impl0_data },
>>>          { .compatible = "qcom,smmu-500", .data = 
>>> &qcom_smmu_500_impl0_data },
>>>          { }
>>>   };
>>> -- 
>>> 2.17.1
>>>
>>
>>
Konrad Dybcio Dec. 15, 2023, 11:35 p.m. UTC | #4
On 15.12.2023 11:18, Bibek Kumar Patro wrote:
> Add ACTLR data table for SM8550 along with support for
> same including SM8550 specific implementation operations.
> 
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
[...]

> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
> +	.impl = &qcom_smmu_500_impl,
> +	.adreno_impl = &qcom_adreno_smmu_500_impl,
> +	.cfg = &qcom_smmu_impl0_cfg,
> +	.actlrcfg = sm8550_apps_actlr_cfg,
> +	.actlrcfg_gfx = sm8550_gfx_actlr_cfg,
There are platforms that feature more than just APPS and Adreno SMMUs,
this implementation seems to assume there's only these two :/

I suppose the only way to solve this would be to introduce new compatibles
for each one of them.. Krzysztof, do you think that's reasonable? E.g.
MSM8996 has at least 5 instances, 8998 has at least 4 etc.

Konrad
Konrad Dybcio Dec. 16, 2023, 12:03 a.m. UTC | #5
On 15.12.2023 13:54, Robin Murphy wrote:
> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>
>>
>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>> Add ACTLR data table for SM8550 along with support for
>>>> same including SM8550 specific implementation operations.
>>>>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>>>>   1 file changed, 89 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index cb49291f5233..d2006f610243 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>          u32 actlr;
>>>>   };
>>>>
>>>> +/*
>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>>>> + * buffer). The remaining bits are implementation defined and vary across
>>>> + * SoCs.
>>>> + */
>>>> +
>>>> +#define PREFETCH_DEFAULT       0
>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>
>>> I thin the following might be more correct:
>>>
>>> #include <linux/bitfield.h>
>>>
>>> #define PREFETCH_MASK GENMASK(9, 8)
>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>
>>
>> Ack, thanks for this suggestion. Let me try this out using
>> GENMASK. Once tested, will take care of this in next version.
> 
> FWIW the more typical usage would be to just define the named macros for the raw field values, then put the FIELD_PREP() at the point of use. However in this case that's liable to get pretty verbose, so although I'm usually a fan of bitfield.h, the most readable option here might actually be to stick with simpler definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really a big deal either way, and I defer to whatever Dmitry and Konrad prefer, since they're the ones looking after arm-smmu-qcom the most :)
My 5 cents would be to just use the "common" style of doing this, so:

#define ACTRL_PREFETCH	GENMASK(9, 8)
 #define PREFETCH_DEFAULT 0
 #define PREFETCH_SHALLOW 1
 #define PREFETCH_MODERATE 2
 #define PREFETCH_DEEP 3

and then use 

| FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)

it can get verbose, but.. arguably that's good, since you really want
to make sure the right bits are set here

Konrad
Dmitry Baryshkov Dec. 16, 2023, 4:15 p.m. UTC | #6
On 16/12/2023 02:03, Konrad Dybcio wrote:
> On 15.12.2023 13:54, Robin Murphy wrote:
>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>
>>>
>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>
>>>>> Add ACTLR data table for SM8550 along with support for
>>>>> same including SM8550 specific implementation operations.
>>>>>
>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>> ---
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>>>>>    1 file changed, 89 insertions(+)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> index cb49291f5233..d2006f610243 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>           u32 actlr;
>>>>>    };
>>>>>
>>>>> +/*
>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>>>>> + * buffer). The remaining bits are implementation defined and vary across
>>>>> + * SoCs.
>>>>> + */
>>>>> +
>>>>> +#define PREFETCH_DEFAULT       0
>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>
>>>> I thin the following might be more correct:
>>>>
>>>> #include <linux/bitfield.h>
>>>>
>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>
>>>
>>> Ack, thanks for this suggestion. Let me try this out using
>>> GENMASK. Once tested, will take care of this in next version.
>>
>> FWIW the more typical usage would be to just define the named macros for the raw field values, then put the FIELD_PREP() at the point of use. However in this case that's liable to get pretty verbose, so although I'm usually a fan of bitfield.h, the most readable option here might actually be to stick with simpler definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really a big deal either way, and I defer to whatever Dmitry and Konrad prefer, since they're the ones looking after arm-smmu-qcom the most :)
> My 5 cents would be to just use the "common" style of doing this, so:
> 
> #define ACTRL_PREFETCH	GENMASK(9, 8)
>   #define PREFETCH_DEFAULT 0
>   #define PREFETCH_SHALLOW 1
>   #define PREFETCH_MODERATE 2
>   #define PREFETCH_DEEP 3
> 
> and then use
> 
> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
> 
> it can get verbose, but.. arguably that's good, since you really want
> to make sure the right bits are set here

Sounds good to me.
Dmitry Baryshkov Dec. 16, 2023, 4:16 p.m. UTC | #7
On 16/12/2023 01:35, Konrad Dybcio wrote:
> On 15.12.2023 11:18, Bibek Kumar Patro wrote:
>> Add ACTLR data table for SM8550 along with support for
>> same including SM8550 specific implementation operations.
>>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
> [...]
> 
>> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
>> +	.impl = &qcom_smmu_500_impl,
>> +	.adreno_impl = &qcom_adreno_smmu_500_impl,
>> +	.cfg = &qcom_smmu_impl0_cfg,
>> +	.actlrcfg = sm8550_apps_actlr_cfg,
>> +	.actlrcfg_gfx = sm8550_gfx_actlr_cfg,
> There are platforms that feature more than just APPS and Adreno SMMUs,
> this implementation seems to assume there's only these two :/
> 
> I suppose the only way to solve this would be to introduce new compatibles
> for each one of them.. Krzysztof, do you think that's reasonable? E.g.
> MSM8996 has at least 5 instances, 8998 has at least 4 etc.

Ugh. I don't think compatibles will make sense here. I think we have to 
resolve to the hated solution of putting identifying the instance via 
the IO address.
Bibek Kumar Patro Dec. 18, 2023, 5:36 a.m. UTC | #8
On 12/15/2023 6:24 PM, Robin Murphy wrote:
> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>
>>
>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>> Add ACTLR data table for SM8550 along with support for
>>>> same including SM8550 specific implementation operations.
>>>>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 
>>>> ++++++++++++++++++++++
>>>>   1 file changed, 89 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index cb49291f5233..d2006f610243 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>          u32 actlr;
>>>>   };
>>>>
>>>> +/*
>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>>> prefetch
>>>> + * buffer). The remaining bits are implementation defined and vary 
>>>> across
>>>> + * SoCs.
>>>> + */
>>>> +
>>>> +#define PREFETCH_DEFAULT       0
>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>
>>> I thin the following might be more correct:
>>>
>>> #include <linux/bitfield.h>
>>>
>>> #define PREFETCH_MASK GENMASK(9, 8)
>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>
>>
>> Ack, thanks for this suggestion. Let me try this out using
>> GENMASK. Once tested, will take care of this in next version.
> 
> FWIW the more typical usage would be to just define the named macros for 
> the raw field values, then put the FIELD_PREP() at the point of use. 
> However in this case that's liable to get pretty verbose, so although 
> I'm usually a fan of bitfield.h, the most readable option here might 
> actually be to stick with simpler definitions of "(0 << 8)", "(1 << 8)", 
> etc. However it's not really a big deal either way, and I defer to 
> whatever Dmitry and Konrad prefer, since they're the ones looking after 
> arm-smmu-qcom the most :)
> 

Agree, surely simple macros would be easy to understand the bits we are
setting/resetting, but to get good verbosity bitfield would surely be
helpful as you rightly pointed. I can see some improved suggestions form
Konrad as well in the latest reply, the way it'd be better in arm-smmu-
qcom. Will try to incorporate these inputs in next version.

Thanks,
Bibek

> Thanks,
> Robin.
> 
>>
>> Thanks,
>> Bibek
>>
>>>> +#define PREFETCH_SWITCH_GFX    (BIT(5) | BIT(3))
>>>> +#define CPRE                   BIT(1)
>>>> +#define CMTLB                  BIT(0)
>>>> +
>>>> +static const struct actlr_config sm8550_apps_actlr_cfg[] = {
>>>> +       { 0x18a0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x18e0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>> +       { 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
>>>> +       { 0x1820, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +       { 0x1860, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +       { 0x0c01, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c02, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c03, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c04, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c05, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c06, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c07, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c08, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c09, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c0c, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c0d, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c0e, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x0c0f, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1961, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1962, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1963, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1964, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1965, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1966, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1967, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1968, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1969, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x196c, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x196d, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x196e, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x196f, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c1, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c2, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c3, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c4, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c5, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c6, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c7, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c8, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19c9, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19cc, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19cd, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19ce, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x19cf, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +       { 0x1c00, 0x0002, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1c01, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +       { 0x1920, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1923, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1924, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1940, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1941, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1943, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1944, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +       {},
>>>> +};
>>>> +
>>>> +static const struct actlr_config sm8550_gfx_actlr_cfg[] = {
>>>> +       { 0x0000, 0x03ff, PREFETCH_SWITCH_GFX | PREFETCH_DEEP | CPRE 
>>>> | CMTLB },
>>>> +       {},
>>>> +};
>>>> +
>>>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>   {
>>>>          return container_of(smmu, struct qcom_smmu, smmu);
>>>> @@ -549,6 +628,15 @@ static const struct qcom_smmu_match_data 
>>>> sdm845_smmu_500_data = {
>>>>          /* Also no debug configuration. */
>>>>   };
>>>>
>>>> +
>>>> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data 
>>>> = {
>>>> +       .impl = &qcom_smmu_500_impl,
>>>> +       .adreno_impl = &qcom_adreno_smmu_500_impl,
>>>> +       .cfg = &qcom_smmu_impl0_cfg,
>>>> +       .actlrcfg = sm8550_apps_actlr_cfg,
>>>> +       .actlrcfg_gfx = sm8550_gfx_actlr_cfg,
>>>> +};
>>>> +
>>>>   static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
>>>>          .impl = &qcom_smmu_500_impl,
>>>>          .adreno_impl = &qcom_adreno_smmu_500_impl,
>>>> @@ -583,6 +671,7 @@ static const struct of_device_id __maybe_unused 
>>>> qcom_smmu_impl_of_match[] = {
>>>>          { .compatible = "qcom,sm8250-smmu-500", .data = 
>>>> &qcom_smmu_500_impl0_data },
>>>>          { .compatible = "qcom,sm8350-smmu-500", .data = 
>>>> &qcom_smmu_500_impl0_data },
>>>>          { .compatible = "qcom,sm8450-smmu-500", .data = 
>>>> &qcom_smmu_500_impl0_data },
>>>> +       { .compatible = "qcom,sm8550-smmu-500", .data = 
>>>> &sm8550_smmu_500_impl0_data },
>>>>          { .compatible = "qcom,smmu-500", .data = 
>>>> &qcom_smmu_500_impl0_data },
>>>>          { }
>>>>   };
>>>> -- 
>>>> 2.17.1
>>>>
>>>
>>>
Bibek Kumar Patro Dec. 18, 2023, 6:13 a.m. UTC | #9
On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
> On 16/12/2023 02:03, Konrad Dybcio wrote:
>> On 15.12.2023 13:54, Robin Murphy wrote:
>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>
>>>>
>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>> same including SM8550 specific implementation operations.
>>>>>>
>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>> ---
>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 
>>>>>> ++++++++++++++++++++++
>>>>>>    1 file changed, 89 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>           u32 actlr;
>>>>>>    };
>>>>>>
>>>>>> +/*
>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching 
>>>>>> in the
>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>>>>> prefetch
>>>>>> + * buffer). The remaining bits are implementation defined and 
>>>>>> vary across
>>>>>> + * SoCs.
>>>>>> + */
>>>>>> +
>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>
>>>>> I thin the following might be more correct:
>>>>>
>>>>> #include <linux/bitfield.h>
>>>>>
>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>
>>>>
>>>> Ack, thanks for this suggestion. Let me try this out using
>>>> GENMASK. Once tested, will take care of this in next version.
>>>
>>> FWIW the more typical usage would be to just define the named macros 
>>> for the raw field values, then put the FIELD_PREP() at the point of 
>>> use. However in this case that's liable to get pretty verbose, so 
>>> although I'm usually a fan of bitfield.h, the most readable option 
>>> here might actually be to stick with simpler definitions of "(0 << 
>>> 8)", "(1 << 8)", etc. However it's not really a big deal either way, 
>>> and I defer to whatever Dmitry and Konrad prefer, since they're the 
>>> ones looking after arm-smmu-qcom the most :)
>> My 5 cents would be to just use the "common" style of doing this, so:
>>
>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>   #define PREFETCH_DEFAULT 0
>>   #define PREFETCH_SHALLOW 1
>>   #define PREFETCH_MODERATE 2
>>   #define PREFETCH_DEEP 3
>>
>> and then use
>>
>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>
>> it can get verbose, but.. arguably that's good, since you really want
>> to make sure the right bits are set here
> 
> Sounds good to me.
> 

Acked.
Bibek Kumar Patro Dec. 18, 2023, 6:17 a.m. UTC | #10
On 12/16/2023 5:33 AM, Konrad Dybcio wrote:
> On 15.12.2023 13:54, Robin Murphy wrote:
>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>
>>>
>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>
>>>>> Add ACTLR data table for SM8550 along with support for
>>>>> same including SM8550 specific implementation operations.
>>>>>
>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>> ---
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 ++++++++++++++++++++++
>>>>>    1 file changed, 89 insertions(+)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> index cb49291f5233..d2006f610243 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>           u32 actlr;
>>>>>    };
>>>>>
>>>>> +/*
>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
>>>>> + * buffer). The remaining bits are implementation defined and vary across
>>>>> + * SoCs.
>>>>> + */
>>>>> +
>>>>> +#define PREFETCH_DEFAULT       0
>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>
>>>> I thin the following might be more correct:
>>>>
>>>> #include <linux/bitfield.h>
>>>>
>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>
>>>
>>> Ack, thanks for this suggestion. Let me try this out using
>>> GENMASK. Once tested, will take care of this in next version.
>>
>> FWIW the more typical usage would be to just define the named macros for the raw field values, then put the FIELD_PREP() at the point of use. However in this case that's liable to get pretty verbose, so although I'm usually a fan of bitfield.h, the most readable option here might actually be to stick with simpler definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really a big deal either way, and I defer to whatever Dmitry and Konrad prefer, since they're the ones looking after arm-smmu-qcom the most :)
> My 5 cents would be to just use the "common" style of doing this, so:
> 
> #define ACTRL_PREFETCH	GENMASK(9, 8)
>   #define PREFETCH_DEFAULT 0
>   #define PREFETCH_SHALLOW 1
>   #define PREFETCH_MODERATE 2
>   #define PREFETCH_DEEP 3
> 
> and then use
> 
> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
> 
> it can get verbose, but.. arguably that's good, since you really want
> to make sure the right bits are set here
> 

Thanks for the suggestion with these mods.
Let me try out the suggested way and once tested will post this in next
version.

Thanks,
Bibek

> Konrad
Bibek Kumar Patro Dec. 18, 2023, 11:23 a.m. UTC | #11
On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
> On 16/12/2023 02:03, Konrad Dybcio wrote:
>> On 15.12.2023 13:54, Robin Murphy wrote:
>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>
>>>>
>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>> same including SM8550 specific implementation operations.
>>>>>>
>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>> ---
>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 
>>>>>> ++++++++++++++++++++++
>>>>>>    1 file changed, 89 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>           u32 actlr;
>>>>>>    };
>>>>>>
>>>>>> +/*
>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching 
>>>>>> in the
>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>>>>> prefetch
>>>>>> + * buffer). The remaining bits are implementation defined and 
>>>>>> vary across
>>>>>> + * SoCs.
>>>>>> + */
>>>>>> +
>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>
>>>>> I thin the following might be more correct:
>>>>>
>>>>> #include <linux/bitfield.h>
>>>>>
>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>
>>>>
>>>> Ack, thanks for this suggestion. Let me try this out using
>>>> GENMASK. Once tested, will take care of this in next version.
>>>
>>> FWIW the more typical usage would be to just define the named macros 
>>> for the raw field values, then put the FIELD_PREP() at the point of 
>>> use. However in this case that's liable to get pretty verbose, so 
>>> although I'm usually a fan of bitfield.h, the most readable option 
>>> here might actually be to stick with simpler definitions of "(0 << 
>>> 8)", "(1 << 8)", etc. However it's not really a big deal either way, 
>>> and I defer to whatever Dmitry and Konrad prefer, since they're the 
>>> ones looking after arm-smmu-qcom the most :)
>> My 5 cents would be to just use the "common" style of doing this, so:
>>
>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>   #define PREFETCH_DEFAULT 0
>>   #define PREFETCH_SHALLOW 1
>>   #define PREFETCH_MODERATE 2
>>   #define PREFETCH_DEEP 3
>>
>> and then use
>>
>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>
>> it can get verbose, but.. arguably that's good, since you really want
>> to make sure the right bits are set here
> 
> Sounds good to me.
> 

Konrad, Dimitry, just checked FIELD_PREP() implementation

#define FIELD_FIT(_mask, _val)
({                                                              \
                  __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
                  ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
})

since it is defined as a block, it won't be possible to use FIELD_PREP
in macro or as a structure value, and can only be used inside a 
block/function. Orelse would show compilation errors as following

kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in 
expansion of macro 'PREFETCH_SHALLOW'
   { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
                     ^
kernel/include/linux/bitfield.h:113:2: error: braced-group within 
expression allowed only inside a function
   ({        \
   ^

So as per my understanding I think, we might need to go ahead with the
generic implementation only. Let me know if I missed something.

Thanks,
Bibek
Bibek Kumar Patro Dec. 18, 2023, 11:40 a.m. UTC | #12
On 12/16/2023 5:05 AM, Konrad Dybcio wrote:
> On 15.12.2023 11:18, Bibek Kumar Patro wrote:
>> Add ACTLR data table for SM8550 along with support for
>> same including SM8550 specific implementation operations.
>>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
> [...]
> 
>> +static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
>> +	.impl = &qcom_smmu_500_impl,
>> +	.adreno_impl = &qcom_adreno_smmu_500_impl,
>> +	.cfg = &qcom_smmu_impl0_cfg,
>> +	.actlrcfg = sm8550_apps_actlr_cfg,
>> +	.actlrcfg_gfx = sm8550_gfx_actlr_cfg,
> There are platforms that feature more than just APPS and Adreno SMMUs,
> this implementation seems to assume there's only these two :/
> 

Yes, some platforms can feature additional SMMUs as well including APPS 
and Adreno. In that case there would be a corresponding compatible 
string and an additional field in qcom_smmu_match_data might be needed.

Thanks,
Bibek

> I suppose the only way to solve this would be to introduce new compatibles
> for each one of them.. Krzysztof, do you think that's reasonable? E.g.
> MSM8996 has at least 5 instances, 8998 has at least 4 etc.
> 
> Konrad
Dmitry Baryshkov Dec. 18, 2023, 2:21 p.m. UTC | #13
On 18/12/2023 13:23, Bibek Kumar Patro wrote:
> 
> 
> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
>> On 16/12/2023 02:03, Konrad Dybcio wrote:
>>> On 15.12.2023 13:54, Robin Murphy wrote:
>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>>
>>>>>
>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>
>>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>>> same including SM8550 specific implementation operations.
>>>>>>>
>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>> ---
>>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 
>>>>>>> ++++++++++++++++++++++
>>>>>>>    1 file changed, 89 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>>           u32 actlr;
>>>>>>>    };
>>>>>>>
>>>>>>> +/*
>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching 
>>>>>>> in the
>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>>>>>> prefetch
>>>>>>> + * buffer). The remaining bits are implementation defined and 
>>>>>>> vary across
>>>>>>> + * SoCs.
>>>>>>> + */
>>>>>>> +
>>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>>
>>>>>> I thin the following might be more correct:
>>>>>>
>>>>>> #include <linux/bitfield.h>
>>>>>>
>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>>
>>>>>
>>>>> Ack, thanks for this suggestion. Let me try this out using
>>>>> GENMASK. Once tested, will take care of this in next version.
>>>>
>>>> FWIW the more typical usage would be to just define the named macros 
>>>> for the raw field values, then put the FIELD_PREP() at the point of 
>>>> use. However in this case that's liable to get pretty verbose, so 
>>>> although I'm usually a fan of bitfield.h, the most readable option 
>>>> here might actually be to stick with simpler definitions of "(0 << 
>>>> 8)", "(1 << 8)", etc. However it's not really a big deal either way, 
>>>> and I defer to whatever Dmitry and Konrad prefer, since they're the 
>>>> ones looking after arm-smmu-qcom the most :)
>>> My 5 cents would be to just use the "common" style of doing this, so:
>>>
>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>>   #define PREFETCH_DEFAULT 0
>>>   #define PREFETCH_SHALLOW 1
>>>   #define PREFETCH_MODERATE 2
>>>   #define PREFETCH_DEEP 3
>>>
>>> and then use
>>>
>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>>
>>> it can get verbose, but.. arguably that's good, since you really want
>>> to make sure the right bits are set here
>>
>> Sounds good to me.
>>
> 
> Konrad, Dimitry, just checked FIELD_PREP() implementation
> 
> #define FIELD_FIT(_mask, _val)
> ({                                                              \
>                   __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
>                   ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
> })
> 
> since it is defined as a block, it won't be possible to use FIELD_PREP
> in macro or as a structure value, and can only be used inside a 
> block/function. Orelse would show compilation errors as following
> 
> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in 
> expansion of macro 'PREFETCH_SHALLOW'
>    { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>                      ^
> kernel/include/linux/bitfield.h:113:2: error: braced-group within 
> expression allowed only inside a function
>    ({        \
>    ^
> 
> So as per my understanding I think, we might need to go ahead with the
> generic implementation only. Let me know if I missed something.

Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
Bibek Kumar Patro Dec. 19, 2023, 8:24 a.m. UTC | #14
On 12/18/2023 7:51 PM, Dmitry Baryshkov wrote:
> On 18/12/2023 13:23, Bibek Kumar Patro wrote:
>>
>>
>> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
>>> On 16/12/2023 02:03, Konrad Dybcio wrote:
>>>> On 15.12.2023 13:54, Robin Murphy wrote:
>>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>>>
>>>>>>
>>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>
>>>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>>>> same including SM8550 specific implementation operations.
>>>>>>>>
>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>> ---
>>>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89 
>>>>>>>> ++++++++++++++++++++++
>>>>>>>>    1 file changed, 89 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>>>           u32 actlr;
>>>>>>>>    };
>>>>>>>>
>>>>>>>> +/*
>>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching 
>>>>>>>> in the
>>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the 
>>>>>>>> prefetch
>>>>>>>> + * buffer). The remaining bits are implementation defined and 
>>>>>>>> vary across
>>>>>>>> + * SoCs.
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>>>
>>>>>>> I thin the following might be more correct:
>>>>>>>
>>>>>>> #include <linux/bitfield.h>
>>>>>>>
>>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>>>
>>>>>>
>>>>>> Ack, thanks for this suggestion. Let me try this out using
>>>>>> GENMASK. Once tested, will take care of this in next version.
>>>>>
>>>>> FWIW the more typical usage would be to just define the named 
>>>>> macros for the raw field values, then put the FIELD_PREP() at the 
>>>>> point of use. However in this case that's liable to get pretty 
>>>>> verbose, so although I'm usually a fan of bitfield.h, the most 
>>>>> readable option here might actually be to stick with simpler 
>>>>> definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really 
>>>>> a big deal either way, and I defer to whatever Dmitry and Konrad 
>>>>> prefer, since they're the ones looking after arm-smmu-qcom the most :)
>>>> My 5 cents would be to just use the "common" style of doing this, so:
>>>>
>>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>>>   #define PREFETCH_DEFAULT 0
>>>>   #define PREFETCH_SHALLOW 1
>>>>   #define PREFETCH_MODERATE 2
>>>>   #define PREFETCH_DEEP 3
>>>>
>>>> and then use
>>>>
>>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>>>
>>>> it can get verbose, but.. arguably that's good, since you really want
>>>> to make sure the right bits are set here
>>>
>>> Sounds good to me.
>>>
>>
>> Konrad, Dimitry, just checked FIELD_PREP() implementation
>>
>> #define FIELD_FIT(_mask, _val)
>> ({                                                              \
>>                   __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
>>                   ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
>> })
>>
>> since it is defined as a block, it won't be possible to use FIELD_PREP
>> in macro or as a structure value, and can only be used inside a 
>> block/function. Orelse would show compilation errors as following
>>
>> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in 
>> expansion of macro 'PREFETCH_SHALLOW'
>>    { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>                      ^
>> kernel/include/linux/bitfield.h:113:2: error: braced-group within 
>> expression allowed only inside a function
>>    ({        \
>>    ^
>>
>> So as per my understanding I think, we might need to go ahead with the
>> generic implementation only. Let me know if I missed something.
> 
> Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
> 

Sure Dmitry, (foo << bar) would be simpler as well as Robin mentioned
earlier in his reply.
I can implement the defines as:

#define PREFETCH_DEFAULT       0
#define PREFETCH_SHALLOW       (1 << 8)
#define PREFETCH_MODERATE      (1 << 9)
#define PREFETCH_DEEP          (3 << 8)

This should be okay I think ?

Thanks,
Bibek
Dmitry Baryshkov Dec. 19, 2023, 10:21 a.m. UTC | #15
On Tue, 19 Dec 2023 at 10:25, Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 12/18/2023 7:51 PM, Dmitry Baryshkov wrote:
> > On 18/12/2023 13:23, Bibek Kumar Patro wrote:
> >>
> >>
> >> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
> >>> On 16/12/2023 02:03, Konrad Dybcio wrote:
> >>>> On 15.12.2023 13:54, Robin Murphy wrote:
> >>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
> >>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
> >>>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>>
> >>>>>>>> Add ACTLR data table for SM8550 along with support for
> >>>>>>>> same including SM8550 specific implementation operations.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>>>> ---
> >>>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89
> >>>>>>>> ++++++++++++++++++++++
> >>>>>>>>    1 file changed, 89 insertions(+)
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>> index cb49291f5233..d2006f610243 100644
> >>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
> >>>>>>>>           u32 actlr;
> >>>>>>>>    };
> >>>>>>>>
> >>>>>>>> +/*
> >>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching
> >>>>>>>> in the
> >>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the
> >>>>>>>> prefetch
> >>>>>>>> + * buffer). The remaining bits are implementation defined and
> >>>>>>>> vary across
> >>>>>>>> + * SoCs.
> >>>>>>>> + */
> >>>>>>>> +
> >>>>>>>> +#define PREFETCH_DEFAULT       0
> >>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
> >>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
> >>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
> >>>>>>>
> >>>>>>> I thin the following might be more correct:
> >>>>>>>
> >>>>>>> #include <linux/bitfield.h>
> >>>>>>>
> >>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
> >>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
> >>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
> >>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
> >>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
> >>>>>>>
> >>>>>>
> >>>>>> Ack, thanks for this suggestion. Let me try this out using
> >>>>>> GENMASK. Once tested, will take care of this in next version.
> >>>>>
> >>>>> FWIW the more typical usage would be to just define the named
> >>>>> macros for the raw field values, then put the FIELD_PREP() at the
> >>>>> point of use. However in this case that's liable to get pretty
> >>>>> verbose, so although I'm usually a fan of bitfield.h, the most
> >>>>> readable option here might actually be to stick with simpler
> >>>>> definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really
> >>>>> a big deal either way, and I defer to whatever Dmitry and Konrad
> >>>>> prefer, since they're the ones looking after arm-smmu-qcom the most :)
> >>>> My 5 cents would be to just use the "common" style of doing this, so:
> >>>>
> >>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
> >>>>   #define PREFETCH_DEFAULT 0
> >>>>   #define PREFETCH_SHALLOW 1
> >>>>   #define PREFETCH_MODERATE 2
> >>>>   #define PREFETCH_DEEP 3
> >>>>
> >>>> and then use
> >>>>
> >>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
> >>>>
> >>>> it can get verbose, but.. arguably that's good, since you really want
> >>>> to make sure the right bits are set here
> >>>
> >>> Sounds good to me.
> >>>
> >>
> >> Konrad, Dimitry, just checked FIELD_PREP() implementation
> >>
> >> #define FIELD_FIT(_mask, _val)
> >> ({                                                              \
> >>                   __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
> >>                   ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
> >> })
> >>
> >> since it is defined as a block, it won't be possible to use FIELD_PREP
> >> in macro or as a structure value, and can only be used inside a
> >> block/function. Orelse would show compilation errors as following
> >>
> >> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in
> >> expansion of macro 'PREFETCH_SHALLOW'
> >>    { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>                      ^
> >> kernel/include/linux/bitfield.h:113:2: error: braced-group within
> >> expression allowed only inside a function
> >>    ({        \
> >>    ^
> >>
> >> So as per my understanding I think, we might need to go ahead with the
> >> generic implementation only. Let me know if I missed something.
> >
> > Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
> >
>
> Sure Dmitry, (foo << bar) would be simpler as well as Robin mentioned
> earlier in his reply.
> I can implement the defines as:
>
> #define PREFETCH_DEFAULT       0
> #define PREFETCH_SHALLOW       (1 << 8)
> #define PREFETCH_MODERATE      (1 << 9)

2 << 8. Isn't that hard.

> #define PREFETCH_DEEP          (3 << 8)
>
> This should be okay I think ?
>
> Thanks,
> Bibek
>
Bibek Kumar Patro Dec. 19, 2023, 10:36 a.m. UTC | #16
On 12/19/2023 3:51 PM, Dmitry Baryshkov wrote:
> On Tue, 19 Dec 2023 at 10:25, Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 12/18/2023 7:51 PM, Dmitry Baryshkov wrote:
>>> On 18/12/2023 13:23, Bibek Kumar Patro wrote:
>>>>
>>>>
>>>> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
>>>>> On 16/12/2023 02:03, Konrad Dybcio wrote:
>>>>>> On 15.12.2023 13:54, Robin Murphy wrote:
>>>>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>
>>>>>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>>>>>> same including SM8550 specific implementation operations.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>>> ---
>>>>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89
>>>>>>>>>> ++++++++++++++++++++++
>>>>>>>>>>     1 file changed, 89 insertions(+)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>>>>>            u32 actlr;
>>>>>>>>>>     };
>>>>>>>>>>
>>>>>>>>>> +/*
>>>>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching
>>>>>>>>>> in the
>>>>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the
>>>>>>>>>> prefetch
>>>>>>>>>> + * buffer). The remaining bits are implementation defined and
>>>>>>>>>> vary across
>>>>>>>>>> + * SoCs.
>>>>>>>>>> + */
>>>>>>>>>> +
>>>>>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>>>>>
>>>>>>>>> I thin the following might be more correct:
>>>>>>>>>
>>>>>>>>> #include <linux/bitfield.h>
>>>>>>>>>
>>>>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>>>>>
>>>>>>>>
>>>>>>>> Ack, thanks for this suggestion. Let me try this out using
>>>>>>>> GENMASK. Once tested, will take care of this in next version.
>>>>>>>
>>>>>>> FWIW the more typical usage would be to just define the named
>>>>>>> macros for the raw field values, then put the FIELD_PREP() at the
>>>>>>> point of use. However in this case that's liable to get pretty
>>>>>>> verbose, so although I'm usually a fan of bitfield.h, the most
>>>>>>> readable option here might actually be to stick with simpler
>>>>>>> definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really
>>>>>>> a big deal either way, and I defer to whatever Dmitry and Konrad
>>>>>>> prefer, since they're the ones looking after arm-smmu-qcom the most :)
>>>>>> My 5 cents would be to just use the "common" style of doing this, so:
>>>>>>
>>>>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>>>>>    #define PREFETCH_DEFAULT 0
>>>>>>    #define PREFETCH_SHALLOW 1
>>>>>>    #define PREFETCH_MODERATE 2
>>>>>>    #define PREFETCH_DEEP 3
>>>>>>
>>>>>> and then use
>>>>>>
>>>>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>>>>>
>>>>>> it can get verbose, but.. arguably that's good, since you really want
>>>>>> to make sure the right bits are set here
>>>>>
>>>>> Sounds good to me.
>>>>>
>>>>
>>>> Konrad, Dimitry, just checked FIELD_PREP() implementation
>>>>
>>>> #define FIELD_FIT(_mask, _val)
>>>> ({                                                              \
>>>>                    __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
>>>>                    ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
>>>> })
>>>>
>>>> since it is defined as a block, it won't be possible to use FIELD_PREP
>>>> in macro or as a structure value, and can only be used inside a
>>>> block/function. Orelse would show compilation errors as following
>>>>
>>>> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in
>>>> expansion of macro 'PREFETCH_SHALLOW'
>>>>     { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>                       ^
>>>> kernel/include/linux/bitfield.h:113:2: error: braced-group within
>>>> expression allowed only inside a function
>>>>     ({        \
>>>>     ^
>>>>
>>>> So as per my understanding I think, we might need to go ahead with the
>>>> generic implementation only. Let me know if I missed something.
>>>
>>> Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
>>>
>>
>> Sure Dmitry, (foo << bar) would be simpler as well as Robin mentioned
>> earlier in his reply.
>> I can implement the defines as:
>>
>> #define PREFETCH_DEFAULT       0
>> #define PREFETCH_SHALLOW       (1 << 8)
>> #define PREFETCH_MODERATE      (1 << 9)
> 
> 2 << 8. Isn't that hard.
> 

Ah, right. This is nice! .
Will use 2 << 8 instead. Thanks for the suggestion.

Thanks,
Bibek

>> #define PREFETCH_DEEP          (3 << 8)
>>
>> This should be okay I think ?
>>
>> Thanks,
>> Bibek
>>
> 
>
Dmitry Baryshkov Dec. 19, 2023, 10:44 a.m. UTC | #17
On Tue, 19 Dec 2023 at 12:37, Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 12/19/2023 3:51 PM, Dmitry Baryshkov wrote:
> > On Tue, 19 Dec 2023 at 10:25, Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 12/18/2023 7:51 PM, Dmitry Baryshkov wrote:
> >>> On 18/12/2023 13:23, Bibek Kumar Patro wrote:
> >>>>
> >>>>
> >>>> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
> >>>>> On 16/12/2023 02:03, Konrad Dybcio wrote:
> >>>>>> On 15.12.2023 13:54, Robin Murphy wrote:
> >>>>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
> >>>>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
> >>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> Add ACTLR data table for SM8550 along with support for
> >>>>>>>>>> same including SM8550 specific implementation operations.
> >>>>>>>>>>
> >>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>>>>>> ---
> >>>>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89
> >>>>>>>>>> ++++++++++++++++++++++
> >>>>>>>>>>     1 file changed, 89 insertions(+)
> >>>>>>>>>>
> >>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>> index cb49291f5233..d2006f610243 100644
> >>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
> >>>>>>>>>>            u32 actlr;
> >>>>>>>>>>     };
> >>>>>>>>>>
> >>>>>>>>>> +/*
> >>>>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching
> >>>>>>>>>> in the
> >>>>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the
> >>>>>>>>>> prefetch
> >>>>>>>>>> + * buffer). The remaining bits are implementation defined and
> >>>>>>>>>> vary across
> >>>>>>>>>> + * SoCs.
> >>>>>>>>>> + */
> >>>>>>>>>> +
> >>>>>>>>>> +#define PREFETCH_DEFAULT       0
> >>>>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
> >>>>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
> >>>>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
> >>>>>>>>>
> >>>>>>>>> I thin the following might be more correct:
> >>>>>>>>>
> >>>>>>>>> #include <linux/bitfield.h>
> >>>>>>>>>
> >>>>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
> >>>>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
> >>>>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
> >>>>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
> >>>>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
> >>>>>>>>>
> >>>>>>>>
> >>>>>>>> Ack, thanks for this suggestion. Let me try this out using
> >>>>>>>> GENMASK. Once tested, will take care of this in next version.
> >>>>>>>
> >>>>>>> FWIW the more typical usage would be to just define the named
> >>>>>>> macros for the raw field values, then put the FIELD_PREP() at the
> >>>>>>> point of use. However in this case that's liable to get pretty
> >>>>>>> verbose, so although I'm usually a fan of bitfield.h, the most
> >>>>>>> readable option here might actually be to stick with simpler
> >>>>>>> definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really
> >>>>>>> a big deal either way, and I defer to whatever Dmitry and Konrad
> >>>>>>> prefer, since they're the ones looking after arm-smmu-qcom the most :)
> >>>>>> My 5 cents would be to just use the "common" style of doing this, so:
> >>>>>>
> >>>>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
> >>>>>>    #define PREFETCH_DEFAULT 0
> >>>>>>    #define PREFETCH_SHALLOW 1
> >>>>>>    #define PREFETCH_MODERATE 2
> >>>>>>    #define PREFETCH_DEEP 3
> >>>>>>
> >>>>>> and then use
> >>>>>>
> >>>>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
> >>>>>>
> >>>>>> it can get verbose, but.. arguably that's good, since you really want
> >>>>>> to make sure the right bits are set here
> >>>>>
> >>>>> Sounds good to me.
> >>>>>
> >>>>
> >>>> Konrad, Dimitry, just checked FIELD_PREP() implementation
> >>>>
> >>>> #define FIELD_FIT(_mask, _val)
> >>>> ({                                                              \
> >>>>                    __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
> >>>>                    ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
> >>>> })
> >>>>
> >>>> since it is defined as a block, it won't be possible to use FIELD_PREP
> >>>> in macro or as a structure value, and can only be used inside a
> >>>> block/function. Orelse would show compilation errors as following
> >>>>
> >>>> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in
> >>>> expansion of macro 'PREFETCH_SHALLOW'
> >>>>     { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>                       ^
> >>>> kernel/include/linux/bitfield.h:113:2: error: braced-group within
> >>>> expression allowed only inside a function
> >>>>     ({        \
> >>>>     ^
> >>>>
> >>>> So as per my understanding I think, we might need to go ahead with the
> >>>> generic implementation only. Let me know if I missed something.
> >>>
> >>> Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
> >>>
> >>
> >> Sure Dmitry, (foo << bar) would be simpler as well as Robin mentioned
> >> earlier in his reply.
> >> I can implement the defines as:
> >>
> >> #define PREFETCH_DEFAULT       0
> >> #define PREFETCH_SHALLOW       (1 << 8)
> >> #define PREFETCH_MODERATE      (1 << 9)
> >
> > 2 << 8. Isn't that hard.
> >
>
> Ah, right. This is nice! .
> Will use 2 << 8 instead. Thanks for the suggestion.

It might still be useful to define the PREFETCH_SHIFT equal to 8.

>
> Thanks,
> Bibek
>
> >> #define PREFETCH_DEEP          (3 << 8)
> >>
> >> This should be okay I think ?
> >>
> >> Thanks,
> >> Bibek
> >>
> >
> >
Bibek Kumar Patro Dec. 19, 2023, 11:39 a.m. UTC | #18
On 12/19/2023 4:14 PM, Dmitry Baryshkov wrote:
> On Tue, 19 Dec 2023 at 12:37, Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 12/19/2023 3:51 PM, Dmitry Baryshkov wrote:
>>> On Tue, 19 Dec 2023 at 10:25, Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 12/18/2023 7:51 PM, Dmitry Baryshkov wrote:
>>>>> On 18/12/2023 13:23, Bibek Kumar Patro wrote:
>>>>>>
>>>>>>
>>>>>> On 12/16/2023 9:45 PM, Dmitry Baryshkov wrote:
>>>>>>> On 16/12/2023 02:03, Konrad Dybcio wrote:
>>>>>>>> On 15.12.2023 13:54, Robin Murphy wrote:
>>>>>>>>> On 2023-12-15 12:20 pm, Bibek Kumar Patro wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 12/15/2023 4:14 PM, Dmitry Baryshkov wrote:
>>>>>>>>>>> On Fri, 15 Dec 2023 at 12:19, Bibek Kumar Patro
>>>>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> Add ACTLR data table for SM8550 along with support for
>>>>>>>>>>>> same including SM8550 specific implementation operations.
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 89
>>>>>>>>>>>> ++++++++++++++++++++++
>>>>>>>>>>>>      1 file changed, 89 insertions(+)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>> index cb49291f5233..d2006f610243 100644
>>>>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>>>>> @@ -20,6 +20,85 @@ struct actlr_config {
>>>>>>>>>>>>             u32 actlr;
>>>>>>>>>>>>      };
>>>>>>>>>>>>
>>>>>>>>>>>> +/*
>>>>>>>>>>>> + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching
>>>>>>>>>>>> in the
>>>>>>>>>>>> + * macro TLB) and BIT(1) as CPRE (Enable context caching in the
>>>>>>>>>>>> prefetch
>>>>>>>>>>>> + * buffer). The remaining bits are implementation defined and
>>>>>>>>>>>> vary across
>>>>>>>>>>>> + * SoCs.
>>>>>>>>>>>> + */
>>>>>>>>>>>> +
>>>>>>>>>>>> +#define PREFETCH_DEFAULT       0
>>>>>>>>>>>> +#define PREFETCH_SHALLOW       BIT(8)
>>>>>>>>>>>> +#define PREFETCH_MODERATE      BIT(9)
>>>>>>>>>>>> +#define PREFETCH_DEEP          (BIT(9) | BIT(8))
>>>>>>>>>>>
>>>>>>>>>>> I thin the following might be more correct:
>>>>>>>>>>>
>>>>>>>>>>> #include <linux/bitfield.h>
>>>>>>>>>>>
>>>>>>>>>>> #define PREFETCH_MASK GENMASK(9, 8)
>>>>>>>>>>> #define PREFETCH_DEFAULT FIELD_PREP(PREFETCH_MASK, 0)
>>>>>>>>>>> #define PREFETCH_SHALLOW FIELD_PREP(PREFETCH_MASK, 1)
>>>>>>>>>>> #define PREFETCH_MODERATE FIELD_PREP(PREFETCH_MASK, 2)
>>>>>>>>>>> #define PREFETCH_DEEP FIELD_PREP(PREFETCH_MASK, 3)
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Ack, thanks for this suggestion. Let me try this out using
>>>>>>>>>> GENMASK. Once tested, will take care of this in next version.
>>>>>>>>>
>>>>>>>>> FWIW the more typical usage would be to just define the named
>>>>>>>>> macros for the raw field values, then put the FIELD_PREP() at the
>>>>>>>>> point of use. However in this case that's liable to get pretty
>>>>>>>>> verbose, so although I'm usually a fan of bitfield.h, the most
>>>>>>>>> readable option here might actually be to stick with simpler
>>>>>>>>> definitions of "(0 << 8)", "(1 << 8)", etc. However it's not really
>>>>>>>>> a big deal either way, and I defer to whatever Dmitry and Konrad
>>>>>>>>> prefer, since they're the ones looking after arm-smmu-qcom the most :)
>>>>>>>> My 5 cents would be to just use the "common" style of doing this, so:
>>>>>>>>
>>>>>>>> #define ACTRL_PREFETCH    GENMASK(9, 8)
>>>>>>>>     #define PREFETCH_DEFAULT 0
>>>>>>>>     #define PREFETCH_SHALLOW 1
>>>>>>>>     #define PREFETCH_MODERATE 2
>>>>>>>>     #define PREFETCH_DEEP 3
>>>>>>>>
>>>>>>>> and then use
>>>>>>>>
>>>>>>>> | FIELD_PREP(ACTRL_PREFETCH, PREFETCH_x)
>>>>>>>>
>>>>>>>> it can get verbose, but.. arguably that's good, since you really want
>>>>>>>> to make sure the right bits are set here
>>>>>>>
>>>>>>> Sounds good to me.
>>>>>>>
>>>>>>
>>>>>> Konrad, Dimitry, just checked FIELD_PREP() implementation
>>>>>>
>>>>>> #define FIELD_FIT(_mask, _val)
>>>>>> ({                                                              \
>>>>>>                     __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");  \
>>>>>>                     ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
>>>>>> })
>>>>>>
>>>>>> since it is defined as a block, it won't be possible to use FIELD_PREP
>>>>>> in macro or as a structure value, and can only be used inside a
>>>>>> block/function. Orelse would show compilation errors as following
>>>>>>
>>>>>> kernel/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c:94:20: note: in
>>>>>> expansion of macro 'PREFETCH_SHALLOW'
>>>>>>      { 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>                        ^
>>>>>> kernel/include/linux/bitfield.h:113:2: error: braced-group within
>>>>>> expression allowed only inside a function
>>>>>>      ({        \
>>>>>>      ^
>>>>>>
>>>>>> So as per my understanding I think, we might need to go ahead with the
>>>>>> generic implementation only. Let me know if I missed something.
>>>>>
>>>>> Then anyway (foo << bar) is better compared to BIT(n) | BIT(m).
>>>>>
>>>>
>>>> Sure Dmitry, (foo << bar) would be simpler as well as Robin mentioned
>>>> earlier in his reply.
>>>> I can implement the defines as:
>>>>
>>>> #define PREFETCH_DEFAULT       0
>>>> #define PREFETCH_SHALLOW       (1 << 8)
>>>> #define PREFETCH_MODERATE      (1 << 9)
>>>
>>> 2 << 8. Isn't that hard.
>>>
>>
>> Ah, right. This is nice! .
>> Will use 2 << 8 instead. Thanks for the suggestion.
> 
> It might still be useful to define the PREFETCH_SHIFT equal to 8.
> 

Sure, looks okay to me as well to define PREFETCH_SHIFT to 8
as it's constant.

Thanks,
Bibek

>>
>> Thanks,
>> Bibek
>>
>>>> #define PREFETCH_DEEP          (3 << 8)
>>>>
>>>> This should be okay I think ?
>>>>
>>>> Thanks,
>>>> Bibek
>>>>
>>>
>>>
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index cb49291f5233..d2006f610243 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -20,6 +20,85 @@  struct actlr_config {
 	u32 actlr;
 };

+/*
+ * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
+ * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
+ * buffer). The remaining bits are implementation defined and vary across
+ * SoCs.
+ */
+
+#define PREFETCH_DEFAULT	0
+#define PREFETCH_SHALLOW	BIT(8)
+#define PREFETCH_MODERATE	BIT(9)
+#define PREFETCH_DEEP		(BIT(9) | BIT(8))
+#define PREFETCH_SWITCH_GFX	(BIT(5) | BIT(3))
+#define CPRE			BIT(1)
+#define CMTLB			BIT(0)
+
+static const struct actlr_config sm8550_apps_actlr_cfg[] = {
+	{ 0x18a0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x18e0, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
+	{ 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
+	{ 0x1820, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x1860, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x0c01, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c02, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c03, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c04, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c05, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c06, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c07, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c08, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c09, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c0c, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c0d, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c0e, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x0c0f, 0x0020, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1961, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1962, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1963, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1964, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1965, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1966, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1967, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1968, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1969, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x196c, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x196d, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x196e, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x196f, 0x0000, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c1, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c2, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c3, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c4, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c5, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c6, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c7, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c8, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19c9, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19cc, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19cd, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19ce, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x19cf, 0x0010, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1c00, 0x0002, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1c01, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x1920, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1923, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1924, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1940, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1941, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1943, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1944, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1947, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{},
+};
+
+static const struct actlr_config sm8550_gfx_actlr_cfg[] = {
+	{ 0x0000, 0x03ff, PREFETCH_SWITCH_GFX | PREFETCH_DEEP | CPRE | CMTLB },
+	{},
+};
+
 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 {
 	return container_of(smmu, struct qcom_smmu, smmu);
@@ -549,6 +628,15 @@  static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
 	/* Also no debug configuration. */
 };

+
+static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
+	.impl = &qcom_smmu_500_impl,
+	.adreno_impl = &qcom_adreno_smmu_500_impl,
+	.cfg = &qcom_smmu_impl0_cfg,
+	.actlrcfg = sm8550_apps_actlr_cfg,
+	.actlrcfg_gfx = sm8550_gfx_actlr_cfg,
+};
+
 static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
 	.impl = &qcom_smmu_500_impl,
 	.adreno_impl = &qcom_adreno_smmu_500_impl,
@@ -583,6 +671,7 @@  static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm8550-smmu-500", .data = &sm8550_smmu_500_impl0_data },
 	{ .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ }
 };