diff mbox series

[v16,3/5] iommu/arm-smmu: add support for PRR bit setup

Message ID 20241008125410.3422512-4-quic_bibekkum@quicinc.com (mailing list archive)
State Superseded
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Oct. 8, 2024, 12:54 p.m. UTC
Add an adreno-smmu-priv interface for drm/msm to call
into arm-smmu-qcom and initiate the PRR bit setup or reset
sequence as per request.

This will be used by GPU to setup the PRR bit and related
configuration registers through adreno-smmu private
interface instead of directly poking the smmu hardware.

Suggested-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
 include/linux/adreno-smmu-priv.h           | 10 +++++-
 3 files changed, 48 insertions(+), 1 deletion(-)

--
2.34.1

Comments

Rob Clark Oct. 28, 2024, 9:13 p.m. UTC | #1
On Tue, Oct 8, 2024 at 5:54 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Add an adreno-smmu-priv interface for drm/msm to call
> into arm-smmu-qcom and initiate the PRR bit setup or reset
> sequence as per request.
>
> This will be used by GPU to setup the PRR bit and related
> configuration registers through adreno-smmu private
> interface instead of directly poking the smmu hardware.
>
> Suggested-by: Rob Clark <robdclark@gmail.com>

Reviewed-by: Rob Clark <robdclark@gmail.com>

> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>  include/linux/adreno-smmu-priv.h           | 10 +++++-
>  3 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 6e0a2a43e45a..38ac9cab763b 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -25,6 +25,7 @@
>
>  #define CPRE                   (1 << 1)
>  #define CMTLB                  (1 << 0)
> +#define GFX_ACTLR_PRR          (1 << 5)
>
>  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>  {
> @@ -109,6 +110,40 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>  }
>
> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +       const struct device_node *np = smmu->dev->of_node;
> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> +       u32 reg = 0;
> +
> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> +               reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> +               reg &= ~GFX_ACTLR_PRR;
> +               if (set)
> +                       reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> +               arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> +       }
> +}
> +
> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +       const struct device_node *np = smmu->dev->of_node;
> +
> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> +               writel_relaxed(lower_32_bits(page_addr),
> +                                       smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> +
> +               writel_relaxed(upper_32_bits(page_addr),
> +                                       smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> +       }
> +}
> +
>  #define QCOM_ADRENO_SMMU_GPU_SID 0
>
>  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> @@ -249,6 +284,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>         priv->set_stall = qcom_adreno_smmu_set_stall;
>         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> +       priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> +       priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>
>         return 0;
>  }
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index e2aeb511ae90..2dbf3243b5ad 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>  #define ARM_SMMU_SCTLR_M               BIT(0)
>
>  #define ARM_SMMU_CB_ACTLR              0x4
> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>
>  #define ARM_SMMU_CB_RESUME             0x8
>  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> index c637e0997f6d..03466eb16933 100644
> --- a/include/linux/adreno-smmu-priv.h
> +++ b/include/linux/adreno-smmu-priv.h
> @@ -49,7 +49,13 @@ struct adreno_smmu_fault_info {
>   *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>   *                 the GPU driver must call resume_translation()
>   * @resume_translation: Resume translation after a fault
> - *
> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> + *                ACTLR register bits, currently used to configure
> + *                Partially-Resident-Region (PRR) bit for feature's
> + *                setup and reset sequence as requested.
> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> + *                physical address of PRR page passed from
> + *                GPU driver.
>   *
>   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> @@ -67,6 +73,8 @@ struct adreno_smmu_priv {
>      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>      void (*set_stall)(const void *cookie, bool enabled);
>      void (*resume_translation)(const void *cookie, bool terminate);
> +    void (*set_prr_bit)(const void *cookie, bool set);
> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>  };
>
>  #endif /* __ADRENO_SMMU_PRIV_H */
> --
> 2.34.1
>
Robin Murphy Oct. 29, 2024, 1:29 p.m. UTC | #2
On 2024-10-08 1:54 pm, Bibek Kumar Patro wrote:
> Add an adreno-smmu-priv interface for drm/msm to call
> into arm-smmu-qcom and initiate the PRR bit setup or reset
> sequence as per request.
> 
> This will be used by GPU to setup the PRR bit and related
> configuration registers through adreno-smmu private
> interface instead of directly poking the smmu hardware.
> 
> Suggested-by: Rob Clark <robdclark@gmail.com>
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>   include/linux/adreno-smmu-priv.h           | 10 +++++-
>   3 files changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 6e0a2a43e45a..38ac9cab763b 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -25,6 +25,7 @@
> 
>   #define CPRE			(1 << 1)
>   #define CMTLB			(1 << 0)
> +#define GFX_ACTLR_PRR		(1 << 5)
> 
>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>   {
> @@ -109,6 +110,40 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>   	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>   }
> 
> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> +{
> +	struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	const struct device_node *np = smmu->dev->of_node;
> +	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> +	u32 reg = 0;
> +
> +	if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +			of_device_is_compatible(np, "qcom,adreno-smmu")) {

These conditions aren't going to change between calls - wouldn't it make 
more sense to conditionally assign the callbacks in the first place? Not 
the biggest deal if this is a one-off context-setup type thing, just 
that it looks a little funky.

Thanks,
Robin.

> +		reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> +		reg &= ~GFX_ACTLR_PRR;
> +		if (set)
> +			reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> +		arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> +	}
> +}
> +
> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> +{
> +	struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	const struct device_node *np = smmu->dev->of_node;
> +
> +	if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +			of_device_is_compatible(np, "qcom,adreno-smmu")) {
> +		writel_relaxed(lower_32_bits(page_addr),
> +					smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> +
> +		writel_relaxed(upper_32_bits(page_addr),
> +					smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> +	}
> +}
> +
>   #define QCOM_ADRENO_SMMU_GPU_SID 0
> 
>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> @@ -249,6 +284,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>   	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>   	priv->set_stall = qcom_adreno_smmu_set_stall;
>   	priv->resume_translation = qcom_adreno_smmu_resume_translation;
> +	priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> +	priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> 
>   	return 0;
>   }
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index e2aeb511ae90..2dbf3243b5ad 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>   #define ARM_SMMU_SCTLR_M		BIT(0)
> 
>   #define ARM_SMMU_CB_ACTLR		0x4
> +#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
> +#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C
> 
>   #define ARM_SMMU_CB_RESUME		0x8
>   #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> index c637e0997f6d..03466eb16933 100644
> --- a/include/linux/adreno-smmu-priv.h
> +++ b/include/linux/adreno-smmu-priv.h
> @@ -49,7 +49,13 @@ struct adreno_smmu_fault_info {
>    *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>    *                 the GPU driver must call resume_translation()
>    * @resume_translation: Resume translation after a fault
> - *
> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> + *		   ACTLR register bits, currently used to configure
> + *		   Partially-Resident-Region (PRR) bit for feature's
> + *		   setup and reset sequence as requested.
> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> + *		   physical address of PRR page passed from
> + *		   GPU driver.
>    *
>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> @@ -67,6 +73,8 @@ struct adreno_smmu_priv {
>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>       void (*set_stall)(const void *cookie, bool enabled);
>       void (*resume_translation)(const void *cookie, bool terminate);
> +    void (*set_prr_bit)(const void *cookie, bool set);
> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>   };
> 
>   #endif /* __ADRENO_SMMU_PRIV_H */
> --
> 2.34.1
>
Bibek Kumar Patro Oct. 30, 2024, 1:14 p.m. UTC | #3
On 10/29/2024 6:59 PM, Robin Murphy wrote:
> On 2024-10-08 1:54 pm, Bibek Kumar Patro wrote:
>> Add an adreno-smmu-priv interface for drm/msm to call
>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>> sequence as per request.
>>
>> This will be used by GPU to setup the PRR bit and related
>> configuration registers through adreno-smmu private
>> interface instead of directly poking the smmu hardware.
>>
>> Suggested-by: Rob Clark <robdclark@gmail.com>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>   include/linux/adreno-smmu-priv.h           | 10 +++++-
>>   3 files changed, 48 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/ 
>> iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index 6e0a2a43e45a..38ac9cab763b 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -25,6 +25,7 @@
>>
>>   #define CPRE            (1 << 1)
>>   #define CMTLB            (1 << 0)
>> +#define GFX_ACTLR_PRR        (1 << 5)
>>
>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>   {
>> @@ -109,6 +110,40 @@ static void 
>> qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>   }
>>
>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>> +{
>> +    struct arm_smmu_domain *smmu_domain = (void *)cookie;
>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +    const struct device_node *np = smmu->dev->of_node;
>> +    struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +    u32 reg = 0;
>> +
>> +    if (of_device_is_compatible(np, "qcom,smmu-500") &&
>> +            of_device_is_compatible(np, "qcom,adreno-smmu")) {
> 
> These conditions aren't going to change between calls - wouldn't it make 
> more sense to conditionally assign the callbacks in the first place? Not 
> the biggest deal if this is a one-off context-setup type thing, just 
> that it looks a little funky.
> 

Let me know if you want to pursue this still.
 From the current PRR implementation in the graphics
vendor layer, this seems to be just setup kind-of thing.
Also if we keep this conditional check before assigning callbacks,
and vendor layer caller won't be having any such check,
wouldn't it be an issue in unsupported platforms (!qcom,smmu-500 or 
!qcom,adreno-smmu)
as the callbacks won't be assigned?
So as per my understanding I think it would be safe to keep the 
condition check here?

Thanks & regards,
Bibek


> Thanks,
> Robin.
> 
>> +        reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>> +        reg &= ~GFX_ACTLR_PRR;
>> +        if (set)
>> +            reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>> +        arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>> +    }
>> +}
>> +
>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, 
>> phys_addr_t page_addr)
>> +{
>> +    struct arm_smmu_domain *smmu_domain = (void *)cookie;
>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +    const struct device_node *np = smmu->dev->of_node;
>> +
>> +    if (of_device_is_compatible(np, "qcom,smmu-500") &&
>> +            of_device_is_compatible(np, "qcom,adreno-smmu")) {
>> +        writel_relaxed(lower_32_bits(page_addr),
>> +                    smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>> +
>> +        writel_relaxed(upper_32_bits(page_addr),
>> +                    smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>> +    }
>> +}
>> +
>>   #define QCOM_ADRENO_SMMU_GPU_SID 0
>>
>>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>> @@ -249,6 +284,8 @@ static int qcom_adreno_smmu_init_context(struct 
>> arm_smmu_domain *smmu_domain,
>>       priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>       priv->set_stall = qcom_adreno_smmu_set_stall;
>>       priv->resume_translation = qcom_adreno_smmu_resume_translation;
>> +    priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>> +    priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>
>>       return 0;
>>   }
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/ 
>> arm/arm-smmu/arm-smmu.h
>> index e2aeb511ae90..2dbf3243b5ad 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>   #define ARM_SMMU_SCTLR_M        BIT(0)
>>
>>   #define ARM_SMMU_CB_ACTLR        0x4
>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR    0x6008
>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR    0x600C
>>
>>   #define ARM_SMMU_CB_RESUME        0x8
>>   #define ARM_SMMU_RESUME_TERMINATE    BIT(0)
>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno- 
>> smmu-priv.h
>> index c637e0997f6d..03466eb16933 100644
>> --- a/include/linux/adreno-smmu-priv.h
>> +++ b/include/linux/adreno-smmu-priv.h
>> @@ -49,7 +49,13 @@ struct adreno_smmu_fault_info {
>>    *                 before set_ttbr0_cfg().  If stalling on fault is 
>> enabled,
>>    *                 the GPU driver must call resume_translation()
>>    * @resume_translation: Resume translation after a fault
>> - *
>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>> + *           ACTLR register bits, currently used to configure
>> + *           Partially-Resident-Region (PRR) bit for feature's
>> + *           setup and reset sequence as requested.
>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>> + *           physical address of PRR page passed from
>> + *           GPU driver.
>>    *
>>    * The GPU driver (drm/msm) and adreno-smmu work together for 
>> controlling
>>    * the GPU's SMMU instance.  This is by necessity, as the GPU is 
>> directly
>> @@ -67,6 +73,8 @@ struct adreno_smmu_priv {
>>       void (*get_fault_info)(const void *cookie, struct 
>> adreno_smmu_fault_info *info);
>>       void (*set_stall)(const void *cookie, bool enabled);
>>       void (*resume_translation)(const void *cookie, bool terminate);
>> +    void (*set_prr_bit)(const void *cookie, bool set);
>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>   };
>>
>>   #endif /* __ADRENO_SMMU_PRIV_H */
>> -- 
>> 2.34.1
>>
Robin Murphy Oct. 30, 2024, 3:23 p.m. UTC | #4
On 30/10/2024 1:14 pm, Bibek Kumar Patro wrote:
> 
> 
> On 10/29/2024 6:59 PM, Robin Murphy wrote:
>> On 2024-10-08 1:54 pm, Bibek Kumar Patro wrote:
>>> Add an adreno-smmu-priv interface for drm/msm to call
>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>> sequence as per request.
>>>
>>> This will be used by GPU to setup the PRR bit and related
>>> configuration registers through adreno-smmu private
>>> interface instead of directly poking the smmu hardware.
>>>
>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>> ---
>>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>   include/linux/adreno-smmu-priv.h           | 10 +++++-
>>>   3 files changed, 48 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/ 
>>> iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> index 6e0a2a43e45a..38ac9cab763b 100644
>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> @@ -25,6 +25,7 @@
>>>
>>>   #define CPRE            (1 << 1)
>>>   #define CMTLB            (1 << 0)
>>> +#define GFX_ACTLR_PRR        (1 << 5)
>>>
>>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>   {
>>> @@ -109,6 +110,40 @@ static void 
>>> qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>   }
>>>
>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>> +{
>>> +    struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> +    const struct device_node *np = smmu->dev->of_node;
>>> +    struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>> +    u32 reg = 0;
>>> +
>>> +    if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>> +            of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>
>> These conditions aren't going to change between calls - wouldn't it 
>> make more sense to conditionally assign the callbacks in the first 
>> place? Not the biggest deal if this is a one-off context-setup type 
>> thing, just that it looks a little funky.
>>
> 
> Let me know if you want to pursue this still.
>  From the current PRR implementation in the graphics
> vendor layer, this seems to be just setup kind-of thing.
> Also if we keep this conditional check before assigning callbacks,
> and vendor layer caller won't be having any such check,
> wouldn't it be an issue in unsupported platforms (!qcom,smmu-500 or 
> !qcom,adreno-smmu)
> as the callbacks won't be assigned?
> So as per my understanding I think it would be safe to keep the 
> condition check here?

Like I say, it makes more sense to me personally if SMMUs which don't 
have a PRR don't offer a callback for setting the PRR which they don't 
have, and for it to be the caller's responsibility not to call a NULL 
callback where they wouldn't need to call one anyway. But the 
adreno_priv interface is kind of Rob's thing, so I'll leave it to his 
preference.

Thanks,
Robin.
Rob Clark Oct. 30, 2024, 4:58 p.m. UTC | #5
On Wed, Oct 30, 2024 at 8:23 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 30/10/2024 1:14 pm, Bibek Kumar Patro wrote:
> >
> >
> > On 10/29/2024 6:59 PM, Robin Murphy wrote:
> >> On 2024-10-08 1:54 pm, Bibek Kumar Patro wrote:
> >>> Add an adreno-smmu-priv interface for drm/msm to call
> >>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>> sequence as per request.
> >>>
> >>> This will be used by GPU to setup the PRR bit and related
> >>> configuration registers through adreno-smmu private
> >>> interface instead of directly poking the smmu hardware.
> >>>
> >>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>> ---
> >>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>   include/linux/adreno-smmu-priv.h           | 10 +++++-
> >>>   3 files changed, 48 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/
> >>> iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> index 6e0a2a43e45a..38ac9cab763b 100644
> >>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> @@ -25,6 +25,7 @@
> >>>
> >>>   #define CPRE            (1 << 1)
> >>>   #define CMTLB            (1 << 0)
> >>> +#define GFX_ACTLR_PRR        (1 << 5)
> >>>
> >>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>   {
> >>> @@ -109,6 +110,40 @@ static void
> >>> qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>   }
> >>>
> >>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>> +{
> >>> +    struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>> +    const struct device_node *np = smmu->dev->of_node;
> >>> +    struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>> +    u32 reg = 0;
> >>> +
> >>> +    if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>> +            of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>
> >> These conditions aren't going to change between calls - wouldn't it
> >> make more sense to conditionally assign the callbacks in the first
> >> place? Not the biggest deal if this is a one-off context-setup type
> >> thing, just that it looks a little funky.
> >>
> >
> > Let me know if you want to pursue this still.
> >  From the current PRR implementation in the graphics
> > vendor layer, this seems to be just setup kind-of thing.
> > Also if we keep this conditional check before assigning callbacks,
> > and vendor layer caller won't be having any such check,
> > wouldn't it be an issue in unsupported platforms (!qcom,smmu-500 or
> > !qcom,adreno-smmu)
> > as the callbacks won't be assigned?
> > So as per my understanding I think it would be safe to keep the
> > condition check here?
>
> Like I say, it makes more sense to me personally if SMMUs which don't
> have a PRR don't offer a callback for setting the PRR which they don't
> have, and for it to be the caller's responsibility not to call a NULL
> callback where they wouldn't need to call one anyway. But the
> adreno_priv interface is kind of Rob's thing, so I'll leave it to his
> preference.

We can go the route of NULL cb if it is not supported (but should make
note of that in the adreno-smmu-priv.h header comment)

BR,
-R

> Thanks,
> Robin.
Bibek Kumar Patro Oct. 30, 2024, 8:27 p.m. UTC | #6
On 10/30/2024 10:28 PM, Rob Clark wrote:
> On Wed, Oct 30, 2024 at 8:23 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 30/10/2024 1:14 pm, Bibek Kumar Patro wrote:
>>>
>>>
>>> On 10/29/2024 6:59 PM, Robin Murphy wrote:
>>>> On 2024-10-08 1:54 pm, Bibek Kumar Patro wrote:
>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>> sequence as per request.
>>>>>
>>>>> This will be used by GPU to setup the PRR bit and related
>>>>> configuration registers through adreno-smmu private
>>>>> interface instead of directly poking the smmu hardware.
>>>>>
>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>> ---
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>    include/linux/adreno-smmu-priv.h           | 10 +++++-
>>>>>    3 files changed, 48 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/
>>>>> iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> index 6e0a2a43e45a..38ac9cab763b 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> @@ -25,6 +25,7 @@
>>>>>
>>>>>    #define CPRE            (1 << 1)
>>>>>    #define CMTLB            (1 << 0)
>>>>> +#define GFX_ACTLR_PRR        (1 << 5)
>>>>>
>>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>    {
>>>>> @@ -109,6 +110,40 @@ static void
>>>>> qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>        arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>    }
>>>>>
>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>> +{
>>>>> +    struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>> +    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>> +    const struct device_node *np = smmu->dev->of_node;
>>>>> +    struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>> +    u32 reg = 0;
>>>>> +
>>>>> +    if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>> +            of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>
>>>> These conditions aren't going to change between calls - wouldn't it
>>>> make more sense to conditionally assign the callbacks in the first
>>>> place? Not the biggest deal if this is a one-off context-setup type
>>>> thing, just that it looks a little funky.
>>>>
>>>
>>> Let me know if you want to pursue this still.
>>>   From the current PRR implementation in the graphics
>>> vendor layer, this seems to be just setup kind-of thing.
>>> Also if we keep this conditional check before assigning callbacks,
>>> and vendor layer caller won't be having any such check,
>>> wouldn't it be an issue in unsupported platforms (!qcom,smmu-500 or
>>> !qcom,adreno-smmu)
>>> as the callbacks won't be assigned?
>>> So as per my understanding I think it would be safe to keep the
>>> condition check here?
>>
>> Like I say, it makes more sense to me personally if SMMUs which don't
>> have a PRR don't offer a callback for setting the PRR which they don't
>> have, and for it to be the caller's responsibility not to call a NULL
>> callback where they wouldn't need to call one anyway. But the
>> adreno_priv interface is kind of Rob's thing, so I'll leave it to his
>> preference.
> 
> We can go the route of NULL cb if it is not supported (but should make
> note of that in the adreno-smmu-priv.h header comment)
> 

Actually I liked Robin's suggestion to use the compatible check before 
assignment in the sense that there won't be repeated compatible checks
for each call. My only concern was how to handle the non PRR supported 
targets incase vendors called it.
Thanks for clarifying the same, we can use null callbacks for non-PRR 
supported targets with a note in adreno-smmu-priv.h header, so that
caller could take care while implementing the same.
I'll incorporate these changes in next patch along with the CPRE workaround.

Thanks & regards,
Bibek


> BR,
> -R
> 
>> Thanks,
>> Robin.
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 6e0a2a43e45a..38ac9cab763b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -25,6 +25,7 @@ 

 #define CPRE			(1 << 1)
 #define CMTLB			(1 << 0)
+#define GFX_ACTLR_PRR		(1 << 5)

 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 {
@@ -109,6 +110,40 @@  static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
 	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
 }

+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	const struct device_node *np = smmu->dev->of_node;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	u32 reg = 0;
+
+	if (of_device_is_compatible(np, "qcom,smmu-500") &&
+			of_device_is_compatible(np, "qcom,adreno-smmu")) {
+		reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+		reg &= ~GFX_ACTLR_PRR;
+		if (set)
+			reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+		arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+	}
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	const struct device_node *np = smmu->dev->of_node;
+
+	if (of_device_is_compatible(np, "qcom,smmu-500") &&
+			of_device_is_compatible(np, "qcom,adreno-smmu")) {
+		writel_relaxed(lower_32_bits(page_addr),
+					smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+
+		writel_relaxed(upper_32_bits(page_addr),
+					smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+	}
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0

 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -249,6 +284,8 @@  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 	priv->set_stall = qcom_adreno_smmu_set_stall;
 	priv->resume_translation = qcom_adreno_smmu_resume_translation;
+	priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+	priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;

 	return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae90..2dbf3243b5ad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@  enum arm_smmu_cbar_type {
 #define ARM_SMMU_SCTLR_M		BIT(0)

 #define ARM_SMMU_CB_ACTLR		0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C

 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..03466eb16933 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -49,7 +49,13 @@  struct adreno_smmu_fault_info {
  *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
  *                 the GPU driver must call resume_translation()
  * @resume_translation: Resume translation after a fault
- *
+ * @set_prr_bit:   Extendible interface to be used by GPU to modify the
+ *		   ACTLR register bits, currently used to configure
+ *		   Partially-Resident-Region (PRR) bit for feature's
+ *		   setup and reset sequence as requested.
+ * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
+ *		   physical address of PRR page passed from
+ *		   GPU driver.
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -67,6 +73,8 @@  struct adreno_smmu_priv {
     void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
     void (*set_stall)(const void *cookie, bool enabled);
     void (*resume_translation)(const void *cookie, bool terminate);
+    void (*set_prr_bit)(const void *cookie, bool set);
+    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
 };

 #endif /* __ADRENO_SMMU_PRIV_H */