[v2,7/8] drm/panfrost: Rework page table flushing and runtime PM interaction
diff mbox series

Message ID 20190823021216.5862-8-robh@kernel.org
State New
Headers show
Series
  • [v2,1/8] drm/panfrost: Fix possible suspend in panfrost_remove
Related show

Commit Message

Rob Herring Aug. 23, 2019, 2:12 a.m. UTC
There is no point in resuming the h/w just to do flush operations and
doing so takes several locks which cause lockdep issues with the shrinker.
Rework the flush operations to only happen when the h/w is already awake.
This avoids taking any locks associated with resuming.

Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
---
v2: new patch

 drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
 1 file changed, 20 insertions(+), 21 deletions(-)

Comments

Robin Murphy Aug. 23, 2019, 11:11 a.m. UTC | #1
On 23/08/2019 03:12, Rob Herring wrote:
> There is no point in resuming the h/w just to do flush operations and
> doing so takes several locks which cause lockdep issues with the shrinker.
> Rework the flush operations to only happen when the h/w is already awake.
> This avoids taking any locks associated with resuming.
> 
> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Rob Herring <robh@kernel.org>
> ---
> v2: new patch
> 
>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
>   1 file changed, 20 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index 842bdd7cf6be..ccf671a9c3fb 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
>   	return SZ_2M;
>   }
>   
> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> +			      struct panfrost_mmu *mmu,
> +			      u64 iova, size_t size)
> +{
> +	if (mmu->as < 0)
> +		return;
> +
> +	/* Flush the PTs only if we're already awake */
> +	if (!pm_runtime_get_if_in_use(pfdev->dev))
> +		return;

Is the MMU guaranteed to be reset on resume such that the TLBs will 
always come up clean? Otherwise there are potentially corners where 
stale entries that we skip here might hang around if the hardware lies 
about powering things down.

Robin.

> +
> +	mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
> +
> +	pm_runtime_mark_last_busy(pfdev->dev);
> +	pm_runtime_put_autosuspend(pfdev->dev);
> +}
> +
>   static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>   		      u64 iova, int prot, struct sg_table *sgt)
>   {
> @@ -246,11 +263,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>   		}
>   	}
>   
> -	mmu_hw_do_operation(pfdev, mmu, start_iova, iova - start_iova,
> -			    AS_COMMAND_FLUSH_PT);
> -
>   	mutex_unlock(&mmu->lock);
>   
> +	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
> +
>   	return 0;
>   }
>   
> @@ -259,7 +275,6 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	struct drm_gem_object *obj = &bo->base.base;
>   	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>   	struct sg_table *sgt;
> -	int ret;
>   	int prot = IOMMU_READ | IOMMU_WRITE;
>   
>   	if (WARN_ON(bo->is_mapped))
> @@ -272,14 +287,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	if (WARN_ON(IS_ERR(sgt)))
>   		return PTR_ERR(sgt);
>   
> -	ret = pm_runtime_get_sync(pfdev->dev);
> -	if (ret < 0)
> -		return ret;
> -
>   	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
> -
> -	pm_runtime_mark_last_busy(pfdev->dev);
> -	pm_runtime_put_autosuspend(pfdev->dev);
>   	bo->is_mapped = true;
>   
>   	return 0;
> @@ -293,17 +301,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
>   	u64 iova = bo->node.start << PAGE_SHIFT;
>   	size_t len = bo->node.size << PAGE_SHIFT;
>   	size_t unmapped_len = 0;
> -	int ret;
>   
>   	if (WARN_ON(!bo->is_mapped))
>   		return;
>   
>   	dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len);
>   
> -	ret = pm_runtime_get_sync(pfdev->dev);
> -	if (ret < 0)
> -		return;
> -
>   	mutex_lock(&bo->mmu->lock);
>   
>   	while (unmapped_len < len) {
> @@ -318,13 +321,9 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
>   		unmapped_len += pgsize;
>   	}
>   
> -	mmu_hw_do_operation(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
> -			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
> -
>   	mutex_unlock(&bo->mmu->lock);
>   
> -	pm_runtime_mark_last_busy(pfdev->dev);
> -	pm_runtime_put_autosuspend(pfdev->dev);
> +	panfrost_mmu_flush_range(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, len);
>   	bo->is_mapped = false;
>   }
>   
>
Steven Price Aug. 23, 2019, 3:05 p.m. UTC | #2
On 23/08/2019 12:11, Robin Murphy wrote:
> On 23/08/2019 03:12, Rob Herring wrote:
>> There is no point in resuming the h/w just to do flush operations and
>> doing so takes several locks which cause lockdep issues with the 
>> shrinker.
>> Rework the flush operations to only happen when the h/w is already awake.
>> This avoids taking any locks associated with resuming.
>>
>> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
>> Cc: Steven Price <steven.price@arm.com>
>> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
>> Cc: David Airlie <airlied@linux.ie>
>> Cc: Daniel Vetter <daniel@ffwll.ch>
>> Signed-off-by: Rob Herring <robh@kernel.org>
>> ---
>> v2: new patch
>>
>>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
>>   1 file changed, 20 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c 
>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> index 842bdd7cf6be..ccf671a9c3fb 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
>>       return SZ_2M;
>>   }
>> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
>> +                  struct panfrost_mmu *mmu,
>> +                  u64 iova, size_t size)
>> +{
>> +    if (mmu->as < 0)
>> +        return;
>> +
>> +    /* Flush the PTs only if we're already awake */
>> +    if (!pm_runtime_get_if_in_use(pfdev->dev))
>> +        return;
> 
> Is the MMU guaranteed to be reset on resume such that the TLBs will 
> always come up clean? Otherwise there are potentially corners where 
> stale entries that we skip here might hang around if the hardware lies 
> about powering things down.

Assuming runtime PM has actually committed to the power off, then on 
power on panfrost_device_resume() will be called which performs a reset 
of the GPU which will clear the L2/TLBs so there shouldn't be any 
problem there.

As far as I can see from the code that is how pm_runtime_get_if_in_use() 
works - it will return true unless the suspend() callback has been called.

Steve
Steven Price Aug. 23, 2019, 3:09 p.m. UTC | #3
On 23/08/2019 03:12, Rob Herring wrote:
> There is no point in resuming the h/w just to do flush operations and
> doing so takes several locks which cause lockdep issues with the shrinker.
> Rework the flush operations to only happen when the h/w is already awake.
> This avoids taking any locks associated with resuming.
> 
> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
> Cc: Steven Price <steven.price@arm.com>
> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Rob Herring <robh@kernel.org>

Reviewed-by: Steven Price <steven.price@arm.com>

But one comment below...

> ---
> v2: new patch
> 
>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
>   1 file changed, 20 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index 842bdd7cf6be..ccf671a9c3fb 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
>   	return SZ_2M;
>   }
>   
> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> +			      struct panfrost_mmu *mmu,
> +			      u64 iova, size_t size)
> +{
> +	if (mmu->as < 0)
> +		return;
> +
> +	/* Flush the PTs only if we're already awake */
> +	if (!pm_runtime_get_if_in_use(pfdev->dev))
> +		return;
> +
> +	mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
> +
> +	pm_runtime_mark_last_busy(pfdev->dev);

This isn't really a change, but: I'm not sure why we want to signal we 
were busy just because we had to do some cache maintenance? We might 
actually be faster leaving the GPU off so there's no need to do extra 
flushes on the GPU?

Steve

> +	pm_runtime_put_autosuspend(pfdev->dev);
> +}
> +
>   static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>   		      u64 iova, int prot, struct sg_table *sgt)
>   {
> @@ -246,11 +263,10 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>   		}
>   	}
>   
> -	mmu_hw_do_operation(pfdev, mmu, start_iova, iova - start_iova,
> -			    AS_COMMAND_FLUSH_PT);
> -
>   	mutex_unlock(&mmu->lock);
>   
> +	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
> +
>   	return 0;
>   }
>   
> @@ -259,7 +275,6 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	struct drm_gem_object *obj = &bo->base.base;
>   	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>   	struct sg_table *sgt;
> -	int ret;
>   	int prot = IOMMU_READ | IOMMU_WRITE;
>   
>   	if (WARN_ON(bo->is_mapped))
> @@ -272,14 +287,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	if (WARN_ON(IS_ERR(sgt)))
>   		return PTR_ERR(sgt);
>   
> -	ret = pm_runtime_get_sync(pfdev->dev);
> -	if (ret < 0)
> -		return ret;
> -
>   	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
> -
> -	pm_runtime_mark_last_busy(pfdev->dev);
> -	pm_runtime_put_autosuspend(pfdev->dev);
>   	bo->is_mapped = true;
>   
>   	return 0;
> @@ -293,17 +301,12 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
>   	u64 iova = bo->node.start << PAGE_SHIFT;
>   	size_t len = bo->node.size << PAGE_SHIFT;
>   	size_t unmapped_len = 0;
> -	int ret;
>   
>   	if (WARN_ON(!bo->is_mapped))
>   		return;
>   
>   	dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len);
>   
> -	ret = pm_runtime_get_sync(pfdev->dev);
> -	if (ret < 0)
> -		return;
> -
>   	mutex_lock(&bo->mmu->lock);
>   
>   	while (unmapped_len < len) {
> @@ -318,13 +321,9 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
>   		unmapped_len += pgsize;
>   	}
>   
> -	mmu_hw_do_operation(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
> -			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
> -
>   	mutex_unlock(&bo->mmu->lock);
>   
> -	pm_runtime_mark_last_busy(pfdev->dev);
> -	pm_runtime_put_autosuspend(pfdev->dev);
> +	panfrost_mmu_flush_range(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, len);
>   	bo->is_mapped = false;
>   }
>   
>
Robin Murphy Aug. 23, 2019, 3:44 p.m. UTC | #4
On 23/08/2019 16:05, Steven Price wrote:
> On 23/08/2019 12:11, Robin Murphy wrote:
>> On 23/08/2019 03:12, Rob Herring wrote:
>>> There is no point in resuming the h/w just to do flush operations and
>>> doing so takes several locks which cause lockdep issues with the 
>>> shrinker.
>>> Rework the flush operations to only happen when the h/w is already 
>>> awake.
>>> This avoids taking any locks associated with resuming.
>>>
>>> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
>>> Cc: Steven Price <steven.price@arm.com>
>>> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
>>> Cc: David Airlie <airlied@linux.ie>
>>> Cc: Daniel Vetter <daniel@ffwll.ch>
>>> Signed-off-by: Rob Herring <robh@kernel.org>
>>> ---
>>> v2: new patch
>>>
>>>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
>>>   1 file changed, 20 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c 
>>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> index 842bdd7cf6be..ccf671a9c3fb 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
>>>       return SZ_2M;
>>>   }
>>> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
>>> +                  struct panfrost_mmu *mmu,
>>> +                  u64 iova, size_t size)
>>> +{
>>> +    if (mmu->as < 0)
>>> +        return;
>>> +
>>> +    /* Flush the PTs only if we're already awake */
>>> +    if (!pm_runtime_get_if_in_use(pfdev->dev))
>>> +        return;
>>
>> Is the MMU guaranteed to be reset on resume such that the TLBs will 
>> always come up clean? Otherwise there are potentially corners where 
>> stale entries that we skip here might hang around if the hardware lies 
>> about powering things down.
> 
> Assuming runtime PM has actually committed to the power off, then on 
> power on panfrost_device_resume() will be called which performs a reset 
> of the GPU which will clear the L2/TLBs so there shouldn't be any 
> problem there.

OK, if panfrost_gpu_soft_reset() is sufficient to guarantee clean TLBs 
then this looks equivalent to what we did for arm-smmu, so I've no 
complaints in that regard.

However on second look I've now noticed the panfrost_mmu_flush_range() 
calls being moved outside of mmu->lock protection. Forgive me if there's 
basic DRM knowledge I'm missing here, but is there any possibility for 
multiple threads to create/import/free objects simultaneously on the 
same FD such that two mmu_hw_do_operation() calls could race and 
interfere with each other in terms of the 
AS_LOCKADDR/AS_COMMAND/AS_STATUS dance?

Robin.
Rob Herring Aug. 23, 2019, 3:49 p.m. UTC | #5
On Fri, Aug 23, 2019 at 10:09 AM Steven Price <steven.price@arm.com> wrote:
>
> On 23/08/2019 03:12, Rob Herring wrote:
> > There is no point in resuming the h/w just to do flush operations and
> > doing so takes several locks which cause lockdep issues with the shrinker.
> > Rework the flush operations to only happen when the h/w is already awake.
> > This avoids taking any locks associated with resuming.
> >
> > Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
> > Cc: Steven Price <steven.price@arm.com>
> > Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> > Cc: David Airlie <airlied@linux.ie>
> > Cc: Daniel Vetter <daniel@ffwll.ch>
> > Signed-off-by: Rob Herring <robh@kernel.org>
>
> Reviewed-by: Steven Price <steven.price@arm.com>
>
> But one comment below...
>
> > ---
> > v2: new patch
> >
> >   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
> >   1 file changed, 20 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > index 842bdd7cf6be..ccf671a9c3fb 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
> >       return SZ_2M;
> >   }
> >
> > +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> > +                           struct panfrost_mmu *mmu,
> > +                           u64 iova, size_t size)
> > +{
> > +     if (mmu->as < 0)
> > +             return;
> > +
> > +     /* Flush the PTs only if we're already awake */
> > +     if (!pm_runtime_get_if_in_use(pfdev->dev))
> > +             return;
> > +
> > +     mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
> > +
> > +     pm_runtime_mark_last_busy(pfdev->dev);
>
> This isn't really a change, but: I'm not sure why we want to signal we
> were busy just because we had to do some cache maintenance? We might
> actually be faster leaving the GPU off so there's no need to do extra
> flushes on the GPU?

I don't know, good question. Powering up and down has its cost too. We
only get here if we were already active. A flush on a map probably is
going to be followed by a job. An unmap may be the end of activity or
not.

If we don't call pm_runtime_mark_last_busy, then maybe we also want to
do a pm_runtime_put_suspend (i.e. suspend immediately) instead. That
may only matter if we're the last one which could only happen during
this get/put. I'm not sure what happens if a suspend is requested
followed by an autosuspend.

Rob
Rob Herring Aug. 23, 2019, 3:57 p.m. UTC | #6
On Fri, Aug 23, 2019 at 10:44 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 23/08/2019 16:05, Steven Price wrote:
> > On 23/08/2019 12:11, Robin Murphy wrote:
> >> On 23/08/2019 03:12, Rob Herring wrote:
> >>> There is no point in resuming the h/w just to do flush operations and
> >>> doing so takes several locks which cause lockdep issues with the
> >>> shrinker.
> >>> Rework the flush operations to only happen when the h/w is already
> >>> awake.
> >>> This avoids taking any locks associated with resuming.
> >>>
> >>> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
> >>> Cc: Steven Price <steven.price@arm.com>
> >>> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> >>> Cc: David Airlie <airlied@linux.ie>
> >>> Cc: Daniel Vetter <daniel@ffwll.ch>
> >>> Signed-off-by: Rob Herring <robh@kernel.org>
> >>> ---
> >>> v2: new patch
> >>>
> >>>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
> >>>   1 file changed, 20 insertions(+), 21 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>> index 842bdd7cf6be..ccf671a9c3fb 100644
> >>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
> >>>       return SZ_2M;
> >>>   }
> >>> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> >>> +                  struct panfrost_mmu *mmu,
> >>> +                  u64 iova, size_t size)
> >>> +{
> >>> +    if (mmu->as < 0)
> >>> +        return;
> >>> +
> >>> +    /* Flush the PTs only if we're already awake */
> >>> +    if (!pm_runtime_get_if_in_use(pfdev->dev))
> >>> +        return;
> >>
> >> Is the MMU guaranteed to be reset on resume such that the TLBs will
> >> always come up clean? Otherwise there are potentially corners where
> >> stale entries that we skip here might hang around if the hardware lies
> >> about powering things down.
> >
> > Assuming runtime PM has actually committed to the power off, then on
> > power on panfrost_device_resume() will be called which performs a reset
> > of the GPU which will clear the L2/TLBs so there shouldn't be any
> > problem there.
>
> OK, if panfrost_gpu_soft_reset() is sufficient to guarantee clean TLBs
> then this looks equivalent to what we did for arm-smmu, so I've no
> complaints in that regard.
>
> However on second look I've now noticed the panfrost_mmu_flush_range()
> calls being moved outside of mmu->lock protection. Forgive me if there's
> basic DRM knowledge I'm missing here, but is there any possibility for
> multiple threads to create/import/free objects simultaneously on the
> same FD such that two mmu_hw_do_operation() calls could race and
> interfere with each other in terms of the
> AS_LOCKADDR/AS_COMMAND/AS_STATUS dance?

Yes, we could have multiple threads. Not really any good reason it's
moved out of the mmu->lock other than just to avoid any nesting
(though that seemed fine in testing). The newly added as_lock will
serialize mmu_hw_do_operation(). So the mmu->lock is just serializing
page table writes.

Rob
Robin Murphy Aug. 23, 2019, 4:16 p.m. UTC | #7
On 23/08/2019 16:57, Rob Herring wrote:
> On Fri, Aug 23, 2019 at 10:44 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 23/08/2019 16:05, Steven Price wrote:
>>> On 23/08/2019 12:11, Robin Murphy wrote:
>>>> On 23/08/2019 03:12, Rob Herring wrote:
>>>>> There is no point in resuming the h/w just to do flush operations and
>>>>> doing so takes several locks which cause lockdep issues with the
>>>>> shrinker.
>>>>> Rework the flush operations to only happen when the h/w is already
>>>>> awake.
>>>>> This avoids taking any locks associated with resuming.
>>>>>
>>>>> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
>>>>> Cc: Steven Price <steven.price@arm.com>
>>>>> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
>>>>> Cc: David Airlie <airlied@linux.ie>
>>>>> Cc: Daniel Vetter <daniel@ffwll.ch>
>>>>> Signed-off-by: Rob Herring <robh@kernel.org>
>>>>> ---
>>>>> v2: new patch
>>>>>
>>>>>    drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
>>>>>    1 file changed, 20 insertions(+), 21 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>>>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>>>> index 842bdd7cf6be..ccf671a9c3fb 100644
>>>>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>>>> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
>>>>>        return SZ_2M;
>>>>>    }
>>>>> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
>>>>> +                  struct panfrost_mmu *mmu,
>>>>> +                  u64 iova, size_t size)
>>>>> +{
>>>>> +    if (mmu->as < 0)
>>>>> +        return;
>>>>> +
>>>>> +    /* Flush the PTs only if we're already awake */
>>>>> +    if (!pm_runtime_get_if_in_use(pfdev->dev))
>>>>> +        return;
>>>>
>>>> Is the MMU guaranteed to be reset on resume such that the TLBs will
>>>> always come up clean? Otherwise there are potentially corners where
>>>> stale entries that we skip here might hang around if the hardware lies
>>>> about powering things down.
>>>
>>> Assuming runtime PM has actually committed to the power off, then on
>>> power on panfrost_device_resume() will be called which performs a reset
>>> of the GPU which will clear the L2/TLBs so there shouldn't be any
>>> problem there.
>>
>> OK, if panfrost_gpu_soft_reset() is sufficient to guarantee clean TLBs
>> then this looks equivalent to what we did for arm-smmu, so I've no
>> complaints in that regard.
>>
>> However on second look I've now noticed the panfrost_mmu_flush_range()
>> calls being moved outside of mmu->lock protection. Forgive me if there's
>> basic DRM knowledge I'm missing here, but is there any possibility for
>> multiple threads to create/import/free objects simultaneously on the
>> same FD such that two mmu_hw_do_operation() calls could race and
>> interfere with each other in terms of the
>> AS_LOCKADDR/AS_COMMAND/AS_STATUS dance?
> 
> Yes, we could have multiple threads. Not really any good reason it's
> moved out of the mmu->lock other than just to avoid any nesting
> (though that seemed fine in testing). The newly added as_lock will
> serialize mmu_hw_do_operation(). So the mmu->lock is just serializing
> page table writes.

Urgh, sorry, once again I'd stopped looking at -next and was 
cross-referencing my current rc3-based working tree :(

In that case, you may even be able to remove mmu->lock entirely, since 
io-pgtable-arm doesn't need external locking itself. And for this patch,

Reviewed-by: Robin Murphy <robin.murphy@arm.com>

Cheers,
Robin.
Rob Herring Aug. 23, 2019, 4:45 p.m. UTC | #8
On Fri, Aug 23, 2019 at 11:16 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 23/08/2019 16:57, Rob Herring wrote:
> > On Fri, Aug 23, 2019 at 10:44 AM Robin Murphy <robin.murphy@arm.com> wrote:
> >>
> >> On 23/08/2019 16:05, Steven Price wrote:
> >>> On 23/08/2019 12:11, Robin Murphy wrote:
> >>>> On 23/08/2019 03:12, Rob Herring wrote:
> >>>>> There is no point in resuming the h/w just to do flush operations and
> >>>>> doing so takes several locks which cause lockdep issues with the
> >>>>> shrinker.
> >>>>> Rework the flush operations to only happen when the h/w is already
> >>>>> awake.
> >>>>> This avoids taking any locks associated with resuming.
> >>>>>
> >>>>> Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
> >>>>> Cc: Steven Price <steven.price@arm.com>
> >>>>> Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
> >>>>> Cc: David Airlie <airlied@linux.ie>
> >>>>> Cc: Daniel Vetter <daniel@ffwll.ch>
> >>>>> Signed-off-by: Rob Herring <robh@kernel.org>
> >>>>> ---
> >>>>> v2: new patch
> >>>>>
> >>>>>    drivers/gpu/drm/panfrost/panfrost_mmu.c | 41 ++++++++++++-------------
> >>>>>    1 file changed, 20 insertions(+), 21 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>>>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>>>> index 842bdd7cf6be..ccf671a9c3fb 100644
> >>>>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>>>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> >>>>> @@ -220,6 +220,23 @@ static size_t get_pgsize(u64 addr, size_t size)
> >>>>>        return SZ_2M;
> >>>>>    }
> >>>>> +void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
> >>>>> +                  struct panfrost_mmu *mmu,
> >>>>> +                  u64 iova, size_t size)
> >>>>> +{
> >>>>> +    if (mmu->as < 0)
> >>>>> +        return;
> >>>>> +
> >>>>> +    /* Flush the PTs only if we're already awake */
> >>>>> +    if (!pm_runtime_get_if_in_use(pfdev->dev))
> >>>>> +        return;
> >>>>
> >>>> Is the MMU guaranteed to be reset on resume such that the TLBs will
> >>>> always come up clean? Otherwise there are potentially corners where
> >>>> stale entries that we skip here might hang around if the hardware lies
> >>>> about powering things down.
> >>>
> >>> Assuming runtime PM has actually committed to the power off, then on
> >>> power on panfrost_device_resume() will be called which performs a reset
> >>> of the GPU which will clear the L2/TLBs so there shouldn't be any
> >>> problem there.
> >>
> >> OK, if panfrost_gpu_soft_reset() is sufficient to guarantee clean TLBs
> >> then this looks equivalent to what we did for arm-smmu, so I've no
> >> complaints in that regard.
> >>
> >> However on second look I've now noticed the panfrost_mmu_flush_range()
> >> calls being moved outside of mmu->lock protection. Forgive me if there's
> >> basic DRM knowledge I'm missing here, but is there any possibility for
> >> multiple threads to create/import/free objects simultaneously on the
> >> same FD such that two mmu_hw_do_operation() calls could race and
> >> interfere with each other in terms of the
> >> AS_LOCKADDR/AS_COMMAND/AS_STATUS dance?
> >
> > Yes, we could have multiple threads. Not really any good reason it's
> > moved out of the mmu->lock other than just to avoid any nesting
> > (though that seemed fine in testing). The newly added as_lock will
> > serialize mmu_hw_do_operation(). So the mmu->lock is just serializing
> > page table writes.
>
> Urgh, sorry, once again I'd stopped looking at -next and was
> cross-referencing my current rc3-based working tree :(
>
> In that case, you may even be able to remove mmu->lock entirely, since
> io-pgtable-arm doesn't need external locking itself. And for this patch,

I was wondering about that, but hadn't gotten around to investigating.

>
> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
>
> Cheers,
> Robin.

Patch
diff mbox series

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 842bdd7cf6be..ccf671a9c3fb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -220,6 +220,23 @@  static size_t get_pgsize(u64 addr, size_t size)
 	return SZ_2M;
 }
 
+void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+			      struct panfrost_mmu *mmu,
+			      u64 iova, size_t size)
+{
+	if (mmu->as < 0)
+		return;
+
+	/* Flush the PTs only if we're already awake */
+	if (!pm_runtime_get_if_in_use(pfdev->dev))
+		return;
+
+	mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
+
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
 static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 		      u64 iova, int prot, struct sg_table *sgt)
 {
@@ -246,11 +263,10 @@  static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 		}
 	}
 
-	mmu_hw_do_operation(pfdev, mmu, start_iova, iova - start_iova,
-			    AS_COMMAND_FLUSH_PT);
-
 	mutex_unlock(&mmu->lock);
 
+	panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova);
+
 	return 0;
 }
 
@@ -259,7 +275,6 @@  int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	struct drm_gem_object *obj = &bo->base.base;
 	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
 	struct sg_table *sgt;
-	int ret;
 	int prot = IOMMU_READ | IOMMU_WRITE;
 
 	if (WARN_ON(bo->is_mapped))
@@ -272,14 +287,7 @@  int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	if (WARN_ON(IS_ERR(sgt)))
 		return PTR_ERR(sgt);
 
-	ret = pm_runtime_get_sync(pfdev->dev);
-	if (ret < 0)
-		return ret;
-
 	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
-
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
 	bo->is_mapped = true;
 
 	return 0;
@@ -293,17 +301,12 @@  void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
 	u64 iova = bo->node.start << PAGE_SHIFT;
 	size_t len = bo->node.size << PAGE_SHIFT;
 	size_t unmapped_len = 0;
-	int ret;
 
 	if (WARN_ON(!bo->is_mapped))
 		return;
 
 	dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", bo->mmu->as, iova, len);
 
-	ret = pm_runtime_get_sync(pfdev->dev);
-	if (ret < 0)
-		return;
-
 	mutex_lock(&bo->mmu->lock);
 
 	while (unmapped_len < len) {
@@ -318,13 +321,9 @@  void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
 		unmapped_len += pgsize;
 	}
 
-	mmu_hw_do_operation(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
-			    bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT);
-
 	mutex_unlock(&bo->mmu->lock);
 
-	pm_runtime_mark_last_busy(pfdev->dev);
-	pm_runtime_put_autosuspend(pfdev->dev);
+	panfrost_mmu_flush_range(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, len);
 	bo->is_mapped = false;
 }