diff mbox

drm/radeon: add an exclusive lock for GPU reset

Message ID 1341243563-2559-1-git-send-email-j.glisse@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jerome Glisse July 2, 2012, 3:39 p.m. UTC
From: Jerome Glisse <jglisse@redhat.com>

GPU reset need to be exclusive, one happening at a time. For this
add a rw semaphore so that any path that trigger GPU activities
have to take the semaphore as a reader thus allowing concurency.

The GPU reset path take the semaphore as a writer ensuring that
no concurrent reset take place.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |    1 +
 drivers/gpu/drm/radeon/radeon_cs.c     |    5 +++++
 drivers/gpu/drm/radeon/radeon_device.c |    2 ++
 drivers/gpu/drm/radeon/radeon_gem.c    |    7 +++++++
 4 files changed, 15 insertions(+)

Comments

Jerome Glisse July 2, 2012, 4:20 p.m. UTC | #1
On Mon, Jul 2, 2012 at 11:39 AM,  <j.glisse@gmail.com> wrote:
> From: Jerome Glisse <jglisse@redhat.com>
>
> GPU reset need to be exclusive, one happening at a time. For this
> add a rw semaphore so that any path that trigger GPU activities
> have to take the semaphore as a reader thus allowing concurency.
>
> The GPU reset path take the semaphore as a writer ensuring that
> no concurrent reset take place.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>

Wrong patch sorry resending

> ---
>  drivers/gpu/drm/radeon/radeon.h        |    1 +
>  drivers/gpu/drm/radeon/radeon_cs.c     |    5 +++++
>  drivers/gpu/drm/radeon/radeon_device.c |    2 ++
>  drivers/gpu/drm/radeon/radeon_gem.c    |    7 +++++++
>  4 files changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 77b4519b..29d6986 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1446,6 +1446,7 @@ struct radeon_device {
>        struct device                   *dev;
>        struct drm_device               *ddev;
>        struct pci_dev                  *pdev;
> +       struct rw_semaphore             exclusive_lock;
>        /* ASIC */
>        union radeon_asic_config        config;
>        enum radeon_family              family;
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index f1b7527..7ee6491 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -499,7 +499,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        struct radeon_cs_parser parser;
>        int r;
>
> +       down_read(&rdev->exclusive_lock);
>        if (!rdev->accel_working) {
> +               up_read(&rdev->exclusive_lock);
>                return -EBUSY;
>        }
>        /* initialize parser */
> @@ -512,6 +514,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        if (r) {
>                DRM_ERROR("Failed to initialize parser !\n");
>                radeon_cs_parser_fini(&parser, r);
> +               up_read(&rdev->exclusive_lock);
>                r = radeon_cs_handle_lockup(rdev, r);
>                return r;
>        }
> @@ -520,6 +523,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>                if (r != -ERESTARTSYS)
>                        DRM_ERROR("Failed to parse relocation %d!\n", r);
>                radeon_cs_parser_fini(&parser, r);
> +               up_read(&rdev->exclusive_lock);
>                r = radeon_cs_handle_lockup(rdev, r);
>                return r;
>        }
> @@ -533,6 +537,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        }
>  out:
>        radeon_cs_parser_fini(&parser, r);
> +       up_read(&rdev->exclusive_lock);
>        r = radeon_cs_handle_lockup(rdev, r);
>        return r;
>  }
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index f654ba8..c8fdb40 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -988,6 +988,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>        int r;
>        int resched;
>
> +       down_write(&rdev->exclusive_lock);
>        radeon_save_bios_scratch_regs(rdev);
>        /* block TTM */
>        resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
> @@ -1007,6 +1008,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>                dev_info(rdev->dev, "GPU reset failed\n");
>        }
>
> +       up_write(&rdev->exclusive_lock);
>        return r;
>  }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index d9b0809..f99db63 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -215,12 +215,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
>        uint32_t handle;
>        int r;
>
> +       down_read(&rdev->exclusive_lock);
>        /* create a gem object to contain this object in */
>        args->size = roundup(args->size, PAGE_SIZE);
>        r = radeon_gem_object_create(rdev, args->size, args->alignment,
>                                        args->initial_domain, false,
>                                        false, &gobj);
>        if (r) {
> +               up_read(&rdev->exclusive_lock);
>                r = radeon_gem_handle_lockup(rdev, r);
>                return r;
>        }
> @@ -228,10 +230,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
>        /* drop reference from allocate - handle holds it now */
>        drm_gem_object_unreference_unlocked(gobj);
>        if (r) {
> +               up_read(&rdev->exclusive_lock);
>                r = radeon_gem_handle_lockup(rdev, r);
>                return r;
>        }
>        args->handle = handle;
> +       up_read(&rdev->exclusive_lock);
>        return 0;
>  }
>
> @@ -240,6 +244,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  {
>        /* transition the BO to a domain -
>         * just validate the BO into a certain domain */
> +       struct radeon_device *rdev = dev->dev_private;
>        struct drm_radeon_gem_set_domain *args = data;
>        struct drm_gem_object *gobj;
>        struct radeon_bo *robj;
> @@ -255,9 +260,11 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>        }
>        robj = gem_to_radeon_bo(gobj);
>
> +       down_read(&rdev->exclusive_lock);
>        r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
>
>        drm_gem_object_unreference_unlocked(gobj);
> +       up_read(&rdev->exclusive_lock);
>        r = radeon_gem_handle_lockup(robj->rdev, r);
>        return r;
>  }
> --
> 1.7.10.2
>
Christian König July 2, 2012, 4:26 p.m. UTC | #2
On 02.07.2012 17:39, j.glisse@gmail.com wrote:
> From: Jerome Glisse <jglisse@redhat.com>
>
> GPU reset need to be exclusive, one happening at a time. For this
> add a rw semaphore so that any path that trigger GPU activities
> have to take the semaphore as a reader thus allowing concurency.
>
> The GPU reset path take the semaphore as a writer ensuring that
> no concurrent reset take place.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
NAK, that isn't as bad as the cs mutex was but still to complicated. 
It's just too far up in the call stack, e.g. it tries to catch ioctl 
operations, instead of catching the underlying hardware operation which 
is caused by the ioctl/ttm/etc...

Why not just take the ring look as I suggested?

Christian.
> ---
>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>   drivers/gpu/drm/radeon/radeon_cs.c     |    5 +++++
>   drivers/gpu/drm/radeon/radeon_device.c |    2 ++
>   drivers/gpu/drm/radeon/radeon_gem.c    |    7 +++++++
>   4 files changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 77b4519b..29d6986 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1446,6 +1446,7 @@ struct radeon_device {
>   	struct device			*dev;
>   	struct drm_device		*ddev;
>   	struct pci_dev			*pdev;
> +	struct rw_semaphore		exclusive_lock;
>   	/* ASIC */
>   	union radeon_asic_config	config;
>   	enum radeon_family		family;
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index f1b7527..7ee6491 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -499,7 +499,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	struct radeon_cs_parser parser;
>   	int r;
>   
> +	down_read(&rdev->exclusive_lock);
>   	if (!rdev->accel_working) {
> +		up_read(&rdev->exclusive_lock);
>   		return -EBUSY;
>   	}
>   	/* initialize parser */
> @@ -512,6 +514,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	if (r) {
>   		DRM_ERROR("Failed to initialize parser !\n");
>   		radeon_cs_parser_fini(&parser, r);
> +		up_read(&rdev->exclusive_lock);
>   		r = radeon_cs_handle_lockup(rdev, r);
>   		return r;
>   	}
> @@ -520,6 +523,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		if (r != -ERESTARTSYS)
>   			DRM_ERROR("Failed to parse relocation %d!\n", r);
>   		radeon_cs_parser_fini(&parser, r);
> +		up_read(&rdev->exclusive_lock);
>   		r = radeon_cs_handle_lockup(rdev, r);
>   		return r;
>   	}
> @@ -533,6 +537,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	}
>   out:
>   	radeon_cs_parser_fini(&parser, r);
> +	up_read(&rdev->exclusive_lock);
>   	r = radeon_cs_handle_lockup(rdev, r);
>   	return r;
>   }
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index f654ba8..c8fdb40 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -988,6 +988,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   	int r;
>   	int resched;
>   
> +	down_write(&rdev->exclusive_lock);
>   	radeon_save_bios_scratch_regs(rdev);
>   	/* block TTM */
>   	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
> @@ -1007,6 +1008,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>   		dev_info(rdev->dev, "GPU reset failed\n");
>   	}
>   
> +	up_write(&rdev->exclusive_lock);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index d9b0809..f99db63 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -215,12 +215,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
>   	uint32_t handle;
>   	int r;
>   
> +	down_read(&rdev->exclusive_lock);
>   	/* create a gem object to contain this object in */
>   	args->size = roundup(args->size, PAGE_SIZE);
>   	r = radeon_gem_object_create(rdev, args->size, args->alignment,
>   					args->initial_domain, false,
>   					false, &gobj);
>   	if (r) {
> +		up_read(&rdev->exclusive_lock);
>   		r = radeon_gem_handle_lockup(rdev, r);
>   		return r;
>   	}
> @@ -228,10 +230,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
>   	/* drop reference from allocate - handle holds it now */
>   	drm_gem_object_unreference_unlocked(gobj);
>   	if (r) {
> +		up_read(&rdev->exclusive_lock);
>   		r = radeon_gem_handle_lockup(rdev, r);
>   		return r;
>   	}
>   	args->handle = handle;
> +	up_read(&rdev->exclusive_lock);
>   	return 0;
>   }
>   
> @@ -240,6 +244,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   {
>   	/* transition the BO to a domain -
>   	 * just validate the BO into a certain domain */
> +	struct radeon_device *rdev = dev->dev_private;
>   	struct drm_radeon_gem_set_domain *args = data;
>   	struct drm_gem_object *gobj;
>   	struct radeon_bo *robj;
> @@ -255,9 +260,11 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   	}
>   	robj = gem_to_radeon_bo(gobj);
>   
> +	down_read(&rdev->exclusive_lock);
>   	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
>   
>   	drm_gem_object_unreference_unlocked(gobj);
> +	up_read(&rdev->exclusive_lock);
>   	r = radeon_gem_handle_lockup(robj->rdev, r);
>   	return r;
>   }
Jerome Glisse July 2, 2012, 4:41 p.m. UTC | #3
On Mon, Jul 2, 2012 at 12:26 PM, Christian König
<deathsimple@vodafone.de> wrote:
> On 02.07.2012 17:39, j.glisse@gmail.com wrote:
>>
>> From: Jerome Glisse <jglisse@redhat.com>
>>
>> GPU reset need to be exclusive, one happening at a time. For this
>> add a rw semaphore so that any path that trigger GPU activities
>> have to take the semaphore as a reader thus allowing concurency.
>>
>> The GPU reset path take the semaphore as a writer ensuring that
>> no concurrent reset take place.
>>
>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>
> NAK, that isn't as bad as the cs mutex was but still to complicated. It's
> just too far up in the call stack, e.g. it tries to catch ioctl operations,
> instead of catching the underlying hardware operation which is caused by the
> ioctl/ttm/etc...
>
> Why not just take the ring look as I suggested?
>
>

No we can't use ring lock, we need to protect suspend/resume path and
we need an exclusive lock for that so we need a reset mutex at the
very least. But instead of having a reset mutex i prefer using a rw
lock so that we can stop ioctl until a reset goes through an let
pending ioctl take proper action. Think about multiple context trying
to reset GPU ...

Really this is the best option, the rw locking wont induce any lock
contention execept in gpu reset case which is anyway bad news.

Jerome
Christian König July 2, 2012, 5:05 p.m. UTC | #4
On 02.07.2012 18:41, Jerome Glisse wrote:
> On Mon, Jul 2, 2012 at 12:26 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>> On 02.07.2012 17:39, j.glisse@gmail.com wrote:
>>> From: Jerome Glisse <jglisse@redhat.com>
>>>
>>> GPU reset need to be exclusive, one happening at a time. For this
>>> add a rw semaphore so that any path that trigger GPU activities
>>> have to take the semaphore as a reader thus allowing concurency.
>>>
>>> The GPU reset path take the semaphore as a writer ensuring that
>>> no concurrent reset take place.
>>>
>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>> NAK, that isn't as bad as the cs mutex was but still to complicated. It's
>> just too far up in the call stack, e.g. it tries to catch ioctl operations,
>> instead of catching the underlying hardware operation which is caused by the
>> ioctl/ttm/etc...
>>
>> Why not just take the ring look as I suggested?
>>
>>
> No we can't use ring lock, we need to protect suspend/resume path and
> we need an exclusive lock for that so we need a reset mutex at the
> very least. But instead of having a reset mutex i prefer using a rw
> lock so that we can stop ioctl until a reset goes through an let
> pending ioctl take proper action. Think about multiple context trying
> to reset GPU ...
>
> Really this is the best option, the rw locking wont induce any lock
> contention execept in gpu reset case which is anyway bad news.
Why? That makes no sense to me. Well I don't want to prevent lock 
contention, but understand why we should add locking at the ioctl level. 
That violates locking rule number one "lock data instead of code" (or in 
our case "lock hardware access instead of code path") and it really is 
the reason why we ended up with the cs_mutex protecting practically 
everything.

Multiple context trying to reset the GPU should be pretty fine, current 
it would just reset the GPU twice, but in the future asic_reset should 
be much more fine grained and only reset the parts of the GPU which 
really needs an reset.

Cheers,
Christian.
Jerome Glisse July 2, 2012, 5:27 p.m. UTC | #5
On Mon, Jul 2, 2012 at 1:05 PM, Christian König <deathsimple@vodafone.de> wrote:
> On 02.07.2012 18:41, Jerome Glisse wrote:
>>
>> On Mon, Jul 2, 2012 at 12:26 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> On 02.07.2012 17:39, j.glisse@gmail.com wrote:
>>>>
>>>> From: Jerome Glisse <jglisse@redhat.com>
>>>>
>>>> GPU reset need to be exclusive, one happening at a time. For this
>>>> add a rw semaphore so that any path that trigger GPU activities
>>>> have to take the semaphore as a reader thus allowing concurency.
>>>>
>>>> The GPU reset path take the semaphore as a writer ensuring that
>>>> no concurrent reset take place.
>>>>
>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>
>>> NAK, that isn't as bad as the cs mutex was but still to complicated. It's
>>> just too far up in the call stack, e.g. it tries to catch ioctl
>>> operations,
>>> instead of catching the underlying hardware operation which is caused by
>>> the
>>> ioctl/ttm/etc...
>>>
>>> Why not just take the ring look as I suggested?
>>>
>>>
>> No we can't use ring lock, we need to protect suspend/resume path and
>> we need an exclusive lock for that so we need a reset mutex at the
>> very least. But instead of having a reset mutex i prefer using a rw
>> lock so that we can stop ioctl until a reset goes through an let
>> pending ioctl take proper action. Think about multiple context trying
>> to reset GPU ...
>>
>> Really this is the best option, the rw locking wont induce any lock
>> contention execept in gpu reset case which is anyway bad news.
>
> Why? That makes no sense to me. Well I don't want to prevent lock
> contention, but understand why we should add locking at the ioctl level.
> That violates locking rule number one "lock data instead of code" (or in our
> case "lock hardware access instead of code path") and it really is the
> reason why we ended up with the cs_mutex protecting practically everything.
>
> Multiple context trying to reset the GPU should be pretty fine, current it
> would just reset the GPU twice, but in the future asic_reset should be much
> more fine grained and only reset the parts of the GPU which really needs an
> reset.
>
> Cheers,
> Christian.

No multiple reset is not fine, try it your self and you will see all
kind of oops (strongly advise you to sync you hd before stress testing
that). Yes we need to protect code path because suspend/resume code
path is special one it touch many data in the device structure. GPU
reset is a rare event or should be a rare event.

I stress it we need at very least a mutex to protect gpu reset and i
will stand on that position because there is no other way around.
Using rw lock have a bonus of allowing proper handling of gpu reset
failure and that what the patch i sent to linus fix tree is about, so
to make drm next merge properly while preserving proper behavior in
gpu reset failure the rw semaphore is the best option.

Cheers,
Jerome
Christian König July 3, 2012, 9:26 a.m. UTC | #6
On 02.07.2012 19:27, Jerome Glisse wrote:
> On Mon, Jul 2, 2012 at 1:05 PM, Christian König <deathsimple@vodafone.de> wrote:
>> On 02.07.2012 18:41, Jerome Glisse wrote:
>>> On Mon, Jul 2, 2012 at 12:26 PM, Christian König
>>> <deathsimple@vodafone.de> wrote:
>>>> On 02.07.2012 17:39, j.glisse@gmail.com wrote:
>>>>> From: Jerome Glisse <jglisse@redhat.com>
>>>>>
>>>>> GPU reset need to be exclusive, one happening at a time. For this
>>>>> add a rw semaphore so that any path that trigger GPU activities
>>>>> have to take the semaphore as a reader thus allowing concurency.
>>>>>
>>>>> The GPU reset path take the semaphore as a writer ensuring that
>>>>> no concurrent reset take place.
>>>>>
>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>> NAK, that isn't as bad as the cs mutex was but still to complicated. It's
>>>> just too far up in the call stack, e.g. it tries to catch ioctl
>>>> operations,
>>>> instead of catching the underlying hardware operation which is caused by
>>>> the
>>>> ioctl/ttm/etc...
>>>>
>>>> Why not just take the ring look as I suggested?
>>>>
>>>>
>>> No we can't use ring lock, we need to protect suspend/resume path and
>>> we need an exclusive lock for that so we need a reset mutex at the
>>> very least. But instead of having a reset mutex i prefer using a rw
>>> lock so that we can stop ioctl until a reset goes through an let
>>> pending ioctl take proper action. Think about multiple context trying
>>> to reset GPU ...
>>>
>>> Really this is the best option, the rw locking wont induce any lock
>>> contention execept in gpu reset case which is anyway bad news.
>> Why? That makes no sense to me. Well I don't want to prevent lock
>> contention, but understand why we should add locking at the ioctl level.
>> That violates locking rule number one "lock data instead of code" (or in our
>> case "lock hardware access instead of code path") and it really is the
>> reason why we ended up with the cs_mutex protecting practically everything.
>>
>> Multiple context trying to reset the GPU should be pretty fine, current it
>> would just reset the GPU twice, but in the future asic_reset should be much
>> more fine grained and only reset the parts of the GPU which really needs an
>> reset.
>>
>> Cheers,
>> Christian.
> No multiple reset is not fine, try it your self and you will see all
> kind of oops (strongly advise you to sync you hd before stress testing
> that). Yes we need to protect code path because suspend/resume code
> path is special one it touch many data in the device structure. GPU
> reset is a rare event or should be a rare event.
Yeah, but I thought that fixing those oops as the second step. I see the 
fact that suspend/resume is unpinning all the ttm memory and then 
pinning it again as a bug that needs to be fixed. Or as an alternative 
we could split the suspend/resume calls into suspend/disable and 
resume/enable calls, where we only call disable/enable in the gpu_reset 
path rather than a complete suspend/resume (not really sure about that).

Also a GPU reset isn't such a rare event, currently it just occurs when 
userspace is doing something bad, for example submitting an invalid 
shader or stuff like that. But with VM and IO protection coming into the 
driver we are going to need a GPU reset when there is an protection 
fault, and with that it is really desirable to just reset the hardware 
in a way where we can say: This IB was faulty skip over it and resume 
with whatever is after it on the ring.

And todo that we need to keep the auxiliary data like sub allocator 
memory, blitting shader bo, and especially vm page tables at the same 
place in hardware memory.

> I stress it we need at very least a mutex to protect gpu reset and i
> will stand on that position because there is no other way around.
> Using rw lock have a bonus of allowing proper handling of gpu reset
> failure and that what the patch i sent to linus fix tree is about, so
> to make drm next merge properly while preserving proper behavior in
> gpu reset failure the rw semaphore is the best option.
Oh well, I'm not arguing that we don't need a look here. I'm just 
questioning to put it at the ioctl level (e.g. the driver entry from 
userspace), that wasn't such a good idea with the cs_mutex and doesn't 
seems like a good idea now. Instead we should place the looking between 
the ioctl/ttm and the actual hardware submission and that brings it 
pretty close (if not identically) to the ring mutex.

Cheers,
Christian.
Jerome Glisse July 3, 2012, 2:09 p.m. UTC | #7
On Tue, Jul 3, 2012 at 5:26 AM, Christian König <deathsimple@vodafone.de> wrote:
> On 02.07.2012 19:27, Jerome Glisse wrote:
>>
>> On Mon, Jul 2, 2012 at 1:05 PM, Christian König <deathsimple@vodafone.de>
>> wrote:
>>>
>>> On 02.07.2012 18:41, Jerome Glisse wrote:
>>>>
>>>> On Mon, Jul 2, 2012 at 12:26 PM, Christian König
>>>> <deathsimple@vodafone.de> wrote:
>>>>>
>>>>> On 02.07.2012 17:39, j.glisse@gmail.com wrote:
>>>>>>
>>>>>> From: Jerome Glisse <jglisse@redhat.com>
>>>>>>
>>>>>> GPU reset need to be exclusive, one happening at a time. For this
>>>>>> add a rw semaphore so that any path that trigger GPU activities
>>>>>> have to take the semaphore as a reader thus allowing concurency.
>>>>>>
>>>>>> The GPU reset path take the semaphore as a writer ensuring that
>>>>>> no concurrent reset take place.
>>>>>>
>>>>>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>>>>>
>>>>> NAK, that isn't as bad as the cs mutex was but still to complicated.
>>>>> It's
>>>>> just too far up in the call stack, e.g. it tries to catch ioctl
>>>>> operations,
>>>>> instead of catching the underlying hardware operation which is caused
>>>>> by
>>>>> the
>>>>> ioctl/ttm/etc...
>>>>>
>>>>> Why not just take the ring look as I suggested?
>>>>>
>>>>>
>>>> No we can't use ring lock, we need to protect suspend/resume path and
>>>> we need an exclusive lock for that so we need a reset mutex at the
>>>> very least. But instead of having a reset mutex i prefer using a rw
>>>> lock so that we can stop ioctl until a reset goes through an let
>>>> pending ioctl take proper action. Think about multiple context trying
>>>> to reset GPU ...
>>>>
>>>> Really this is the best option, the rw locking wont induce any lock
>>>> contention execept in gpu reset case which is anyway bad news.
>>>
>>> Why? That makes no sense to me. Well I don't want to prevent lock
>>> contention, but understand why we should add locking at the ioctl level.
>>> That violates locking rule number one "lock data instead of code" (or in
>>> our
>>> case "lock hardware access instead of code path") and it really is the
>>> reason why we ended up with the cs_mutex protecting practically
>>> everything.
>>>
>>> Multiple context trying to reset the GPU should be pretty fine, current
>>> it
>>> would just reset the GPU twice, but in the future asic_reset should be
>>> much
>>> more fine grained and only reset the parts of the GPU which really needs
>>> an
>>> reset.
>>>
>>> Cheers,
>>> Christian.
>>
>> No multiple reset is not fine, try it your self and you will see all
>> kind of oops (strongly advise you to sync you hd before stress testing
>> that). Yes we need to protect code path because suspend/resume code
>> path is special one it touch many data in the device structure. GPU
>> reset is a rare event or should be a rare event.
>
> Yeah, but I thought that fixing those oops as the second step. I see the
> fact that suspend/resume is unpinning all the ttm memory and then pinning it
> again as a bug that needs to be fixed. Or as an alternative we could split
> the suspend/resume calls into suspend/disable and resume/enable calls, where
> we only call disable/enable in the gpu_reset path rather than a complete
> suspend/resume (not really sure about that).

Fixing oops are not second step, they are first step. I am not saying
that the suspend/resume as it happens right now is a good thing, i am
saying it's what it's and we have to deal with it until we do
something better. There is no excuse to not fix oops with a simple
patch 16 lines patch.

> Also a GPU reset isn't such a rare event, currently it just occurs when
> userspace is doing something bad, for example submitting an invalid shader
> or stuff like that. But with VM and IO protection coming into the driver we
> are going to need a GPU reset when there is an protection fault, and with
> that it is really desirable to just reset the hardware in a way where we can
> say: This IB was faulty skip over it and resume with whatever is after it on
> the ring.

There is mecanism to handle that properly from irq handler that AMD
need to release, the pagefault thing could be transparent and should
only need few lines in the irq handler (i think i did a patch for that
and sent it to AMD for review but i am wondering if i wasn't lacking
some doc).

> And todo that we need to keep the auxiliary data like sub allocator memory,
> blitting shader bo, and especially vm page tables at the same place in
> hardware memory.

I agree that we need a lightweight reset but we need to keep the heavy
reset as it is right now, if you want to do a light weight reset do it
as a new function. I always intended to have two reset path, hint gpu
soft reset name vs what is call hard reset but not released, i even
made patch for that long time ago but never got them cleared from AMD
review.

>
>> I stress it we need at very least a mutex to protect gpu reset and i
>> will stand on that position because there is no other way around.
>> Using rw lock have a bonus of allowing proper handling of gpu reset
>> failure and that what the patch i sent to linus fix tree is about, so
>> to make drm next merge properly while preserving proper behavior in
>> gpu reset failure the rw semaphore is the best option.
>
> Oh well, I'm not arguing that we don't need a look here. I'm just
> questioning to put it at the ioctl level (e.g. the driver entry from
> userspace), that wasn't such a good idea with the cs_mutex and doesn't seems
> like a good idea now. Instead we should place the looking between the
> ioctl/ttm and the actual hardware submission and that brings it pretty close
> (if not identically) to the ring mutex.
>
> Cheers,
> Christian.

No, locking at the ioctl level make sense please show me figure that
it hurt performance, i did a quick sysprof and i couldn't see them
impacting anything. No matter how much you hate this, this is the best
solution, it avoids each ioctl to do useless things in case of gpu
lockup and it touch a lot less code than moving a lock down the call
path would. So again this is the best solution for the heavy reset,
and i am not saying that a soft reset would need to take this lock or
that we can't improve the way it's done. All i am saying is that ring
lock is the wrong solution for heavy reset, it should be ok for light
weight reset.

Cheers,
Jerome
Christian König July 3, 2012, 2:52 p.m. UTC | #8
On 03.07.2012 16:09, Jerome Glisse wrote:
> On Tue, Jul 3, 2012 at 5:26 AM, Christian König <deathsimple@vodafone.de> wrote:
>> [SNIP]
>> Yeah, but I thought that fixing those oops as the second step. I see the
>> fact that suspend/resume is unpinning all the ttm memory and then pinning it
>> again as a bug that needs to be fixed. Or as an alternative we could split
>> the suspend/resume calls into suspend/disable and resume/enable calls, where
>> we only call disable/enable in the gpu_reset path rather than a complete
>> suspend/resume (not really sure about that).
> Fixing oops are not second step, they are first step. I am not saying
> that the suspend/resume as it happens right now is a good thing, i am
> saying it's what it's and we have to deal with it until we do
> something better. There is no excuse to not fix oops with a simple
> patch 16 lines patch.
Completely agree.

>> Also a GPU reset isn't such a rare event, currently it just occurs when
>> userspace is doing something bad, for example submitting an invalid shader
>> or stuff like that. But with VM and IO protection coming into the driver we
>> are going to need a GPU reset when there is an protection fault, and with
>> that it is really desirable to just reset the hardware in a way where we can
>> say: This IB was faulty skip over it and resume with whatever is after it on
>> the ring.
> There is mecanism to handle that properly from irq handler that AMD
> need to release, the pagefault thing could be transparent and should
> only need few lines in the irq handler (i think i did a patch for that
> and sent it to AMD for review but i am wondering if i wasn't lacking
> some doc).
I also searched the AMD internal docs for a good description of how the 
lightweight recovery is supposed to work, but haven't found anything 
clear so far. My expectation is that there should be something like a 
"abort current IB" command you can issue by writing an register, but 
that doesn't seems to be the case.

>> And todo that we need to keep the auxiliary data like sub allocator memory,
>> blitting shader bo, and especially vm page tables at the same place in
>> hardware memory.
> I agree that we need a lightweight reset but we need to keep the heavy
> reset as it is right now, if you want to do a light weight reset do it
> as a new function. I always intended to have two reset path, hint gpu
> soft reset name vs what is call hard reset but not released, i even
> made patch for that long time ago but never got them cleared from AMD
> review.
My idea was to pass in some extra informations, so asic_reset more 
clearly knows what todo. An explicit distinction between a soft and a 
hard reset also seems like a possible solution, but sounds like a bit of 
code duplication.

>>> I stress it we need at very least a mutex to protect gpu reset and i
>>> will stand on that position because there is no other way around.
>>> Using rw lock have a bonus of allowing proper handling of gpu reset
>>> failure and that what the patch i sent to linus fix tree is about, so
>>> to make drm next merge properly while preserving proper behavior in
>>> gpu reset failure the rw semaphore is the best option.
>> Oh well, I'm not arguing that we don't need a look here. I'm just
>> questioning to put it at the ioctl level (e.g. the driver entry from
>> userspace), that wasn't such a good idea with the cs_mutex and doesn't seems
>> like a good idea now. Instead we should place the looking between the
>> ioctl/ttm and the actual hardware submission and that brings it pretty close
>> (if not identically) to the ring mutex.
>>
>> Cheers,
>> Christian.
> No, locking at the ioctl level make sense please show me figure that
> it hurt performance, i did a quick sysprof and i couldn't see them
> impacting anything. No matter how much you hate this, this is the best
> solution, it avoids each ioctl to do useless things in case of gpu
> lockup and it touch a lot less code than moving a lock down the call
> path would. So again this is the best solution for the heavy reset,
> and i am not saying that a soft reset would need to take this lock or
> that we can't improve the way it's done. All i am saying is that ring
> lock is the wrong solution for heavy reset, it should be ok for light
> weight reset.
>
I'm not into any performance concerns, it just doesn't seems to be the 
right place to me. So are you really sure that the 
ttm_bo_delayed_workqueue,  pageflips or callbacks to radeon_bo_move 
can't hit us here? IIRC that always was the big concern with the 
cs_mutex not being held in all cases.

Anyway, if you think it will work and fix the crash problem at hand then 
I'm ok with commit it.

Christian.
<http://lxr.free-electrons.com/ident?v=2.6.37;i=ttm_bo_delayed_workqueue>
Jerome Glisse July 3, 2012, 3:15 p.m. UTC | #9
On Tue, Jul 3, 2012 at 10:52 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 03.07.2012 16:09, Jerome Glisse wrote:
>>
>> On Tue, Jul 3, 2012 at 5:26 AM, Christian König <deathsimple@vodafone.de>
>> wrote:
>>>
>>> [SNIP]
>>>
>>> Yeah, but I thought that fixing those oops as the second step. I see the
>>> fact that suspend/resume is unpinning all the ttm memory and then pinning
>>> it
>>> again as a bug that needs to be fixed. Or as an alternative we could
>>> split
>>> the suspend/resume calls into suspend/disable and resume/enable calls,
>>> where
>>> we only call disable/enable in the gpu_reset path rather than a complete
>>> suspend/resume (not really sure about that).
>>
>> Fixing oops are not second step, they are first step. I am not saying
>> that the suspend/resume as it happens right now is a good thing, i am
>> saying it's what it's and we have to deal with it until we do
>> something better. There is no excuse to not fix oops with a simple
>> patch 16 lines patch.
>
> Completely agree.
>
>
>>> Also a GPU reset isn't such a rare event, currently it just occurs when
>>> userspace is doing something bad, for example submitting an invalid
>>> shader
>>> or stuff like that. But with VM and IO protection coming into the driver
>>> we
>>> are going to need a GPU reset when there is an protection fault, and with
>>> that it is really desirable to just reset the hardware in a way where we
>>> can
>>> say: This IB was faulty skip over it and resume with whatever is after it
>>> on
>>> the ring.
>>
>> There is mecanism to handle that properly from irq handler that AMD
>> need to release, the pagefault thing could be transparent and should
>> only need few lines in the irq handler (i think i did a patch for that
>> and sent it to AMD for review but i am wondering if i wasn't lacking
>> some doc).
>
> I also searched the AMD internal docs for a good description of how the
> lightweight recovery is supposed to work, but haven't found anything clear
> so far. My expectation is that there should be something like a "abort
> current IB" command you can issue by writing an register, but that doesn't
> seems to be the case.

Doc are scarce on that, my understanding is that you can't skip IB
that trigger the segfault. The handling was designed with page fault
in mind, ie you page in the missing page and you resume the ib. That
being said i think one can abuse that by moving the rptr of the ring
and resuming.

>>> And todo that we need to keep the auxiliary data like sub allocator
>>> memory,
>>> blitting shader bo, and especially vm page tables at the same place in
>>> hardware memory.
>>
>> I agree that we need a lightweight reset but we need to keep the heavy
>> reset as it is right now, if you want to do a light weight reset do it
>> as a new function. I always intended to have two reset path, hint gpu
>> soft reset name vs what is call hard reset but not released, i even
>> made patch for that long time ago but never got them cleared from AMD
>> review.
>
> My idea was to pass in some extra informations, so asic_reset more clearly
> knows what todo. An explicit distinction between a soft and a hard reset
> also seems like a possible solution, but sounds like a bit of code
> duplication.

I think people should stop on the mantra of don't duplicate code, i do
agree that it's better to not duplicate code but in some case there is
no way around it and it's worse to try sharing the code, i think the
worst example in radeon kernel is the modesetting section where we try
to share same code for all different variation of DCE but in the end
it's a mess of if/else/switch/case and what not. Code duplication is
sometime the cleanest and best choice, might we fix a bug in one and
not the other yes it might happen but it's life ;)

>
>>>> I stress it we need at very least a mutex to protect gpu reset and i
>>>> will stand on that position because there is no other way around.
>>>> Using rw lock have a bonus of allowing proper handling of gpu reset
>>>> failure and that what the patch i sent to linus fix tree is about, so
>>>> to make drm next merge properly while preserving proper behavior in
>>>> gpu reset failure the rw semaphore is the best option.
>>>
>>> Oh well, I'm not arguing that we don't need a look here. I'm just
>>> questioning to put it at the ioctl level (e.g. the driver entry from
>>> userspace), that wasn't such a good idea with the cs_mutex and doesn't
>>> seems
>>> like a good idea now. Instead we should place the looking between the
>>> ioctl/ttm and the actual hardware submission and that brings it pretty
>>> close
>>> (if not identically) to the ring mutex.
>>>
>>> Cheers,
>>> Christian.
>>
>> No, locking at the ioctl level make sense please show me figure that
>> it hurt performance, i did a quick sysprof and i couldn't see them
>> impacting anything. No matter how much you hate this, this is the best
>> solution, it avoids each ioctl to do useless things in case of gpu
>> lockup and it touch a lot less code than moving a lock down the call
>> path would. So again this is the best solution for the heavy reset,
>> and i am not saying that a soft reset would need to take this lock or
>> that we can't improve the way it's done. All i am saying is that ring
>> lock is the wrong solution for heavy reset, it should be ok for light
>> weight reset.
>>
> I'm not into any performance concerns, it just doesn't seems to be the right
> place to me. So are you really sure that the ttm_bo_delayed_workqueue,
>  pageflips or callbacks to radeon_bo_move can't hit us here? IIRC that
> always was the big concern with the cs_mutex not being held in all cases.
>
> Anyway, if you think it will work and fix the crash problem at hand then I'm
> ok with commit it.

On GPU lockup the assumption have always been that irq handler won't
be call. Thus no pageflip. For ttm the reset function suspend ttm
delayed workqueue. Suspending bo creation/deletion of domain change
like my patch does avoid the other possibilities of any ttm work
starting. I don't think i have miss anything, i am quite confident i
did not, ... but you never know.

Cheers,
Jerome
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 77b4519b..29d6986 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1446,6 +1446,7 @@  struct radeon_device {
 	struct device			*dev;
 	struct drm_device		*ddev;
 	struct pci_dev			*pdev;
+	struct rw_semaphore		exclusive_lock;
 	/* ASIC */
 	union radeon_asic_config	config;
 	enum radeon_family		family;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index f1b7527..7ee6491 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -499,7 +499,9 @@  int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	struct radeon_cs_parser parser;
 	int r;
 
+	down_read(&rdev->exclusive_lock);
 	if (!rdev->accel_working) {
+		up_read(&rdev->exclusive_lock);
 		return -EBUSY;
 	}
 	/* initialize parser */
@@ -512,6 +514,7 @@  int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
 		radeon_cs_parser_fini(&parser, r);
+		up_read(&rdev->exclusive_lock);
 		r = radeon_cs_handle_lockup(rdev, r);
 		return r;
 	}
@@ -520,6 +523,7 @@  int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to parse relocation %d!\n", r);
 		radeon_cs_parser_fini(&parser, r);
+		up_read(&rdev->exclusive_lock);
 		r = radeon_cs_handle_lockup(rdev, r);
 		return r;
 	}
@@ -533,6 +537,7 @@  int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 out:
 	radeon_cs_parser_fini(&parser, r);
+	up_read(&rdev->exclusive_lock);
 	r = radeon_cs_handle_lockup(rdev, r);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f654ba8..c8fdb40 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -988,6 +988,7 @@  int radeon_gpu_reset(struct radeon_device *rdev)
 	int r;
 	int resched;
 
+	down_write(&rdev->exclusive_lock);
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1007,6 +1008,7 @@  int radeon_gpu_reset(struct radeon_device *rdev)
 		dev_info(rdev->dev, "GPU reset failed\n");
 	}
 
+	up_write(&rdev->exclusive_lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index d9b0809..f99db63 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -215,12 +215,14 @@  int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	uint32_t handle;
 	int r;
 
+	down_read(&rdev->exclusive_lock);
 	/* create a gem object to contain this object in */
 	args->size = roundup(args->size, PAGE_SIZE);
 	r = radeon_gem_object_create(rdev, args->size, args->alignment,
 					args->initial_domain, false,
 					false, &gobj);
 	if (r) {
+		up_read(&rdev->exclusive_lock);
 		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
@@ -228,10 +230,12 @@  int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {
+		up_read(&rdev->exclusive_lock);
 		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
 	args->handle = handle;
+	up_read(&rdev->exclusive_lock);
 	return 0;
 }
 
@@ -240,6 +244,7 @@  int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 {
 	/* transition the BO to a domain -
 	 * just validate the BO into a certain domain */
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_set_domain *args = data;
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
@@ -255,9 +260,11 @@  int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	}
 	robj = gem_to_radeon_bo(gobj);
 
+	down_read(&rdev->exclusive_lock);
 	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
 
 	drm_gem_object_unreference_unlocked(gobj);
+	up_read(&rdev->exclusive_lock);
 	r = radeon_gem_handle_lockup(robj->rdev, r);
 	return r;
 }