diff mbox series

drm/amd/display: Clear dm_state for fast updates

Message ID M0lxN5AUlPvzBKULfIBe5BZRwfQGXeMQCdWItYcQ-9P79y32WzExYK2Y0DwyNVtyGelqbvV07_lFk1oeT4cApbT-P_oH0bnxQbMmFsJv_xg=@protonmail.com (mailing list archive)
State New, archived
Headers show
Series drm/amd/display: Clear dm_state for fast updates | expand

Commit Message

Mazin Rezk July 27, 2020, 5:40 a.m. UTC
This patch fixes a race condition that causes a use-after-free during
amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
are requested and the second one finishes before the first. Essentially,
this bug occurs when the following sequence of events happens:

1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
deferred to the workqueue.

2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
deferred to the workqueue.

3. Commit #2 starts before commit #1, dm_state #1 is used in the
commit_tail and commit #2 completes, freeing dm_state #1.

4. Commit #1 starts after commit #2 completes, uses the freed dm_state
1 and dereferences a freelist pointer while setting the context.

Since this bug has only been spotted with fast commits, this patch fixes
the bug by clearing the dm_state instead of using the old dc_state for
fast updates. In addition, since dm_state is only used for its dc_state
and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
removing the dm_state should not have any consequences in fast updates.

This use-after-free bug has existed for a while now, but only caused a
noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
freelist pointer to middle of object") moving the freelist pointer from
dm_state->base (which was unused) to dm_state->context (which is
dereferenced).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast updates")
Reported-by: Duncan <1i5t5.duncan@cox.net>
Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
 1 file changed, 27 insertions(+), 9 deletions(-)

--
2.27.0

Comments

Mazin Rezk July 27, 2020, 5:53 a.m. UTC | #1
On Monday, July 27, 2020 1:40 AM, Mazin Rezk <mnrzk@protonmail.com> wrote:
> This patch fixes a race condition that causes a use-after-free during
> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> are requested and the second one finishes before the first. Essentially,
> this bug occurs when the following sequence of events happens:
>
> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> deferred to the workqueue.
>
> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> deferred to the workqueue.
>
> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> commit_tail and commit #2 completes, freeing dm_state #1.
>
> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> 1 and dereferences a freelist pointer while setting the context.
>
> Since this bug has only been spotted with fast commits, this patch fixes
> the bug by clearing the dm_state instead of using the old dc_state for
> fast updates. In addition, since dm_state is only used for its dc_state
> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> removing the dm_state should not have any consequences in fast updates.
>
> This use-after-free bug has existed for a while now, but only caused a
> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> freelist pointer to middle of object") moving the freelist pointer from
> dm_state->base (which was unused) to dm_state->context (which is
> dereferenced).
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast updates")
> Reported-by: Duncan <1i5t5.duncan@cox.net>
> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
>  1 file changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 86ffa0c2880f..710edc70e37e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>  		 * the same resource. If we have a new DC context as part of
>  		 * the DM atomic state from validation we need to free it and
>  		 * retain the existing one instead.
> +		 *
> +		 * Furthermore, since the DM atomic state only contains the DC
> +		 * context and can safely be annulled, we can free the state
> +		 * and clear the associated private object now to free
> +		 * some memory and avoid a possible use-after-free later.
>  		 */
> -		struct dm_atomic_state *new_dm_state, *old_dm_state;
>
> -		new_dm_state = dm_atomic_get_new_state(state);
> -		old_dm_state = dm_atomic_get_old_state(state);
> +		for (i = 0; i < state->num_private_objs; i++) {
> +			struct drm_private_obj *obj = state->private_objs[i].ptr;
>
> -		if (new_dm_state && old_dm_state) {
> -			if (new_dm_state->context)
> -				dc_release_state(new_dm_state->context);
> +			if (obj->funcs == adev->dm.atomic_obj.funcs) {
> +				int j = state->num_private_objs-1;
>
> -			new_dm_state->context = old_dm_state->context;
> +				dm_atomic_destroy_state(obj,
> +						state->private_objs[i].state);
> +
> +				/* If i is not at the end of the array then the
> +				 * last element needs to be moved to where i was
> +				 * before the array can safely be truncated.
> +				 */
> +				if (i != j)
> +					state->private_objs[i] =
> +						state->private_objs[j];
>
> -			if (old_dm_state->context)
> -				dc_retain_state(old_dm_state->context);
> +				state->private_objs[j].ptr = NULL;
> +				state->private_objs[j].state = NULL;
> +				state->private_objs[j].old_state = NULL;
> +				state->private_objs[j].new_state = NULL;
> +
> +				state->num_private_objs = j;
> +				break;
> +			}
>  		}
>  	}
>
> --
> 2.27.0
>

I have tested this on 5.8.0-rc6 w/ an RX 480 for 8 hours and I have not had
the crash described in the Bugzilla thread. I will also be running this
patch on my kernel for the next couple of days to further confirm that this
is working. In addition, I will ask the users in the Bugzilla thread to
test this patch and confirm that it works.

My previous attempt to patch this bug ("amdgpu_dm: fix nonblocking atomic
commit use-after-free") [1] was unsuccessful and this patch is meant to
replace it. That said, this isn't necessarily a v2 of that patch, since
this patch works very differently from the other patch.

[1] https://lkml.org/lkml/2020/7/23/1123

Thanks,
Mazin Rezk
Kazlauskas, Nicholas July 27, 2020, 1:26 p.m. UTC | #2
On 2020-07-27 1:40 a.m., Mazin Rezk wrote:
> This patch fixes a race condition that causes a use-after-free during
> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> are requested and the second one finishes before the first. Essentially,
> this bug occurs when the following sequence of events happens:
> 
> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> deferred to the workqueue.
> 
> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> deferred to the workqueue.
> 
> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> commit_tail and commit #2 completes, freeing dm_state #1.
> 
> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> 1 and dereferences a freelist pointer while setting the context.
> 
> Since this bug has only been spotted with fast commits, this patch fixes
> the bug by clearing the dm_state instead of using the old dc_state for
> fast updates. In addition, since dm_state is only used for its dc_state
> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> removing the dm_state should not have any consequences in fast updates.
> 
> This use-after-free bug has existed for a while now, but only caused a
> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> freelist pointer to middle of object") moving the freelist pointer from
> dm_state->base (which was unused) to dm_state->context (which is
> dereferenced).
> 
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast updates")
> Reported-by: Duncan <1i5t5.duncan@cox.net>
> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
>   1 file changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 86ffa0c2880f..710edc70e37e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   		 * the same resource. If we have a new DC context as part of
>   		 * the DM atomic state from validation we need to free it and
>   		 * retain the existing one instead.
> +		 *
> +		 * Furthermore, since the DM atomic state only contains the DC
> +		 * context and can safely be annulled, we can free the state
> +		 * and clear the associated private object now to free
> +		 * some memory and avoid a possible use-after-free later.
>   		 */
> -		struct dm_atomic_state *new_dm_state, *old_dm_state;
> 
> -		new_dm_state = dm_atomic_get_new_state(state);
> -		old_dm_state = dm_atomic_get_old_state(state);
> +		for (i = 0; i < state->num_private_objs; i++) {
> +			struct drm_private_obj *obj = state->private_objs[i].ptr;
> 
> -		if (new_dm_state && old_dm_state) {
> -			if (new_dm_state->context)
> -				dc_release_state(new_dm_state->context);
> +			if (obj->funcs == adev->dm.atomic_obj.funcs) {
> +				int j = state->num_private_objs-1;
> 
> -			new_dm_state->context = old_dm_state->context;
> +				dm_atomic_destroy_state(obj,
> +						state->private_objs[i].state);
> +
> +				/* If i is not at the end of the array then the
> +				 * last element needs to be moved to where i was
> +				 * before the array can safely be truncated.
> +				 */
> +				if (i != j)
> +					state->private_objs[i] =
> +						state->private_objs[j];
> 
> -			if (old_dm_state->context)
> -				dc_retain_state(old_dm_state->context);
> +				state->private_objs[j].ptr = NULL;
> +				state->private_objs[j].state = NULL;
> +				state->private_objs[j].old_state = NULL;
> +				state->private_objs[j].new_state = NULL;
> +
> +				state->num_private_objs = j;
> +				break;
> +			}

In the bug report itself I mentioned that I don't really like hacking 
around the DRM core for resolving this patch but to go into more 
specifics, it's really two issues of code maintenance:

1. It's iterating over internal structures and layout of private objects 
in the state and modifying the state. The core doesn't really guarantee 
how these things are going to be laid out and it may change in the future.

2. It's freeing an allocation we don't own from DM. DRM doesn't track 
this state elsewhere for purposes of freeing, but nothing is really 
stopping the core from doing this later down the line.

The implementation itself is correct from a technical perspective, but 
I'd rather it reside in DRM as a helper for code maintenance purposes.

Regards,
Nicholas Kazlauskas

>   		}
>   	}
> 
> --
> 2.27.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Christian König July 27, 2020, 1:39 p.m. UTC | #3
Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> This patch fixes a race condition that causes a use-after-free during
> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> are requested and the second one finishes before the first. Essentially,
> this bug occurs when the following sequence of events happens:
>
> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> deferred to the workqueue.
>
> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> deferred to the workqueue.
>
> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> commit_tail and commit #2 completes, freeing dm_state #1.
>
> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> 1 and dereferences a freelist pointer while setting the context.

Well I only have a one mile high view on this, but why don't you let the 
work items execute in order?

That would be better anyway cause this way we don't trigger a cache line 
ping pong between CPUs.

Christian.

>
> Since this bug has only been spotted with fast commits, this patch fixes
> the bug by clearing the dm_state instead of using the old dc_state for
> fast updates. In addition, since dm_state is only used for its dc_state
> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> removing the dm_state should not have any consequences in fast updates.
>
> This use-after-free bug has existed for a while now, but only caused a
> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> freelist pointer to middle of object") moving the freelist pointer from
> dm_state->base (which was unused) to dm_state->context (which is
> dereferenced).
>
> Bugzilla: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugzilla.kernel.org%2Fshow_bug.cgi%3Fid%3D207383&amp;data=02%7C01%7Cchristian.koenig%40amd.com%7C16d6d6d4a02241deb94e08d831efa1bb%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314252605493548&amp;sdata=JuaMFSMTjAVQBBpbXIf2RWj%2BLcx19ki25XLXbr1C6RA%3D&amp;reserved=0
> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast updates")
> Reported-by: Duncan <1i5t5.duncan@cox.net>
> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
>   1 file changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 86ffa0c2880f..710edc70e37e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   		 * the same resource. If we have a new DC context as part of
>   		 * the DM atomic state from validation we need to free it and
>   		 * retain the existing one instead.
> +		 *
> +		 * Furthermore, since the DM atomic state only contains the DC
> +		 * context and can safely be annulled, we can free the state
> +		 * and clear the associated private object now to free
> +		 * some memory and avoid a possible use-after-free later.
>   		 */
> -		struct dm_atomic_state *new_dm_state, *old_dm_state;
>
> -		new_dm_state = dm_atomic_get_new_state(state);
> -		old_dm_state = dm_atomic_get_old_state(state);
> +		for (i = 0; i < state->num_private_objs; i++) {
> +			struct drm_private_obj *obj = state->private_objs[i].ptr;
>
> -		if (new_dm_state && old_dm_state) {
> -			if (new_dm_state->context)
> -				dc_release_state(new_dm_state->context);
> +			if (obj->funcs == adev->dm.atomic_obj.funcs) {
> +				int j = state->num_private_objs-1;
>
> -			new_dm_state->context = old_dm_state->context;
> +				dm_atomic_destroy_state(obj,
> +						state->private_objs[i].state);
> +
> +				/* If i is not at the end of the array then the
> +				 * last element needs to be moved to where i was
> +				 * before the array can safely be truncated.
> +				 */
> +				if (i != j)
> +					state->private_objs[i] =
> +						state->private_objs[j];
>
> -			if (old_dm_state->context)
> -				dc_retain_state(old_dm_state->context);
> +				state->private_objs[j].ptr = NULL;
> +				state->private_objs[j].state = NULL;
> +				state->private_objs[j].old_state = NULL;
> +				state->private_objs[j].new_state = NULL;
> +
> +				state->num_private_objs = j;
> +				break;
> +			}
>   		}
>   	}
>
> --
> 2.27.0
>
Kazlauskas, Nicholas July 27, 2020, 2:05 p.m. UTC | #4
On 2020-07-27 9:39 a.m., Christian König wrote:
> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
>> This patch fixes a race condition that causes a use-after-free during
>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
>> are requested and the second one finishes before the first. Essentially,
>> this bug occurs when the following sequence of events happens:
>>
>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
>> deferred to the workqueue.
>>
>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
>> deferred to the workqueue.
>>
>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
>> commit_tail and commit #2 completes, freeing dm_state #1.
>>
>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
>> 1 and dereferences a freelist pointer while setting the context.
> 
> Well I only have a one mile high view on this, but why don't you let the 
> work items execute in order?
> 
> That would be better anyway cause this way we don't trigger a cache line 
> ping pong between CPUs.
> 
> Christian.

We use the DRM helpers for managing drm_atomic_commit_state and those 
helpers internally push non-blocking commit work into the system unbound 
work queue.

While we could duplicate a copy of that code with nothing but the 
workqueue changed that isn't something I'd really like to maintain going 
forward.

Regards,
Nicholas Kazlauskas

> 
>>
>> Since this bug has only been spotted with fast commits, this patch fixes
>> the bug by clearing the dm_state instead of using the old dc_state for
>> fast updates. In addition, since dm_state is only used for its dc_state
>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is 
>> found,
>> removing the dm_state should not have any consequences in fast updates.
>>
>> This use-after-free bug has existed for a while now, but only caused a
>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
>> freelist pointer to middle of object") moving the freelist pointer from
>> dm_state->base (which was unused) to dm_state->context (which is
>> dereferenced).
>>
>> Bugzilla: 
>> https://bugzilla.kernel.org/show_bug.cgi?id=207383 
>>
>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for 
>> fast updates")
>> Reported-by: Duncan <1i5t5.duncan@cox.net>
>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
>> ---
>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
>>   1 file changed, 27 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 86ffa0c2880f..710edc70e37e 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
>> drm_device *dev,
>>            * the same resource. If we have a new DC context as part of
>>            * the DM atomic state from validation we need to free it and
>>            * retain the existing one instead.
>> +         *
>> +         * Furthermore, since the DM atomic state only contains the DC
>> +         * context and can safely be annulled, we can free the state
>> +         * and clear the associated private object now to free
>> +         * some memory and avoid a possible use-after-free later.
>>            */
>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
>>
>> -        new_dm_state = dm_atomic_get_new_state(state);
>> -        old_dm_state = dm_atomic_get_old_state(state);
>> +        for (i = 0; i < state->num_private_objs; i++) {
>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
>>
>> -        if (new_dm_state && old_dm_state) {
>> -            if (new_dm_state->context)
>> -                dc_release_state(new_dm_state->context);
>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
>> +                int j = state->num_private_objs-1;
>>
>> -            new_dm_state->context = old_dm_state->context;
>> +                dm_atomic_destroy_state(obj,
>> +                        state->private_objs[i].state);
>> +
>> +                /* If i is not at the end of the array then the
>> +                 * last element needs to be moved to where i was
>> +                 * before the array can safely be truncated.
>> +                 */
>> +                if (i != j)
>> +                    state->private_objs[i] =
>> +                        state->private_objs[j];
>>
>> -            if (old_dm_state->context)
>> -                dc_retain_state(old_dm_state->context);
>> +                state->private_objs[j].ptr = NULL;
>> +                state->private_objs[j].state = NULL;
>> +                state->private_objs[j].old_state = NULL;
>> +                state->private_objs[j].new_state = NULL;
>> +
>> +                state->num_private_objs = j;
>> +                break;
>> +            }
>>           }
>>       }
>>
>> -- 
>> 2.27.0
>>
>
Duncan July 27, 2020, 3:37 p.m. UTC | #5
On Mon, 27 Jul 2020 10:05:01 -0400
"Kazlauskas, Nicholas" <nicholas.kazlauskas@amd.com> wrote:

> On 2020-07-27 9:39 a.m., Christian König wrote:
> > Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> >> This patch fixes a race condition that causes a use-after-free
> >> during amdgpu_dm_atomic_commit_tail. This can occur when 2
> >> non-blocking commits are requested and the second one finishes
> >> before the first. Essentially, this bug occurs when the following
> >> sequence of events happens:
> >>
> >> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> >> deferred to the workqueue.
> >>
> >> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> >> deferred to the workqueue.
> >>
> >> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> >> commit_tail and commit #2 completes, freeing dm_state #1.
> >>
> >> 4. Commit #1 starts after commit #2 completes, uses the freed
> >> dm_state 1 and dereferences a freelist pointer while setting the
> >> context.
> > 
> > Well I only have a one mile high view on this, but why don't you
> > let the work items execute in order?
> > 
> > That would be better anyway cause this way we don't trigger a cache
> > line ping pong between CPUs.
> > 
> > Christian.
> 
> We use the DRM helpers for managing drm_atomic_commit_state and those 
> helpers internally push non-blocking commit work into the system
> unbound work queue.
> 
> While we could duplicate a copy of that code with nothing but the 
> workqueue changed that isn't something I'd really like to maintain
> going forward.
> 
> Regards,
> Nicholas Kazlauskas

Additionally, I don't see mentioned on-thread (it's on the bug and now
in the details below) that we're talking multi-monitor, not
single-monitor. Presumably that goes some way toward answering the "why
not force order?" question, considering the outputs may be running at
different refresh frequencies, etc...

All the reports on the bug seem to be multi-monitor (after seeing
multi-monitor/output in several reports I asked if anyone was seeing it
with only one monitor and no answers), and as you commented on the bug
for your proposed patch but seems missing from this one here (different
author/proposal) ...

Commits #1 and #2 don't touch any of the same core DRM objects (CRTCs,
Planes, Connectors) so Commit #2 does not stall for Commit #1. DRM
Private Objects have always been avoided in stall checks, so we have no
safety from DRM core in this regard.

> >>
> >> Since this bug has only been spotted with fast commits, this patch
> >> fixes the bug by clearing the dm_state instead of using the old
> >> dc_state for fast updates. In addition, since dm_state is only
> >> used for its dc_state and amdgpu_dm_atomic_commit_tail will retain
> >> the dc_state if none is found,
> >> removing the dm_state should not have any consequences in fast
> >> updates.
> >>
> >> This use-after-free bug has existed for a while now, but only
> >> caused a noticeable issue starting from 5.7-rc1 due to 3202fa62f
> >> ("slub: relocate freelist pointer to middle of object") moving the
> >> freelist pointer from dm_state->base (which was unused) to
> >> dm_state->context (which is dereferenced).
> >>
> >> Bugzilla: 
> >> https://bugzilla.kernel.org/show_bug.cgi?id=207383 
> >>
> >> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> >> for fast updates")
> >> Reported-by: Duncan <1i5t5.duncan@cox.net>
> >> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> >> ---
> >>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> >> ++++++++++++++----- 1 file changed, 27 insertions(+), 9
> >> deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> index 86ffa0c2880f..710edc70e37e 100644
> >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
> >> drm_device *dev,
> >>            * the same resource. If we have a new DC context as
> >> part of
> >>            * the DM atomic state from validation we need to free
> >> it and
> >>            * retain the existing one instead.
> >> +         *
> >> +         * Furthermore, since the DM atomic state only contains
> >> the DC
> >> +         * context and can safely be annulled, we can free the
> >> state
> >> +         * and clear the associated private object now to free
> >> +         * some memory and avoid a possible use-after-free later.
> >>            */
> >> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> >>
> >> -        new_dm_state = dm_atomic_get_new_state(state);
> >> -        old_dm_state = dm_atomic_get_old_state(state);
> >> +        for (i = 0; i < state->num_private_objs; i++) {
> >> +            struct drm_private_obj *obj =
> >> state->private_objs[i].ptr;
> >>
> >> -        if (new_dm_state && old_dm_state) {
> >> -            if (new_dm_state->context)
> >> -                dc_release_state(new_dm_state->context);
> >> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> >> +                int j = state->num_private_objs-1;
> >>
> >> -            new_dm_state->context = old_dm_state->context;
> >> +                dm_atomic_destroy_state(obj,
> >> +                        state->private_objs[i].state);
> >> +
> >> +                /* If i is not at the end of the array then the
> >> +                 * last element needs to be moved to where i was
> >> +                 * before the array can safely be truncated.
> >> +                 */
> >> +                if (i != j)
> >> +                    state->private_objs[i] =
> >> +                        state->private_objs[j];
> >>
> >> -            if (old_dm_state->context)
> >> -                dc_retain_state(old_dm_state->context);
> >> +                state->private_objs[j].ptr = NULL;
> >> +                state->private_objs[j].state = NULL;
> >> +                state->private_objs[j].old_state = NULL;
> >> +                state->private_objs[j].new_state = NULL;
> >> +
> >> +                state->num_private_objs = j;
> >> +                break;
> >> +            }
> >>           }
> >>       }
> >>
> >> -- 
> >> 2.27.0
> >>
> > 
>
Christian König July 27, 2020, 7:28 p.m. UTC | #6
Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> On 2020-07-27 9:39 a.m., Christian König wrote:
>> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
>>> This patch fixes a race condition that causes a use-after-free during
>>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking 
>>> commits
>>> are requested and the second one finishes before the first. 
>>> Essentially,
>>> this bug occurs when the following sequence of events happens:
>>>
>>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
>>> deferred to the workqueue.
>>>
>>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
>>> deferred to the workqueue.
>>>
>>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
>>> commit_tail and commit #2 completes, freeing dm_state #1.
>>>
>>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
>>> 1 and dereferences a freelist pointer while setting the context.
>>
>> Well I only have a one mile high view on this, but why don't you let 
>> the work items execute in order?
>>
>> That would be better anyway cause this way we don't trigger a cache 
>> line ping pong between CPUs.
>>
>> Christian.
>
> We use the DRM helpers for managing drm_atomic_commit_state and those 
> helpers internally push non-blocking commit work into the system 
> unbound work queue.

Mhm, well if you send those helper atomic commits in the order A,B and 
they execute it in the order B,A I would call that a bug :)

> While we could duplicate a copy of that code with nothing but the 
> workqueue changed that isn't something I'd really like to maintain 
> going forward.

I'm not talking about duplicating the code, I'm talking about fixing the 
helpers. I don't know that code well, but from the outside it sounds 
like a bug there.

And executing work items in the order they are submitted is trivial.

Had anybody pinged Daniel or other people familiar with the helper code 
about it?

Regards,
Christian.

>
> Regards,
> Nicholas Kazlauskas
>
>>
>>>
>>> Since this bug has only been spotted with fast commits, this patch 
>>> fixes
>>> the bug by clearing the dm_state instead of using the old dc_state for
>>> fast updates. In addition, since dm_state is only used for its dc_state
>>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is 
>>> found,
>>> removing the dm_state should not have any consequences in fast updates.
>>>
>>> This use-after-free bug has existed for a while now, but only caused a
>>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: 
>>> relocate
>>> freelist pointer to middle of object") moving the freelist pointer from
>>> dm_state->base (which was unused) to dm_state->context (which is
>>> dereferenced).
>>>
>>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
>>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state 
>>> for fast updates")
>>> Reported-by: Duncan <1i5t5.duncan@cox.net>
>>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
>>> ---
>>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 
>>> ++++++++++++++-----
>>>   1 file changed, 27 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index 86ffa0c2880f..710edc70e37e 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct 
>>> drm_device *dev,
>>>            * the same resource. If we have a new DC context as part of
>>>            * the DM atomic state from validation we need to free it and
>>>            * retain the existing one instead.
>>> +         *
>>> +         * Furthermore, since the DM atomic state only contains the DC
>>> +         * context and can safely be annulled, we can free the state
>>> +         * and clear the associated private object now to free
>>> +         * some memory and avoid a possible use-after-free later.
>>>            */
>>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
>>>
>>> -        new_dm_state = dm_atomic_get_new_state(state);
>>> -        old_dm_state = dm_atomic_get_old_state(state);
>>> +        for (i = 0; i < state->num_private_objs; i++) {
>>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
>>>
>>> -        if (new_dm_state && old_dm_state) {
>>> -            if (new_dm_state->context)
>>> -                dc_release_state(new_dm_state->context);
>>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
>>> +                int j = state->num_private_objs-1;
>>>
>>> -            new_dm_state->context = old_dm_state->context;
>>> +                dm_atomic_destroy_state(obj,
>>> +                        state->private_objs[i].state);
>>> +
>>> +                /* If i is not at the end of the array then the
>>> +                 * last element needs to be moved to where i was
>>> +                 * before the array can safely be truncated.
>>> +                 */
>>> +                if (i != j)
>>> +                    state->private_objs[i] =
>>> +                        state->private_objs[j];
>>>
>>> -            if (old_dm_state->context)
>>> -                dc_retain_state(old_dm_state->context);
>>> +                state->private_objs[j].ptr = NULL;
>>> +                state->private_objs[j].state = NULL;
>>> +                state->private_objs[j].old_state = NULL;
>>> +                state->private_objs[j].new_state = NULL;
>>> +
>>> +                state->num_private_objs = j;
>>> +                break;
>>> +            }
>>>           }
>>>       }
>>>
>>> -- 
>>> 2.27.0
>>>
>>
>
Mazin Rezk July 27, 2020, 8:27 p.m. UTC | #7
On Monday, July 27, 2020 9:26 AM, Kazlauskas, Nicholas <nicholas.kazlauskas@amd.com> wrote:

> On 2020-07-27 1:40 a.m., Mazin Rezk wrote:
> > This patch fixes a race condition that causes a use-after-free during
> > amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking commits
> > are requested and the second one finishes before the first. Essentially,
> > this bug occurs when the following sequence of events happens:
> >
> > 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > deferred to the workqueue.
> >
> > 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > deferred to the workqueue.
> >
> > 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > commit_tail and commit #2 completes, freeing dm_state #1.
> >
> > 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > 1 and dereferences a freelist pointer while setting the context.
> >
> > Since this bug has only been spotted with fast commits, this patch fixes
> > the bug by clearing the dm_state instead of using the old dc_state for
> > fast updates. In addition, since dm_state is only used for its dc_state
> > and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is found,
> > removing the dm_state should not have any consequences in fast updates.
> >
> > This use-after-free bug has existed for a while now, but only caused a
> > noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub: relocate
> > freelist pointer to middle of object") moving the freelist pointer from
> > dm_state->base (which was unused) to dm_state->context (which is
> > dereferenced).
> >
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state for fast updates")
> > Reported-by: Duncan <1i5t5.duncan@cox.net>
> > Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > ---
> >   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 ++++++++++++++-----
> >   1 file changed, 27 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 86ffa0c2880f..710edc70e37e 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
> >   		 * the same resource. If we have a new DC context as part of
> >   		 * the DM atomic state from validation we need to free it and
> >   		 * retain the existing one instead.
> > +		 *
> > +		 * Furthermore, since the DM atomic state only contains the DC
> > +		 * context and can safely be annulled, we can free the state
> > +		 * and clear the associated private object now to free
> > +		 * some memory and avoid a possible use-after-free later.
> >   		 */
> > -		struct dm_atomic_state *new_dm_state, *old_dm_state;
> >
> > -		new_dm_state = dm_atomic_get_new_state(state);
> > -		old_dm_state = dm_atomic_get_old_state(state);
> > +		for (i = 0; i < state->num_private_objs; i++) {
> > +			struct drm_private_obj *obj = state->private_objs[i].ptr;
> >
> > -		if (new_dm_state && old_dm_state) {
> > -			if (new_dm_state->context)
> > -				dc_release_state(new_dm_state->context);
> > +			if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > +				int j = state->num_private_objs-1;
> >
> > -			new_dm_state->context = old_dm_state->context;
> > +				dm_atomic_destroy_state(obj,
> > +						state->private_objs[i].state);
> > +
> > +				/* If i is not at the end of the array then the
> > +				 * last element needs to be moved to where i was
> > +				 * before the array can safely be truncated.
> > +				 */
> > +				if (i != j)
> > +					state->private_objs[i] =
> > +						state->private_objs[j];
> >
> > -			if (old_dm_state->context)
> > -				dc_retain_state(old_dm_state->context);
> > +				state->private_objs[j].ptr = NULL;
> > +				state->private_objs[j].state = NULL;
> > +				state->private_objs[j].old_state = NULL;
> > +				state->private_objs[j].new_state = NULL;
> > +
> > +				state->num_private_objs = j;
> > +				break;
> > +			}
>
> In the bug report itself I mentioned that I don't really like hacking
> around the DRM core for resolving this patch but to go into more
> specifics, it's really two issues of code maintenance:
>
> 1. It's iterating over internal structures and layout of private objects
> in the state and modifying the state. The core doesn't really guarantee
> how these things are going to be laid out and it may change in the future.
>
> 2. It's freeing an allocation we don't own from DM. DRM doesn't track
> this state elsewhere for purposes of freeing, but nothing is really
> stopping the core from doing this later down the line.
>
> The implementation itself is correct from a technical perspective, but
> I'd rather it reside in DRM as a helper for code maintenance purposes.

So would something like this in drm_atomic_helper.c work?

void drm_atomic_helper_delete_private_obj(struct drm_atomic_state *state,
					int i)
{
	struct drm_private_obj *obj = state->private_objs[i].ptr;
	int end = state->num_private_objs-1;

	obj->funcs->atomic_destroy_state(obj, state->private_objs[i].state);

	/* If i is not at the end of the array then the last element
	 * needs to be moved to where i was before the array can safely
	 * be truncated.
	 */
	if (i != end)
		state->private_objs[i] = state->private_objs[end];

	state->private_objs[end].ptr = NULL;
	state->private_objs[end].state = NULL;
	state->private_objs[end].old_state = NULL;
	state->private_objs[end].new_state = NULL;

	state->num_private_objs = end;
}

I was considering doing something like that, but I wanted to avoid
modifying DRM core to fix a bug in amdgpu. I guess this makes more sense
though since it does seem rather unorthodox to make changes to
drm_atomic_state outside of DRM core. Perhaps there will be a use for this
function outside of this patch in the future.

Thanks,
Mazin Rezk

>
> Regards,
> Nicholas Kazlauskas
>
> >   		}
> >   	}
> >
> > --
> > 2.27.0
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
Daniel Vetter July 27, 2020, 8:29 p.m. UTC | #8
On Mon, Jul 27, 2020 at 9:28 PM Christian König
<christian.koenig@amd.com> wrote:
>
> Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > On 2020-07-27 9:39 a.m., Christian König wrote:
> >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> >>> This patch fixes a race condition that causes a use-after-free during
> >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> >>> commits
> >>> are requested and the second one finishes before the first.
> >>> Essentially,
> >>> this bug occurs when the following sequence of events happens:
> >>>
> >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> >>> deferred to the workqueue.
> >>>
> >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> >>> deferred to the workqueue.
> >>>
> >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> >>> commit_tail and commit #2 completes, freeing dm_state #1.
> >>>
> >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> >>> 1 and dereferences a freelist pointer while setting the context.
> >>
> >> Well I only have a one mile high view on this, but why don't you let
> >> the work items execute in order?
> >>
> >> That would be better anyway cause this way we don't trigger a cache
> >> line ping pong between CPUs.
> >>
> >> Christian.
> >
> > We use the DRM helpers for managing drm_atomic_commit_state and those
> > helpers internally push non-blocking commit work into the system
> > unbound work queue.
>
> Mhm, well if you send those helper atomic commits in the order A,B and
> they execute it in the order B,A I would call that a bug :)

The way it works is it pushes all commits into unbound work queue, but
then forces serialization as needed. We do _not_ want e.g. updates on
different CRTC to be serialized, that would result in lots of judder.
And hw is funny enough that there's all kinds of dependencies.

The way you force synchronization is by adding other CRTC state
objects. So if DC is busted and can only handle a single update per
work item, then I guess you always need all CRTC states and everything
will be run in order. But that also totally kills modern multi-screen
compositors. Xorg isn't modern, just in case that's not clear :-)

Lucking at the code it seems like you indeed have only a single dm
state, so yeah global sync is what you'll need as immediate fix, and
then maybe fix up DM to not be quite so silly ... or at least only do
the dm state stuff when really needed.

We could also sprinkle the drm_crtc_commit structure around a bit
(it's the glue that provides the synchronization across commits), but
since your dm state is global just grabbing all crtc states
unconditionally as part of that is probably best.

> > While we could duplicate a copy of that code with nothing but the
> > workqueue changed that isn't something I'd really like to maintain
> > going forward.
>
> I'm not talking about duplicating the code, I'm talking about fixing the
> helpers. I don't know that code well, but from the outside it sounds
> like a bug there.
>
> And executing work items in the order they are submitted is trivial.
>
> Had anybody pinged Daniel or other people familiar with the helper code
> about it?

Yeah something is wrong here, and the fix looks horrible :-)

Aside, I've also seen some recent discussion flare up about
drm_atomic_state_get/put used to paper over some other use-after-free,
but this time related to interrupt handlers. Maybe a few rules about
that:
- dont
- especially not when it's interrupt handlers, because you can't call
drm_atomic_state_put from interrupt handlers.

Instead have an spin_lock_irq to protect the shared date with your
interrupt handler, and _copy_ the date over. This is e.g. what
drm_crtc_arm_vblank_event does.

Cheers, Daniel

>
> Regards,
> Christian.
>
> >
> > Regards,
> > Nicholas Kazlauskas
> >
> >>
> >>>
> >>> Since this bug has only been spotted with fast commits, this patch
> >>> fixes
> >>> the bug by clearing the dm_state instead of using the old dc_state for
> >>> fast updates. In addition, since dm_state is only used for its dc_state
> >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> >>> found,
> >>> removing the dm_state should not have any consequences in fast updates.
> >>>
> >>> This use-after-free bug has existed for a while now, but only caused a
> >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> >>> relocate
> >>> freelist pointer to middle of object") moving the freelist pointer from
> >>> dm_state->base (which was unused) to dm_state->context (which is
> >>> dereferenced).
> >>>
> >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> >>> for fast updates")
> >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> >>> ---
> >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> >>> ++++++++++++++-----
> >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> index 86ffa0c2880f..710edc70e37e 100644
> >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> >>> drm_device *dev,
> >>>            * the same resource. If we have a new DC context as part of
> >>>            * the DM atomic state from validation we need to free it and
> >>>            * retain the existing one instead.
> >>> +         *
> >>> +         * Furthermore, since the DM atomic state only contains the DC
> >>> +         * context and can safely be annulled, we can free the state
> >>> +         * and clear the associated private object now to free
> >>> +         * some memory and avoid a possible use-after-free later.
> >>>            */
> >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> >>>
> >>> -        new_dm_state = dm_atomic_get_new_state(state);
> >>> -        old_dm_state = dm_atomic_get_old_state(state);
> >>> +        for (i = 0; i < state->num_private_objs; i++) {
> >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> >>>
> >>> -        if (new_dm_state && old_dm_state) {
> >>> -            if (new_dm_state->context)
> >>> -                dc_release_state(new_dm_state->context);
> >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> >>> +                int j = state->num_private_objs-1;
> >>>
> >>> -            new_dm_state->context = old_dm_state->context;
> >>> +                dm_atomic_destroy_state(obj,
> >>> +                        state->private_objs[i].state);
> >>> +
> >>> +                /* If i is not at the end of the array then the
> >>> +                 * last element needs to be moved to where i was
> >>> +                 * before the array can safely be truncated.
> >>> +                 */
> >>> +                if (i != j)
> >>> +                    state->private_objs[i] =
> >>> +                        state->private_objs[j];
> >>>
> >>> -            if (old_dm_state->context)
> >>> -                dc_retain_state(old_dm_state->context);
> >>> +                state->private_objs[j].ptr = NULL;
> >>> +                state->private_objs[j].state = NULL;
> >>> +                state->private_objs[j].old_state = NULL;
> >>> +                state->private_objs[j].new_state = NULL;
> >>> +
> >>> +                state->num_private_objs = j;
> >>> +                break;
> >>> +            }
> >>>           }
> >>>       }
> >>>
> >>> --
> >>> 2.27.0
> >>>
> >>
> >
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter July 27, 2020, 9:09 p.m. UTC | #9
On Mon, Jul 27, 2020 at 10:29 PM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Mon, Jul 27, 2020 at 9:28 PM Christian König
> <christian.koenig@amd.com> wrote:
> >
> > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > >>> This patch fixes a race condition that causes a use-after-free during
> > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > >>> commits
> > >>> are requested and the second one finishes before the first.
> > >>> Essentially,
> > >>> this bug occurs when the following sequence of events happens:
> > >>>
> > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > >>>
> > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > >>> 1 and dereferences a freelist pointer while setting the context.
> > >>
> > >> Well I only have a one mile high view on this, but why don't you let
> > >> the work items execute in order?
> > >>
> > >> That would be better anyway cause this way we don't trigger a cache
> > >> line ping pong between CPUs.
> > >>
> > >> Christian.
> > >
> > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > helpers internally push non-blocking commit work into the system
> > > unbound work queue.
> >
> > Mhm, well if you send those helper atomic commits in the order A,B and
> > they execute it in the order B,A I would call that a bug :)
>
> The way it works is it pushes all commits into unbound work queue, but
> then forces serialization as needed. We do _not_ want e.g. updates on
> different CRTC to be serialized, that would result in lots of judder.
> And hw is funny enough that there's all kinds of dependencies.
>
> The way you force synchronization is by adding other CRTC state
> objects. So if DC is busted and can only handle a single update per
> work item, then I guess you always need all CRTC states and everything
> will be run in order. But that also totally kills modern multi-screen
> compositors. Xorg isn't modern, just in case that's not clear :-)
>
> Lucking at the code it seems like you indeed have only a single dm
> state, so yeah global sync is what you'll need as immediate fix, and
> then maybe fix up DM to not be quite so silly ... or at least only do
> the dm state stuff when really needed.

Just looked a bit more at this struct dc_state, and that looks a lot
like an atomic side-wagon. I don't think that works as a private
state, this should probably be embedded into a subclass of
drm_atomic_state.

And probably a lot of these pointers moved to other places I think, or
I'm not entirely clear on what exactly this stuff is needed for ...

dc_state is also refcounted, which is definitely rather funny for a
state structure.

Feels like this entire thing (how the overall dc state machinery is
glued into atomic) isn't quite thought thru just yet :-/
-Daniel

> We could also sprinkle the drm_crtc_commit structure around a bit
> (it's the glue that provides the synchronization across commits), but
> since your dm state is global just grabbing all crtc states
> unconditionally as part of that is probably best.
>
> > > While we could duplicate a copy of that code with nothing but the
> > > workqueue changed that isn't something I'd really like to maintain
> > > going forward.
> >
> > I'm not talking about duplicating the code, I'm talking about fixing the
> > helpers. I don't know that code well, but from the outside it sounds
> > like a bug there.
> >
> > And executing work items in the order they are submitted is trivial.
> >
> > Had anybody pinged Daniel or other people familiar with the helper code
> > about it?
>
> Yeah something is wrong here, and the fix looks horrible :-)
>
> Aside, I've also seen some recent discussion flare up about
> drm_atomic_state_get/put used to paper over some other use-after-free,
> but this time related to interrupt handlers. Maybe a few rules about
> that:
> - dont
> - especially not when it's interrupt handlers, because you can't call
> drm_atomic_state_put from interrupt handlers.
>
> Instead have an spin_lock_irq to protect the shared date with your
> interrupt handler, and _copy_ the date over. This is e.g. what
> drm_crtc_arm_vblank_event does.
>
> Cheers, Daniel
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Regards,
> > > Nicholas Kazlauskas
> > >
> > >>
> > >>>
> > >>> Since this bug has only been spotted with fast commits, this patch
> > >>> fixes
> > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > >>> found,
> > >>> removing the dm_state should not have any consequences in fast updates.
> > >>>
> > >>> This use-after-free bug has existed for a while now, but only caused a
> > >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > >>> relocate
> > >>> freelist pointer to middle of object") moving the freelist pointer from
> > >>> dm_state->base (which was unused) to dm_state->context (which is
> > >>> dereferenced).
> > >>>
> > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > >>> for fast updates")
> > >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> > >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > >>> ---
> > >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > >>> ++++++++++++++-----
> > >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> index 86ffa0c2880f..710edc70e37e 100644
> > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > >>> drm_device *dev,
> > >>>            * the same resource. If we have a new DC context as part of
> > >>>            * the DM atomic state from validation we need to free it and
> > >>>            * retain the existing one instead.
> > >>> +         *
> > >>> +         * Furthermore, since the DM atomic state only contains the DC
> > >>> +         * context and can safely be annulled, we can free the state
> > >>> +         * and clear the associated private object now to free
> > >>> +         * some memory and avoid a possible use-after-free later.
> > >>>            */
> > >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > >>>
> > >>> -        new_dm_state = dm_atomic_get_new_state(state);
> > >>> -        old_dm_state = dm_atomic_get_old_state(state);
> > >>> +        for (i = 0; i < state->num_private_objs; i++) {
> > >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > >>>
> > >>> -        if (new_dm_state && old_dm_state) {
> > >>> -            if (new_dm_state->context)
> > >>> -                dc_release_state(new_dm_state->context);
> > >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > >>> +                int j = state->num_private_objs-1;
> > >>>
> > >>> -            new_dm_state->context = old_dm_state->context;
> > >>> +                dm_atomic_destroy_state(obj,
> > >>> +                        state->private_objs[i].state);
> > >>> +
> > >>> +                /* If i is not at the end of the array then the
> > >>> +                 * last element needs to be moved to where i was
> > >>> +                 * before the array can safely be truncated.
> > >>> +                 */
> > >>> +                if (i != j)
> > >>> +                    state->private_objs[i] =
> > >>> +                        state->private_objs[j];
> > >>>
> > >>> -            if (old_dm_state->context)
> > >>> -                dc_retain_state(old_dm_state->context);
> > >>> +                state->private_objs[j].ptr = NULL;
> > >>> +                state->private_objs[j].state = NULL;
> > >>> +                state->private_objs[j].old_state = NULL;
> > >>> +                state->private_objs[j].new_state = NULL;
> > >>> +
> > >>> +                state->num_private_objs = j;
> > >>> +                break;
> > >>> +            }
> > >>>           }
> > >>>       }
> > >>>
> > >>> --
> > >>> 2.27.0
> > >>>
> > >>
> > >
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Mazin Rezk July 27, 2020, 9:11 p.m. UTC | #10
On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Jul 27, 2020 at 9:28 PM Christian König
> <christian.koenig@amd.com> wrote:
> >
> > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > >>> This patch fixes a race condition that causes a use-after-free during
> > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > >>> commits
> > >>> are requested and the second one finishes before the first.
> > >>> Essentially,
> > >>> this bug occurs when the following sequence of events happens:
> > >>>
> > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > >>> deferred to the workqueue.
> > >>>
> > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > >>>
> > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > >>> 1 and dereferences a freelist pointer while setting the context.
> > >>
> > >> Well I only have a one mile high view on this, but why don't you let
> > >> the work items execute in order?
> > >>
> > >> That would be better anyway cause this way we don't trigger a cache
> > >> line ping pong between CPUs.
> > >>
> > >> Christian.
> > >
> > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > helpers internally push non-blocking commit work into the system
> > > unbound work queue.
> >
> > Mhm, well if you send those helper atomic commits in the order A,B and
> > they execute it in the order B,A I would call that a bug :)
>
> The way it works is it pushes all commits into unbound work queue, but
> then forces serialization as needed. We do _not_ want e.g. updates on
> different CRTC to be serialized, that would result in lots of judder.
> And hw is funny enough that there's all kinds of dependencies.
>
> The way you force synchronization is by adding other CRTC state
> objects. So if DC is busted and can only handle a single update per
> work item, then I guess you always need all CRTC states and everything
> will be run in order. But that also totally kills modern multi-screen
> compositors. Xorg isn't modern, just in case that's not clear :-)
>
> Lucking at the code it seems like you indeed have only a single dm
> state, so yeah global sync is what you'll need as immediate fix, and
> then maybe fix up DM to not be quite so silly ... or at least only do
> the dm state stuff when really needed.
>
> We could also sprinkle the drm_crtc_commit structure around a bit
> (it's the glue that provides the synchronization across commits), but
> since your dm state is global just grabbing all crtc states
> unconditionally as part of that is probably best.
>
> > > While we could duplicate a copy of that code with nothing but the
> > > workqueue changed that isn't something I'd really like to maintain
> > > going forward.
> >
> > I'm not talking about duplicating the code, I'm talking about fixing the
> > helpers. I don't know that code well, but from the outside it sounds
> > like a bug there.
> >
> > And executing work items in the order they are submitted is trivial.
> >
> > Had anybody pinged Daniel or other people familiar with the helper code
> > about it?
>
> Yeah something is wrong here, and the fix looks horrible :-)
>
> Aside, I've also seen some recent discussion flare up about
> drm_atomic_state_get/put used to paper over some other use-after-free,
> but this time related to interrupt handlers. Maybe a few rules about
> that:
> - dont
> - especially not when it's interrupt handlers, because you can't call
> drm_atomic_state_put from interrupt handlers.
>
> Instead have an spin_lock_irq to protect the shared date with your
> interrupt handler, and _copy_ the date over. This is e.g. what
> drm_crtc_arm_vblank_event does.

Nicholas wrote a patch that attempted to resolve the issue by adding every
CRTC into the commit to use use the stall checks. [1] While this forces
synchronisation on commits, it's kind of a hacky method that may take a
toll on performance.

Is it possible to have a DRM helper that forces synchronisation on some
commits without having to add every CRTC into the commit?

Also, is synchronisation really necessary for fast updates in amdgpu?
I'll admit, the idea of eliminating the use-after-free bug by eliminating
the use entirely doesn't seem ideal; but is forcing synchronisation on
these updates that much better?

[1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96

Thanks,
Mazin Rezk

>
> Cheers, Daniel
>
> >
> > Regards,
> > Christian.
> >
> > >
> > > Regards,
> > > Nicholas Kazlauskas
> > >
> > >>
> > >>>
> > >>> Since this bug has only been spotted with fast commits, this patch
> > >>> fixes
> > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > >>> found,
> > >>> removing the dm_state should not have any consequences in fast updates.
> > >>>
> > >>> This use-after-free bug has existed for a while now, but only caused a
> > >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > >>> relocate
> > >>> freelist pointer to middle of object") moving the freelist pointer from
> > >>> dm_state->base (which was unused) to dm_state->context (which is
> > >>> dereferenced).
> > >>>
> > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > >>> for fast updates")
> > >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> > >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > >>> ---
> > >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > >>> ++++++++++++++-----
> > >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> index 86ffa0c2880f..710edc70e37e 100644
> > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > >>> drm_device *dev,
> > >>>            * the same resource. If we have a new DC context as part of
> > >>>            * the DM atomic state from validation we need to free it and
> > >>>            * retain the existing one instead.
> > >>> +         *
> > >>> +         * Furthermore, since the DM atomic state only contains the DC
> > >>> +         * context and can safely be annulled, we can free the state
> > >>> +         * and clear the associated private object now to free
> > >>> +         * some memory and avoid a possible use-after-free later.
> > >>>            */
> > >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > >>>
> > >>> -        new_dm_state = dm_atomic_get_new_state(state);
> > >>> -        old_dm_state = dm_atomic_get_old_state(state);
> > >>> +        for (i = 0; i < state->num_private_objs; i++) {
> > >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > >>>
> > >>> -        if (new_dm_state && old_dm_state) {
> > >>> -            if (new_dm_state->context)
> > >>> -                dc_release_state(new_dm_state->context);
> > >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > >>> +                int j = state->num_private_objs-1;
> > >>>
> > >>> -            new_dm_state->context = old_dm_state->context;
> > >>> +                dm_atomic_destroy_state(obj,
> > >>> +                        state->private_objs[i].state);
> > >>> +
> > >>> +                /* If i is not at the end of the array then the
> > >>> +                 * last element needs to be moved to where i was
> > >>> +                 * before the array can safely be truncated.
> > >>> +                 */
> > >>> +                if (i != j)
> > >>> +                    state->private_objs[i] =
> > >>> +                        state->private_objs[j];
> > >>>
> > >>> -            if (old_dm_state->context)
> > >>> -                dc_retain_state(old_dm_state->context);
> > >>> +                state->private_objs[j].ptr = NULL;
> > >>> +                state->private_objs[j].state = NULL;
> > >>> +                state->private_objs[j].old_state = NULL;
> > >>> +                state->private_objs[j].new_state = NULL;
> > >>> +
> > >>> +                state->num_private_objs = j;
> > >>> +                break;
> > >>> +            }
> > >>>           }
> > >>>       }
> > >>>
> > >>> --
> > >>> 2.27.0
> > >>>
> > >>
> > >
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter July 27, 2020, 9:32 p.m. UTC | #11
On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
>
> On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>
> > On Mon, Jul 27, 2020 at 9:28 PM Christian König
> > <christian.koenig@amd.com> wrote:
> > >
> > > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > > >>> This patch fixes a race condition that causes a use-after-free during
> > > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > > >>> commits
> > > >>> are requested and the second one finishes before the first.
> > > >>> Essentially,
> > > >>> this bug occurs when the following sequence of events happens:
> > > >>>
> > > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > > >>> deferred to the workqueue.
> > > >>>
> > > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > > >>> deferred to the workqueue.
> > > >>>
> > > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > > >>>
> > > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > > >>> 1 and dereferences a freelist pointer while setting the context.
> > > >>
> > > >> Well I only have a one mile high view on this, but why don't you let
> > > >> the work items execute in order?
> > > >>
> > > >> That would be better anyway cause this way we don't trigger a cache
> > > >> line ping pong between CPUs.
> > > >>
> > > >> Christian.
> > > >
> > > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > > helpers internally push non-blocking commit work into the system
> > > > unbound work queue.
> > >
> > > Mhm, well if you send those helper atomic commits in the order A,B and
> > > they execute it in the order B,A I would call that a bug :)
> >
> > The way it works is it pushes all commits into unbound work queue, but
> > then forces serialization as needed. We do _not_ want e.g. updates on
> > different CRTC to be serialized, that would result in lots of judder.
> > And hw is funny enough that there's all kinds of dependencies.
> >
> > The way you force synchronization is by adding other CRTC state
> > objects. So if DC is busted and can only handle a single update per
> > work item, then I guess you always need all CRTC states and everything
> > will be run in order. But that also totally kills modern multi-screen
> > compositors. Xorg isn't modern, just in case that's not clear :-)
> >
> > Lucking at the code it seems like you indeed have only a single dm
> > state, so yeah global sync is what you'll need as immediate fix, and
> > then maybe fix up DM to not be quite so silly ... or at least only do
> > the dm state stuff when really needed.
> >
> > We could also sprinkle the drm_crtc_commit structure around a bit
> > (it's the glue that provides the synchronization across commits), but
> > since your dm state is global just grabbing all crtc states
> > unconditionally as part of that is probably best.
> >
> > > > While we could duplicate a copy of that code with nothing but the
> > > > workqueue changed that isn't something I'd really like to maintain
> > > > going forward.
> > >
> > > I'm not talking about duplicating the code, I'm talking about fixing the
> > > helpers. I don't know that code well, but from the outside it sounds
> > > like a bug there.
> > >
> > > And executing work items in the order they are submitted is trivial.
> > >
> > > Had anybody pinged Daniel or other people familiar with the helper code
> > > about it?
> >
> > Yeah something is wrong here, and the fix looks horrible :-)
> >
> > Aside, I've also seen some recent discussion flare up about
> > drm_atomic_state_get/put used to paper over some other use-after-free,
> > but this time related to interrupt handlers. Maybe a few rules about
> > that:
> > - dont
> > - especially not when it's interrupt handlers, because you can't call
> > drm_atomic_state_put from interrupt handlers.
> >
> > Instead have an spin_lock_irq to protect the shared date with your
> > interrupt handler, and _copy_ the date over. This is e.g. what
> > drm_crtc_arm_vblank_event does.
>
> Nicholas wrote a patch that attempted to resolve the issue by adding every
> CRTC into the commit to use use the stall checks. [1] While this forces
> synchronisation on commits, it's kind of a hacky method that may take a
> toll on performance.
>
> Is it possible to have a DRM helper that forces synchronisation on some
> commits without having to add every CRTC into the commit?
>
> Also, is synchronisation really necessary for fast updates in amdgpu?
> I'll admit, the idea of eliminating the use-after-free bug by eliminating
> the use entirely doesn't seem ideal; but is forcing synchronisation on
> these updates that much better?

Well clearing the dc_state pointer here and then allocating another
one in atomic_commit_tail also looks fishy. The proper fix is probably
a lot more involved, but yeah interim fix is to grab all crtc states
iff you also grabbed the dm_atomic_state structure. Real fix is to
only do this when necessary, which pretty much means the dc_state
needs to be somehow split up, or there needs to be some guarantees
about when it's necessary and when not. Otherwise parallel commits on
different CRTC are not possible with the current dc backend code.

See also my dma-fence annotation fixup patch, there dc_state also gets
in the way:

https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/

Nicholas, btw I'm still waiting for some dc feedback on that entire
series, and what/if there's plans to fix these issues properly.

Maybe even going back to the subclassed drm_atomic_state might be
better than what we currently have.
-Daniel
>
> [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
>
> Thanks,
> Mazin Rezk
>
> >
> > Cheers, Daniel
> >
> > >
> > > Regards,
> > > Christian.
> > >
> > > >
> > > > Regards,
> > > > Nicholas Kazlauskas
> > > >
> > > >>
> > > >>>
> > > >>> Since this bug has only been spotted with fast commits, this patch
> > > >>> fixes
> > > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > > >>> found,
> > > >>> removing the dm_state should not have any consequences in fast updates.
> > > >>>
> > > >>> This use-after-free bug has existed for a while now, but only caused a
> > > >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > > >>> relocate
> > > >>> freelist pointer to middle of object") moving the freelist pointer from
> > > >>> dm_state->base (which was unused) to dm_state->context (which is
> > > >>> dereferenced).
> > > >>>
> > > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > > >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > > >>> for fast updates")
> > > >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> > > >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > > >>> ---
> > > >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > > >>> ++++++++++++++-----
> > > >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> > > >>>
> > > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >>> index 86ffa0c2880f..710edc70e37e 100644
> > > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > > >>> drm_device *dev,
> > > >>>            * the same resource. If we have a new DC context as part of
> > > >>>            * the DM atomic state from validation we need to free it and
> > > >>>            * retain the existing one instead.
> > > >>> +         *
> > > >>> +         * Furthermore, since the DM atomic state only contains the DC
> > > >>> +         * context and can safely be annulled, we can free the state
> > > >>> +         * and clear the associated private object now to free
> > > >>> +         * some memory and avoid a possible use-after-free later.
> > > >>>            */
> > > >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > > >>>
> > > >>> -        new_dm_state = dm_atomic_get_new_state(state);
> > > >>> -        old_dm_state = dm_atomic_get_old_state(state);
> > > >>> +        for (i = 0; i < state->num_private_objs; i++) {
> > > >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > > >>>
> > > >>> -        if (new_dm_state && old_dm_state) {
> > > >>> -            if (new_dm_state->context)
> > > >>> -                dc_release_state(new_dm_state->context);
> > > >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > > >>> +                int j = state->num_private_objs-1;
> > > >>>
> > > >>> -            new_dm_state->context = old_dm_state->context;
> > > >>> +                dm_atomic_destroy_state(obj,
> > > >>> +                        state->private_objs[i].state);
> > > >>> +
> > > >>> +                /* If i is not at the end of the array then the
> > > >>> +                 * last element needs to be moved to where i was
> > > >>> +                 * before the array can safely be truncated.
> > > >>> +                 */
> > > >>> +                if (i != j)
> > > >>> +                    state->private_objs[i] =
> > > >>> +                        state->private_objs[j];
> > > >>>
> > > >>> -            if (old_dm_state->context)
> > > >>> -                dc_retain_state(old_dm_state->context);
> > > >>> +                state->private_objs[j].ptr = NULL;
> > > >>> +                state->private_objs[j].state = NULL;
> > > >>> +                state->private_objs[j].old_state = NULL;
> > > >>> +                state->private_objs[j].new_state = NULL;
> > > >>> +
> > > >>> +                state->num_private_objs = j;
> > > >>> +                break;
> > > >>> +            }
> > > >>>           }
> > > >>>       }
> > > >>>
> > > >>> --
> > > >>> 2.27.0
> > > >>>
> > > >>
> > > >
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
Mazin Rezk July 27, 2020, 11:42 p.m. UTC | #12
On Monday, July 27, 2020 5:32 PM, Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
> >
> > On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > > On Mon, Jul 27, 2020 at 9:28 PM Christian König
> > > <christian.koenig@amd.com> wrote:
> > > >
> > > > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > > > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > > > >>> This patch fixes a race condition that causes a use-after-free during
> > > > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > > > >>> commits
> > > > >>> are requested and the second one finishes before the first.
> > > > >>> Essentially,
> > > > >>> this bug occurs when the following sequence of events happens:
> > > > >>>
> > > > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > > > >>> deferred to the workqueue.
> > > > >>>
> > > > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > > > >>> deferred to the workqueue.
> > > > >>>
> > > > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > > > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > > > >>>
> > > > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > > > >>> 1 and dereferences a freelist pointer while setting the context.
> > > > >>
> > > > >> Well I only have a one mile high view on this, but why don't you let
> > > > >> the work items execute in order?
> > > > >>
> > > > >> That would be better anyway cause this way we don't trigger a cache
> > > > >> line ping pong between CPUs.
> > > > >>
> > > > >> Christian.
> > > > >
> > > > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > > > helpers internally push non-blocking commit work into the system
> > > > > unbound work queue.
> > > >
> > > > Mhm, well if you send those helper atomic commits in the order A,B and
> > > > they execute it in the order B,A I would call that a bug :)
> > >
> > > The way it works is it pushes all commits into unbound work queue, but
> > > then forces serialization as needed. We do _not_ want e.g. updates on
> > > different CRTC to be serialized, that would result in lots of judder.
> > > And hw is funny enough that there's all kinds of dependencies.
> > >
> > > The way you force synchronization is by adding other CRTC state
> > > objects. So if DC is busted and can only handle a single update per
> > > work item, then I guess you always need all CRTC states and everything
> > > will be run in order. But that also totally kills modern multi-screen
> > > compositors. Xorg isn't modern, just in case that's not clear :-)
> > >
> > > Lucking at the code it seems like you indeed have only a single dm
> > > state, so yeah global sync is what you'll need as immediate fix, and
> > > then maybe fix up DM to not be quite so silly ... or at least only do
> > > the dm state stuff when really needed.
> > >
> > > We could also sprinkle the drm_crtc_commit structure around a bit
> > > (it's the glue that provides the synchronization across commits), but
> > > since your dm state is global just grabbing all crtc states
> > > unconditionally as part of that is probably best.
> > >
> > > > > While we could duplicate a copy of that code with nothing but the
> > > > > workqueue changed that isn't something I'd really like to maintain
> > > > > going forward.
> > > >
> > > > I'm not talking about duplicating the code, I'm talking about fixing the
> > > > helpers. I don't know that code well, but from the outside it sounds
> > > > like a bug there.
> > > >
> > > > And executing work items in the order they are submitted is trivial.
> > > >
> > > > Had anybody pinged Daniel or other people familiar with the helper code
> > > > about it?
> > >
> > > Yeah something is wrong here, and the fix looks horrible :-)
> > >
> > > Aside, I've also seen some recent discussion flare up about
> > > drm_atomic_state_get/put used to paper over some other use-after-free,
> > > but this time related to interrupt handlers. Maybe a few rules about
> > > that:
> > > - dont
> > > - especially not when it's interrupt handlers, because you can't call
> > > drm_atomic_state_put from interrupt handlers.
> > >
> > > Instead have an spin_lock_irq to protect the shared date with your
> > > interrupt handler, and _copy_ the date over. This is e.g. what
> > > drm_crtc_arm_vblank_event does.
> >
> > Nicholas wrote a patch that attempted to resolve the issue by adding every
> > CRTC into the commit to use use the stall checks. [1] While this forces
> > synchronisation on commits, it's kind of a hacky method that may take a
> > toll on performance.
> >
> > Is it possible to have a DRM helper that forces synchronisation on some
> > commits without having to add every CRTC into the commit?
> >
> > Also, is synchronisation really necessary for fast updates in amdgpu?
> > I'll admit, the idea of eliminating the use-after-free bug by eliminating
> > the use entirely doesn't seem ideal; but is forcing synchronisation on
> > these updates that much better?
>
> Well clearing the dc_state pointer here and then allocating another
> one in atomic_commit_tail also looks fishy. The proper fix is probably
> a lot more involved, but yeah interim fix is to grab all crtc states
> iff you also grabbed the dm_atomic_state structure. Real fix is to
> only do this when necessary, which pretty much means the dc_state
> needs to be somehow split up, or there needs to be some guarantees
> about when it's necessary and when not. Otherwise parallel commits on
> different CRTC are not possible with the current dc backend code.

In that case, I'll test out Nicholas' patch right now and see if it works.
Unlike Duncan, something about my setup seems to trigger the bug rather
quickly, so I should be able to tell if it works in the next couple of
hours with a reasonable degree of accuracy.

Thanks,
Mazin Rezk

>
> See also my dma-fence annotation fixup patch, there dc_state also gets
> in the way:
>
> https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/
>
> Nicholas, btw I'm still waiting for some dc feedback on that entire
> series, and what/if there's plans to fix these issues properly.
>
> Maybe even going back to the subclassed drm_atomic_state might be
> better than what we currently have.
> -Daniel
> >
> > [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
> >
> > Thanks,
> > Mazin Rezk
> >
> > >
> > > Cheers, Daniel
> > >
> > > >
> > > > Regards,
> > > > Christian.
> > > >
> > > > >
> > > > > Regards,
> > > > > Nicholas Kazlauskas
> > > > >
> > > > >>
> > > > >>>
> > > > >>> Since this bug has only been spotted with fast commits, this patch
> > > > >>> fixes
> > > > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > > > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > > > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > > > >>> found,
> > > > >>> removing the dm_state should not have any consequences in fast updates.
> > > > >>>
> > > > >>> This use-after-free bug has existed for a while now, but only caused a
> > > > >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > > > >>> relocate
> > > > >>> freelist pointer to middle of object") moving the freelist pointer from
> > > > >>> dm_state->base (which was unused) to dm_state->context (which is
> > > > >>> dereferenced).
> > > > >>>
> > > > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > > > >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > > > >>> for fast updates")
> > > > >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> > > > >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > > > >>> ---
> > > > >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > > > >>> ++++++++++++++-----
> > > > >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> > > > >>>
> > > > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >>> index 86ffa0c2880f..710edc70e37e 100644
> > > > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > > > >>> drm_device *dev,
> > > > >>>            * the same resource. If we have a new DC context as part of
> > > > >>>            * the DM atomic state from validation we need to free it and
> > > > >>>            * retain the existing one instead.
> > > > >>> +         *
> > > > >>> +         * Furthermore, since the DM atomic state only contains the DC
> > > > >>> +         * context and can safely be annulled, we can free the state
> > > > >>> +         * and clear the associated private object now to free
> > > > >>> +         * some memory and avoid a possible use-after-free later.
> > > > >>>            */
> > > > >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > > > >>>
> > > > >>> -        new_dm_state = dm_atomic_get_new_state(state);
> > > > >>> -        old_dm_state = dm_atomic_get_old_state(state);
> > > > >>> +        for (i = 0; i < state->num_private_objs; i++) {
> > > > >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > > > >>>
> > > > >>> -        if (new_dm_state && old_dm_state) {
> > > > >>> -            if (new_dm_state->context)
> > > > >>> -                dc_release_state(new_dm_state->context);
> > > > >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > > > >>> +                int j = state->num_private_objs-1;
> > > > >>>
> > > > >>> -            new_dm_state->context = old_dm_state->context;
> > > > >>> +                dm_atomic_destroy_state(obj,
> > > > >>> +                        state->private_objs[i].state);
> > > > >>> +
> > > > >>> +                /* If i is not at the end of the array then the
> > > > >>> +                 * last element needs to be moved to where i was
> > > > >>> +                 * before the array can safely be truncated.
> > > > >>> +                 */
> > > > >>> +                if (i != j)
> > > > >>> +                    state->private_objs[i] =
> > > > >>> +                        state->private_objs[j];
> > > > >>>
> > > > >>> -            if (old_dm_state->context)
> > > > >>> -                dc_retain_state(old_dm_state->context);
> > > > >>> +                state->private_objs[j].ptr = NULL;
> > > > >>> +                state->private_objs[j].state = NULL;
> > > > >>> +                state->private_objs[j].old_state = NULL;
> > > > >>> +                state->private_objs[j].new_state = NULL;
> > > > >>> +
> > > > >>> +                state->num_private_objs = j;
> > > > >>> +                break;
> > > > >>> +            }
> > > > >>>           }
> > > > >>>       }
> > > > >>>
> > > > >>> --
> > > > >>> 2.27.0
> > > > >>>
> > > > >>
> > > > >
> > > >
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > >
> > >
> > >
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Kazlauskas, Nicholas July 28, 2020, 2:49 a.m. UTC | #13
On 2020-07-27 5:32 p.m., Daniel Vetter wrote:
> On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
>>
>> On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>>
>>> On Mon, Jul 27, 2020 at 9:28 PM Christian König
>>> <christian.koenig@amd.com> wrote:
>>>>
>>>> Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
>>>>> On 2020-07-27 9:39 a.m., Christian König wrote:
>>>>>> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
>>>>>>> This patch fixes a race condition that causes a use-after-free during
>>>>>>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
>>>>>>> commits
>>>>>>> are requested and the second one finishes before the first.
>>>>>>> Essentially,
>>>>>>> this bug occurs when the following sequence of events happens:
>>>>>>>
>>>>>>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
>>>>>>> deferred to the workqueue.
>>>>>>>
>>>>>>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
>>>>>>> deferred to the workqueue.
>>>>>>>
>>>>>>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
>>>>>>> commit_tail and commit #2 completes, freeing dm_state #1.
>>>>>>>
>>>>>>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
>>>>>>> 1 and dereferences a freelist pointer while setting the context.
>>>>>>
>>>>>> Well I only have a one mile high view on this, but why don't you let
>>>>>> the work items execute in order?
>>>>>>
>>>>>> That would be better anyway cause this way we don't trigger a cache
>>>>>> line ping pong between CPUs.
>>>>>>
>>>>>> Christian.
>>>>>
>>>>> We use the DRM helpers for managing drm_atomic_commit_state and those
>>>>> helpers internally push non-blocking commit work into the system
>>>>> unbound work queue.
>>>>
>>>> Mhm, well if you send those helper atomic commits in the order A,B and
>>>> they execute it in the order B,A I would call that a bug :)
>>>
>>> The way it works is it pushes all commits into unbound work queue, but
>>> then forces serialization as needed. We do _not_ want e.g. updates on
>>> different CRTC to be serialized, that would result in lots of judder.
>>> And hw is funny enough that there's all kinds of dependencies.
>>>
>>> The way you force synchronization is by adding other CRTC state
>>> objects. So if DC is busted and can only handle a single update per
>>> work item, then I guess you always need all CRTC states and everything
>>> will be run in order. But that also totally kills modern multi-screen
>>> compositors. Xorg isn't modern, just in case that's not clear :-)
>>>
>>> Lucking at the code it seems like you indeed have only a single dm
>>> state, so yeah global sync is what you'll need as immediate fix, and
>>> then maybe fix up DM to not be quite so silly ... or at least only do
>>> the dm state stuff when really needed.
>>>
>>> We could also sprinkle the drm_crtc_commit structure around a bit
>>> (it's the glue that provides the synchronization across commits), but
>>> since your dm state is global just grabbing all crtc states
>>> unconditionally as part of that is probably best.
>>>
>>>>> While we could duplicate a copy of that code with nothing but the
>>>>> workqueue changed that isn't something I'd really like to maintain
>>>>> going forward.
>>>>
>>>> I'm not talking about duplicating the code, I'm talking about fixing the
>>>> helpers. I don't know that code well, but from the outside it sounds
>>>> like a bug there.
>>>>
>>>> And executing work items in the order they are submitted is trivial.
>>>>
>>>> Had anybody pinged Daniel or other people familiar with the helper code
>>>> about it?
>>>
>>> Yeah something is wrong here, and the fix looks horrible :-)
>>>
>>> Aside, I've also seen some recent discussion flare up about
>>> drm_atomic_state_get/put used to paper over some other use-after-free,
>>> but this time related to interrupt handlers. Maybe a few rules about
>>> that:
>>> - dont
>>> - especially not when it's interrupt handlers, because you can't call
>>> drm_atomic_state_put from interrupt handlers.
>>>
>>> Instead have an spin_lock_irq to protect the shared date with your
>>> interrupt handler, and _copy_ the date over. This is e.g. what
>>> drm_crtc_arm_vblank_event does.
>>
>> Nicholas wrote a patch that attempted to resolve the issue by adding every
>> CRTC into the commit to use use the stall checks. [1] While this forces
>> synchronisation on commits, it's kind of a hacky method that may take a
>> toll on performance.
>>
>> Is it possible to have a DRM helper that forces synchronisation on some
>> commits without having to add every CRTC into the commit?
>>
>> Also, is synchronisation really necessary for fast updates in amdgpu?
>> I'll admit, the idea of eliminating the use-after-free bug by eliminating
>> the use entirely doesn't seem ideal; but is forcing synchronisation on
>> these updates that much better?
> 
> Well clearing the dc_state pointer here and then allocating another
> one in atomic_commit_tail also looks fishy. The proper fix is probably
> a lot more involved, but yeah interim fix is to grab all crtc states
> iff you also grabbed the dm_atomic_state structure. Real fix is to
> only do this when necessary, which pretty much means the dc_state
> needs to be somehow split up, or there needs to be some guarantees
> about when it's necessary and when not. Otherwise parallel commits on
> different CRTC are not possible with the current dc backend code.

Thanks for spending some time to help take a look at this as well.

The DRM documentation (at least at the time) seemed to imply that 
subclassing DRM private state was for legacy usage only and DRM wanted 
to move away from it. DRM private objects seemed to fit as the nicer 
atomic method for handling this since they have old/new, but as you can 
guess from this issue it's a mess (from our end).

The first step to fixing this is going back to subclassing DRM state.

It's actually the right tool for the job for allocating temporary state 
outside of the core DRM objects, and we need this to form the DC state 
object and necessary stream update bundles which might be too big to fit 
on the stack for commit check/commit tail. We'll be a lot closer to 
fixing the lockdep issues this way once we get around to getting rid of 
allocations in the DC commit paths.

The second step is to fix validation itself. The old state requirement 
for checking stream/plane updates was actually an entirely pointless 
endeavor since dc global validation doesn't every look at updates, just 
the final state for a set streams/planes - it's stateless.

We wanted to rely on DC to internally notify DM when DRM changes would 
do this, but DM actually requires this logic as well to know when to 
use a fast path vs a full programming path to recreate planes/streams 
based on the new DRM state to pass into validation.

State updates will change to be formed from a delta of the old/new plane 
state, but the new one can be discarded if not needed.

Any update that requires clock/bandwidth changes will have to add all 
the CRTCs and stall for any fast updates to complete first. This is 
because they could potentially reshuffle around the pipes as well, but 
these updates happen infrequently enough that the stall isn't too bad here.

DC state unfortunately still does need to exist due to requirements on 
hardware resources/pipe splitting that don't fit nicely into the DRM 
infrastructure. It's really hard to shift everything over into the DRM 
infrastructure as well because of the DKMS problem we chatted about 
briefly last year at XDC.

I'd really like to tackle a third step as well at some point, and that's 
cleaning up our IRQ management. Our interrupt handlers unfortunately 
access DRM state directly (since it's so easy to do) and thus introduce 
a requirement that it doesn't get changed while these are still enabled. 
This requires us to introduce our own stall checks in atomic_check and 
perform the interrupt disable in commit before the state swap.

The fix for this one isn't too bad, but it's tedious - copying all the 
state we need to the interrupt handlers before enabling them and letting 
them continue to work off of that state. This way we can remove the 
stall from atomic_check and actually disable/enable interrupts from 
commit_tail like we should be doing.

> 
> See also my dma-fence annotation fixup patch, there dc_state also gets
> in the way:
> 
> https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/
> 
> Nicholas, btw I'm still waiting for some dc feedback on that entire
> series, and what/if there's plans to fix these issues properly.
> 
> Maybe even going back to the subclassed drm_atomic_state might be
> better than what we currently have.
> -Daniel

I've taken a look at that series but forgot to ACK. While this isn't the 
same thread for it, you can have my:

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

...on the DC/DM bits. Everything you've identified there is correct and 
it's something I'd really like to get around to taking a look at by the 
end of the year, hopefully.

State allocations will be solved by the DM state allocation rework and 
the tiling flags thing needs to be solved by storing those in 
atomic_check instead on the plane.

Regards,
Nicholas Kazlauskas

>>
>> [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
>>
>> Thanks,
>> Mazin Rezk
>>
>>>
>>> Cheers, Daniel
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Regards,
>>>>> Nicholas Kazlauskas
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Since this bug has only been spotted with fast commits, this patch
>>>>>>> fixes
>>>>>>> the bug by clearing the dm_state instead of using the old dc_state for
>>>>>>> fast updates. In addition, since dm_state is only used for its dc_state
>>>>>>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
>>>>>>> found,
>>>>>>> removing the dm_state should not have any consequences in fast updates.
>>>>>>>
>>>>>>> This use-after-free bug has existed for a while now, but only caused a
>>>>>>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
>>>>>>> relocate
>>>>>>> freelist pointer to middle of object") moving the freelist pointer from
>>>>>>> dm_state->base (which was unused) to dm_state->context (which is
>>>>>>> dereferenced).
>>>>>>>
>>>>>>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
>>>>>>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
>>>>>>> for fast updates")
>>>>>>> Reported-by: Duncan <1i5t5.duncan@cox.net>
>>>>>>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
>>>>>>> ---
>>>>>>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
>>>>>>> ++++++++++++++-----
>>>>>>>    1 file changed, 27 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> index 86ffa0c2880f..710edc70e37e 100644
>>>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
>>>>>>> drm_device *dev,
>>>>>>>             * the same resource. If we have a new DC context as part of
>>>>>>>             * the DM atomic state from validation we need to free it and
>>>>>>>             * retain the existing one instead.
>>>>>>> +         *
>>>>>>> +         * Furthermore, since the DM atomic state only contains the DC
>>>>>>> +         * context and can safely be annulled, we can free the state
>>>>>>> +         * and clear the associated private object now to free
>>>>>>> +         * some memory and avoid a possible use-after-free later.
>>>>>>>             */
>>>>>>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
>>>>>>>
>>>>>>> -        new_dm_state = dm_atomic_get_new_state(state);
>>>>>>> -        old_dm_state = dm_atomic_get_old_state(state);
>>>>>>> +        for (i = 0; i < state->num_private_objs; i++) {
>>>>>>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
>>>>>>>
>>>>>>> -        if (new_dm_state && old_dm_state) {
>>>>>>> -            if (new_dm_state->context)
>>>>>>> -                dc_release_state(new_dm_state->context);
>>>>>>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
>>>>>>> +                int j = state->num_private_objs-1;
>>>>>>>
>>>>>>> -            new_dm_state->context = old_dm_state->context;
>>>>>>> +                dm_atomic_destroy_state(obj,
>>>>>>> +                        state->private_objs[i].state);
>>>>>>> +
>>>>>>> +                /* If i is not at the end of the array then the
>>>>>>> +                 * last element needs to be moved to where i was
>>>>>>> +                 * before the array can safely be truncated.
>>>>>>> +                 */
>>>>>>> +                if (i != j)
>>>>>>> +                    state->private_objs[i] =
>>>>>>> +                        state->private_objs[j];
>>>>>>>
>>>>>>> -            if (old_dm_state->context)
>>>>>>> -                dc_retain_state(old_dm_state->context);
>>>>>>> +                state->private_objs[j].ptr = NULL;
>>>>>>> +                state->private_objs[j].state = NULL;
>>>>>>> +                state->private_objs[j].old_state = NULL;
>>>>>>> +                state->private_objs[j].new_state = NULL;
>>>>>>> +
>>>>>>> +                state->num_private_objs = j;
>>>>>>> +                break;
>>>>>>> +            }
>>>>>>>            }
>>>>>>>        }
>>>>>>>
>>>>>>> --
>>>>>>> 2.27.0
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>
>>>
>>>
>>> --
>>> Daniel Vetter
>>> Software Engineer, Intel Corporation
>>> http://blog.ffwll.ch/
> 
> 
>
Mazin Rezk July 28, 2020, 4:42 a.m. UTC | #14
On Monday, July 27, 2020 7:42 PM, Mazin Rezk <mnrzk@protonmail.com> wrote:

> On Monday, July 27, 2020 5:32 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>
> > On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
> > >
> > > On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> > >
> > > > On Mon, Jul 27, 2020 at 9:28 PM Christian König
> > > > <christian.koenig@amd.com> wrote:
> > > > >
> > > > > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > > > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > > > > >> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > > > > >>> This patch fixes a race condition that causes a use-after-free during
> > > > > >>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > > > > >>> commits
> > > > > >>> are requested and the second one finishes before the first.
> > > > > >>> Essentially,
> > > > > >>> this bug occurs when the following sequence of events happens:
> > > > > >>>
> > > > > >>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > > > > >>> deferred to the workqueue.
> > > > > >>>
> > > > > >>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > > > > >>> deferred to the workqueue.
> > > > > >>>
> > > > > >>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > > > > >>> commit_tail and commit #2 completes, freeing dm_state #1.
> > > > > >>>
> > > > > >>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > > > > >>> 1 and dereferences a freelist pointer while setting the context.
> > > > > >>
> > > > > >> Well I only have a one mile high view on this, but why don't you let
> > > > > >> the work items execute in order?
> > > > > >>
> > > > > >> That would be better anyway cause this way we don't trigger a cache
> > > > > >> line ping pong between CPUs.
> > > > > >>
> > > > > >> Christian.
> > > > > >
> > > > > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > > > > helpers internally push non-blocking commit work into the system
> > > > > > unbound work queue.
> > > > >
> > > > > Mhm, well if you send those helper atomic commits in the order A,B and
> > > > > they execute it in the order B,A I would call that a bug :)
> > > >
> > > > The way it works is it pushes all commits into unbound work queue, but
> > > > then forces serialization as needed. We do _not_ want e.g. updates on
> > > > different CRTC to be serialized, that would result in lots of judder.
> > > > And hw is funny enough that there's all kinds of dependencies.
> > > >
> > > > The way you force synchronization is by adding other CRTC state
> > > > objects. So if DC is busted and can only handle a single update per
> > > > work item, then I guess you always need all CRTC states and everything
> > > > will be run in order. But that also totally kills modern multi-screen
> > > > compositors. Xorg isn't modern, just in case that's not clear :-)
> > > >
> > > > Lucking at the code it seems like you indeed have only a single dm
> > > > state, so yeah global sync is what you'll need as immediate fix, and
> > > > then maybe fix up DM to not be quite so silly ... or at least only do
> > > > the dm state stuff when really needed.
> > > >
> > > > We could also sprinkle the drm_crtc_commit structure around a bit
> > > > (it's the glue that provides the synchronization across commits), but
> > > > since your dm state is global just grabbing all crtc states
> > > > unconditionally as part of that is probably best.
> > > >
> > > > > > While we could duplicate a copy of that code with nothing but the
> > > > > > workqueue changed that isn't something I'd really like to maintain
> > > > > > going forward.
> > > > >
> > > > > I'm not talking about duplicating the code, I'm talking about fixing the
> > > > > helpers. I don't know that code well, but from the outside it sounds
> > > > > like a bug there.
> > > > >
> > > > > And executing work items in the order they are submitted is trivial.
> > > > >
> > > > > Had anybody pinged Daniel or other people familiar with the helper code
> > > > > about it?
> > > >
> > > > Yeah something is wrong here, and the fix looks horrible :-)
> > > >
> > > > Aside, I've also seen some recent discussion flare up about
> > > > drm_atomic_state_get/put used to paper over some other use-after-free,
> > > > but this time related to interrupt handlers. Maybe a few rules about
> > > > that:
> > > > - dont
> > > > - especially not when it's interrupt handlers, because you can't call
> > > > drm_atomic_state_put from interrupt handlers.
> > > >
> > > > Instead have an spin_lock_irq to protect the shared date with your
> > > > interrupt handler, and _copy_ the date over. This is e.g. what
> > > > drm_crtc_arm_vblank_event does.
> > >
> > > Nicholas wrote a patch that attempted to resolve the issue by adding every
> > > CRTC into the commit to use use the stall checks. [1] While this forces
> > > synchronisation on commits, it's kind of a hacky method that may take a
> > > toll on performance.
> > >
> > > Is it possible to have a DRM helper that forces synchronisation on some
> > > commits without having to add every CRTC into the commit?
> > >
> > > Also, is synchronisation really necessary for fast updates in amdgpu?
> > > I'll admit, the idea of eliminating the use-after-free bug by eliminating
> > > the use entirely doesn't seem ideal; but is forcing synchronisation on
> > > these updates that much better?
> >
> > Well clearing the dc_state pointer here and then allocating another
> > one in atomic_commit_tail also looks fishy. The proper fix is probably
> > a lot more involved, but yeah interim fix is to grab all crtc states
> > iff you also grabbed the dm_atomic_state structure. Real fix is to
> > only do this when necessary, which pretty much means the dc_state
> > needs to be somehow split up, or there needs to be some guarantees
> > about when it's necessary and when not. Otherwise parallel commits on
> > different CRTC are not possible with the current dc backend code.
>
> In that case, I'll test out Nicholas' patch right now and see if it works.
> Unlike Duncan, something about my setup seems to trigger the bug rather
> quickly, so I should be able to tell if it works in the next couple of
> hours with a reasonable degree of accuracy.
>
> Thanks,
> Mazin Rezk

So I finally got around to trying out the CRTC state patch a while ago, and
it seems to have caused several issues on my machine.

I won't go too far into detail as I've already described them in the Bugzilla
thread [2] but the issues range from freezing displays, blank display,
flickering displays, and missing cursors. These bugs occur on my RX 480 w/
KDE Plasma and XFCE using kwin_x11 and picom/compton respectively.

That said, I still don't know if this bug has been solved; I've been
running my system for 45 minutes (which usually triggers a crash) and I
haven't gotten a crash yet which is a good sign but nowhere near
conclusive.

Regardless, the patch needs to be fixed before it can be applied as an
interim fix for this bug. In its current state, Nicholas' patch creates
more bugs for me (and presumably other users) than it solves. Duncan's
system, however, seems to have no issues with this patch so far. (I really
don't know what's so special about my system that causes those bugs and
makes bug 207383 occur so often)

While I haven't gone too far into debugging my system, I've checked the
dmesg log and nothing remotely interesting appears there. Looking through
the code in the patch, it doesn't seem like it should cause any issues like
this; at most, it seemed like it was going to cause some lag.

There is one thing that I believe may be the culprit though:

if (IS_ERR(new_crtc_state)) {
	ret = PTR_ERR(new_crtc_state);
	goto fail;
}

Perhaps this is causing some fast updates to unnecessarily fail? I don't
know how exactly we would fix this though, removing the error check doesn't
seem ideal to me.

Also, I have a question about the whole drm_atomic_state debacle. Why was
drm_atomic_state changed from subclassing to private objects in the first
place?

[2] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c103

Thanks,
Mazin Rezk

>
> >
> > See also my dma-fence annotation fixup patch, there dc_state also gets
> > in the way:
> >
> > https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/
> >
> > Nicholas, btw I'm still waiting for some dc feedback on that entire
> > series, and what/if there's plans to fix these issues properly.
> >
> > Maybe even going back to the subclassed drm_atomic_state might be
> > better than what we currently have.
> > -Daniel
> > >
> > > [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
> > >
> > > Thanks,
> > > Mazin Rezk
> > >
> > > >
> > > > Cheers, Daniel
> > > >
> > > > >
> > > > > Regards,
> > > > > Christian.
> > > > >
> > > > > >
> > > > > > Regards,
> > > > > > Nicholas Kazlauskas
> > > > > >
> > > > > >>
> > > > > >>>
> > > > > >>> Since this bug has only been spotted with fast commits, this patch
> > > > > >>> fixes
> > > > > >>> the bug by clearing the dm_state instead of using the old dc_state for
> > > > > >>> fast updates. In addition, since dm_state is only used for its dc_state
> > > > > >>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > > > > >>> found,
> > > > > >>> removing the dm_state should not have any consequences in fast updates.
> > > > > >>>
> > > > > >>> This use-after-free bug has existed for a while now, but only caused a
> > > > > >>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > > > > >>> relocate
> > > > > >>> freelist pointer to middle of object") moving the freelist pointer from
> > > > > >>> dm_state->base (which was unused) to dm_state->context (which is
> > > > > >>> dereferenced).
> > > > > >>>
> > > > > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > > > > >>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > > > > >>> for fast updates")
> > > > > >>> Reported-by: Duncan <1i5t5.duncan@cox.net>
> > > > > >>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > > > > >>> ---
> > > > > >>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > > > > >>> ++++++++++++++-----
> > > > > >>>   1 file changed, 27 insertions(+), 9 deletions(-)
> > > > > >>>
> > > > > >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > >>> index 86ffa0c2880f..710edc70e37e 100644
> > > > > >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > >>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > > > > >>> drm_device *dev,
> > > > > >>>            * the same resource. If we have a new DC context as part of
> > > > > >>>            * the DM atomic state from validation we need to free it and
> > > > > >>>            * retain the existing one instead.
> > > > > >>> +         *
> > > > > >>> +         * Furthermore, since the DM atomic state only contains the DC
> > > > > >>> +         * context and can safely be annulled, we can free the state
> > > > > >>> +         * and clear the associated private object now to free
> > > > > >>> +         * some memory and avoid a possible use-after-free later.
> > > > > >>>            */
> > > > > >>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > > > > >>>
> > > > > >>> -        new_dm_state = dm_atomic_get_new_state(state);
> > > > > >>> -        old_dm_state = dm_atomic_get_old_state(state);
> > > > > >>> +        for (i = 0; i < state->num_private_objs; i++) {
> > > > > >>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > > > > >>>
> > > > > >>> -        if (new_dm_state && old_dm_state) {
> > > > > >>> -            if (new_dm_state->context)
> > > > > >>> -                dc_release_state(new_dm_state->context);
> > > > > >>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > > > > >>> +                int j = state->num_private_objs-1;
> > > > > >>>
> > > > > >>> -            new_dm_state->context = old_dm_state->context;
> > > > > >>> +                dm_atomic_destroy_state(obj,
> > > > > >>> +                        state->private_objs[i].state);
> > > > > >>> +
> > > > > >>> +                /* If i is not at the end of the array then the
> > > > > >>> +                 * last element needs to be moved to where i was
> > > > > >>> +                 * before the array can safely be truncated.
> > > > > >>> +                 */
> > > > > >>> +                if (i != j)
> > > > > >>> +                    state->private_objs[i] =
> > > > > >>> +                        state->private_objs[j];
> > > > > >>>
> > > > > >>> -            if (old_dm_state->context)
> > > > > >>> -                dc_retain_state(old_dm_state->context);
> > > > > >>> +                state->private_objs[j].ptr = NULL;
> > > > > >>> +                state->private_objs[j].state = NULL;
> > > > > >>> +                state->private_objs[j].old_state = NULL;
> > > > > >>> +                state->private_objs[j].new_state = NULL;
> > > > > >>> +
> > > > > >>> +                state->num_private_objs = j;
> > > > > >>> +                break;
> > > > > >>> +            }
> > > > > >>>           }
> > > > > >>>       }
> > > > > >>>
> > > > > >>> --
> > > > > >>> 2.27.0
> > > > > >>>
> > > > > >>
> > > > > >
> > > > >
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > > >
> > > >
> > > >
> > > > --
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
> >
> >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
Daniel Vetter July 28, 2020, 9:08 a.m. UTC | #15
On Mon, Jul 27, 2020 at 10:49:48PM -0400, Kazlauskas, Nicholas wrote:
> On 2020-07-27 5:32 p.m., Daniel Vetter wrote:
> > On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
> > > 
> > > On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> > > 
> > > > On Mon, Jul 27, 2020 at 9:28 PM Christian König
> > > > <christian.koenig@amd.com> wrote:
> > > > > 
> > > > > Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
> > > > > > On 2020-07-27 9:39 a.m., Christian König wrote:
> > > > > > > Am 27.07.20 um 07:40 schrieb Mazin Rezk:
> > > > > > > > This patch fixes a race condition that causes a use-after-free during
> > > > > > > > amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
> > > > > > > > commits
> > > > > > > > are requested and the second one finishes before the first.
> > > > > > > > Essentially,
> > > > > > > > this bug occurs when the following sequence of events happens:
> > > > > > > > 
> > > > > > > > 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
> > > > > > > > deferred to the workqueue.
> > > > > > > > 
> > > > > > > > 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
> > > > > > > > deferred to the workqueue.
> > > > > > > > 
> > > > > > > > 3. Commit #2 starts before commit #1, dm_state #1 is used in the
> > > > > > > > commit_tail and commit #2 completes, freeing dm_state #1.
> > > > > > > > 
> > > > > > > > 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
> > > > > > > > 1 and dereferences a freelist pointer while setting the context.
> > > > > > > 
> > > > > > > Well I only have a one mile high view on this, but why don't you let
> > > > > > > the work items execute in order?
> > > > > > > 
> > > > > > > That would be better anyway cause this way we don't trigger a cache
> > > > > > > line ping pong between CPUs.
> > > > > > > 
> > > > > > > Christian.
> > > > > > 
> > > > > > We use the DRM helpers for managing drm_atomic_commit_state and those
> > > > > > helpers internally push non-blocking commit work into the system
> > > > > > unbound work queue.
> > > > > 
> > > > > Mhm, well if you send those helper atomic commits in the order A,B and
> > > > > they execute it in the order B,A I would call that a bug :)
> > > > 
> > > > The way it works is it pushes all commits into unbound work queue, but
> > > > then forces serialization as needed. We do _not_ want e.g. updates on
> > > > different CRTC to be serialized, that would result in lots of judder.
> > > > And hw is funny enough that there's all kinds of dependencies.
> > > > 
> > > > The way you force synchronization is by adding other CRTC state
> > > > objects. So if DC is busted and can only handle a single update per
> > > > work item, then I guess you always need all CRTC states and everything
> > > > will be run in order. But that also totally kills modern multi-screen
> > > > compositors. Xorg isn't modern, just in case that's not clear :-)
> > > > 
> > > > Lucking at the code it seems like you indeed have only a single dm
> > > > state, so yeah global sync is what you'll need as immediate fix, and
> > > > then maybe fix up DM to not be quite so silly ... or at least only do
> > > > the dm state stuff when really needed.
> > > > 
> > > > We could also sprinkle the drm_crtc_commit structure around a bit
> > > > (it's the glue that provides the synchronization across commits), but
> > > > since your dm state is global just grabbing all crtc states
> > > > unconditionally as part of that is probably best.
> > > > 
> > > > > > While we could duplicate a copy of that code with nothing but the
> > > > > > workqueue changed that isn't something I'd really like to maintain
> > > > > > going forward.
> > > > > 
> > > > > I'm not talking about duplicating the code, I'm talking about fixing the
> > > > > helpers. I don't know that code well, but from the outside it sounds
> > > > > like a bug there.
> > > > > 
> > > > > And executing work items in the order they are submitted is trivial.
> > > > > 
> > > > > Had anybody pinged Daniel or other people familiar with the helper code
> > > > > about it?
> > > > 
> > > > Yeah something is wrong here, and the fix looks horrible :-)
> > > > 
> > > > Aside, I've also seen some recent discussion flare up about
> > > > drm_atomic_state_get/put used to paper over some other use-after-free,
> > > > but this time related to interrupt handlers. Maybe a few rules about
> > > > that:
> > > > - dont
> > > > - especially not when it's interrupt handlers, because you can't call
> > > > drm_atomic_state_put from interrupt handlers.
> > > > 
> > > > Instead have an spin_lock_irq to protect the shared date with your
> > > > interrupt handler, and _copy_ the date over. This is e.g. what
> > > > drm_crtc_arm_vblank_event does.
> > > 
> > > Nicholas wrote a patch that attempted to resolve the issue by adding every
> > > CRTC into the commit to use use the stall checks. [1] While this forces
> > > synchronisation on commits, it's kind of a hacky method that may take a
> > > toll on performance.
> > > 
> > > Is it possible to have a DRM helper that forces synchronisation on some
> > > commits without having to add every CRTC into the commit?
> > > 
> > > Also, is synchronisation really necessary for fast updates in amdgpu?
> > > I'll admit, the idea of eliminating the use-after-free bug by eliminating
> > > the use entirely doesn't seem ideal; but is forcing synchronisation on
> > > these updates that much better?
> > 
> > Well clearing the dc_state pointer here and then allocating another
> > one in atomic_commit_tail also looks fishy. The proper fix is probably
> > a lot more involved, but yeah interim fix is to grab all crtc states
> > iff you also grabbed the dm_atomic_state structure. Real fix is to
> > only do this when necessary, which pretty much means the dc_state
> > needs to be somehow split up, or there needs to be some guarantees
> > about when it's necessary and when not. Otherwise parallel commits on
> > different CRTC are not possible with the current dc backend code.
> 
> Thanks for spending some time to help take a look at this as well.
> 
> The DRM documentation (at least at the time) seemed to imply that
> subclassing DRM private state was for legacy usage only and DRM wanted to
> move away from it. DRM private objects seemed to fit as the nicer atomic
> method for handling this since they have old/new, but as you can guess from
> this issue it's a mess (from our end).

Yeah, if it's actual state you put in there. But this dc_state structure
is more like a container of state pointers, like drm_atomic_state. It's
not 100% clear-cut, e.g. the bw stuff is more like a real state object I
guess, or maybe it's just temporary storage for computation results?

> The first step to fixing this is going back to subclassing DRM state.
> 
> It's actually the right tool for the job for allocating temporary state
> outside of the core DRM objects, and we need this to form the DC state
> object and necessary stream update bundles which might be too big to fit on
> the stack for commit check/commit tail. We'll be a lot closer to fixing the
> lockdep issues this way once we get around to getting rid of allocations in
> the DC commit paths.

So on the stream bundles ... where do these pointers point to? Are the
streams themselves in the crtc/plane states?

> The second step is to fix validation itself. The old state requirement for
> checking stream/plane updates was actually an entirely pointless endeavor
> since dc global validation doesn't every look at updates, just the final
> state for a set streams/planes - it's stateless.

Uh, is your hw that good, i.e. there's no impossible state transitions? Or
do you simply force a modeset in such cases? In which case, are the
modeset flags correctly updated so that userspace can control this with
ALLOW_MODESET?

> We wanted to rely on DC to internally notify DM when DRM changes would do
> this, but DM actually requires this logic as well to know when to use a fast
> path vs a full programming path to recreate planes/streams based on the new
> DRM state to pass into validation.
> 
> State updates will change to be formed from a delta of the old/new plane
> state, but the new one can be discarded if not needed.
> 
> Any update that requires clock/bandwidth changes will have to add all the
> CRTCs and stall for any fast updates to complete first. This is because they
> could potentially reshuffle around the pipes as well, but these updates
> happen infrequently enough that the stall isn't too bad here.
> 
> DC state unfortunately still does need to exist due to requirements on
> hardware resources/pipe splitting that don't fit nicely into the DRM
> infrastructure. It's really hard to shift everything over into the DRM
> infrastructure as well because of the DKMS problem we chatted about briefly
> last year at XDC.

Uh this is awkward, holding up upstream because of downstream kernels ...

Can't you fix this with a lot more glue in the downstream kernels, stuff
like copying the entire atomic helpers into your driver when there's new
stuff?

For the multi-pipe stuff, how other drivers handle this is by internally
remapping userspace visible crtc/plane objects to underlying hw objects.
And then if you have remapping going on you add the userspace state
structs for all underlying remmapped crtc/plane in use by the current
update. That should be enough to force just enough synchronization.

Then in your hw commit functions you simple iterate over all the states in
your update, as if you'd iterate over streams directly. So example:

struct amdgpu_crtc_state {
	struct drm_crtc_state base;
	struct dm_stream_mapping_state mapping;
	/* this crtc might not be using this stream! */
	struct dm_stream_state stream;
};

I think msm works like this, but there's also other drivers.

When you have to change the mapping then you grab the global mapping
private state, but in any other cases you use the read-only copy in the
crtc state. And the global state is only for book-keeping, so no sync
needed (aside from the sync provided by adding the crtc states for each
stream you touch).

Ofc might have gotten the stream lingo wrong, but this is roughly how to
do this in atomic.

> I'd really like to tackle a third step as well at some point, and that's
> cleaning up our IRQ management. Our interrupt handlers unfortunately access
> DRM state directly (since it's so easy to do) and thus introduce a
> requirement that it doesn't get changed while these are still enabled. This
> requires us to introduce our own stall checks in atomic_check and perform
> the interrupt disable in commit before the state swap.

Yeah don't do that :-) It might work if you guarantee that the atomic
commit_tail is waiting for all your irq handlers to complete, plus use rcu
dereferencing on these pointers.

Copying the relevant bits over should be much nicer imo, also passing
explicit pointers at least. I.e. if you don't want to copy, have a
dedicated pointer protected by irqsave spin_lock that you share with the
interrupt handler. Stopping interrupt handlers before swap_state isn't
really how this should work.

> The fix for this one isn't too bad, but it's tedious - copying all the state
> we need to the interrupt handlers before enabling them and letting them
> continue to work off of that state. This way we can remove the stall from
> atomic_check and actually disable/enable interrupts from commit_tail like we
> should be doing.

Yup. I think you can have some pointers to specific state, _iff_ you make
sure the commit_tail waits for all the interrupt handlers to complete
somewhere (depending upon how this all works). But copying is probably
simpler to understand - interrupt races are tricky.
> 
> > 
> > See also my dma-fence annotation fixup patch, there dc_state also gets
> > in the way:
> > 
> > https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/
> > 
> > Nicholas, btw I'm still waiting for some dc feedback on that entire
> > series, and what/if there's plans to fix these issues properly.
> > 
> > Maybe even going back to the subclassed drm_atomic_state might be
> > better than what we currently have.
> > -Daniel
> 
> I've taken a look at that series but forgot to ACK. While this isn't the
> same thread for it, you can have my:
> 
> Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
> 
> ...on the DC/DM bits. Everything you've identified there is correct and it's
> something I'd really like to get around to taking a look at by the end of
> the year, hopefully.
> 
> State allocations will be solved by the DM state allocation rework and the
> tiling flags thing needs to be solved by storing those in atomic_check
> instead on the plane.
> 
> Regards,
> Nicholas Kazlauskas
> 
> > > 
> > > [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
> > > 
> > > Thanks,
> > > Mazin Rezk
> > > 
> > > > 
> > > > Cheers, Daniel
> > > > 
> > > > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > > 
> > > > > > Regards,
> > > > > > Nicholas Kazlauskas
> > > > > > 
> > > > > > > 
> > > > > > > > 
> > > > > > > > Since this bug has only been spotted with fast commits, this patch
> > > > > > > > fixes
> > > > > > > > the bug by clearing the dm_state instead of using the old dc_state for
> > > > > > > > fast updates. In addition, since dm_state is only used for its dc_state
> > > > > > > > and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
> > > > > > > > found,
> > > > > > > > removing the dm_state should not have any consequences in fast updates.
> > > > > > > > 
> > > > > > > > This use-after-free bug has existed for a while now, but only caused a
> > > > > > > > noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
> > > > > > > > relocate
> > > > > > > > freelist pointer to middle of object") moving the freelist pointer from
> > > > > > > > dm_state->base (which was unused) to dm_state->context (which is
> > > > > > > > dereferenced).
> > > > > > > > 
> > > > > > > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
> > > > > > > > Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
> > > > > > > > for fast updates")
> > > > > > > > Reported-by: Duncan <1i5t5.duncan@cox.net>
> > > > > > > > Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>
> > > > > > > > ---
> > > > > > > >    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
> > > > > > > > ++++++++++++++-----
> > > > > > > >    1 file changed, 27 insertions(+), 9 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > > > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > > > index 86ffa0c2880f..710edc70e37e 100644
> > > > > > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > > > @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
> > > > > > > > drm_device *dev,
> > > > > > > >             * the same resource. If we have a new DC context as part of
> > > > > > > >             * the DM atomic state from validation we need to free it and
> > > > > > > >             * retain the existing one instead.
> > > > > > > > +         *
> > > > > > > > +         * Furthermore, since the DM atomic state only contains the DC
> > > > > > > > +         * context and can safely be annulled, we can free the state
> > > > > > > > +         * and clear the associated private object now to free
> > > > > > > > +         * some memory and avoid a possible use-after-free later.
> > > > > > > >             */
> > > > > > > > -        struct dm_atomic_state *new_dm_state, *old_dm_state;
> > > > > > > > 
> > > > > > > > -        new_dm_state = dm_atomic_get_new_state(state);
> > > > > > > > -        old_dm_state = dm_atomic_get_old_state(state);
> > > > > > > > +        for (i = 0; i < state->num_private_objs; i++) {
> > > > > > > > +            struct drm_private_obj *obj = state->private_objs[i].ptr;
> > > > > > > > 
> > > > > > > > -        if (new_dm_state && old_dm_state) {
> > > > > > > > -            if (new_dm_state->context)
> > > > > > > > -                dc_release_state(new_dm_state->context);
> > > > > > > > +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
> > > > > > > > +                int j = state->num_private_objs-1;
> > > > > > > > 
> > > > > > > > -            new_dm_state->context = old_dm_state->context;
> > > > > > > > +                dm_atomic_destroy_state(obj,
> > > > > > > > +                        state->private_objs[i].state);
> > > > > > > > +
> > > > > > > > +                /* If i is not at the end of the array then the
> > > > > > > > +                 * last element needs to be moved to where i was
> > > > > > > > +                 * before the array can safely be truncated.
> > > > > > > > +                 */
> > > > > > > > +                if (i != j)
> > > > > > > > +                    state->private_objs[i] =
> > > > > > > > +                        state->private_objs[j];
> > > > > > > > 
> > > > > > > > -            if (old_dm_state->context)
> > > > > > > > -                dc_retain_state(old_dm_state->context);
> > > > > > > > +                state->private_objs[j].ptr = NULL;
> > > > > > > > +                state->private_objs[j].state = NULL;
> > > > > > > > +                state->private_objs[j].old_state = NULL;
> > > > > > > > +                state->private_objs[j].new_state = NULL;
> > > > > > > > +
> > > > > > > > +                state->num_private_objs = j;
> > > > > > > > +                break;
> > > > > > > > +            }
> > > > > > > >            }
> > > > > > > >        }
> > > > > > > > 
> > > > > > > > --
> > > > > > > > 2.27.0
> > > > > > > > 
> > > > > > > 
> > > > > > 
> > > > > 
> > > > > _______________________________________________
> > > > > dri-devel mailing list
> > > > > dri-devel@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > > > 
> > > > 
> > > > 
> > > > --
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch/
> > 
> > 
> > 
>
Kazlauskas, Nicholas July 29, 2020, 4:49 p.m. UTC | #16
On 2020-07-28 5:08 a.m., daniel@ffwll.ch wrote:
> On Mon, Jul 27, 2020 at 10:49:48PM -0400, Kazlauskas, Nicholas wrote:
>> On 2020-07-27 5:32 p.m., Daniel Vetter wrote:
>>> On Mon, Jul 27, 2020 at 11:11 PM Mazin Rezk <mnrzk@protonmail.com> wrote:
>>>>
>>>> On Monday, July 27, 2020 4:29 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>>>>
>>>>> On Mon, Jul 27, 2020 at 9:28 PM Christian König
>>>>> <christian.koenig@amd.com> wrote:
>>>>>>
>>>>>> Am 27.07.20 um 16:05 schrieb Kazlauskas, Nicholas:
>>>>>>> On 2020-07-27 9:39 a.m., Christian König wrote:
>>>>>>>> Am 27.07.20 um 07:40 schrieb Mazin Rezk:
>>>>>>>>> This patch fixes a race condition that causes a use-after-free during
>>>>>>>>> amdgpu_dm_atomic_commit_tail. This can occur when 2 non-blocking
>>>>>>>>> commits
>>>>>>>>> are requested and the second one finishes before the first.
>>>>>>>>> Essentially,
>>>>>>>>> this bug occurs when the following sequence of events happens:
>>>>>>>>>
>>>>>>>>> 1. Non-blocking commit #1 is requested w/ a new dm_state #1 and is
>>>>>>>>> deferred to the workqueue.
>>>>>>>>>
>>>>>>>>> 2. Non-blocking commit #2 is requested w/ a new dm_state #2 and is
>>>>>>>>> deferred to the workqueue.
>>>>>>>>>
>>>>>>>>> 3. Commit #2 starts before commit #1, dm_state #1 is used in the
>>>>>>>>> commit_tail and commit #2 completes, freeing dm_state #1.
>>>>>>>>>
>>>>>>>>> 4. Commit #1 starts after commit #2 completes, uses the freed dm_state
>>>>>>>>> 1 and dereferences a freelist pointer while setting the context.
>>>>>>>>
>>>>>>>> Well I only have a one mile high view on this, but why don't you let
>>>>>>>> the work items execute in order?
>>>>>>>>
>>>>>>>> That would be better anyway cause this way we don't trigger a cache
>>>>>>>> line ping pong between CPUs.
>>>>>>>>
>>>>>>>> Christian.
>>>>>>>
>>>>>>> We use the DRM helpers for managing drm_atomic_commit_state and those
>>>>>>> helpers internally push non-blocking commit work into the system
>>>>>>> unbound work queue.
>>>>>>
>>>>>> Mhm, well if you send those helper atomic commits in the order A,B and
>>>>>> they execute it in the order B,A I would call that a bug :)
>>>>>
>>>>> The way it works is it pushes all commits into unbound work queue, but
>>>>> then forces serialization as needed. We do _not_ want e.g. updates on
>>>>> different CRTC to be serialized, that would result in lots of judder.
>>>>> And hw is funny enough that there's all kinds of dependencies.
>>>>>
>>>>> The way you force synchronization is by adding other CRTC state
>>>>> objects. So if DC is busted and can only handle a single update per
>>>>> work item, then I guess you always need all CRTC states and everything
>>>>> will be run in order. But that also totally kills modern multi-screen
>>>>> compositors. Xorg isn't modern, just in case that's not clear :-)
>>>>>
>>>>> Lucking at the code it seems like you indeed have only a single dm
>>>>> state, so yeah global sync is what you'll need as immediate fix, and
>>>>> then maybe fix up DM to not be quite so silly ... or at least only do
>>>>> the dm state stuff when really needed.
>>>>>
>>>>> We could also sprinkle the drm_crtc_commit structure around a bit
>>>>> (it's the glue that provides the synchronization across commits), but
>>>>> since your dm state is global just grabbing all crtc states
>>>>> unconditionally as part of that is probably best.
>>>>>
>>>>>>> While we could duplicate a copy of that code with nothing but the
>>>>>>> workqueue changed that isn't something I'd really like to maintain
>>>>>>> going forward.
>>>>>>
>>>>>> I'm not talking about duplicating the code, I'm talking about fixing the
>>>>>> helpers. I don't know that code well, but from the outside it sounds
>>>>>> like a bug there.
>>>>>>
>>>>>> And executing work items in the order they are submitted is trivial.
>>>>>>
>>>>>> Had anybody pinged Daniel or other people familiar with the helper code
>>>>>> about it?
>>>>>
>>>>> Yeah something is wrong here, and the fix looks horrible :-)
>>>>>
>>>>> Aside, I've also seen some recent discussion flare up about
>>>>> drm_atomic_state_get/put used to paper over some other use-after-free,
>>>>> but this time related to interrupt handlers. Maybe a few rules about
>>>>> that:
>>>>> - dont
>>>>> - especially not when it's interrupt handlers, because you can't call
>>>>> drm_atomic_state_put from interrupt handlers.
>>>>>
>>>>> Instead have an spin_lock_irq to protect the shared date with your
>>>>> interrupt handler, and _copy_ the date over. This is e.g. what
>>>>> drm_crtc_arm_vblank_event does.
>>>>
>>>> Nicholas wrote a patch that attempted to resolve the issue by adding every
>>>> CRTC into the commit to use use the stall checks. [1] While this forces
>>>> synchronisation on commits, it's kind of a hacky method that may take a
>>>> toll on performance.
>>>>
>>>> Is it possible to have a DRM helper that forces synchronisation on some
>>>> commits without having to add every CRTC into the commit?
>>>>
>>>> Also, is synchronisation really necessary for fast updates in amdgpu?
>>>> I'll admit, the idea of eliminating the use-after-free bug by eliminating
>>>> the use entirely doesn't seem ideal; but is forcing synchronisation on
>>>> these updates that much better?
>>>
>>> Well clearing the dc_state pointer here and then allocating another
>>> one in atomic_commit_tail also looks fishy. The proper fix is probably
>>> a lot more involved, but yeah interim fix is to grab all crtc states
>>> iff you also grabbed the dm_atomic_state structure. Real fix is to
>>> only do this when necessary, which pretty much means the dc_state
>>> needs to be somehow split up, or there needs to be some guarantees
>>> about when it's necessary and when not. Otherwise parallel commits on
>>> different CRTC are not possible with the current dc backend code.
>>
>> Thanks for spending some time to help take a look at this as well.
>>
>> The DRM documentation (at least at the time) seemed to imply that
>> subclassing DRM private state was for legacy usage only and DRM wanted to
>> move away from it. DRM private objects seemed to fit as the nicer atomic
>> method for handling this since they have old/new, but as you can guess from
>> this issue it's a mess (from our end).
> 
> Yeah, if it's actual state you put in there. But this dc_state structure
> is more like a container of state pointers, like drm_atomic_state. It's
> not 100% clear-cut, e.g. the bw stuff is more like a real state object I
> guess, or maybe it's just temporary storage for computation results?
> 
>> The first step to fixing this is going back to subclassing DRM state.
>>
>> It's actually the right tool for the job for allocating temporary state
>> outside of the core DRM objects, and we need this to form the DC state
>> object and necessary stream update bundles which might be too big to fit on
>> the stack for commit check/commit tail. We'll be a lot closer to fixing the
>> lockdep issues this way once we get around to getting rid of allocations in
>> the DC commit paths.
> 
> So on the stream bundles ... where do these pointers point to? Are the
> streams themselves in the crtc/plane states?
> 
>> The second step is to fix validation itself. The old state requirement for
>> checking stream/plane updates was actually an entirely pointless endeavor
>> since dc global validation doesn't every look at updates, just the final
>> state for a set streams/planes - it's stateless.
> 
> Uh, is your hw that good, i.e. there's no impossible state transitions? Or
> do you simply force a modeset in such cases? In which case, are the
> modeset flags correctly updated so that userspace can control this with
> ALLOW_MODESET?
> 
>> We wanted to rely on DC to internally notify DM when DRM changes would do
>> this, but DM actually requires this logic as well to know when to use a fast
>> path vs a full programming path to recreate planes/streams based on the new
>> DRM state to pass into validation.
>>
>> State updates will change to be formed from a delta of the old/new plane
>> state, but the new one can be discarded if not needed.
>>
>> Any update that requires clock/bandwidth changes will have to add all the
>> CRTCs and stall for any fast updates to complete first. This is because they
>> could potentially reshuffle around the pipes as well, but these updates
>> happen infrequently enough that the stall isn't too bad here.
>>
>> DC state unfortunately still does need to exist due to requirements on
>> hardware resources/pipe splitting that don't fit nicely into the DRM
>> infrastructure. It's really hard to shift everything over into the DRM
>> infrastructure as well because of the DKMS problem we chatted about briefly
>> last year at XDC.
> 
> Uh this is awkward, holding up upstream because of downstream kernels ...
> 
> Can't you fix this with a lot more glue in the downstream kernels, stuff
> like copying the entire atomic helpers into your driver when there's new
> stuff?
> 
> For the multi-pipe stuff, how other drivers handle this is by internally
> remapping userspace visible crtc/plane objects to underlying hw objects.
> And then if you have remapping going on you add the userspace state
> structs for all underlying remmapped crtc/plane in use by the current
> update. That should be enough to force just enough synchronization.
> 
> Then in your hw commit functions you simple iterate over all the states in
> your update, as if you'd iterate over streams directly. So example:
> 
> struct amdgpu_crtc_state {
> 	struct drm_crtc_state base;
> 	struct dm_stream_mapping_state mapping;
> 	/* this crtc might not be using this stream! */
> 	struct dm_stream_state stream;
> };
> 
> I think msm works like this, but there's also other drivers.
> 
> When you have to change the mapping then you grab the global mapping
> private state, but in any other cases you use the read-only copy in the
> crtc state. And the global state is only for book-keeping, so no sync
> needed (aside from the sync provided by adding the crtc states for each
> stream you touch).
> 
> Ofc might have gotten the stream lingo wrong, but this is roughly how to
> do this in atomic.
> 
>> I'd really like to tackle a third step as well at some point, and that's
>> cleaning up our IRQ management. Our interrupt handlers unfortunately access
>> DRM state directly (since it's so easy to do) and thus introduce a
>> requirement that it doesn't get changed while these are still enabled. This
>> requires us to introduce our own stall checks in atomic_check and perform
>> the interrupt disable in commit before the state swap.
> 
> Yeah don't do that :-) It might work if you guarantee that the atomic
> commit_tail is waiting for all your irq handlers to complete, plus use rcu
> dereferencing on these pointers.
> 
> Copying the relevant bits over should be much nicer imo, also passing
> explicit pointers at least. I.e. if you don't want to copy, have a
> dedicated pointer protected by irqsave spin_lock that you share with the
> interrupt handler. Stopping interrupt handlers before swap_state isn't
> really how this should work.
> 
>> The fix for this one isn't too bad, but it's tedious - copying all the state
>> we need to the interrupt handlers before enabling them and letting them
>> continue to work off of that state. This way we can remove the stall from
>> atomic_check and actually disable/enable interrupts from commit_tail like we
>> should be doing.
> 
> Yup. I think you can have some pointers to specific state, _iff_ you make
> sure the commit_tail waits for all the interrupt handlers to complete
> somewhere (depending upon how this all works). But copying is probably
> simpler to understand - interrupt races are tricky.
>>
>>>
>>> See also my dma-fence annotation fixup patch, there dc_state also gets
>>> in the way:
>>>
>>> https://lore.kernel.org/dri-devel/20200707201229.472834-21-daniel.vetter@ffwll.ch/
>>>
>>> Nicholas, btw I'm still waiting for some dc feedback on that entire
>>> series, and what/if there's plans to fix these issues properly.
>>>
>>> Maybe even going back to the subclassed drm_atomic_state might be
>>> better than what we currently have.
>>> -Daniel
>>
>> I've taken a look at that series but forgot to ACK. While this isn't the
>> same thread for it, you can have my:
>>
>> Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>
>> ...on the DC/DM bits. Everything you've identified there is correct and it's
>> something I'd really like to get around to taking a look at by the end of
>> the year, hopefully.
>>
>> State allocations will be solved by the DM state allocation rework and the
>> tiling flags thing needs to be solved by storing those in atomic_check
>> instead on the plane.
>>
>> Regards,
>> Nicholas Kazlauskas
>>
>>>>
>>>> [1] https://bugzilla.kernel.org/show_bug.cgi?id=207383#c96
>>>>
>>>> Thanks,
>>>> Mazin Rezk
>>>>
>>>>>
>>>>> Cheers, Daniel
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>> Nicholas Kazlauskas
>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Since this bug has only been spotted with fast commits, this patch
>>>>>>>>> fixes
>>>>>>>>> the bug by clearing the dm_state instead of using the old dc_state for
>>>>>>>>> fast updates. In addition, since dm_state is only used for its dc_state
>>>>>>>>> and amdgpu_dm_atomic_commit_tail will retain the dc_state if none is
>>>>>>>>> found,
>>>>>>>>> removing the dm_state should not have any consequences in fast updates.
>>>>>>>>>
>>>>>>>>> This use-after-free bug has existed for a while now, but only caused a
>>>>>>>>> noticeable issue starting from 5.7-rc1 due to 3202fa62f ("slub:
>>>>>>>>> relocate
>>>>>>>>> freelist pointer to middle of object") moving the freelist pointer from
>>>>>>>>> dm_state->base (which was unused) to dm_state->context (which is
>>>>>>>>> dereferenced).
>>>>>>>>>
>>>>>>>>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207383
>>>>>>>>> Fixes: bd200d190f45 ("drm/amd/display: Don't replace the dc_state
>>>>>>>>> for fast updates")
>>>>>>>>> Reported-by: Duncan <1i5t5.duncan@cox.net>
>>>>>>>>> Signed-off-by: Mazin Rezk <mnrzk@protonmail.com>

I think this patch is OK to merge for now based on the discussion in 
this thread and the other one. It's better than hitting a page fault at 
the very least and it's not a very intrusive change.

I'd say I'm about halfway through development on a proper fix for this 
pending no IGT failures at least.

So for now,

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

>>>>>>>>> ---
>>>>>>>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36
>>>>>>>>> ++++++++++++++-----
>>>>>>>>>     1 file changed, 27 insertions(+), 9 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>>>> index 86ffa0c2880f..710edc70e37e 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>>>> @@ -8717,20 +8717,38 @@ static int amdgpu_dm_atomic_check(struct
>>>>>>>>> drm_device *dev,
>>>>>>>>>              * the same resource. If we have a new DC context as part of
>>>>>>>>>              * the DM atomic state from validation we need to free it and
>>>>>>>>>              * retain the existing one instead.
>>>>>>>>> +         *
>>>>>>>>> +         * Furthermore, since the DM atomic state only contains the DC
>>>>>>>>> +         * context and can safely be annulled, we can free the state
>>>>>>>>> +         * and clear the associated private object now to free
>>>>>>>>> +         * some memory and avoid a possible use-after-free later.
>>>>>>>>>              */
>>>>>>>>> -        struct dm_atomic_state *new_dm_state, *old_dm_state;
>>>>>>>>>
>>>>>>>>> -        new_dm_state = dm_atomic_get_new_state(state);
>>>>>>>>> -        old_dm_state = dm_atomic_get_old_state(state);
>>>>>>>>> +        for (i = 0; i < state->num_private_objs; i++) {
>>>>>>>>> +            struct drm_private_obj *obj = state->private_objs[i].ptr;
>>>>>>>>>
>>>>>>>>> -        if (new_dm_state && old_dm_state) {
>>>>>>>>> -            if (new_dm_state->context)
>>>>>>>>> -                dc_release_state(new_dm_state->context);
>>>>>>>>> +            if (obj->funcs == adev->dm.atomic_obj.funcs) {
>>>>>>>>> +                int j = state->num_private_objs-1;
>>>>>>>>>
>>>>>>>>> -            new_dm_state->context = old_dm_state->context;
>>>>>>>>> +                dm_atomic_destroy_state(obj,
>>>>>>>>> +                        state->private_objs[i].state);
>>>>>>>>> +
>>>>>>>>> +                /* If i is not at the end of the array then the
>>>>>>>>> +                 * last element needs to be moved to where i was
>>>>>>>>> +                 * before the array can safely be truncated.
>>>>>>>>> +                 */
>>>>>>>>> +                if (i != j)
>>>>>>>>> +                    state->private_objs[i] =
>>>>>>>>> +                        state->private_objs[j];
>>>>>>>>>
>>>>>>>>> -            if (old_dm_state->context)
>>>>>>>>> -                dc_retain_state(old_dm_state->context);
>>>>>>>>> +                state->private_objs[j].ptr = NULL;
>>>>>>>>> +                state->private_objs[j].state = NULL;
>>>>>>>>> +                state->private_objs[j].old_state = NULL;
>>>>>>>>> +                state->private_objs[j].new_state = NULL;
>>>>>>>>> +
>>>>>>>>> +                state->num_private_objs = j;
>>>>>>>>> +                break;
>>>>>>>>> +            }
>>>>>>>>>             }
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>> 2.27.0
>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>> _______________________________________________
>>>>>> dri-devel mailing list
>>>>>> dri-devel@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Daniel Vetter
>>>>> Software Engineer, Intel Corporation
>>>>> http://blog.ffwll.ch/
>>>
>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 86ffa0c2880f..710edc70e37e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8717,20 +8717,38 @@  static int amdgpu_dm_atomic_check(struct drm_device *dev,
 		 * the same resource. If we have a new DC context as part of
 		 * the DM atomic state from validation we need to free it and
 		 * retain the existing one instead.
+		 *
+		 * Furthermore, since the DM atomic state only contains the DC
+		 * context and can safely be annulled, we can free the state
+		 * and clear the associated private object now to free
+		 * some memory and avoid a possible use-after-free later.
 		 */
-		struct dm_atomic_state *new_dm_state, *old_dm_state;

-		new_dm_state = dm_atomic_get_new_state(state);
-		old_dm_state = dm_atomic_get_old_state(state);
+		for (i = 0; i < state->num_private_objs; i++) {
+			struct drm_private_obj *obj = state->private_objs[i].ptr;

-		if (new_dm_state && old_dm_state) {
-			if (new_dm_state->context)
-				dc_release_state(new_dm_state->context);
+			if (obj->funcs == adev->dm.atomic_obj.funcs) {
+				int j = state->num_private_objs-1;

-			new_dm_state->context = old_dm_state->context;
+				dm_atomic_destroy_state(obj,
+						state->private_objs[i].state);
+
+				/* If i is not at the end of the array then the
+				 * last element needs to be moved to where i was
+				 * before the array can safely be truncated.
+				 */
+				if (i != j)
+					state->private_objs[i] =
+						state->private_objs[j];

-			if (old_dm_state->context)
-				dc_retain_state(old_dm_state->context);
+				state->private_objs[j].ptr = NULL;
+				state->private_objs[j].state = NULL;
+				state->private_objs[j].old_state = NULL;
+				state->private_objs[j].new_state = NULL;
+
+				state->num_private_objs = j;
+				break;
+			}
 		}
 	}