diff mbox series

[v1] drm/scheduler: Don't kill jobs in interrupt context

Message ID 20220411221536.283312-1-dmitry.osipenko@collabora.com (mailing list archive)
State New, archived
Headers show
Series [v1] drm/scheduler: Don't kill jobs in interrupt context | expand

Commit Message

Dmitry Osipenko April 11, 2022, 10:15 p.m. UTC
Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
mutex when job is released, and thus, that code can sleep. This results
into "BUG: scheduling while atomic" if locks are contented while job is
freed. There is no good reason for releasing scheduler's jobs in IRQ
context, hence use normal context to fix the trouble.

Cc: stable@vger.kernel.org
Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 6 +++---
 include/drm/gpu_scheduler.h              | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

Comments

Andrey Grodzovsky April 12, 2022, 4:51 p.m. UTC | #1
On 2022-04-11 18:15, Dmitry Osipenko wrote:
> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
> mutex when job is released, and thus, that code can sleep. This results
> into "BUG: scheduling while atomic" if locks are contented while job is
> freed. There is no good reason for releasing scheduler's jobs in IRQ
> context, hence use normal context to fix the trouble.


I am not sure this is the beast Idea to leave job's sw fence signalling 
to be
executed in system_wq context which is prone to delays of executing
various work items from around the system. Seems better to me to leave the
fence signaling within the IRQ context and offload only the job freeing or,
maybe handle rescheduling to thread context within drivers implemention
of .free_job cb. Not really sure which is the better.

Andrey


>
> Cc: stable@vger.kernel.org
> Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> ---
>   drivers/gpu/drm/scheduler/sched_entity.c | 6 +++---
>   include/drm/gpu_scheduler.h              | 4 ++--
>   2 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 191c56064f19..6b25b2f4f5a3 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>   }
>   EXPORT_SYMBOL(drm_sched_entity_flush);
>   
> -static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
> +static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
>   {
>   	struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
>   
> @@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
>   	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>   						 finish_cb);
>   
> -	init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
> -	irq_work_queue(&job->work);
> +	INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
> +	schedule_work(&job->work);
>   }
>   
>   static struct dma_fence *
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 0fca8f38bee4..addb135eeea6 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -28,7 +28,7 @@
>   #include <linux/dma-fence.h>
>   #include <linux/completion.h>
>   #include <linux/xarray.h>
> -#include <linux/irq_work.h>
> +#include <linux/workqueue.h>
>   
>   #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
>   
> @@ -295,7 +295,7 @@ struct drm_sched_job {
>   	 */
>   	union {
>   		struct dma_fence_cb		finish_cb;
> -		struct irq_work 		work;
> +		struct work_struct 		work;
>   	};
>   
>   	uint64_t			id;
Dmitry Osipenko April 12, 2022, 6:20 p.m. UTC | #2
On 4/12/22 19:51, Andrey Grodzovsky wrote:
> 
> On 2022-04-11 18:15, Dmitry Osipenko wrote:
>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>> mutex when job is released, and thus, that code can sleep. This results
>> into "BUG: scheduling while atomic" if locks are contented while job is
>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>> context, hence use normal context to fix the trouble.
> 
> 
> I am not sure this is the beast Idea to leave job's sw fence signalling
> to be
> executed in system_wq context which is prone to delays of executing
> various work items from around the system. Seems better to me to leave the
> fence signaling within the IRQ context and offload only the job freeing or,
> maybe handle rescheduling to thread context within drivers implemention
> of .free_job cb. Not really sure which is the better.

We're talking here about killing jobs when driver destroys context,
which doesn't feel like it needs to be a fast path. I could move the
signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
do we really need this for a slow path?
Andrey Grodzovsky April 12, 2022, 7:40 p.m. UTC | #3
On 2022-04-12 14:20, Dmitry Osipenko wrote:
> On 4/12/22 19:51, Andrey Grodzovsky wrote:
>> On 2022-04-11 18:15, Dmitry Osipenko wrote:
>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>>> mutex when job is released, and thus, that code can sleep. This results
>>> into "BUG: scheduling while atomic" if locks are contented while job is
>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>> context, hence use normal context to fix the trouble.
>>
>> I am not sure this is the beast Idea to leave job's sw fence signalling
>> to be
>> executed in system_wq context which is prone to delays of executing
>> various work items from around the system. Seems better to me to leave the
>> fence signaling within the IRQ context and offload only the job freeing or,
>> maybe handle rescheduling to thread context within drivers implemention
>> of .free_job cb. Not really sure which is the better.
> We're talking here about killing jobs when driver destroys context,
> which doesn't feel like it needs to be a fast path. I could move the
> signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
> do we really need this for a slow path?


You can't move the signaling back to drm_sched_entity_kill_jobs_cb
since this will bring back the lockdep splat that 'drm/sched: Avoid 
lockdep spalt on killing a processes'
was fixing.

I see your point and i guess we can go this way too. Another way would 
be to add to
panfrost and msm job a  work_item and reschedule to thread context from 
within their
.free_job callbacks but that probably to cumbersome to be justified here.

Andrey


Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Dmitry Osipenko April 12, 2022, 7:55 p.m. UTC | #4
On 4/12/22 22:40, Andrey Grodzovsky wrote:
> 
> On 2022-04-12 14:20, Dmitry Osipenko wrote:
>> On 4/12/22 19:51, Andrey Grodzovsky wrote:
>>> On 2022-04-11 18:15, Dmitry Osipenko wrote:
>>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>>>> mutex when job is released, and thus, that code can sleep. This results
>>>> into "BUG: scheduling while atomic" if locks are contented while job is
>>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>>> context, hence use normal context to fix the trouble.
>>>
>>> I am not sure this is the beast Idea to leave job's sw fence signalling
>>> to be
>>> executed in system_wq context which is prone to delays of executing
>>> various work items from around the system. Seems better to me to
>>> leave the
>>> fence signaling within the IRQ context and offload only the job
>>> freeing or,
>>> maybe handle rescheduling to thread context within drivers implemention
>>> of .free_job cb. Not really sure which is the better.
>> We're talking here about killing jobs when driver destroys context,
>> which doesn't feel like it needs to be a fast path. I could move the
>> signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
>> do we really need this for a slow path?
> 
> 
> You can't move the signaling back to drm_sched_entity_kill_jobs_cb
> since this will bring back the lockdep splat that 'drm/sched: Avoid
> lockdep spalt on killing a processes'
> was fixing.

Indeed

> I see your point and i guess we can go this way too. Another way would
> be to add to
> panfrost and msm job a  work_item and reschedule to thread context from
> within their
> .free_job callbacks but that probably to cumbersome to be justified here.

Yes, there is no clear justification for doing that.

> Andrey
> 
> 
> Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Thank you!
Erico Nunes April 12, 2022, 10:59 p.m. UTC | #5
On Tue, Apr 12, 2022 at 9:41 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
>
> On 2022-04-12 14:20, Dmitry Osipenko wrote:
> > On 4/12/22 19:51, Andrey Grodzovsky wrote:
> >> On 2022-04-11 18:15, Dmitry Osipenko wrote:
> >>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
> >>> mutex when job is released, and thus, that code can sleep. This results
> >>> into "BUG: scheduling while atomic" if locks are contented while job is
> >>> freed. There is no good reason for releasing scheduler's jobs in IRQ
> >>> context, hence use normal context to fix the trouble.
> >>
> >> I am not sure this is the beast Idea to leave job's sw fence signalling
> >> to be
> >> executed in system_wq context which is prone to delays of executing
> >> various work items from around the system. Seems better to me to leave the
> >> fence signaling within the IRQ context and offload only the job freeing or,
> >> maybe handle rescheduling to thread context within drivers implemention
> >> of .free_job cb. Not really sure which is the better.
> > We're talking here about killing jobs when driver destroys context,
> > which doesn't feel like it needs to be a fast path. I could move the
> > signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
> > do we really need this for a slow path?
>
>
> You can't move the signaling back to drm_sched_entity_kill_jobs_cb
> since this will bring back the lockdep splat that 'drm/sched: Avoid
> lockdep spalt on killing a processes'
> was fixing.
>
> I see your point and i guess we can go this way too. Another way would
> be to add to
> panfrost and msm job a  work_item and reschedule to thread context from
> within their
> .free_job callbacks but that probably to cumbersome to be justified here.

FWIW since this mentioned individual drivers, commit 'drm/sched: Avoid
lockdep spalt on killing a processes' also introduced problems for
lima.
There were some occurrences in our CI
https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20980982/raw .
Later I found it also reproducible on normal usage when just closing
applications, so it may be affecting users too.

I tested this patch and looks like it fixes things for lima.

Thanks

Erico
Dmitry Osipenko April 13, 2022, 6:05 a.m. UTC | #6
On 4/13/22 01:59, Erico Nunes wrote:
> On Tue, Apr 12, 2022 at 9:41 PM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>>
>>
>> On 2022-04-12 14:20, Dmitry Osipenko wrote:
>>> On 4/12/22 19:51, Andrey Grodzovsky wrote:
>>>> On 2022-04-11 18:15, Dmitry Osipenko wrote:
>>>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>>>>> mutex when job is released, and thus, that code can sleep. This results
>>>>> into "BUG: scheduling while atomic" if locks are contented while job is
>>>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>>>> context, hence use normal context to fix the trouble.
>>>>
>>>> I am not sure this is the beast Idea to leave job's sw fence signalling
>>>> to be
>>>> executed in system_wq context which is prone to delays of executing
>>>> various work items from around the system. Seems better to me to leave the
>>>> fence signaling within the IRQ context and offload only the job freeing or,
>>>> maybe handle rescheduling to thread context within drivers implemention
>>>> of .free_job cb. Not really sure which is the better.
>>> We're talking here about killing jobs when driver destroys context,
>>> which doesn't feel like it needs to be a fast path. I could move the
>>> signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
>>> do we really need this for a slow path?
>>
>>
>> You can't move the signaling back to drm_sched_entity_kill_jobs_cb
>> since this will bring back the lockdep splat that 'drm/sched: Avoid
>> lockdep spalt on killing a processes'
>> was fixing.
>>
>> I see your point and i guess we can go this way too. Another way would
>> be to add to
>> panfrost and msm job a  work_item and reschedule to thread context from
>> within their
>> .free_job callbacks but that probably to cumbersome to be justified here.
> 
> FWIW since this mentioned individual drivers, commit 'drm/sched: Avoid
> lockdep spalt on killing a processes' also introduced problems for
> lima.
> There were some occurrences in our CI
> https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20980982/raw .
> Later I found it also reproducible on normal usage when just closing
> applications, so it may be affecting users too.
> 
> I tested this patch and looks like it fixes things for lima.

This patch indeed should fix that lima bug. Feel free to give yours
tested-by :)
Erico Nunes April 13, 2022, 9:45 a.m. UTC | #7
On Wed, Apr 13, 2022 at 8:05 AM Dmitry Osipenko
<dmitry.osipenko@collabora.com> wrote:
>
> On 4/13/22 01:59, Erico Nunes wrote:
> > On Tue, Apr 12, 2022 at 9:41 PM Andrey Grodzovsky
> > <andrey.grodzovsky@amd.com> wrote:
> >>
> >>
> >> On 2022-04-12 14:20, Dmitry Osipenko wrote:
> >>> On 4/12/22 19:51, Andrey Grodzovsky wrote:
> >>>> On 2022-04-11 18:15, Dmitry Osipenko wrote:
> >>>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
> >>>>> mutex when job is released, and thus, that code can sleep. This results
> >>>>> into "BUG: scheduling while atomic" if locks are contented while job is
> >>>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
> >>>>> context, hence use normal context to fix the trouble.
> >>>>
> >>>> I am not sure this is the beast Idea to leave job's sw fence signalling
> >>>> to be
> >>>> executed in system_wq context which is prone to delays of executing
> >>>> various work items from around the system. Seems better to me to leave the
> >>>> fence signaling within the IRQ context and offload only the job freeing or,
> >>>> maybe handle rescheduling to thread context within drivers implemention
> >>>> of .free_job cb. Not really sure which is the better.
> >>> We're talking here about killing jobs when driver destroys context,
> >>> which doesn't feel like it needs to be a fast path. I could move the
> >>> signalling into drm_sched_entity_kill_jobs_cb() and use unbound wq, but
> >>> do we really need this for a slow path?
> >>
> >>
> >> You can't move the signaling back to drm_sched_entity_kill_jobs_cb
> >> since this will bring back the lockdep splat that 'drm/sched: Avoid
> >> lockdep spalt on killing a processes'
> >> was fixing.
> >>
> >> I see your point and i guess we can go this way too. Another way would
> >> be to add to
> >> panfrost and msm job a  work_item and reschedule to thread context from
> >> within their
> >> .free_job callbacks but that probably to cumbersome to be justified here.
> >
> > FWIW since this mentioned individual drivers, commit 'drm/sched: Avoid
> > lockdep spalt on killing a processes' also introduced problems for
> > lima.
> > There were some occurrences in our CI
> > https://gitlab.freedesktop.org/mesa/mesa/-/jobs/20980982/raw .
> > Later I found it also reproducible on normal usage when just closing
> > applications, so it may be affecting users too.
> >
> > I tested this patch and looks like it fixes things for lima.
>
> This patch indeed should fix that lima bug. Feel free to give yours
> tested-by :)

Sure:
Tested-by: Erico Nunes <nunes.erico@gmail.com>

Thanks

Erico
Steven Price April 13, 2022, 10:04 a.m. UTC | #8
On 11/04/2022 23:15, Dmitry Osipenko wrote:
> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
> mutex when job is released, and thus, that code can sleep. This results
> into "BUG: scheduling while atomic" if locks are contented while job is
> freed. There is no good reason for releasing scheduler's jobs in IRQ
> context, hence use normal context to fix the trouble.
> 
> Cc: stable@vger.kernel.org
> Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>

Reviewed-by: Steven Price <steven.price@arm.com>

> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 6 +++---
>  include/drm/gpu_scheduler.h              | 4 ++--
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 191c56064f19..6b25b2f4f5a3 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -190,7 +190,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>  }
>  EXPORT_SYMBOL(drm_sched_entity_flush);
>  
> -static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
> +static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
>  {
>  	struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
>  
> @@ -207,8 +207,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
>  	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>  						 finish_cb);
>  
> -	init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
> -	irq_work_queue(&job->work);
> +	INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
> +	schedule_work(&job->work);
>  }
>  
>  static struct dma_fence *
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 0fca8f38bee4..addb135eeea6 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -28,7 +28,7 @@
>  #include <linux/dma-fence.h>
>  #include <linux/completion.h>
>  #include <linux/xarray.h>
> -#include <linux/irq_work.h>
> +#include <linux/workqueue.h>
>  
>  #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
>  
> @@ -295,7 +295,7 @@ struct drm_sched_job {
>  	 */
>  	union {
>  		struct dma_fence_cb		finish_cb;
> -		struct irq_work 		work;
> +		struct work_struct 		work;
>  	};
>  
>  	uint64_t			id;
Erico Nunes May 17, 2022, 7:40 a.m. UTC | #9
On Wed, Apr 13, 2022 at 12:05 PM Steven Price <steven.price@arm.com> wrote:
>
> On 11/04/2022 23:15, Dmitry Osipenko wrote:
> > Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
> > mutex when job is released, and thus, that code can sleep. This results
> > into "BUG: scheduling while atomic" if locks are contented while job is
> > freed. There is no good reason for releasing scheduler's jobs in IRQ
> > context, hence use normal context to fix the trouble.
> >
> > Cc: stable@vger.kernel.org
> > Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
> > Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>
> Reviewed-by: Steven Price <steven.price@arm.com>

Is there something blocking this patch?
Mesa CI is still hitting the issue and I have been waiting for it to
be applied/backported to update CI with it.
Thanks

Erico
Dmitry Osipenko May 17, 2022, 9:03 a.m. UTC | #10
On 5/17/22 10:40, Erico Nunes wrote:
> On Wed, Apr 13, 2022 at 12:05 PM Steven Price <steven.price@arm.com> wrote:
>>
>> On 11/04/2022 23:15, Dmitry Osipenko wrote:
>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>>> mutex when job is released, and thus, that code can sleep. This results
>>> into "BUG: scheduling while atomic" if locks are contented while job is
>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>> context, hence use normal context to fix the trouble.
>>>
>>> Cc: stable@vger.kernel.org
>>> Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
>>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>
>> Reviewed-by: Steven Price <steven.price@arm.com>
> 
> Is there something blocking this patch?
> Mesa CI is still hitting the issue and I have been waiting for it to
> be applied/backported to update CI with it.
> Thanks

If this patch won't be picked up anytime soon, then I'll include it into
my "memory shrinker" patchset together with the rest of the fixes, so it
won't get lost.
Andrey Grodzovsky May 17, 2022, 2:03 p.m. UTC | #11
Let me push it into drm-misc-next.

Andrey

On 2022-05-17 05:03, Dmitry Osipenko wrote:

> On 5/17/22 10:40, Erico Nunes wrote:
>> On Wed, Apr 13, 2022 at 12:05 PM Steven Price <steven.price@arm.com> wrote:
>>> On 11/04/2022 23:15, Dmitry Osipenko wrote:
>>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are taking
>>>> mutex when job is released, and thus, that code can sleep. This results
>>>> into "BUG: scheduling while atomic" if locks are contented while job is
>>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>>> context, hence use normal context to fix the trouble.
>>>>
>>>> Cc: stable@vger.kernel.org
>>>> Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a processes")
>>>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>> Reviewed-by: Steven Price <steven.price@arm.com>
>> Is there something blocking this patch?
>> Mesa CI is still hitting the issue and I have been waiting for it to
>> be applied/backported to update CI with it.
>> Thanks
> If this patch won't be picked up anytime soon, then I'll include it into
> my "memory shrinker" patchset together with the rest of the fixes, so it
> won't get lost.
>
Andrey Grodzovsky May 17, 2022, 2:13 p.m. UTC | #12
Done.

Andrey

On 2022-05-17 10:03, Andrey Grodzovsky wrote:
> Let me push it into drm-misc-next.
>
> Andrey
>
> On 2022-05-17 05:03, Dmitry Osipenko wrote:
>
>> On 5/17/22 10:40, Erico Nunes wrote:
>>> On Wed, Apr 13, 2022 at 12:05 PM Steven Price <steven.price@arm.com> 
>>> wrote:
>>>> On 11/04/2022 23:15, Dmitry Osipenko wrote:
>>>>> Interrupt context can't sleep. Drivers like Panfrost and MSM are 
>>>>> taking
>>>>> mutex when job is released, and thus, that code can sleep. This 
>>>>> results
>>>>> into "BUG: scheduling while atomic" if locks are contented while 
>>>>> job is
>>>>> freed. There is no good reason for releasing scheduler's jobs in IRQ
>>>>> context, hence use normal context to fix the trouble.
>>>>>
>>>>> Cc: stable@vger.kernel.org
>>>>> Fixes: 542cff7893a3 ("drm/sched: Avoid lockdep spalt on killing a 
>>>>> processes")
>>>>> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>>> Reviewed-by: Steven Price <steven.price@arm.com>
>>> Is there something blocking this patch?
>>> Mesa CI is still hitting the issue and I have been waiting for it to
>>> be applied/backported to update CI with it.
>>> Thanks
>> If this patch won't be picked up anytime soon, then I'll include it into
>> my "memory shrinker" patchset together with the rest of the fixes, so it
>> won't get lost.
>>
Dmitry Osipenko May 17, 2022, 2:48 p.m. UTC | #13
On 5/17/22 17:13, Andrey Grodzovsky wrote:
> Done.
> 
> Andrey

Awesome, thank you!
Dmitry Osipenko July 6, 2022, 7:07 a.m. UTC | #14
Hello Andrey,

On 5/17/22 17:48, Dmitry Osipenko wrote:
> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>> Done.
>>
>> Andrey
> 
> Awesome, thank you!
> 

Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
earlier, shouldn't it be in the drm-fixes and not in drm-next?
Andrey Grodzovsky July 6, 2022, 1:49 p.m. UTC | #15
On 2022-07-06 03:07, Dmitry Osipenko wrote:

> Hello Andrey,
>
> On 5/17/22 17:48, Dmitry Osipenko wrote:
>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>> Done.
>>>
>>> Andrey
>> Awesome, thank you!
>>
> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
> earlier, shouldn't it be in the drm-fixes and not in drm-next?


I pushed it into drm-misc from where it got into drm-next. I don't have 
permission for drm-fixes.

Andrey


>
Dmitry Osipenko July 6, 2022, 2:57 p.m. UTC | #16
On 7/6/22 16:49, Andrey Grodzovsky wrote:
> On 2022-07-06 03:07, Dmitry Osipenko wrote:
> 
>> Hello Andrey,
>>
>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>> Done.
>>>>
>>>> Andrey
>>> Awesome, thank you!
>>>
>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
> 
> 
> I pushed it into drm-misc from where it got into drm-next. I don't have
> permission for drm-fixes.

Thank you
Alex Deucher July 6, 2022, 3:46 p.m. UTC | #17
On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>
> > Hello Andrey,
> >
> > On 5/17/22 17:48, Dmitry Osipenko wrote:
> >> On 5/17/22 17:13, Andrey Grodzovsky wrote:
> >>> Done.
> >>>
> >>> Andrey
> >> Awesome, thank you!
> >>
> > Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
> > earlier, shouldn't it be in the drm-fixes and not in drm-next?
>
>
> I pushed it into drm-misc from where it got into drm-next. I don't have
> permission for drm-fixes.

The -fixes branch of drm-misc.

Alex


>
> Andrey
>
>
> >
Dmitry Osipenko July 12, 2022, 8:56 a.m. UTC | #18
On 7/6/22 18:46, Alex Deucher wrote:
> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>>
>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>
>>> Hello Andrey,
>>>
>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>> Done.
>>>>>
>>>>> Andrey
>>>> Awesome, thank you!
>>>>
>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>
>>
>> I pushed it into drm-misc from where it got into drm-next. I don't have
>> permission for drm-fixes.
> 
> The -fixes branch of drm-misc.

Now I don't see the scheduler bugfix neither in the -fixes branch nor in
the -next and today Dave sent out 5.19-rc7 pull request without the
scheduler fix. Could anyone please check what is going on with the DRM
patches? Thanks!

https://github.com/freedesktop/drm-misc/commits/drm-misc-fixes
https://cgit.freedesktop.org/drm/drm-misc/log/?h=drm-misc-fixes
Dmitry Osipenko July 14, 2022, 9:57 a.m. UTC | #19
On 7/12/22 11:56, Dmitry Osipenko wrote:
> On 7/6/22 18:46, Alex Deucher wrote:
>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>> <andrey.grodzovsky@amd.com> wrote:
>>>
>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>
>>>> Hello Andrey,
>>>>
>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>> Done.
>>>>>>
>>>>>> Andrey
>>>>> Awesome, thank you!
>>>>>
>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>
>>>
>>> I pushed it into drm-misc from where it got into drm-next. I don't have
>>> permission for drm-fixes.
>>
>> The -fixes branch of drm-misc.
> 
> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
> the -next and today Dave sent out 5.19-rc7 pull request without the
> scheduler fix. Could anyone please check what is going on with the DRM
> patches? Thanks!
> 
> https://github.com/freedesktop/drm-misc/commits/drm-misc-fixes
> https://cgit.freedesktop.org/drm/drm-misc/log/?h=drm-misc-fixes

The patch is in the drm-misc-next-fixes, so it wasn't moved to the
drm-misc-fixes.

Andrey, don't you have access to drm-misc-fixes? Or you meant
drm-fixes=drm-misc-fixes?
Andrey Grodzovsky July 14, 2022, 2:14 p.m. UTC | #20
On 2022-07-14 05:57, Dmitry Osipenko wrote:
> On 7/12/22 11:56, Dmitry Osipenko wrote:
>> On 7/6/22 18:46, Alex Deucher wrote:
>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>>> <andrey.grodzovsky@amd.com> wrote:
>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>>
>>>>> Hello Andrey,
>>>>>
>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>>> Done.
>>>>>>>
>>>>>>> Andrey
>>>>>> Awesome, thank you!
>>>>>>
>>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>>
>>>> I pushed it into drm-misc from where it got into drm-next. I don't have
>>>> permission for drm-fixes.
>>> The -fixes branch of drm-misc.
>> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
>> the -next and today Dave sent out 5.19-rc7 pull request without the
>> scheduler fix. Could anyone please check what is going on with the DRM
>> patches? Thanks!
>>
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=CDdLG%2F7SqCudEnjhBSsXqq15mfhlHlS3xAdAfB%2Bh%2F1s%3D&amp;reserved=0
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=4Vz40j6F%2FzHYckXEyPEunj9DRSoTXikhNxZDXeocTss%3D&amp;reserved=0
> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
> drm-misc-fixes.
>
> Andrey, don't you have access to drm-misc-fixes? Or you meant
> drm-fixes=drm-misc-fixes?


I have only accesses to drm-misc-next to which I pushed this patch.

Andrey


>
Dmitry Osipenko July 14, 2022, 2:23 p.m. UTC | #21
On 7/14/22 17:14, Andrey Grodzovsky wrote:
> 
> On 2022-07-14 05:57, Dmitry Osipenko wrote:
>> On 7/12/22 11:56, Dmitry Osipenko wrote:
>>> On 7/6/22 18:46, Alex Deucher wrote:
>>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>>>> <andrey.grodzovsky@amd.com> wrote:
>>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>>>
>>>>>> Hello Andrey,
>>>>>>
>>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>>>> Done.
>>>>>>>>
>>>>>>>> Andrey
>>>>>>> Awesome, thank you!
>>>>>>>
>>>>>> Given that this drm-scheduler issue needs to be fixed in the
>>>>>> 5.19-RC and
>>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>>>
>>>>> I pushed it into drm-misc from where it got into drm-next. I don't
>>>>> have
>>>>> permission for drm-fixes.
>>>> The -fixes branch of drm-misc.
>>> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
>>> the -next and today Dave sent out 5.19-rc7 pull request without the
>>> scheduler fix. Could anyone please check what is going on with the DRM
>>> patches? Thanks!
>>>
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=CDdLG%2F7SqCudEnjhBSsXqq15mfhlHlS3xAdAfB%2Bh%2F1s%3D&amp;reserved=0
>>>
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=4Vz40j6F%2FzHYckXEyPEunj9DRSoTXikhNxZDXeocTss%3D&amp;reserved=0
>>>
>> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
>> drm-misc-fixes.
>>
>> Andrey, don't you have access to drm-misc-fixes? Or you meant
>> drm-fixes=drm-misc-fixes?
> 
> 
> I have only accesses to drm-misc-next to which I pushed this patch.

Thank you for the clarification. IIUC, the drm-misc-next-fixes should
become drm-misc-fixes, but perhaps it was late for the 5.19-rc6 for this
patch.
Alex Deucher July 14, 2022, 4:22 p.m. UTC | #22
On Thu, Jul 14, 2022 at 10:14 AM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
>
> On 2022-07-14 05:57, Dmitry Osipenko wrote:
> > On 7/12/22 11:56, Dmitry Osipenko wrote:
> >> On 7/6/22 18:46, Alex Deucher wrote:
> >>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
> >>> <andrey.grodzovsky@amd.com> wrote:
> >>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
> >>>>
> >>>>> Hello Andrey,
> >>>>>
> >>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
> >>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
> >>>>>>> Done.
> >>>>>>>
> >>>>>>> Andrey
> >>>>>> Awesome, thank you!
> >>>>>>
> >>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
> >>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
> >>>>
> >>>> I pushed it into drm-misc from where it got into drm-next. I don't have
> >>>> permission for drm-fixes.
> >>> The -fixes branch of drm-misc.
> >> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
> >> the -next and today Dave sent out 5.19-rc7 pull request without the
> >> scheduler fix. Could anyone please check what is going on with the DRM
> >> patches? Thanks!
> >>
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=CDdLG%2F7SqCudEnjhBSsXqq15mfhlHlS3xAdAfB%2Bh%2F1s%3D&amp;reserved=0
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C68b627b8482a4fd28a5608da657f4375%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637933894551324163%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=4Vz40j6F%2FzHYckXEyPEunj9DRSoTXikhNxZDXeocTss%3D&amp;reserved=0
> > The patch is in the drm-misc-next-fixes, so it wasn't moved to the
> > drm-misc-fixes.
> >
> > Andrey, don't you have access to drm-misc-fixes? Or you meant
> > drm-fixes=drm-misc-fixes?
>
>
> I have only accesses to drm-misc-next to which I pushed this patch.

anyone with drm-misc rights can commit to any of the branches in the
drm-misc tree.  You just need to check out and push the appropriate
branch.  then push the changes.  E.g.,
dim push-branch drm-misc-next
vs
dim push-branch drm-misc-next-fixes
etc.

Alex


>
> Andrey
>
>
> >
Andrey Grodzovsky July 14, 2022, 5:58 p.m. UTC | #23
On 2022-07-14 12:22, Alex Deucher wrote:

> On Thu, Jul 14, 2022 at 10:14 AM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>>
>> On 2022-07-14 05:57, Dmitry Osipenko wrote:
>>> On 7/12/22 11:56, Dmitry Osipenko wrote:
>>>> On 7/6/22 18:46, Alex Deucher wrote:
>>>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>>>>> <andrey.grodzovsky@amd.com> wrote:
>>>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>>>>
>>>>>>> Hello Andrey,
>>>>>>>
>>>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>>>>> Done.
>>>>>>>>>
>>>>>>>>> Andrey
>>>>>>>> Awesome, thank you!
>>>>>>>>
>>>>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>>>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>>>> I pushed it into drm-misc from where it got into drm-next. I don't have
>>>>>> permission for drm-fixes.
>>>>> The -fixes branch of drm-misc.
>>>> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
>>>> the -next and today Dave sent out 5.19-rc7 pull request without the
>>>> scheduler fix. Could anyone please check what is going on with the DRM
>>>> patches? Thanks!
>>>>
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7Cd62c2e6d3ec748cd639608da65b52548%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934125954377887%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=WPmMC%2B%2Fy83cUctuF%2FLNo9VhWnB%2FkpUVQotMh74VshB8%3D&amp;reserved=0
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7Cd62c2e6d3ec748cd639608da65b52548%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934125954377887%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=RzCMLUYLmUjSmvDm4E%2FJr%2B5rp7E8UvjFt1tmwBoBiVc%3D&amp;reserved=0
>>> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
>>> drm-misc-fixes.
>>>
>>> Andrey, don't you have access to drm-misc-fixes? Or you meant
>>> drm-fixes=drm-misc-fixes?
>>
>> I have only accesses to drm-misc-next to which I pushed this patch.
> anyone with drm-misc rights can commit to any of the branches in the
> drm-misc tree.  You just need to check out and push the appropriate
> branch.  then push the changes.  E.g.,
> dim push-branch drm-misc-next
> vs
> dim push-branch drm-misc-next-fixes
> etc.
>
> Alex


I see, but what  is the reason then that Dave sent out 5.19-rc7 pull 
request without the
scheduler fix if the patch was merged into drm-misc-next long ago ? All 
the changes from
there are usually picked up for pull requests, no ?

Andrey


>
>
>> Andrey
>>
>>
Alex Deucher July 14, 2022, 9:16 p.m. UTC | #24
On Thu, Jul 14, 2022 at 1:58 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> On 2022-07-14 12:22, Alex Deucher wrote:
>
> > On Thu, Jul 14, 2022 at 10:14 AM Andrey Grodzovsky
> > <andrey.grodzovsky@amd.com> wrote:
> >>
> >> On 2022-07-14 05:57, Dmitry Osipenko wrote:
> >>> On 7/12/22 11:56, Dmitry Osipenko wrote:
> >>>> On 7/6/22 18:46, Alex Deucher wrote:
> >>>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
> >>>>> <andrey.grodzovsky@amd.com> wrote:
> >>>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
> >>>>>>
> >>>>>>> Hello Andrey,
> >>>>>>>
> >>>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
> >>>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
> >>>>>>>>> Done.
> >>>>>>>>>
> >>>>>>>>> Andrey
> >>>>>>>> Awesome, thank you!
> >>>>>>>>
> >>>>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
> >>>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
> >>>>>> I pushed it into drm-misc from where it got into drm-next. I don't have
> >>>>>> permission for drm-fixes.
> >>>>> The -fixes branch of drm-misc.
> >>>> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
> >>>> the -next and today Dave sent out 5.19-rc7 pull request without the
> >>>> scheduler fix. Could anyone please check what is going on with the DRM
> >>>> patches? Thanks!
> >>>>
> >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7Cd62c2e6d3ec748cd639608da65b52548%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934125954377887%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=WPmMC%2B%2Fy83cUctuF%2FLNo9VhWnB%2FkpUVQotMh74VshB8%3D&amp;reserved=0
> >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7Cd62c2e6d3ec748cd639608da65b52548%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934125954377887%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=RzCMLUYLmUjSmvDm4E%2FJr%2B5rp7E8UvjFt1tmwBoBiVc%3D&amp;reserved=0
> >>> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
> >>> drm-misc-fixes.
> >>>
> >>> Andrey, don't you have access to drm-misc-fixes? Or you meant
> >>> drm-fixes=drm-misc-fixes?
> >>
> >> I have only accesses to drm-misc-next to which I pushed this patch.
> > anyone with drm-misc rights can commit to any of the branches in the
> > drm-misc tree.  You just need to check out and push the appropriate
> > branch.  then push the changes.  E.g.,
> > dim push-branch drm-misc-next
> > vs
> > dim push-branch drm-misc-next-fixes
> > etc.
> >
> > Alex
>
>
> I see, but what  is the reason then that Dave sent out 5.19-rc7 pull
> request without the
> scheduler fix if the patch was merged into drm-misc-next long ago ? All
> the changes from
> there are usually picked up for pull requests, no ?

drm-misc-next is for new stuff for the next kernel (e.g., 5.20).
drm-misc-fixes is for fixes for the current kernel cycle (e.g., 5.19).
See:
https://drm.pages.freedesktop.org/maintainer-tools/drm-misc.html

Alex

>
> Andrey
>
>
> >
> >
> >> Andrey
> >>
> >>
Andrey Grodzovsky July 15, 2022, 3:18 p.m. UTC | #25
On 2022-07-14 17:16, Alex Deucher wrote:
> On Thu, Jul 14, 2022 at 1:58 PM Andrey Grodzovsky
> <andrey.grodzovsky@amd.com> wrote:
>> On 2022-07-14 12:22, Alex Deucher wrote:
>>
>>> On Thu, Jul 14, 2022 at 10:14 AM Andrey Grodzovsky
>>> <andrey.grodzovsky@amd.com> wrote:
>>>> On 2022-07-14 05:57, Dmitry Osipenko wrote:
>>>>> On 7/12/22 11:56, Dmitry Osipenko wrote:
>>>>>> On 7/6/22 18:46, Alex Deucher wrote:
>>>>>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>>>>>>> <andrey.grodzovsky@amd.com> wrote:
>>>>>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>>>>>>
>>>>>>>>> Hello Andrey,
>>>>>>>>>
>>>>>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>>>>>>> Done.
>>>>>>>>>>>
>>>>>>>>>>> Andrey
>>>>>>>>>> Awesome, thank you!
>>>>>>>>>>
>>>>>>>>> Given that this drm-scheduler issue needs to be fixed in the 5.19-RC and
>>>>>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>>>>>> I pushed it into drm-misc from where it got into drm-next. I don't have
>>>>>>>> permission for drm-fixes.
>>>>>>> The -fixes branch of drm-misc.
>>>>>> Now I don't see the scheduler bugfix neither in the -fixes branch nor in
>>>>>> the -next and today Dave sent out 5.19-rc7 pull request without the
>>>>>> scheduler fix. Could anyone please check what is going on with the DRM
>>>>>> patches? Thanks!
>>>>>>
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=QjSspRJyOZpFOoaA988nH2V7Gq54gSUl6mm87B1sYhE%3D&amp;reserved=0
>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=8ysjuD7Ufsyu5c%2BfRdpT9nkWHjotsd1cjCfy4yRw2uw%3D&amp;reserved=0
>>>>> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
>>>>> drm-misc-fixes.
>>>>>
>>>>> Andrey, don't you have access to drm-misc-fixes? Or you meant
>>>>> drm-fixes=drm-misc-fixes?
>>>> I have only accesses to drm-misc-next to which I pushed this patch.
>>> anyone with drm-misc rights can commit to any of the branches in the
>>> drm-misc tree.  You just need to check out and push the appropriate
>>> branch.  then push the changes.  E.g.,
>>> dim push-branch drm-misc-next
>>> vs
>>> dim push-branch drm-misc-next-fixes
>>> etc.
>>>
>>> Alex
>>
>> I see, but what  is the reason then that Dave sent out 5.19-rc7 pull
>> request without the
>> scheduler fix if the patch was merged into drm-misc-next long ago ? All
>> the changes from
>> there are usually picked up for pull requests, no ?
> drm-misc-next is for new stuff for the next kernel (e.g., 5.20).
> drm-misc-fixes is for fixes for the current kernel cycle (e.g., 5.19).
> See:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrm.pages.freedesktop.org%2Fmaintainer-tools%2Fdrm-misc.html&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=8IW3uNvSEogYjj%2BNKh1b9jkT5CaJ5osZ9GgEcI8zyqo%3D&amp;reserved=0
>
> Alex


Got it, Dmitry, I pushed this time to drm-misc-fixes so i hope this time 
it will be picked up for next rc release.

Andrey


>
>> Andrey
>>
>>
>>>
>>>> Andrey
>>>>
>>>>
Dmitry Osipenko July 15, 2022, 3:24 p.m. UTC | #26
On 7/15/22 18:18, Andrey Grodzovsky wrote:
> 
> On 2022-07-14 17:16, Alex Deucher wrote:
>> On Thu, Jul 14, 2022 at 1:58 PM Andrey Grodzovsky
>> <andrey.grodzovsky@amd.com> wrote:
>>> On 2022-07-14 12:22, Alex Deucher wrote:
>>>
>>>> On Thu, Jul 14, 2022 at 10:14 AM Andrey Grodzovsky
>>>> <andrey.grodzovsky@amd.com> wrote:
>>>>> On 2022-07-14 05:57, Dmitry Osipenko wrote:
>>>>>> On 7/12/22 11:56, Dmitry Osipenko wrote:
>>>>>>> On 7/6/22 18:46, Alex Deucher wrote:
>>>>>>>> On Wed, Jul 6, 2022 at 9:49 AM Andrey Grodzovsky
>>>>>>>> <andrey.grodzovsky@amd.com> wrote:
>>>>>>>>> On 2022-07-06 03:07, Dmitry Osipenko wrote:
>>>>>>>>>
>>>>>>>>>> Hello Andrey,
>>>>>>>>>>
>>>>>>>>>> On 5/17/22 17:48, Dmitry Osipenko wrote:
>>>>>>>>>>> On 5/17/22 17:13, Andrey Grodzovsky wrote:
>>>>>>>>>>>> Done.
>>>>>>>>>>>>
>>>>>>>>>>>> Andrey
>>>>>>>>>>> Awesome, thank you!
>>>>>>>>>>>
>>>>>>>>>> Given that this drm-scheduler issue needs to be fixed in the
>>>>>>>>>> 5.19-RC and
>>>>>>>>>> earlier, shouldn't it be in the drm-fixes and not in drm-next?
>>>>>>>>> I pushed it into drm-misc from where it got into drm-next. I
>>>>>>>>> don't have
>>>>>>>>> permission for drm-fixes.
>>>>>>>> The -fixes branch of drm-misc.
>>>>>>> Now I don't see the scheduler bugfix neither in the -fixes branch
>>>>>>> nor in
>>>>>>> the -next and today Dave sent out 5.19-rc7 pull request without the
>>>>>>> scheduler fix. Could anyone please check what is going on with
>>>>>>> the DRM
>>>>>>> patches? Thanks!
>>>>>>>
>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Ffreedesktop%2Fdrm-misc%2Fcommits%2Fdrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=QjSspRJyOZpFOoaA988nH2V7Gq54gSUl6mm87B1sYhE%3D&amp;reserved=0
>>>>>>>
>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcgit.freedesktop.org%2Fdrm%2Fdrm-misc%2Flog%2F%3Fh%3Ddrm-misc-fixes&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=8ysjuD7Ufsyu5c%2BfRdpT9nkWHjotsd1cjCfy4yRw2uw%3D&amp;reserved=0
>>>>>>>
>>>>>> The patch is in the drm-misc-next-fixes, so it wasn't moved to the
>>>>>> drm-misc-fixes.
>>>>>>
>>>>>> Andrey, don't you have access to drm-misc-fixes? Or you meant
>>>>>> drm-fixes=drm-misc-fixes?
>>>>> I have only accesses to drm-misc-next to which I pushed this patch.
>>>> anyone with drm-misc rights can commit to any of the branches in the
>>>> drm-misc tree.  You just need to check out and push the appropriate
>>>> branch.  then push the changes.  E.g.,
>>>> dim push-branch drm-misc-next
>>>> vs
>>>> dim push-branch drm-misc-next-fixes
>>>> etc.
>>>>
>>>> Alex
>>>
>>> I see, but what  is the reason then that Dave sent out 5.19-rc7 pull
>>> request without the
>>> scheduler fix if the patch was merged into drm-misc-next long ago ? All
>>> the changes from
>>> there are usually picked up for pull requests, no ?
>> drm-misc-next is for new stuff for the next kernel (e.g., 5.20).
>> drm-misc-fixes is for fixes for the current kernel cycle (e.g., 5.19).
>> See:
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrm.pages.freedesktop.org%2Fmaintainer-tools%2Fdrm-misc.html&amp;data=05%7C01%7Candrey.grodzovsky%40amd.com%7C9585d3814d9b4e51bfcb08da65de368d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637934302314091129%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=8IW3uNvSEogYjj%2BNKh1b9jkT5CaJ5osZ9GgEcI8zyqo%3D&amp;reserved=0
>>
>>
>> Alex
> 
> 
> Got it, Dmitry, I pushed this time to drm-misc-fixes so i hope this time
> it will be picked up for next rc release.

Great, thank you!
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 191c56064f19..6b25b2f4f5a3 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -190,7 +190,7 @@  long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 }
 EXPORT_SYMBOL(drm_sched_entity_flush);
 
-static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
+static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
 {
 	struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
 
@@ -207,8 +207,8 @@  static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 						 finish_cb);
 
-	init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
-	irq_work_queue(&job->work);
+	INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
+	schedule_work(&job->work);
 }
 
 static struct dma_fence *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 0fca8f38bee4..addb135eeea6 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -28,7 +28,7 @@ 
 #include <linux/dma-fence.h>
 #include <linux/completion.h>
 #include <linux/xarray.h>
-#include <linux/irq_work.h>
+#include <linux/workqueue.h>
 
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
 
@@ -295,7 +295,7 @@  struct drm_sched_job {
 	 */
 	union {
 		struct dma_fence_cb		finish_cb;
-		struct irq_work 		work;
+		struct work_struct 		work;
 	};
 
 	uint64_t			id;