diff mbox series

[1/2] drm/scheduler: only kill entity if last user is killed v2

Message ID 20180730110352.2657-1-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/scheduler: only kill entity if last user is killed v2 | expand

Commit Message

Christian König July 30, 2018, 11:03 a.m. UTC
Note which task is using the entity and only kill it if the last user of
the entity is killed. This should prevent problems when entities are leaked to
child processes.

v2: add missing kernel doc

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
 include/drm/gpu_scheduler.h               | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

Comments

Nayan Deshmukh July 30, 2018, 1:34 p.m. UTC | #1
Hi Christian,

The code looks good to me. But I was just wondering what will happen when
the last user is killed and some other user tries to push to the entity.

Regards,
Nayan Deshmukh

On Mon, Jul 30, 2018 at 4:33 PM Christian König <
ckoenig.leichtzumerken@gmail.com> wrote:

> Note which task is using the entity and only kill it if the last user of
> the entity is killed. This should prevent problems when entities are
> leaked to
> child processes.
>
> v2: add missing kernel doc
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>  include/drm/gpu_scheduler.h               | 2 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 3f2fc5e8242a..f563e4fbb4b6 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct
> dma_fence *f,
>  long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
>  {
>         struct drm_gpu_scheduler *sched;
> +       struct task_struct *last_user;
>         long ret = timeout;
>
>         sched = entity->rq->sched;
> @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity
> *entity, long timeout)
>
>
>         /* For killed process disable any more IBs enqueue right now */
> -       if ((current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
> +       last_user = cmpxchg(&entity->last_user, current->group_leader,
> NULL);
> +       if ((!last_user || last_user == current->group_leader) &&
> +           (current->flags & PF_EXITING) && (current->exit_code ==
> SIGKILL))
>                 drm_sched_entity_set_rq(entity, NULL);
>
>         return ret;
> @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
> *sched_job,
>
>         trace_drm_sched_job(sched_job, entity);
>
> +       WRITE_ONCE(entity->last_user, current->group_leader);
>         first = spsc_queue_push(&entity->job_queue,
> &sched_job->queue_node);
>
>         /* first job wakes up scheduler */
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 091b9afcd184..21c648b0b2a1 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -66,6 +66,7 @@ enum drm_sched_priority {
>   * @guilty: points to ctx's guilty.
>   * @fini_status: contains the exit status in case the process was
> signalled.
>   * @last_scheduled: points to the finished fence of the last scheduled
> job.
> + * @last_user: last group leader pushing a job into the entity.
>   *
>   * Entities will emit jobs in order to their corresponding hardware
>   * ring, and the scheduler will alternate between entities based on
> @@ -85,6 +86,7 @@ struct drm_sched_entity {
>         struct dma_fence_cb             cb;
>         atomic_t                        *guilty;
>         struct dma_fence                *last_scheduled;
> +       struct task_struct              *last_user;
>  };
>
>  /**
> --
> 2.14.1
>
>
<div dir="ltr"><div><div><div>Hi Christian,<br><br></div>The code looks good to me. But I was just wondering what will happen when the last user is killed and some other user tries to push to the entity. <br><br></div>Regards,<br></div>Nayan Deshmukh <br></div><br><div class="gmail_quote"><div dir="ltr">On Mon, Jul 30, 2018 at 4:33 PM Christian König &lt;<a href="mailto:ckoenig.leichtzumerken@gmail.com" target="_blank">ckoenig.leichtzumerken@gmail.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Note which task is using the entity and only kill it if the last user of<br>
the entity is killed. This should prevent problems when entities are leaked to<br>
child processes.<br>
<br>
v2: add missing kernel doc<br>
<br>
Signed-off-by: Christian König &lt;<a href="mailto:christian.koenig@amd.com" target="_blank">christian.koenig@amd.com</a>&gt;<br>
---<br>
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-<br>
 include/drm/gpu_scheduler.h               | 2 ++<br>
 2 files changed, 7 insertions(+), 1 deletion(-)<br>
<br>
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
index 3f2fc5e8242a..f563e4fbb4b6 100644<br>
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
@@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,<br>
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)<br>
 {<br>
        struct drm_gpu_scheduler *sched;<br>
+       struct task_struct *last_user;<br>
        long ret = timeout;<br>
<br>
        sched = entity-&gt;rq-&gt;sched;<br>
@@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)<br>
<br>
<br>
        /* For killed process disable any more IBs enqueue right now */<br>
-       if ((current-&gt;flags &amp; PF_EXITING) &amp;&amp; (current-&gt;exit_code == SIGKILL))<br>
+       last_user = cmpxchg(&amp;entity-&gt;last_user, current-&gt;group_leader, NULL);<br>
+       if ((!last_user || last_user == current-&gt;group_leader) &amp;&amp;<br>
+           (current-&gt;flags &amp; PF_EXITING) &amp;&amp; (current-&gt;exit_code == SIGKILL))<br>
                drm_sched_entity_set_rq(entity, NULL);<br>
<br>
        return ret;<br>
@@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,<br>
<br>
        trace_drm_sched_job(sched_job, entity);<br>
<br>
+       WRITE_ONCE(entity-&gt;last_user, current-&gt;group_leader);<br>
        first = spsc_queue_push(&amp;entity-&gt;job_queue, &amp;sched_job-&gt;queue_node);<br>
<br>
        /* first job wakes up scheduler */<br>
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h<br>
index 091b9afcd184..21c648b0b2a1 100644<br>
--- a/include/drm/gpu_scheduler.h<br>
+++ b/include/drm/gpu_scheduler.h<br>
@@ -66,6 +66,7 @@ enum drm_sched_priority {<br>
  * @guilty: points to ctx&#39;s guilty.<br>
  * @fini_status: contains the exit status in case the process was signalled.<br>
  * @last_scheduled: points to the finished fence of the last scheduled job.<br>
+ * @last_user: last group leader pushing a job into the entity.<br>
  *<br>
  * Entities will emit jobs in order to their corresponding hardware<br>
  * ring, and the scheduler will alternate between entities based on<br>
@@ -85,6 +86,7 @@ struct drm_sched_entity {<br>
        struct dma_fence_cb             cb;<br>
        atomic_t                        *guilty;<br>
        struct dma_fence                *last_scheduled;<br>
+       struct task_struct              *last_user;<br>
 };<br>
<br>
 /**<br>
-- <br>
2.14.1<br>
<br>
</blockquote></div>
Andrey Grodzovsky July 30, 2018, 8:42 p.m. UTC | #2
I believe that in this case

if (!entity->rq) {

     DRM_ERROR...

     return;

}

clause will take place.

P.S I remember we planned to actually propagate the error back to the 
caller so i guess we should take care of this sooner or later.

The change is Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey


On 07/30/2018 09:34 AM, Nayan Deshmukh wrote:
> Hi Christian,
>
> The code looks good to me. But I was just wondering what will happen 
> when the last user is killed and some other user tries to push to the 
> entity.
>
> Regards,
> Nayan Deshmukh
>
> On Mon, Jul 30, 2018 at 4:33 PM Christian König 
> <ckoenig.leichtzumerken@gmail.com 
> <mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
>
>     Note which task is using the entity and only kill it if the last
>     user of
>     the entity is killed. This should prevent problems when entities
>     are leaked to
>     child processes.
>
>     v2: add missing kernel doc
>
>     Signed-off-by: Christian König <christian.koenig@amd.com
>     <mailto:christian.koenig@amd.com>>
>     ---
>      drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>      include/drm/gpu_scheduler.h               | 2 ++
>      2 files changed, 7 insertions(+), 1 deletion(-)
>
>     diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     index 3f2fc5e8242a..f563e4fbb4b6 100644
>     --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>     @@ -275,6 +275,7 @@ static void
>     drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
>      long drm_sched_entity_flush(struct drm_sched_entity *entity, long
>     timeout)
>      {
>             struct drm_gpu_scheduler *sched;
>     +       struct task_struct *last_user;
>             long ret = timeout;
>
>             sched = entity->rq->sched;
>     @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct
>     drm_sched_entity *entity, long timeout)
>
>
>             /* For killed process disable any more IBs enqueue right
>     now */
>     -       if ((current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>     +       last_user = cmpxchg(&entity->last_user,
>     current->group_leader, NULL);
>     +       if ((!last_user || last_user == current->group_leader) &&
>     +           (current->flags & PF_EXITING) && (current->exit_code
>     == SIGKILL))
>                     drm_sched_entity_set_rq(entity, NULL);
>
>             return ret;
>     @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct
>     drm_sched_job *sched_job,
>
>             trace_drm_sched_job(sched_job, entity);
>
>     +       WRITE_ONCE(entity->last_user, current->group_leader);
>             first = spsc_queue_push(&entity->job_queue,
>     &sched_job->queue_node);
>
>             /* first job wakes up scheduler */
>     diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>     index 091b9afcd184..21c648b0b2a1 100644
>     --- a/include/drm/gpu_scheduler.h
>     +++ b/include/drm/gpu_scheduler.h
>     @@ -66,6 +66,7 @@ enum drm_sched_priority {
>       * @guilty: points to ctx's guilty.
>       * @fini_status: contains the exit status in case the process was
>     signalled.
>       * @last_scheduled: points to the finished fence of the last
>     scheduled job.
>     + * @last_user: last group leader pushing a job into the entity.
>       *
>       * Entities will emit jobs in order to their corresponding hardware
>       * ring, and the scheduler will alternate between entities based on
>     @@ -85,6 +86,7 @@ struct drm_sched_entity {
>             struct dma_fence_cb             cb;
>             atomic_t                        *guilty;
>             struct dma_fence                *last_scheduled;
>     +       struct task_struct              *last_user;
>      };
>
>      /**
>     -- 
>     2.14.1
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p>I believe that in this case <br>
    </p>
    <p>if (!entity-&gt;rq) {</p>
    <p>    DRM_ERROR...</p>
    <p>    return;<br>
    </p>
    <p>}</p>
    <p>clause will take place.</p>
    <p>P.S I remember we planned to actually propagate the error back to
      the caller so i guess we should take care of this sooner or later.</p>
    <p>The change is Reviewed-by: Andrey Grodzovsky
      <a class="moz-txt-link-rfc2396E" href="mailto:andrey.grodzovsky@amd.com">&lt;andrey.grodzovsky@amd.com&gt;</a></p>
    <p>Andrey<br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 07/30/2018 09:34 AM, Nayan Deshmukh
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAFd4ddxm32FHTTrW5iWDBB=0jwTjWN+w4+3zeZrv9j_oXWWnvQ@mail.gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div dir="ltr">
        <div>
          <div>
            <div>Hi Christian,<br>
              <br>
            </div>
            The code looks good to me. But I was just wondering what
            will happen when the last user is killed and some other user
            tries to push to the entity. <br>
            <br>
          </div>
          Regards,<br>
        </div>
        Nayan Deshmukh <br>
      </div>
      <br>
      <div class="gmail_quote">
        <div dir="ltr">On Mon, Jul 30, 2018 at 4:33 PM Christian König
          &lt;<a href="mailto:ckoenig.leichtzumerken@gmail.com"
            target="_blank" moz-do-not-send="true">ckoenig.leichtzumerken@gmail.com</a>&gt;
          wrote:<br>
        </div>
        <blockquote class="gmail_quote" style="margin:0 0 0
          .8ex;border-left:1px #ccc solid;padding-left:1ex">Note which
          task is using the entity and only kill it if the last user of<br>
          the entity is killed. This should prevent problems when
          entities are leaked to<br>
          child processes.<br>
          <br>
          v2: add missing kernel doc<br>
          <br>
          Signed-off-by: Christian König &lt;<a
            href="mailto:christian.koenig@amd.com" target="_blank"
            moz-do-not-send="true">christian.koenig@amd.com</a>&gt;<br>
          ---<br>
           drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-<br>
           include/drm/gpu_scheduler.h               | 2 ++<br>
           2 files changed, 7 insertions(+), 1 deletion(-)<br>
          <br>
          diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
          b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          index 3f2fc5e8242a..f563e4fbb4b6 100644<br>
          --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          @@ -275,6 +275,7 @@ static void
          drm_sched_entity_kill_jobs_cb(struct dma_fence *f,<br>
           long drm_sched_entity_flush(struct drm_sched_entity *entity,
          long timeout)<br>
           {<br>
                  struct drm_gpu_scheduler *sched;<br>
          +       struct task_struct *last_user;<br>
                  long ret = timeout;<br>
          <br>
                  sched = entity-&gt;rq-&gt;sched;<br>
          @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct
          drm_sched_entity *entity, long timeout)<br>
          <br>
          <br>
                  /* For killed process disable any more IBs enqueue
          right now */<br>
          -       if ((current-&gt;flags &amp; PF_EXITING) &amp;&amp;
          (current-&gt;exit_code == SIGKILL))<br>
          +       last_user = cmpxchg(&amp;entity-&gt;last_user,
          current-&gt;group_leader, NULL);<br>
          +       if ((!last_user || last_user ==
          current-&gt;group_leader) &amp;&amp;<br>
          +           (current-&gt;flags &amp; PF_EXITING) &amp;&amp;
          (current-&gt;exit_code == SIGKILL))<br>
                          drm_sched_entity_set_rq(entity, NULL);<br>
          <br>
                  return ret;<br>
          @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct
          drm_sched_job *sched_job,<br>
          <br>
                  trace_drm_sched_job(sched_job, entity);<br>
          <br>
          +       WRITE_ONCE(entity-&gt;last_user,
          current-&gt;group_leader);<br>
                  first = spsc_queue_push(&amp;entity-&gt;job_queue,
          &amp;sched_job-&gt;queue_node);<br>
          <br>
                  /* first job wakes up scheduler */<br>
          diff --git a/include/drm/gpu_scheduler.h
          b/include/drm/gpu_scheduler.h<br>
          index 091b9afcd184..21c648b0b2a1 100644<br>
          --- a/include/drm/gpu_scheduler.h<br>
          +++ b/include/drm/gpu_scheduler.h<br>
          @@ -66,6 +66,7 @@ enum drm_sched_priority {<br>
            * @guilty: points to ctx's guilty.<br>
            * @fini_status: contains the exit status in case the process
          was signalled.<br>
            * @last_scheduled: points to the finished fence of the last
          scheduled job.<br>
          + * @last_user: last group leader pushing a job into the
          entity.<br>
            *<br>
            * Entities will emit jobs in order to their corresponding
          hardware<br>
            * ring, and the scheduler will alternate between entities
          based on<br>
          @@ -85,6 +86,7 @@ struct drm_sched_entity {<br>
                  struct dma_fence_cb             cb;<br>
                  atomic_t                        *guilty;<br>
                  struct dma_fence                *last_scheduled;<br>
          +       struct task_struct              *last_user;<br>
           };<br>
          <br>
           /**<br>
          -- <br>
          2.14.1<br>
          <br>
        </blockquote>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"></fieldset>
      <br>
      <pre wrap="">_______________________________________________
dri-devel mailing list
<a class="moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org">dri-devel@lists.freedesktop.org</a>
<a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/dri-devel">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a>
</pre>
    </blockquote>
    <br>
  </body>
</html>
Nayan Deshmukh July 31, 2018, 9:11 a.m. UTC | #3
That makes sense. The change is Acked-by: Nayan Deshmukh <
nayan26deshmukh@gmail.com>

On Tue, Jul 31, 2018 at 2:12 AM Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
wrote:

> I believe that in this case
>
> if (!entity->rq) {
>
>     DRM_ERROR...
>
>     return;
>
> }
>
> clause will take place.
>
> P.S I remember we planned to actually propagate the error back to the
> caller so i guess we should take care of this sooner or later.
>
> The change is Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> <andrey.grodzovsky@amd.com>
>
> Andrey
>
> On 07/30/2018 09:34 AM, Nayan Deshmukh wrote:
>
> Hi Christian,
>
> The code looks good to me. But I was just wondering what will happen when
> the last user is killed and some other user tries to push to the entity.
>
> Regards,
> Nayan Deshmukh
>
> On Mon, Jul 30, 2018 at 4:33 PM Christian König <
> ckoenig.leichtzumerken@gmail.com> wrote:
>
>> Note which task is using the entity and only kill it if the last user of
>> the entity is killed. This should prevent problems when entities are
>> leaked to
>> child processes.
>>
>> v2: add missing kernel doc
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>  drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
>>  include/drm/gpu_scheduler.h               | 2 ++
>>  2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> index 3f2fc5e8242a..f563e4fbb4b6 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
>> @@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct
>> dma_fence *f,
>>  long drm_sched_entity_flush(struct drm_sched_entity *entity, long
>> timeout)
>>  {
>>         struct drm_gpu_scheduler *sched;
>> +       struct task_struct *last_user;
>>         long ret = timeout;
>>
>>         sched = entity->rq->sched;
>> @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity
>> *entity, long timeout)
>>
>>
>>         /* For killed process disable any more IBs enqueue right now */
>> -       if ((current->flags & PF_EXITING) && (current->exit_code ==
>> SIGKILL))
>> +       last_user = cmpxchg(&entity->last_user, current->group_leader,
>> NULL);
>> +       if ((!last_user || last_user == current->group_leader) &&
>> +           (current->flags & PF_EXITING) && (current->exit_code ==
>> SIGKILL))
>>                 drm_sched_entity_set_rq(entity, NULL);
>>
>>         return ret;
>> @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job
>> *sched_job,
>>
>>         trace_drm_sched_job(sched_job, entity);
>>
>> +       WRITE_ONCE(entity->last_user, current->group_leader);
>>         first = spsc_queue_push(&entity->job_queue,
>> &sched_job->queue_node);
>>
>>         /* first job wakes up scheduler */
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 091b9afcd184..21c648b0b2a1 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -66,6 +66,7 @@ enum drm_sched_priority {
>>   * @guilty: points to ctx's guilty.
>>   * @fini_status: contains the exit status in case the process was
>> signalled.
>>   * @last_scheduled: points to the finished fence of the last scheduled
>> job.
>> + * @last_user: last group leader pushing a job into the entity.
>>   *
>>   * Entities will emit jobs in order to their corresponding hardware
>>   * ring, and the scheduler will alternate between entities based on
>> @@ -85,6 +86,7 @@ struct drm_sched_entity {
>>         struct dma_fence_cb             cb;
>>         atomic_t                        *guilty;
>>         struct dma_fence                *last_scheduled;
>> +       struct task_struct              *last_user;
>>  };
>>
>>  /**
>> --
>> 2.14.1
>>
>>
>
> _______________________________________________
> dri-devel mailing listdri-devel@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
<div dir="ltr">That makes sense. The change is Acked-by: Nayan Deshmukh &lt;<a href="mailto:nayan26deshmukh@gmail.com">nayan26deshmukh@gmail.com</a>&gt; <br></div><br><div class="gmail_quote"><div dir="ltr">On Tue, Jul 31, 2018 at 2:12 AM Andrey Grodzovsky &lt;<a href="mailto:Andrey.Grodzovsky@amd.com">Andrey.Grodzovsky@amd.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
  
    
  
  <div text="#000000" bgcolor="#FFFFFF">
    <p>I believe that in this case <br>
    </p>
    <p>if (!entity-&gt;rq) {</p>
    <p>    DRM_ERROR...</p>
    <p>    return;<br>
    </p>
    <p>}</p>
    <p>clause will take place.</p>
    <p>P.S I remember we planned to actually propagate the error back to
      the caller so i guess we should take care of this sooner or later.</p>
    <p>The change is Reviewed-by: Andrey Grodzovsky
      <a class="m_-9216610408873313555moz-txt-link-rfc2396E" href="mailto:andrey.grodzovsky@amd.com" target="_blank">&lt;andrey.grodzovsky@amd.com&gt;</a></p>
    <p>Andrey<br>
    </p>
    <br>
    <div class="m_-9216610408873313555moz-cite-prefix">On 07/30/2018 09:34 AM, Nayan Deshmukh
      wrote:<br>
    </div>
    <blockquote type="cite">
      
      <div dir="ltr">
        <div>
          <div>
            <div>Hi Christian,<br>
              <br>
            </div>
            The code looks good to me. But I was just wondering what
            will happen when the last user is killed and some other user
            tries to push to the entity. <br>
            <br>
          </div>
          Regards,<br>
        </div>
        Nayan Deshmukh <br>
      </div>
      <br>
      <div class="gmail_quote">
        <div dir="ltr">On Mon, Jul 30, 2018 at 4:33 PM Christian König
          &lt;<a href="mailto:ckoenig.leichtzumerken@gmail.com" target="_blank">ckoenig.leichtzumerken@gmail.com</a>&gt;
          wrote:<br>
        </div>
        <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Note which
          task is using the entity and only kill it if the last user of<br>
          the entity is killed. This should prevent problems when
          entities are leaked to<br>
          child processes.<br>
          <br>
          v2: add missing kernel doc<br>
          <br>
          Signed-off-by: Christian König &lt;<a href="mailto:christian.koenig@amd.com" target="_blank">christian.koenig@amd.com</a>&gt;<br>
          ---<br>
           drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-<br>
           include/drm/gpu_scheduler.h               | 2 ++<br>
           2 files changed, 7 insertions(+), 1 deletion(-)<br>
          <br>
          diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c
          b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          index 3f2fc5e8242a..f563e4fbb4b6 100644<br>
          --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c<br>
          @@ -275,6 +275,7 @@ static void
          drm_sched_entity_kill_jobs_cb(struct dma_fence *f,<br>
           long drm_sched_entity_flush(struct drm_sched_entity *entity,
          long timeout)<br>
           {<br>
                  struct drm_gpu_scheduler *sched;<br>
          +       struct task_struct *last_user;<br>
                  long ret = timeout;<br>
          <br>
                  sched = entity-&gt;rq-&gt;sched;<br>
          @@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct
          drm_sched_entity *entity, long timeout)<br>
          <br>
          <br>
                  /* For killed process disable any more IBs enqueue
          right now */<br>
          -       if ((current-&gt;flags &amp; PF_EXITING) &amp;&amp;
          (current-&gt;exit_code == SIGKILL))<br>
          +       last_user = cmpxchg(&amp;entity-&gt;last_user,
          current-&gt;group_leader, NULL);<br>
          +       if ((!last_user || last_user ==
          current-&gt;group_leader) &amp;&amp;<br>
          +           (current-&gt;flags &amp; PF_EXITING) &amp;&amp;
          (current-&gt;exit_code == SIGKILL))<br>
                          drm_sched_entity_set_rq(entity, NULL);<br>
          <br>
                  return ret;<br>
          @@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct
          drm_sched_job *sched_job,<br>
          <br>
                  trace_drm_sched_job(sched_job, entity);<br>
          <br>
          +       WRITE_ONCE(entity-&gt;last_user,
          current-&gt;group_leader);<br>
                  first = spsc_queue_push(&amp;entity-&gt;job_queue,
          &amp;sched_job-&gt;queue_node);<br>
          <br>
                  /* first job wakes up scheduler */<br>
          diff --git a/include/drm/gpu_scheduler.h
          b/include/drm/gpu_scheduler.h<br>
          index 091b9afcd184..21c648b0b2a1 100644<br>
          --- a/include/drm/gpu_scheduler.h<br>
          +++ b/include/drm/gpu_scheduler.h<br>
          @@ -66,6 +66,7 @@ enum drm_sched_priority {<br>
            * @guilty: points to ctx&#39;s guilty.<br>
            * @fini_status: contains the exit status in case the process
          was signalled.<br>
            * @last_scheduled: points to the finished fence of the last
          scheduled job.<br>
          + * @last_user: last group leader pushing a job into the
          entity.<br>
            *<br>
            * Entities will emit jobs in order to their corresponding
          hardware<br>
            * ring, and the scheduler will alternate between entities
          based on<br>
          @@ -85,6 +86,7 @@ struct drm_sched_entity {<br>
                  struct dma_fence_cb             cb;<br>
                  atomic_t                        *guilty;<br>
                  struct dma_fence                *last_scheduled;<br>
          +       struct task_struct              *last_user;<br>
           };<br>
          <br>
           /**<br>
          -- <br>
          2.14.1<br>
          <br>
        </blockquote>
      </div>
      <br>
      <fieldset class="m_-9216610408873313555mimeAttachmentHeader"></fieldset>
      <br>
      <pre>_______________________________________________
dri-devel mailing list
<a class="m_-9216610408873313555moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org" target="_blank">dri-devel@lists.freedesktop.org</a>
<a class="m_-9216610408873313555moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/dri-devel" target="_blank">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a>
</pre>
    </blockquote>
    <br>
  </div>

</blockquote></div>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 3f2fc5e8242a..f563e4fbb4b6 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -275,6 +275,7 @@  static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 {
 	struct drm_gpu_scheduler *sched;
+	struct task_struct *last_user;
 	long ret = timeout;
 
 	sched = entity->rq->sched;
@@ -295,7 +296,9 @@  long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 
 
 	/* For killed process disable any more IBs enqueue right now */
-	if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
+	if ((!last_user || last_user == current->group_leader) &&
+	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
 		drm_sched_entity_set_rq(entity, NULL);
 
 	return ret;
@@ -541,6 +544,7 @@  void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 
 	trace_drm_sched_job(sched_job, entity);
 
+	WRITE_ONCE(entity->last_user, current->group_leader);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
 	/* first job wakes up scheduler */
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 091b9afcd184..21c648b0b2a1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -66,6 +66,7 @@  enum drm_sched_priority {
  * @guilty: points to ctx's guilty.
  * @fini_status: contains the exit status in case the process was signalled.
  * @last_scheduled: points to the finished fence of the last scheduled job.
+ * @last_user: last group leader pushing a job into the entity.
  *
  * Entities will emit jobs in order to their corresponding hardware
  * ring, and the scheduler will alternate between entities based on
@@ -85,6 +86,7 @@  struct drm_sched_entity {
 	struct dma_fence_cb		cb;
 	atomic_t			*guilty;
 	struct dma_fence                *last_scheduled;
+	struct task_struct		*last_user;
 };
 
 /**