diff mbox

[08/13] drm/msm/gpu: Rearrange the code that collects the task during a hang

Message ID 20180712185930.2492-9-jcrouse@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Jordan Crouse July 12, 2018, 6:59 p.m. UTC
Do a bit of cleanup to prepare for upcoming changes to pass the
hanging task comm and cmdline to the crash dump function.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
 drivers/gpu/drm/msm/msm_gpu.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

Comments

Chris Wilson July 12, 2018, 7:48 p.m. UTC | #1
Quoting Jordan Crouse (2018-07-12 19:59:25)
> Do a bit of cleanup to prepare for upcoming changes to pass the
> hanging task comm and cmdline to the crash dump function.
> 
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/msm_gpu.c | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 1c09acfb4028..2ca354047250 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -314,6 +314,7 @@ static void recover_worker(struct work_struct *work)
>         struct msm_drm_private *priv = dev->dev_private;
>         struct msm_gem_submit *submit;
>         struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
> +       char *comm = NULL, *cmd = NULL;
>         int i;
>  
>         mutex_lock(&dev->struct_mutex);
> @@ -327,7 +328,7 @@ static void recover_worker(struct work_struct *work)
>                 rcu_read_lock();
>                 task = pid_task(submit->pid, PIDTYPE_PID);
>                 if (task) {
> -                       char *cmd;
> +                       comm = kstrdup(task->comm, GFP_KERNEL);

Under rcu_read_lock(), GFP_KERNEL is not allowed, you need GFP_NOWAIT or
some such (or grab a reference to the pid and drop rcu then GFP_KERNEL).
-Chris
Rob Clark Aug. 4, 2018, 5:17 p.m. UTC | #2
On Thu, Jul 12, 2018 at 3:48 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Jordan Crouse (2018-07-12 19:59:25)
> > Do a bit of cleanup to prepare for upcoming changes to pass the
> > hanging task comm and cmdline to the crash dump function.
> >
> > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> > ---
> >  drivers/gpu/drm/msm/msm_gpu.c | 18 ++++++++++--------
> >  1 file changed, 10 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > index 1c09acfb4028..2ca354047250 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > @@ -314,6 +314,7 @@ static void recover_worker(struct work_struct *work)
> >         struct msm_drm_private *priv = dev->dev_private;
> >         struct msm_gem_submit *submit;
> >         struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
> > +       char *comm = NULL, *cmd = NULL;
> >         int i;
> >
> >         mutex_lock(&dev->struct_mutex);
> > @@ -327,7 +328,7 @@ static void recover_worker(struct work_struct *work)
> >                 rcu_read_lock();
> >                 task = pid_task(submit->pid, PIDTYPE_PID);
> >                 if (task) {
> > -                       char *cmd;
> > +                       comm = kstrdup(task->comm, GFP_KERNEL);
>
> Under rcu_read_lock(), GFP_KERNEL is not allowed, you need GFP_NOWAIT or
> some such (or grab a reference to the pid and drop rcu then GFP_KERNEL).

I started looking at a similar issue w/ our use of
kstrdup_quotable_cmdline() under rcu_read_lock().. I *guess* I hadn't
noticed it before due to different RCU kconfig?

I can use GFP_ATOMIC, and I can fix kstrdup_quotable_cmdline() to
actually use gfp flags passed in for kmalloc() (and similar bug in
kstrdup_quotable_file()).. but get_cmdline() still grabs mmap_sem
which complains under rcu_read_lock()..

is there any way to ensure the tast_struct sticks around long enough
to get it's cmdline without holding rcu_read_lock()?  I couldn't find
any refcnt'ing on task_struct itself, which makes this seem a bit
unsolveable :-/

BR,
-R
Sharat Masetty Oct. 12, 2018, 9:13 a.m. UTC | #3
On 8/4/2018 10:47 PM, Rob Clark wrote:
> On Thu, Jul 12, 2018 at 3:48 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>
>> Quoting Jordan Crouse (2018-07-12 19:59:25)
>>> Do a bit of cleanup to prepare for upcoming changes to pass the
>>> hanging task comm and cmdline to the crash dump function.
>>>
>>> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
>>> ---
>>>   drivers/gpu/drm/msm/msm_gpu.c | 18 ++++++++++--------
>>>   1 file changed, 10 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
>>> index 1c09acfb4028..2ca354047250 100644
>>> --- a/drivers/gpu/drm/msm/msm_gpu.c
>>> +++ b/drivers/gpu/drm/msm/msm_gpu.c
>>> @@ -314,6 +314,7 @@ static void recover_worker(struct work_struct *work)
>>>          struct msm_drm_private *priv = dev->dev_private;
>>>          struct msm_gem_submit *submit;
>>>          struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
>>> +       char *comm = NULL, *cmd = NULL;
>>>          int i;
>>>
>>>          mutex_lock(&dev->struct_mutex);
>>> @@ -327,7 +328,7 @@ static void recover_worker(struct work_struct *work)
>>>                  rcu_read_lock();
>>>                  task = pid_task(submit->pid, PIDTYPE_PID);
>>>                  if (task) {
>>> -                       char *cmd;
>>> +                       comm = kstrdup(task->comm, GFP_KERNEL);
>>
>> Under rcu_read_lock(), GFP_KERNEL is not allowed, you need GFP_NOWAIT or
>> some such (or grab a reference to the pid and drop rcu then GFP_KERNEL).
> 
> I started looking at a similar issue w/ our use of
> kstrdup_quotable_cmdline() under rcu_read_lock().. I *guess* I hadn't
> noticed it before due to different RCU kconfig?
> 
> I can use GFP_ATOMIC, and I can fix kstrdup_quotable_cmdline() to
> actually use gfp flags passed in for kmalloc() (and similar bug in
> kstrdup_quotable_file()).. but get_cmdline() still grabs mmap_sem
> which complains under rcu_read_lock()..
> 
> is there any way to ensure the tast_struct sticks around long enough
> to get it's cmdline without holding rcu_read_lock()?  I couldn't find
> any refcnt'ing on task_struct itself, which makes this seem a bit
> unsolveable :-/

I have been seeing similar issues on my downstream setup and was looking 
into fixing this actively. Here is a way to have the task stay afloat 
and revert to GFP_KERNEL
https://patchwork.freedesktop.org/patch/256397/ Please review...
I tried this out and it does work.

> 
> BR,
> -R
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 1c09acfb4028..2ca354047250 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -314,6 +314,7 @@  static void recover_worker(struct work_struct *work)
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
 	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
+	char *comm = NULL, *cmd = NULL;
 	int i;
 
 	mutex_lock(&dev->struct_mutex);
@@ -327,7 +328,7 @@  static void recover_worker(struct work_struct *work)
 		rcu_read_lock();
 		task = pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
-			char *cmd;
+			comm = kstrdup(task->comm, GFP_KERNEL);
 
 			/*
 			 * So slightly annoying, in other paths like
@@ -342,20 +343,21 @@  static void recover_worker(struct work_struct *work)
 			mutex_unlock(&dev->struct_mutex);
 			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
 			mutex_lock(&dev->struct_mutex);
+		}
+		rcu_read_unlock();
 
+		if (comm && cmd) {
 			dev_err(dev->dev, "%s: offending task: %s (%s)\n",
-				gpu->name, task->comm, cmd);
+				gpu->name, comm, cmd);
 
 			msm_rd_dump_submit(priv->hangrd, submit,
-				"offending task: %s (%s)", task->comm, cmd);
-
-			kfree(cmd);
-		} else {
+				"offending task: %s (%s)", comm, cmd);
+		} else
 			msm_rd_dump_submit(priv->hangrd, submit, NULL);
-		}
-		rcu_read_unlock();
 	}
 
+	kfree(cmd);
+	kfree(comm);
 
 	/*
 	 * Update all the rings with the latest and greatest fence.. this