[v2] drm/i915: Don't dump umpteen thousand requests

Message ID	20180424081600.27544-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Tue, 24 Apr 2018 09:16:00 +0100 Message-Id: <20180424081600.27544-1-chris@chris-wilson.co.uk> In-Reply-To: <20180424012412.18770-1-chris@chris-wilson.co.uk> References: <20180424012412.18770-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH v2] drm/i915: Don't dump umpteen thousand requests Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180424081600.27544-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Tue, 24 Apr 2018 09:16:00 +0100
Message-Id: <20180424081600.27544-1-chris@chris-wilson.co.uk>
In-Reply-To: <20180424012412.18770-1-chris@chris-wilson.co.uk>
References: <20180424012412.18770-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH v2] drm/i915: Don't dump umpteen thousand
	requests
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson April 24, 2018, 8:16 a.m. UTC

If we have more than a few, possibly several thousand request in the
queue, don't show the central portion, just the first few and the last
being executed and/or queued. The first few should be enough to help
identify a problem in execution, and most often comparing the first/last
in the queue is enough to identify problems in the scheduling.

We may need some fine tuning to set MAX_REQUESTS_TO_SHOW for common
debug scenarios, but for the moment if we can avoiding spending more
than a few seconds dumping the GPU state that will avoid a nasty
livelock (where hangcheck spends so long dumping the state, it fires
again and starts to dump the state again in parallel, ad infinitum).

v2: Remember to print last not the stale rq iter after the loop.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 43 +++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 5 deletions(-)

Comments

Tvrtko Ursulin April 24, 2018, 9:27 a.m. UTC | #1

On 24/04/2018 09:16, Chris Wilson wrote:
> If we have more than a few, possibly several thousand request in the
> queue, don't show the central portion, just the first few and the last
> being executed and/or queued. The first few should be enough to help
> identify a problem in execution, and most often comparing the first/last
> in the queue is enough to identify problems in the scheduling.
> 
> We may need some fine tuning to set MAX_REQUESTS_TO_SHOW for common
> debug scenarios, but for the moment if we can avoiding spending more
> than a few seconds dumping the GPU state that will avoid a nasty
> livelock (where hangcheck spends so long dumping the state, it fires
> again and starts to dump the state again in parallel, ad infinitum).
> 
> v2: Remember to print last not the stale rq iter after the loop.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c | 43 +++++++++++++++++++++++---
>   1 file changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 66cddd059666..2398ea71e747 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1307,11 +1307,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   		       struct drm_printer *m,
>   		       const char *header, ...)
>   {
> +	const int MAX_REQUESTS_TO_SHOW = 8;
>   	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
>   	const struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_gpu_error * const error = &engine->i915->gpu_error;
> -	struct i915_request *rq;
> +	struct i915_request *rq, *last;
>   	struct rb_node *rb;
> +	int count;
>   
>   	if (header) {
>   		va_list ap;
> @@ -1378,16 +1380,47 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	}
>   
>   	spin_lock_irq(&engine->timeline->lock);
> -	list_for_each_entry(rq, &engine->timeline->requests, link)
> -		print_request(m, rq, "\t\tE ");
> +
> +	last = NULL;
> +	count = 0;
> +	list_for_each_entry(rq, &engine->timeline->requests, link) {
> +		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> +			print_request(m, rq, "\t\tE ");
> +		else
> +			last = rq;

else {
	last = list_last_entry(...) ?
	break;
}

> +	}
> +	if (last) {
> +		if (count > MAX_REQUESTS_TO_SHOW) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d executing requests...\n",
> +				   count - MAX_REQUESTS_TO_SHOW);
> +		}
> +		print_request(m, last, "\t\tE ");
> +	}

Or even stuff this printf in the first loop above, under the else 
branch. Maybe shorter would be like this, module off by ones if I made some:

list_for_each_entry(rq, &engine->timeline->requests, link) {
	struct i915_request *pr = rq;

	if (++count > MAX_REQUESTS_TO_SHOW) {
		pr = list_last_entry(...);
		drm_printf(m,
			   "\t\t...skipping %d executing requests...\n",
			   count - MAX_REQUESTS_TO_SHOW);
	}
	
	print_request(m, pr, "\t\tE ");
	
	if (count > MAX_REQUESTS_TO_SHOW)
		break;
}

> +
> +	last = NULL;
> +	count = 0;
>   	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>   	for (rb = execlists->first; rb; rb = rb_next(rb)) {
>   		struct i915_priolist *p =
>   			rb_entry(rb, typeof(*p), node);
>   
> -		list_for_each_entry(rq, &p->requests, sched.link)
> -			print_request(m, rq, "\t\tQ ");
> +		list_for_each_entry(rq, &p->requests, sched.link) {
> +			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> +				print_request(m, rq, "\t\tQ ");
> +			else
> +				last = rq;
> +		}
>   	}
> +	if (last) {
> +		if (count > MAX_REQUESTS_TO_SHOW) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d queued requests...\n",
> +				   count - MAX_REQUESTS_TO_SHOW);
> +		}
> +		print_request(m, last, "\t\tQ ");
> +	}

Then I am thinking how to avoid the duplication and extract the smart 
printer. Macro would be easy at least, if a bit ugly.

Another idea comes to mind, but probably for the future, to merge same 
prio, context and strictly consecutive seqnos to a single line of output 
like:

  [prefix]seqno-seqno [%llx:seqno-seqno] (%u consecutive requests)

Give or take, but it will be more involved to do that.

> +
>   	spin_unlock_irq(&engine->timeline->lock);
>   
>   	spin_lock_irq(&b->rb_lock);
> 

Looks OK in general, just please see if you like my idea to contain the 
logic within a single loop and maybe even move it to a macro.

Regards,

Tvrtko

Chris Wilson April 24, 2018, 9:33 a.m. UTC | #2

Quoting Tvrtko Ursulin (2018-04-24 10:27:23)
> 
> On 24/04/2018 09:16, Chris Wilson wrote:
> > If we have more than a few, possibly several thousand request in the
> > queue, don't show the central portion, just the first few and the last
> > being executed and/or queued. The first few should be enough to help
> > identify a problem in execution, and most often comparing the first/last
> > in the queue is enough to identify problems in the scheduling.
> > 
> > We may need some fine tuning to set MAX_REQUESTS_TO_SHOW for common
> > debug scenarios, but for the moment if we can avoiding spending more
> > than a few seconds dumping the GPU state that will avoid a nasty
> > livelock (where hangcheck spends so long dumping the state, it fires
> > again and starts to dump the state again in parallel, ad infinitum).
> > 
> > v2: Remember to print last not the stale rq iter after the loop.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c | 43 +++++++++++++++++++++++---
> >   1 file changed, 38 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index 66cddd059666..2398ea71e747 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -1307,11 +1307,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >                      struct drm_printer *m,
> >                      const char *header, ...)
> >   {
> > +     const int MAX_REQUESTS_TO_SHOW = 8;
> >       struct intel_breadcrumbs * const b = &engine->breadcrumbs;
> >       const struct intel_engine_execlists * const execlists = &engine->execlists;
> >       struct i915_gpu_error * const error = &engine->i915->gpu_error;
> > -     struct i915_request *rq;
> > +     struct i915_request *rq, *last;
> >       struct rb_node *rb;
> > +     int count;
> >   
> >       if (header) {
> >               va_list ap;
> > @@ -1378,16 +1380,47 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >       }
> >   
> >       spin_lock_irq(&engine->timeline->lock);
> > -     list_for_each_entry(rq, &engine->timeline->requests, link)
> > -             print_request(m, rq, "        E ");
> > +
> > +     last = NULL;
> > +     count = 0;
> > +     list_for_each_entry(rq, &engine->timeline->requests, link) {
> > +             if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> > +                     print_request(m, rq, "        E ");
> > +             else
> > +                     last = rq;
> 
> else {
>         last = list_last_entry(...) ?
>         break;
> }
> 
> > +     }
> > +     if (last) {
> > +             if (count > MAX_REQUESTS_TO_SHOW) {
> > +                     drm_printf(m,
> > +                                "        ...skipping %d executing requests...\n",
> > +                                count - MAX_REQUESTS_TO_SHOW);
> > +             }
> > +             print_request(m, last, "        E ");
> > +     }
> 
> Or even stuff this printf in the first loop above, under the else 
> branch. Maybe shorter would be like this, module off by ones if I made some:
> 
> list_for_each_entry(rq, &engine->timeline->requests, link) {
>         struct i915_request *pr = rq;
> 
>         if (++count > MAX_REQUESTS_TO_SHOW) {
>                 pr = list_last_entry(...);
>                 drm_printf(m,
>                            "        ...skipping %d executing requests...\n",
>                            count - MAX_REQUESTS_TO_SHOW);
>         }
>         
>         print_request(m, pr, "        E ");
>         
>         if (count > MAX_REQUESTS_TO_SHOW)
>                 break;
> }
> 
> > +
> > +     last = NULL;
> > +     count = 0;
> >       drm_printf(m, "        Queue priority: %d\n", execlists->queue_priority);
> >       for (rb = execlists->first; rb; rb = rb_next(rb)) {
> >               struct i915_priolist *p =
> >                       rb_entry(rb, typeof(*p), node);
> >   
> > -             list_for_each_entry(rq, &p->requests, sched.link)
> > -                     print_request(m, rq, "        Q ");
> > +             list_for_each_entry(rq, &p->requests, sched.link) {
> > +                     if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> > +                             print_request(m, rq, "        Q ");
> > +                     else
> > +                             last = rq;
> > +             }
> >       }
> > +     if (last) {
> > +             if (count > MAX_REQUESTS_TO_SHOW) {
> > +                     drm_printf(m,
> > +                                "        ...skipping %d queued requests...\n",
> > +                                count - MAX_REQUESTS_TO_SHOW);
> > +             }
> > +             print_request(m, last, "        Q ");
> > +     }
> 
> Then I am thinking how to avoid the duplication and extract the smart 
> printer. Macro would be easy at least, if a bit ugly.

Yeah, and for the moment, I'd like to keep the duplication obvious and
keep the two passes very similar.
 
> Another idea comes to mind, but probably for the future, to merge same 
> prio, context and strictly consecutive seqnos to a single line of output 
> like:
> 
>   [prefix]seqno-seqno [%llx:seqno-seqno] (%u consecutive requests)
> 
> Give or take, but it will be more involved to do that.

Deciding when rq are equivalent will get messy, and going to drift.

> 
> > +
> >       spin_unlock_irq(&engine->timeline->lock);
> >   
> >       spin_lock_irq(&b->rb_lock);
> > 
> 
> Looks OK in general, just please see if you like my idea to contain the 
> logic within a single loop and maybe even move it to a macro.

I don't, because it missed one important thing, the count of skipped
requests. :-p
-Chris

Tvrtko Ursulin April 24, 2018, 9:40 a.m. UTC | #3

On 24/04/2018 10:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-24 10:27:23)
>>
>> On 24/04/2018 09:16, Chris Wilson wrote:
>>> If we have more than a few, possibly several thousand request in the
>>> queue, don't show the central portion, just the first few and the last
>>> being executed and/or queued. The first few should be enough to help
>>> identify a problem in execution, and most often comparing the first/last
>>> in the queue is enough to identify problems in the scheduling.
>>>
>>> We may need some fine tuning to set MAX_REQUESTS_TO_SHOW for common
>>> debug scenarios, but for the moment if we can avoiding spending more
>>> than a few seconds dumping the GPU state that will avoid a nasty
>>> livelock (where hangcheck spends so long dumping the state, it fires
>>> again and starts to dump the state again in parallel, ad infinitum).
>>>
>>> v2: Remember to print last not the stale rq iter after the loop.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/intel_engine_cs.c | 43 +++++++++++++++++++++++---
>>>    1 file changed, 38 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index 66cddd059666..2398ea71e747 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -1307,11 +1307,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>>>                       struct drm_printer *m,
>>>                       const char *header, ...)
>>>    {
>>> +     const int MAX_REQUESTS_TO_SHOW = 8;
>>>        struct intel_breadcrumbs * const b = &engine->breadcrumbs;
>>>        const struct intel_engine_execlists * const execlists = &engine->execlists;
>>>        struct i915_gpu_error * const error = &engine->i915->gpu_error;
>>> -     struct i915_request *rq;
>>> +     struct i915_request *rq, *last;
>>>        struct rb_node *rb;
>>> +     int count;
>>>    
>>>        if (header) {
>>>                va_list ap;
>>> @@ -1378,16 +1380,47 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>>>        }
>>>    
>>>        spin_lock_irq(&engine->timeline->lock);
>>> -     list_for_each_entry(rq, &engine->timeline->requests, link)
>>> -             print_request(m, rq, "        E ");
>>> +
>>> +     last = NULL;
>>> +     count = 0;
>>> +     list_for_each_entry(rq, &engine->timeline->requests, link) {
>>> +             if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>>> +                     print_request(m, rq, "        E ");
>>> +             else
>>> +                     last = rq;
>>
>> else {
>>          last = list_last_entry(...) ?
>>          break;
>> }
>>
>>> +     }
>>> +     if (last) {
>>> +             if (count > MAX_REQUESTS_TO_SHOW) {
>>> +                     drm_printf(m,
>>> +                                "        ...skipping %d executing requests...\n",
>>> +                                count - MAX_REQUESTS_TO_SHOW);
>>> +             }
>>> +             print_request(m, last, "        E ");
>>> +     }
>>
>> Or even stuff this printf in the first loop above, under the else
>> branch. Maybe shorter would be like this, module off by ones if I made some:
>>
>> list_for_each_entry(rq, &engine->timeline->requests, link) {
>>          struct i915_request *pr = rq;
>>
>>          if (++count > MAX_REQUESTS_TO_SHOW) {
>>                  pr = list_last_entry(...);
>>                  drm_printf(m,
>>                             "        ...skipping %d executing requests...\n",
>>                             count - MAX_REQUESTS_TO_SHOW);
>>          }
>>          
>>          print_request(m, pr, "        E ");
>>          
>>          if (count > MAX_REQUESTS_TO_SHOW)
>>                  break;
>> }
>>
>>> +
>>> +     last = NULL;
>>> +     count = 0;
>>>        drm_printf(m, "        Queue priority: %d\n", execlists->queue_priority);
>>>        for (rb = execlists->first; rb; rb = rb_next(rb)) {
>>>                struct i915_priolist *p =
>>>                        rb_entry(rb, typeof(*p), node);
>>>    
>>> -             list_for_each_entry(rq, &p->requests, sched.link)
>>> -                     print_request(m, rq, "        Q ");
>>> +             list_for_each_entry(rq, &p->requests, sched.link) {
>>> +                     if (count++ < MAX_REQUESTS_TO_SHOW - 1)
>>> +                             print_request(m, rq, "        Q ");
>>> +                     else
>>> +                             last = rq;
>>> +             }
>>>        }
>>> +     if (last) {
>>> +             if (count > MAX_REQUESTS_TO_SHOW) {
>>> +                     drm_printf(m,
>>> +                                "        ...skipping %d queued requests...\n",
>>> +                                count - MAX_REQUESTS_TO_SHOW);
>>> +             }
>>> +             print_request(m, last, "        Q ");
>>> +     }
>>
>> Then I am thinking how to avoid the duplication and extract the smart
>> printer. Macro would be easy at least, if a bit ugly.
> 
> Yeah, and for the moment, I'd like to keep the duplication obvious and
> keep the two passes very similar.
>   
>> Another idea comes to mind, but probably for the future, to merge same
>> prio, context and strictly consecutive seqnos to a single line of output
>> like:
>>
>>    [prefix]seqno-seqno [%llx:seqno-seqno] (%u consecutive requests)
>>
>> Give or take, but it will be more involved to do that.
> 
> Deciding when rq are equivalent will get messy, and going to drift.
> 
>>
>>> +
>>>        spin_unlock_irq(&engine->timeline->lock);
>>>    
>>>        spin_lock_irq(&b->rb_lock);
>>>
>>
>> Looks OK in general, just please see if you like my idea to contain the
>> logic within a single loop and maybe even move it to a macro.
> 
> I don't, because it missed one important thing, the count of skipped
> requests. :-p

Doh.. Well, I don't like it, but have no better/easier ideas at the moment.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Chris Wilson April 24, 2018, 1:02 p.m. UTC | #4

Quoting Tvrtko Ursulin (2018-04-24 10:40:57)
> 
> On 24/04/2018 10:33, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-04-24 10:27:23)
> >>
> >> On 24/04/2018 09:16, Chris Wilson wrote:
> >>> If we have more than a few, possibly several thousand request in the
> >>> queue, don't show the central portion, just the first few and the last
> >>> being executed and/or queued. The first few should be enough to help
> >>> identify a problem in execution, and most often comparing the first/last
> >>> in the queue is enough to identify problems in the scheduling.
> >>>
> >>> We may need some fine tuning to set MAX_REQUESTS_TO_SHOW for common
> >>> debug scenarios, but for the moment if we can avoiding spending more
> >>> than a few seconds dumping the GPU state that will avoid a nasty
> >>> livelock (where hangcheck spends so long dumping the state, it fires
> >>> again and starts to dump the state again in parallel, ad infinitum).
> >>>
> >>> v2: Remember to print last not the stale rq iter after the loop.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    drivers/gpu/drm/i915/intel_engine_cs.c | 43 +++++++++++++++++++++++---
> >>>    1 file changed, 38 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> index 66cddd059666..2398ea71e747 100644
> >>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>> @@ -1307,11 +1307,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >>>                       struct drm_printer *m,
> >>>                       const char *header, ...)
> >>>    {
> >>> +     const int MAX_REQUESTS_TO_SHOW = 8;
> >>>        struct intel_breadcrumbs * const b = &engine->breadcrumbs;
> >>>        const struct intel_engine_execlists * const execlists = &engine->execlists;
> >>>        struct i915_gpu_error * const error = &engine->i915->gpu_error;
> >>> -     struct i915_request *rq;
> >>> +     struct i915_request *rq, *last;
> >>>        struct rb_node *rb;
> >>> +     int count;
> >>>    
> >>>        if (header) {
> >>>                va_list ap;
> >>> @@ -1378,16 +1380,47 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> >>>        }
> >>>    
> >>>        spin_lock_irq(&engine->timeline->lock);
> >>> -     list_for_each_entry(rq, &engine->timeline->requests, link)
> >>> -             print_request(m, rq, "        E ");
> >>> +
> >>> +     last = NULL;
> >>> +     count = 0;
> >>> +     list_for_each_entry(rq, &engine->timeline->requests, link) {
> >>> +             if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >>> +                     print_request(m, rq, "        E ");
> >>> +             else
> >>> +                     last = rq;
> >>
> >> else {
> >>          last = list_last_entry(...) ?
> >>          break;
> >> }
> >>
> >>> +     }
> >>> +     if (last) {
> >>> +             if (count > MAX_REQUESTS_TO_SHOW) {
> >>> +                     drm_printf(m,
> >>> +                                "        ...skipping %d executing requests...\n",
> >>> +                                count - MAX_REQUESTS_TO_SHOW);
> >>> +             }
> >>> +             print_request(m, last, "        E ");
> >>> +     }
> >>
> >> Or even stuff this printf in the first loop above, under the else
> >> branch. Maybe shorter would be like this, module off by ones if I made some:
> >>
> >> list_for_each_entry(rq, &engine->timeline->requests, link) {
> >>          struct i915_request *pr = rq;
> >>
> >>          if (++count > MAX_REQUESTS_TO_SHOW) {
> >>                  pr = list_last_entry(...);
> >>                  drm_printf(m,
> >>                             "        ...skipping %d executing requests...\n",
> >>                             count - MAX_REQUESTS_TO_SHOW);
> >>          }
> >>          
> >>          print_request(m, pr, "        E ");
> >>          
> >>          if (count > MAX_REQUESTS_TO_SHOW)
> >>                  break;
> >> }
> >>
> >>> +
> >>> +     last = NULL;
> >>> +     count = 0;
> >>>        drm_printf(m, "        Queue priority: %d\n", execlists->queue_priority);
> >>>        for (rb = execlists->first; rb; rb = rb_next(rb)) {
> >>>                struct i915_priolist *p =
> >>>                        rb_entry(rb, typeof(*p), node);
> >>>    
> >>> -             list_for_each_entry(rq, &p->requests, sched.link)
> >>> -                     print_request(m, rq, "        Q ");
> >>> +             list_for_each_entry(rq, &p->requests, sched.link) {
> >>> +                     if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >>> +                             print_request(m, rq, "        Q ");
> >>> +                     else
> >>> +                             last = rq;
> >>> +             }
> >>>        }
> >>> +     if (last) {
> >>> +             if (count > MAX_REQUESTS_TO_SHOW) {
> >>> +                     drm_printf(m,
> >>> +                                "        ...skipping %d queued requests...\n",
> >>> +                                count - MAX_REQUESTS_TO_SHOW);
> >>> +             }
> >>> +             print_request(m, last, "        Q ");
> >>> +     }
> >>
> >> Then I am thinking how to avoid the duplication and extract the smart
> >> printer. Macro would be easy at least, if a bit ugly.
> > 
> > Yeah, and for the moment, I'd like to keep the duplication obvious and
> > keep the two passes very similar.
> >   
> >> Another idea comes to mind, but probably for the future, to merge same
> >> prio, context and strictly consecutive seqnos to a single line of output
> >> like:
> >>
> >>    [prefix]seqno-seqno [%llx:seqno-seqno] (%u consecutive requests)
> >>
> >> Give or take, but it will be more involved to do that.
> > 
> > Deciding when rq are equivalent will get messy, and going to drift.
> > 
> >>
> >>> +
> >>>        spin_unlock_irq(&engine->timeline->lock);
> >>>    
> >>>        spin_lock_irq(&b->rb_lock);
> >>>
> >>
> >> Looks OK in general, just please see if you like my idea to contain the
> >> logic within a single loop and maybe even move it to a macro.
> > 
> > I don't, because it missed one important thing, the count of skipped
> > requests. :-p
> 
> Doh.. Well, I don't like it, but have no better/easier ideas at the moment.
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I'll take it as an improvement that can be refined again and again and
again...

Thanks,
-Chris

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 66cddd059666..2398ea71e747 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1307,11 +1307,13 @@  void intel_engine_dump(struct intel_engine_cs *engine,
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
+	const int MAX_REQUESTS_TO_SHOW = 8;
 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
-	struct i915_request *rq;
+	struct i915_request *rq, *last;
 	struct rb_node *rb;
+	int count;
 
 	if (header) {
 		va_list ap;
@@ -1378,16 +1380,47 @@  void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 
 	spin_lock_irq(&engine->timeline->lock);
-	list_for_each_entry(rq, &engine->timeline->requests, link)
-		print_request(m, rq, "\t\tE ");
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &engine->timeline->requests, link) {
+		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
+			print_request(m, rq, "\t\tE ");
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > MAX_REQUESTS_TO_SHOW) {
+			drm_printf(m,
+				   "\t\t...skipping %d executing requests...\n",
+				   count - MAX_REQUESTS_TO_SHOW);
+		}
+		print_request(m, last, "\t\tE ");
+	}
+
+	last = NULL;
+	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
 	for (rb = execlists->first; rb; rb = rb_next(rb)) {
 		struct i915_priolist *p =
 			rb_entry(rb, typeof(*p), node);
 
-		list_for_each_entry(rq, &p->requests, sched.link)
-			print_request(m, rq, "\t\tQ ");
+		list_for_each_entry(rq, &p->requests, sched.link) {
+			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
+				print_request(m, rq, "\t\tQ ");
+			else
+				last = rq;
+		}
 	}
+	if (last) {
+		if (count > MAX_REQUESTS_TO_SHOW) {
+			drm_printf(m,
+				   "\t\t...skipping %d queued requests...\n",
+				   count - MAX_REQUESTS_TO_SHOW);
+		}
+		print_request(m, last, "\t\tQ ");
+	}
+
 	spin_unlock_irq(&engine->timeline->lock);
 
 	spin_lock_irq(&b->rb_lock);

[v2] drm/i915: Don't dump umpteen thousand requests

Commit Message

Comments

Patch