diff mbox

drm/i915: Use memset64() to align the ring with MI_NOOP

Message ID 20180425123718.16366-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 25, 2018, 12:37 p.m. UTC
When filling the ring to align the emit pointer to the next cacheline,
use memset64() rather than open-coding it. As we know that we always
have an even number of dwords, we can replace the dword loop with the
qword equivalent.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Comments

Tvrtko Ursulin April 25, 2018, 2 p.m. UTC | #1
On 25/04/2018 13:37, Chris Wilson wrote:
> When filling the ring to align the emit pointer to the next cacheline,
> use memset64() rather than open-coding it. As we know that we always
> have an even number of dwords, we can replace the dword loop with the
> qword equivalent.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------
>   1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c68ac605b8a9..07a9a2b4beb7 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1717,22 +1717,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
>   /* Align the ring tail to a cacheline boundary */
>   int intel_ring_cacheline_align(struct i915_request *rq)
>   {
> -	int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
> -	u32 *cs;
> +	int num_dwords;
> +	void *cs;
>   
> +	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
>   	if (num_dwords == 0)
>   		return 0;
>   
> -	num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
> +	num_dwords = CACHELINE_DWORDS - num_dwords;
> +	GEM_BUG_ON(num_dwords & 1);
> +
>   	cs = intel_ring_begin(rq, num_dwords);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	while (num_dwords--)
> -		*cs++ = MI_NOOP;
> -

BUILD_BUG_ON(MI_NOOP != 0) for paranoid future proofing, since MI_NOOP 
is now not mentioned in this function, but is meant.

> +	memset64(cs, 0, num_dwords/2);

Spaces around '/' to appease checkpatch and our style.

>   	intel_ring_advance(rq, cs);
>   
> +	GEM_BUG_ON(rq->ring->emit & CACHELINE_BYTES);
>   	return 0;
>   }
>   
> 

With whitespace fixed, and preferably BUILD_BUG_ON:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson April 25, 2018, 2:09 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-04-25 15:00:43)
> 
> On 25/04/2018 13:37, Chris Wilson wrote:
> > When filling the ring to align the emit pointer to the next cacheline,
> > use memset64() rather than open-coding it. As we know that we always
> > have an even number of dwords, we can replace the dword loop with the
> > qword equivalent.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------
> >   1 file changed, 8 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index c68ac605b8a9..07a9a2b4beb7 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -1717,22 +1717,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
> >   /* Align the ring tail to a cacheline boundary */
> >   int intel_ring_cacheline_align(struct i915_request *rq)
> >   {
> > -     int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
> > -     u32 *cs;
> > +     int num_dwords;
> > +     void *cs;
> >   
> > +     num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
> >       if (num_dwords == 0)
> >               return 0;
> >   
> > -     num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
> > +     num_dwords = CACHELINE_DWORDS - num_dwords;
> > +     GEM_BUG_ON(num_dwords & 1);
> > +
> >       cs = intel_ring_begin(rq, num_dwords);
> >       if (IS_ERR(cs))
> >               return PTR_ERR(cs);
> >   
> > -     while (num_dwords--)
> > -             *cs++ = MI_NOOP;
> > -
> 
> BUILD_BUG_ON(MI_NOOP != 0) for paranoid future proofing, since MI_NOOP 
> is now not mentioned in this function, but is meant.

((u64)MI_NOOP << 32 | MI_NOOP) ?
-Chris
Tvrtko Ursulin April 25, 2018, 2:14 p.m. UTC | #3
On 25/04/2018 15:09, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-04-25 15:00:43)
>>
>> On 25/04/2018 13:37, Chris Wilson wrote:
>>> When filling the ring to align the emit pointer to the next cacheline,
>>> use memset64() rather than open-coding it. As we know that we always
>>> have an even number of dwords, we can replace the dword loop with the
>>> qword equivalent.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------
>>>    1 file changed, 8 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> index c68ac605b8a9..07a9a2b4beb7 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> @@ -1717,22 +1717,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
>>>    /* Align the ring tail to a cacheline boundary */
>>>    int intel_ring_cacheline_align(struct i915_request *rq)
>>>    {
>>> -     int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
>>> -     u32 *cs;
>>> +     int num_dwords;
>>> +     void *cs;
>>>    
>>> +     num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
>>>        if (num_dwords == 0)
>>>                return 0;
>>>    
>>> -     num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
>>> +     num_dwords = CACHELINE_DWORDS - num_dwords;
>>> +     GEM_BUG_ON(num_dwords & 1);
>>> +
>>>        cs = intel_ring_begin(rq, num_dwords);
>>>        if (IS_ERR(cs))
>>>                return PTR_ERR(cs);
>>>    
>>> -     while (num_dwords--)
>>> -             *cs++ = MI_NOOP;
>>> -
>>
>> BUILD_BUG_ON(MI_NOOP != 0) for paranoid future proofing, since MI_NOOP
>> is now not mentioned in this function, but is meant.
> 
> ((u64)MI_NOOP << 32 | MI_NOOP) ?

That also works, can keep the r-b.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c68ac605b8a9..07a9a2b4beb7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1717,22 +1717,24 @@  u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 /* Align the ring tail to a cacheline boundary */
 int intel_ring_cacheline_align(struct i915_request *rq)
 {
-	int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
-	u32 *cs;
+	int num_dwords;
+	void *cs;
 
+	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
 	if (num_dwords == 0)
 		return 0;
 
-	num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
+	num_dwords = CACHELINE_DWORDS - num_dwords;
+	GEM_BUG_ON(num_dwords & 1);
+
 	cs = intel_ring_begin(rq, num_dwords);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	while (num_dwords--)
-		*cs++ = MI_NOOP;
-
+	memset64(cs, 0, num_dwords/2);
 	intel_ring_advance(rq, cs);
 
+	GEM_BUG_ON(rq->ring->emit & CACHELINE_BYTES);
 	return 0;
 }