diff mbox

drm/i915/gtt: Trust the uncached store to flush wcb

Message ID 20180508120313.8308-1-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala May 8, 2018, 12:03 p.m. UTC
Not all architectures guarantee that uncached read will
flush the write combining buffer. So marking it explicitly
is recommended [1].

However we know the architecture we are operating on
and can avoid wmb as the UC store will flush the wcb [2].

Omit the wmb() before invalidate as redudant.

v2: squash combining and removal (Chris)

References: http://yarchive.net/comp/linux/write_combining.html [1]
References: http://download.intel.com/design/PentiumII/applnots/24442201.pdf [2]
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

Comments

Chris Wilson May 8, 2018, 12:06 p.m. UTC | #1
Quoting Mika Kuoppala (2018-05-08 13:03:13)
> Not all architectures guarantee that uncached read will
> flush the write combining buffer. So marking it explicitly
> is recommended [1].
> 
> However we know the architecture we are operating on
> and can avoid wmb as the UC store will flush the wcb [2].
> 
> Omit the wmb() before invalidate as redudant.
> 
> v2: squash combining and removal (Chris)
> 
> References: http://yarchive.net/comp/linux/write_combining.html [1]
> References: http://download.intel.com/design/PentiumII/applnots/24442201.pdf [2]
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++----
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c879bfd9294f..2126358761a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma);
>  
>  static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
>  {
> -       /* Note that as an uncached mmio write, this should flush the
> +       /*
> +        * Note that as an uncached mmio write, this will flush the
>          * WCB of the writes into the GGTT before it triggers the invalidate.
>          */
>         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> @@ -2418,8 +2419,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>         for_each_sgt_dma(addr, sgt_iter, vma->pages)
>                 gen8_set_pte(gtt_entries++, pte_encode | addr);
>  
> -       wmb();
> -
>         /* This next bit makes the above posting read even more important. We

Yeah, we should rewrite this comment as well; just skip the first
obsolete sentence.
-Chris
Mika Kuoppala May 11, 2018, 1:23 p.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2018-05-08 13:03:13)
>> Not all architectures guarantee that uncached read will
>> flush the write combining buffer. So marking it explicitly
>> is recommended [1].
>> 
>> However we know the architecture we are operating on
>> and can avoid wmb as the UC store will flush the wcb [2].
>> 
>> Omit the wmb() before invalidate as redudant.
>> 
>> v2: squash combining and removal (Chris)
>> 
>> References: http://yarchive.net/comp/linux/write_combining.html [1]
>> References: http://download.intel.com/design/PentiumII/applnots/24442201.pdf [2]
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++----
>>  1 file changed, 2 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index c879bfd9294f..2126358761a5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma);
>>  
>>  static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
>>  {
>> -       /* Note that as an uncached mmio write, this should flush the
>> +       /*
>> +        * Note that as an uncached mmio write, this will flush the
>>          * WCB of the writes into the GGTT before it triggers the invalidate.
>>          */
>>         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>> @@ -2418,8 +2419,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>         for_each_sgt_dma(addr, sgt_iter, vma->pages)
>>                 gen8_set_pte(gtt_entries++, pte_encode | addr);
>>  
>> -       wmb();
>> -
>>         /* This next bit makes the above posting read even more important. We
>
> Yeah, we should rewrite this comment as well; just skip the first
> obsolete sentence.

v3 pushed. Thanks for review.

-Mika
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c879bfd9294f..2126358761a5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -110,7 +110,8 @@  i915_get_ggtt_vma_pages(struct i915_vma *vma);
 
 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
 {
-	/* Note that as an uncached mmio write, this should flush the
+	/*
+	 * Note that as an uncached mmio write, this will flush the
 	 * WCB of the writes into the GGTT before it triggers the invalidate.
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
@@ -2418,8 +2419,6 @@  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	for_each_sgt_dma(addr, sgt_iter, vma->pages)
 		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	wmb();
-
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
 	 * have finished.
@@ -2460,7 +2459,6 @@  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	dma_addr_t addr;
 	for_each_sgt_dma(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
-	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates