diff mbox series

drm/i915/gtt: Use optimised memset32/64 for clearing PTE

Message ID 20190304230646.23714-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915/gtt: Use optimised memset32/64 for clearing PTE | expand

Commit Message

Chris Wilson March 4, 2019, 11:06 p.m. UTC
Replace the open-coded memset loops with the memset32/64 routines that
reduce to a single instruction or two:

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-83 (-83)
Function                                     old     new   delta
gen6_ppgtt_clear_range                       371     344     -27
gen8_ppgtt_clear_pd                          575     519     -56

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

Comments

Matthew Auld March 4, 2019, 11:48 p.m. UTC | #1
On Mon, 4 Mar 2019 at 23:07, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Replace the open-coded memset loops with the memset32/64 routines that
> reduce to a single instruction or two:
>
> add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-83 (-83)
> Function                                     old     new   delta
> gen6_ppgtt_clear_range                       371     344     -27
> gen8_ppgtt_clear_pd                          575     519     -56
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Chris Wilson March 5, 2019, 9:05 a.m. UTC | #2
Quoting Matthew Auld (2019-03-04 23:48:09)
> On Mon, 4 Mar 2019 at 23:07, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Replace the open-coded memset loops with the memset32/64 routines that
> > reduce to a single instruction or two:
> >
> > add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-83 (-83)
> > Function                                     old     new   delta
> > gen6_ppgtt_clear_range                       371     344     -27
> > gen8_ppgtt_clear_pd                          575     519     -56
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>

Thanks, pushed to dinq.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7e79691664e5..f97cc7b437f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -809,8 +809,6 @@  static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
 				u64 start, u64 length)
 {
 	unsigned int num_entries = gen8_pte_count(start, length);
-	unsigned int pte = gen8_pte_index(start);
-	unsigned int pte_end = pte + num_entries;
 	gen8_pte_t *vaddr;
 
 	GEM_BUG_ON(num_entries > pt->used_ptes);
@@ -820,8 +818,7 @@  static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
 		return true;
 
 	vaddr = kmap_atomic_px(pt);
-	while (pte < pte_end)
-		vaddr[pte++] = vm->scratch_pte;
+	memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
 	kunmap_atomic(vaddr);
 
 	return false;
@@ -1672,8 +1669,7 @@  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 
 	while (num_entries) {
 		struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
-		const unsigned int end = min(pte + num_entries, GEN6_PTES);
-		const unsigned int count = end - pte;
+		const unsigned int count = min(num_entries, GEN6_PTES - pte);
 		gen6_pte_t *vaddr;
 
 		GEM_BUG_ON(pt == vm->scratch_pt);
@@ -1693,9 +1689,7 @@  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		 */
 
 		vaddr = kmap_atomic_px(pt);
-		do {
-			vaddr[pte++] = scratch_pte;
-		} while (pte < end);
+		memset32(vaddr + pte, scratch_pte, count);
 		kunmap_atomic(vaddr);
 
 		pte = 0;