diff mbox

[v2] drm/i915: prevent out of range pt in the PDE macros (take 3)

Message ID 56605FD2.5090909@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon Dec. 3, 2015, 3:29 p.m. UTC
On 06/10/15 12:53, Chris Wilson wrote:
> On Tue, Oct 06, 2015 at 12:21:05PM +0100, Dave Gordon wrote:
>> ... although I still think my version is (slightly) easier to read.
>> Or it could be improved even more by moving the increment of 'iter'
>> to the end, making it one line shorter and perhaps helping the
>> compiler a little :)
>>
>> #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
>>          for (iter = gen8_pml4e_index(start);                           \
>>               iter < GEN8_PML4ES_PER_PML4 &&                            \
>>                  (pdp = (pml4)->pdps[iter], length > 0);                \
>>               temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
>>               temp = min(temp, length),                                 \
>>               start += temp, length -= temp, ++iter)
>
> Shorter, yes, but we may as well take advantage of not using [iter] if
> length == 0, so meh.
>
>> Or, noting that 'temp' is never used in the generated loop bodies,
>> we could eliminate the parameter and make it local to the loop
>> header :)
>>
>> #define gen8_for_each_pml4e(pdp, pml4, start, length, iter)            \
>>          for (iter = gen8_pml4e_index(start);                           \
>>               iter < GEN8_PML4ES_PER_PML4 &&                            \
>>                  (pdp = (pml4)->pdps[iter], length > 0);                \
>>               ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);   \
>>                      temp = min(temp - start, length);                  \
>>                      start += temp, length -= temp; }),                 \
>>                  ++iter)
>
> Removing extraneous parameters from macros is differently a usability win.
> Care to spin a real patch so we can see how it looks in practice?
> -Chris

OK, here's the patch that Chris asked for. On compilation, this version 
comes out about 160 bytes smaller than the original, and the disassembly 
has 100 fewer lines. This probably doesn't improve performance 
significantly, though, as many of the uses of these macros are inside 
debug-only data dumping functions.

.Dave.
diff mbox

Patch

From aa121eb42768e19097d93d3a430ab998fc0f62d2 Mon Sep 17 00:00:00 2001
From: Dave Gordon <david.s.gordon@intel.com>
Date: Thu, 3 Dec 2015 11:19:58 +0000
Subject: [PATCH] drm/i915: eliminate 'temp' in gen8_for_each_{pdd,pdpe,pml4e}
 macros
Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ

All of these iterator macros require a 'temp' argument, used merely to
hold internal partial results. We can instead declare the temporary
variable inside the macro, so the caller need not provide it.

Some of the old code contained nested iterators that actually reused the
same 'temp' variable for both inner and outer instances. It's quite
surprising that this didn't introduce bugs! But it does show that the
value of 'temp' isn't required to persist during the iterated body.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 39 ++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_gtt.h | 49 +++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1f7e6b9..c25e8b0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -770,10 +770,10 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
 					   scratch_pte);
 	} else {
-		uint64_t templ4, pml4e;
+		uint64_t pml4e;
 		struct i915_page_directory_pointer *pdp;
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
 			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
 						   scratch_pte);
 		}
@@ -839,10 +839,10 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 					      cache_level);
 	} else {
 		struct i915_page_directory_pointer *pdp;
-		uint64_t templ4, pml4e;
+		uint64_t pml4e;
 		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
 			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
 						      start, cache_level);
 		}
@@ -1020,10 +1020,9 @@  static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 {
 	struct drm_device *dev = vm->dev;
 	struct i915_page_table *pt;
-	uint64_t temp;
 	uint32_t pde;
 
-	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+	gen8_for_each_pde(pt, pd, start, length, pde) {
 		/* Don't reallocate page tables */
 		if (test_bit(pde, pd->used_pdes)) {
 			/* Scratch is never allocated this way */
@@ -1082,13 +1081,12 @@  gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 {
 	struct drm_device *dev = vm->dev;
 	struct i915_page_directory *pd;
-	uint64_t temp;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
 
 	WARN_ON(!bitmap_empty(new_pds, pdpes));
 
-	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		if (test_bit(pdpe, pdp->used_pdpes))
 			continue;
 
@@ -1136,12 +1134,11 @@  gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 {
 	struct drm_device *dev = vm->dev;
 	struct i915_page_directory_pointer *pdp;
-	uint64_t temp;
 	uint32_t pml4e;
 
 	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
 
-	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (!test_bit(pml4e, pml4->used_pml4es)) {
 			pdp = alloc_pdp(dev);
 			if (IS_ERR(pdp))
@@ -1225,7 +1222,6 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
-	uint64_t temp;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
 	int ret;
@@ -1252,7 +1248,7 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	}
 
 	/* For every page directory referenced, allocate page tables */
-	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
 						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
 		if (ret)
@@ -1264,7 +1260,7 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 	/* Allocations have completed successfully, so set the bitmaps, and do
 	 * the mappings. */
-	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		gen8_pde_t *const page_directory = kmap_px(pd);
 		struct i915_page_table *pt;
 		uint64_t pd_len = length;
@@ -1274,7 +1270,7 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 		/* Every pd should be allocated, we just did that above. */
 		WARN_ON(!pd);
 
-		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
 			/* Same reasoning as pd */
 			WARN_ON(!pt);
 			WARN_ON(!pd_len);
@@ -1311,6 +1307,8 @@  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 err_out:
 	while (pdpe--) {
+		unsigned long temp;
+
 		for_each_set_bit(temp, new_page_tables + pdpe *
 				BITS_TO_LONGS(I915_PDES), I915_PDES)
 			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
@@ -1333,7 +1331,7 @@  static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 			container_of(vm, struct i915_hw_ppgtt, base);
 	struct i915_page_directory_pointer *pdp;
-	uint64_t temp, pml4e;
+	uint64_t pml4e;
 	int ret = 0;
 
 	/* Do the pml4 allocations first, so we don't need to track the newly
@@ -1352,7 +1350,7 @@  static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 	     "The allocation has spanned more than 512GB. "
 	     "It is highly likely this is incorrect.");
 
-	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		WARN_ON(!pdp);
 
 		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
@@ -1392,10 +1390,9 @@  static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
 			  struct seq_file *m)
 {
 	struct i915_page_directory *pd;
-	uint64_t temp;
 	uint32_t pdpe;
 
-	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		struct i915_page_table *pt;
 		uint64_t pd_len = length;
 		uint64_t pd_start = start;
@@ -1405,7 +1402,7 @@  static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
 			continue;
 
 		seq_printf(m, "\tPDPE #%d\n", pdpe);
-		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
 			uint32_t  pte;
 			gen8_pte_t *pt_vaddr;
 
@@ -1455,11 +1452,11 @@  static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
 	} else {
-		uint64_t templ4, pml4e;
+		uint64_t pml4e;
 		struct i915_pml4 *pml4 = &ppgtt->pml4;
 		struct i915_page_directory_pointer *pdp;
 
-		gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
+		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 			if (!test_bit(pml4e, pml4->used_pml4es))
 				continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 877c32c..b448ad8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -455,32 +455,29 @@  static inline uint32_t gen6_pde_index(uint32_t addr)
  * between from start until start + length. On gen8+ it simply iterates
  * over every page directory entry in a page directory.
  */
-#define gen8_for_each_pde(pt, pd, start, length, temp, iter)		\
-	for (iter = gen8_pde_index(start); \
-	     length > 0 && iter < I915_PDES ? \
-			(pt = (pd)->page_table[iter]), 1 : 0; \
-	     iter++,				\
-	     temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,	\
-	     temp = min(temp, length),					\
-	     start += temp, length -= temp)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)	\
-	for (iter = gen8_pdpe_index(start); \
-	     length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
-			(pd = (pdp)->page_directory[iter]), 1 : 0; \
-	     iter++,				\
-	     temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,	\
-	     temp = min(temp, length),					\
-	     start += temp, length -= temp)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)	\
-	for (iter = gen8_pml4e_index(start);	\
-	     length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
-			(pdp = (pml4)->pdps[iter]), 1 : 0; \
-	     iter++,				\
-	     temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,	\
-	     temp = min(temp, length),					\
-	     start += temp, length -= temp)
+#define gen8_for_each_pde(pt, pd, start, length, iter)			\
+	for (iter = gen8_pde_index(start);				\
+	     length > 0 && iter < I915_PDES &&				\
+		(pt = (pd)->page_table[iter], true);			\
+	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
+		    temp = min(temp - start, length);			\
+		    start += temp, length -= temp; }), ++iter)
+
+#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
+	for (iter = gen8_pdpe_index(start);				\
+	     length > 0 && iter < I915_PDPES_PER_PDP(dev) &&		\
+		(pd = (pdp)->page_directory[iter], true);		\
+	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
+		    temp = min(temp - start, length);			\
+		    start += temp, length -= temp; }), ++iter)
+
+#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
+	for (iter = gen8_pml4e_index(start);				\
+	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
+		(pdp = (pml4)->pdps[iter], true);			\
+	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
+		    temp = min(temp - start, length);			\
+		    start += temp, length -= temp; }), ++iter)
 
 static inline uint32_t gen8_pte_index(uint64_t address)
 {
-- 
1.9.1