[RFC,v4] i965/gen8+: bo in state base address must be in 32-bit address range
diff mbox

Message ID 1444053653-20856-1-git-send-email-michel.thierry@intel.com
State New
Headers show

Commit Message

Michel Thierry Oct. 5, 2015, 2 p.m. UTC
Gen8+ supports 48-bit virtual addresses, but some objects must always be
allocated inside the 32-bit address range.

In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
General State Heap or Instruction State Heap must be in a 32-bit range
(GSH / ISH), because the General State Offset and Instruction State
Offset are limited to 32-bits.

Use drm_intel_bo_use_48b_address_range to flag when the 4GB limit is not
necessary, and the bo can be in the full address space.

This commit introduces a dependency of libdrm 2.4.6x, which introduces
the drm_intel_bo_use_48b_address_range function.

v2: s/48baddress/48b_address/,
    Only use in OUT_RELOC64 cases, OUT_RELOC implies a 32-bit address
    offset is needed (Ben)
v3: Added OUT_RELOC64_INSIDE_4G, so it stands out when a 64-bit
    relocation needs the 32-bit workaround (Chris)
v4: In order to use full address space, libdrm requires to set the
    support flag before calling emit_reloc.

References: http://lists.freedesktop.org/archives/dri-devel/2015-September/089757.html
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 configure.ac                                  |  2 +-
 src/mesa/drivers/dri/i965/gen8_misc_state.c   | 17 ++++++++++-------
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 18 ++++++++++++++----
 src/mesa/drivers/dri/i965/intel_batchbuffer.h | 16 ++++++++++++----
 4 files changed, 37 insertions(+), 16 deletions(-)

Patch
diff mbox

diff --git a/configure.ac b/configure.ac
index 217281f..e31f4d3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -71,7 +71,7 @@  dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.60
 LIBDRM_RADEON_REQUIRED=2.4.56
 LIBDRM_AMDGPU_REQUIRED=2.4.63
-LIBDRM_INTEL_REQUIRED=2.4.61
+LIBDRM_INTEL_REQUIRED=2.4.66 #yes, it does not exist yet
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.62
 LIBDRM_FREEDRENO_REQUIRED=2.4.65
diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c
index a46b252..7b4e448 100644
--- a/src/mesa/drivers/dri/i965/gen8_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c
@@ -28,6 +28,9 @@ 
 
 /**
  * Define the base addresses which some state is referenced from.
+ * Use OUT_RELOC64_INSIDE_4G instead of OUT_RELOC64, because the General State
+ * Offset and Instruction State Offset are limited to 32-bits by hardware,
+ * and must be located in the first 4GBs (32-bit offset).
  */
 static void
 gen8_upload_state_base_address(struct brw_context *brw)
@@ -42,18 +45,18 @@  gen8_upload_state_base_address(struct brw_context *brw)
    OUT_BATCH(0);
    OUT_BATCH(mocs_wb << 16);
    /* Surface state base address: */
-   OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
-               mocs_wb << 4 | 1);
+   OUT_RELOC64_INSIDE_4G(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                         mocs_wb << 4 | 1);
    /* Dynamic state base address: */
-   OUT_RELOC64(brw->batch.bo,
-               I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
-               mocs_wb << 4 | 1);
+   OUT_RELOC64_INSIDE_4G(brw->batch.bo,
+                         I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+                         mocs_wb << 4 | 1);
    /* Indirect object base address: MEDIA_OBJECT data */
    OUT_BATCH(mocs_wb << 4 | 1);
    OUT_BATCH(0);
    /* Instruction base address: shader kernels (incl. SIP) */
-   OUT_RELOC64(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-               mocs_wb << 4 | 1);
+   OUT_RELOC64_INSIDE_4G(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                         mocs_wb << 4 | 1);
 
    /* General state buffer size */
    OUT_BATCH(0xfffff001);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 0363bd3..b15d627 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -427,11 +427,21 @@  uint64_t
 intel_batchbuffer_reloc64(struct brw_context *brw,
                           drm_intel_bo *buffer, uint32_t offset,
                           uint32_t read_domains, uint32_t write_domain,
-                          uint32_t delta)
+                          uint32_t delta, uint32_t support_48bit_offset)
 {
-   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
-                                     buffer, delta,
-                                     read_domains, write_domain);
+   int ret;
+
+   /* Not all buffers can be allocated outside the first 4GB, and
+    * the offset must be limited to 32-bits.
+    */
+   if (support_48bit_offset)
+      drm_intel_bo_use_48b_address_range(buffer, 1);
+   else
+      drm_intel_bo_use_48b_address_range(buffer, 0);
+
+   ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
+                                 buffer, delta,
+                                 read_domains, write_domain);
    assert(ret == 0);
    (void) ret;
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 2b177d3..8816f56 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -68,7 +68,8 @@  uint64_t intel_batchbuffer_reloc64(struct brw_context *brw,
                                    uint32_t offset,
                                    uint32_t read_domains,
                                    uint32_t write_domain,
-                                   uint32_t delta);
+                                   uint32_t delta,
+                                   uint32_t support_48bit_offset);
 
 #define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
 
@@ -188,17 +189,24 @@  intel_batchbuffer_advance(struct brw_context *brw)
                                      (delta)));                \
 } while (0)
 
-/* Handle 48-bit address relocations for Gen8+ */
-#define OUT_RELOC64(buf, read_domains, write_domain, delta) do {      \
+#define __OUT_RELOC64(buf, read_domains, write_domain, delta, support_48bit) do { \
    uint32_t __offset = (__map - brw->batch.map) * 4;                  \
    uint64_t reloc64 = intel_batchbuffer_reloc64(brw, (buf), __offset, \
                                                 (read_domains),       \
                                                 (write_domain),       \
-                                                (delta));             \
+                                                (delta),              \
+                                                (support_48bit));     \
    OUT_BATCH(reloc64);                                                \
    OUT_BATCH(reloc64 >> 32);                                          \
 } while (0)
 
+/* Handle 48-bit address relocations for Gen8+ */
+#define OUT_RELOC64(buf, read_domains, write_domain, delta)  \
+        __OUT_RELOC64(buf, read_domains, write_domain, delta, 1)
+/* Handle 48-bit address relocations for Gen8+, requesting 32-bit offset */
+#define OUT_RELOC64_INSIDE_4G(buf, read_domains, write_domain, delta)  \
+        __OUT_RELOC64(buf, read_domains, write_domain, delta, 0)
+
 #define ADVANCE_BATCH()                  \
    assert(__map == brw->batch.map_next); \
    intel_batchbuffer_advance(brw);       \