Message ID | 1530584549-31867-3-git-send-email-yakui.zhao@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>-----Original Message----- >From: Zhao, Yakui >Sent: Tuesday, July 3, 2018 10:22 AM >To: intel-gfx@lists.freedesktop.org >Cc: zhenyuw@linux.intel.com; Zhao, Yakui <yakui.zhao@intel.com> >Subject: [PATCH 2/2] drm/i915: write fence reg only once on VGPU > >On VGPU scenario the read/write operation of fence_reg will be trapped by >the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. >So it is unnecessary to read/write fence reg several times. This will help to >reduce the unnecessary trap of fence_reg mmio operation. > Sorry for one typo. The V2 will be sent. >Signed-off-by: Zhao Yakui <yakui.zhao@intel.com> >--- > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +++++++++----- > 1 file changed, 9 insertions(+), 5 deletions(-) > >diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >index d92fe03..55bf6d9 100644 >--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c >+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c >@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct >drm_i915_fence_reg *fence, > > if (INTEL_GEN(fence->i915) >= 6) { > /* Use the 64-bit RW to read/write fence reg on SNB+ */ >- I915_WRITE64_FW(fence_reg_lo, 0); >- I915_READ64(fence_reg_lo); >- >- I915_WRITE64_FW(fence_reg_lo, val); >- I915_READ64(fence_reg_lo); >+ if (intel_vgpu_active(i915)) >+ I915_WRITE64_FW(fence_reg_lo, val); >+ else { >+ I915_WRITE64_FW(fence_reg_lo, 0); >+ I915_READ64(fence_reg_lo); >+ >+ I915_WRITE64_FW(fence_reg_lo, val); >+ I915_READ64(fence_reg_lo); >+ } > } else { > /* To w/a incoherency with non-atomic 64-bit register updates, > * we split the 64-bit update into two 32-bit writes. In order >-- >2.7.4
Hi Zhao, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on v4.18-rc3 next-20180702] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Zhao-Yakui/drm-i915-the-Read-Write-optimization-of-fence-reg/20180703-102412 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-randconfig-x074-201826 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): drivers/gpu//drm/i915/i915_gem_fence_reg.c: In function 'i965_write_fence_reg': >> drivers/gpu//drm/i915/i915_gem_fence_reg.c:98:25: error: 'i915' undeclared (first use in this function); did you mean 'to_i915'? if (intel_vgpu_active(i915)) ^~~~ to_i915 drivers/gpu//drm/i915/i915_gem_fence_reg.c:98:25: note: each undeclared identifier is reported only once for each function it appears in vim +98 drivers/gpu//drm/i915/i915_gem_fence_reg.c 59 60 static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, 61 struct i915_vma *vma) 62 { 63 i915_reg_t fence_reg_lo, fence_reg_hi; 64 int fence_pitch_shift; 65 u64 val; 66 struct drm_i915_private *dev_priv = fence->i915; 67 68 if (INTEL_GEN(fence->i915) >= 6) { 69 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 70 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 71 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 72 73 } else { 74 fence_reg_lo = FENCE_REG_965_LO(fence->id); 75 fence_reg_hi = FENCE_REG_965_HI(fence->id); 76 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 77 } 78 79 val = 0; 80 if (vma) { 81 unsigned int stride = i915_gem_object_get_stride(vma->obj); 82 83 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 84 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); 85 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); 86 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 87 88 val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; 89 val |= vma->node.start; 90 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 91 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 92 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 93 val |= I965_FENCE_REG_VALID; 94 } 95 96 if (INTEL_GEN(fence->i915) >= 6) { 97 /* Use the 64-bit RW to read/write fence reg on SNB+ */ > 98 if (intel_vgpu_active(i915)) 99 I915_WRITE64_FW(fence_reg_lo, val); 100 else { 101 I915_WRITE64_FW(fence_reg_lo, 0); 102 I915_READ64(fence_reg_lo); 103 104 I915_WRITE64_FW(fence_reg_lo, val); 105 I915_READ64(fence_reg_lo); 106 } 107 } else { 108 /* To w/a incoherency with non-atomic 64-bit register updates, 109 * we split the 64-bit update into two 32-bit writes. In order 110 * for a partial fence not to be evaluated between writes, we 111 * precede the update with write to turn off the fence register, 112 * and only enable the fence as the last step. 113 * 114 * For extra levels of paranoia, we make sure each step lands 115 * before applying the next step. 116 */ 117 I915_WRITE(fence_reg_lo, 0); 118 POSTING_READ(fence_reg_lo); 119 120 I915_WRITE(fence_reg_hi, upper_32_bits(val)); 121 I915_WRITE(fence_reg_lo, lower_32_bits(val)); 122 POSTING_READ(fence_reg_lo); 123 } 124 } 125 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Zhao, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm-intel/for-linux-next] [also build test WARNING on v4.18-rc3 next-20180702] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Zhao-Yakui/drm-i915-the-Read-Write-optimization-of-fence-reg/20180703-102412 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-randconfig-x073-201826 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All warnings (new ones prefixed by >>): In file included from include/linux/kernel.h:10:0, from include/linux/list.h:9, from include/linux/agp_backend.h:33, from include/drm/drmP.h:35, from drivers/gpu/drm/i915/i915_gem_fence_reg.c:24: drivers/gpu/drm/i915/i915_gem_fence_reg.c: In function 'i965_write_fence_reg': drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:25: error: 'i915' undeclared (first use in this function); did you mean 'to_i915'? if (intel_vgpu_active(i915)) ^ include/linux/compiler.h:58:30: note: in definition of macro '__trace_if' if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ^~~~ >> drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:3: note: in expansion of macro 'if' if (intel_vgpu_active(i915)) ^~ drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:25: note: each undeclared identifier is reported only once for each function it appears in if (intel_vgpu_active(i915)) ^ include/linux/compiler.h:58:30: note: in definition of macro '__trace_if' if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ^~~~ >> drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:3: note: in expansion of macro 'if' if (intel_vgpu_active(i915)) ^~ vim +/if +98 drivers/gpu/drm/i915/i915_gem_fence_reg.c > 24 #include <drm/drmP.h> 25 #include <drm/i915_drm.h> 26 #include "i915_drv.h" 27 28 /** 29 * DOC: fence register handling 30 * 31 * Important to avoid confusions: "fences" in the i915 driver are not execution 32 * fences used to track command completion but hardware detiler objects which 33 * wrap a given range of the global GTT. Each platform has only a fairly limited 34 * set of these objects. 35 * 36 * Fences are used to detile GTT memory mappings. They're also connected to the 37 * hardware frontbuffer render tracking and hence interact with frontbuffer 38 * compression. Furthermore on older platforms fences are required for tiled 39 * objects used by the display engine. They can also be used by the render 40 * engine - they're required for blitter commands and are optional for render 41 * commands. But on gen4+ both display (with the exception of fbc) and rendering 42 * have their own tiling state bits and don't need fences. 43 * 44 * Also note that fences only support X and Y tiling and hence can't be used for 45 * the fancier new tiling formats like W, Ys and Yf. 46 * 47 * Finally note that because fences are such a restricted resource they're 48 * dynamically associated with objects. Furthermore fence state is committed to 49 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must 50 * explicitly call i915_gem_object_get_fence() to synchronize fencing status 51 * for cpu access. Also note that some code wants an unfenced view, for those 52 * cases the fence can be removed forcefully with i915_gem_object_put_fence(). 53 * 54 * Internally these functions will synchronize with userspace access by removing 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 56 */ 57 58 #define pipelined 0 59 60 static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, 61 struct i915_vma *vma) 62 { 63 i915_reg_t fence_reg_lo, fence_reg_hi; 64 int fence_pitch_shift; 65 u64 val; 66 struct drm_i915_private *dev_priv = fence->i915; 67 68 if (INTEL_GEN(fence->i915) >= 6) { 69 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 70 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 71 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 72 73 } else { 74 fence_reg_lo = FENCE_REG_965_LO(fence->id); 75 fence_reg_hi = FENCE_REG_965_HI(fence->id); 76 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 77 } 78 79 val = 0; 80 if (vma) { 81 unsigned int stride = i915_gem_object_get_stride(vma->obj); 82 83 GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); 84 GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE)); 85 GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE)); 86 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 87 88 val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32; 89 val |= vma->node.start; 90 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 91 if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) 92 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 93 val |= I965_FENCE_REG_VALID; 94 } 95 96 if (INTEL_GEN(fence->i915) >= 6) { 97 /* Use the 64-bit RW to read/write fence reg on SNB+ */ > 98 if (intel_vgpu_active(i915)) 99 I915_WRITE64_FW(fence_reg_lo, val); 100 else { 101 I915_WRITE64_FW(fence_reg_lo, 0); 102 I915_READ64(fence_reg_lo); 103 104 I915_WRITE64_FW(fence_reg_lo, val); 105 I915_READ64(fence_reg_lo); 106 } 107 } else { 108 /* To w/a incoherency with non-atomic 64-bit register updates, 109 * we split the 64-bit update into two 32-bit writes. In order 110 * for a partial fence not to be evaluated between writes, we 111 * precede the update with write to turn off the fence register, 112 * and only enable the fence as the last step. 113 * 114 * For extra levels of paranoia, we make sure each step lands 115 * before applying the next step. 116 */ 117 I915_WRITE(fence_reg_lo, 0); 118 POSTING_READ(fence_reg_lo); 119 120 I915_WRITE(fence_reg_hi, upper_32_bits(val)); 121 I915_WRITE(fence_reg_lo, lower_32_bits(val)); 122 POSTING_READ(fence_reg_lo); 123 } 124 } 125 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index d92fe03..55bf6d9 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, if (INTEL_GEN(fence->i915) >= 6) { /* Use the 64-bit RW to read/write fence reg on SNB+ */ - I915_WRITE64_FW(fence_reg_lo, 0); - I915_READ64(fence_reg_lo); - - I915_WRITE64_FW(fence_reg_lo, val); - I915_READ64(fence_reg_lo); + if (intel_vgpu_active(i915)) + I915_WRITE64_FW(fence_reg_lo, val); + else { + I915_WRITE64_FW(fence_reg_lo, 0); + I915_READ64(fence_reg_lo); + + I915_WRITE64_FW(fence_reg_lo, val); + I915_READ64(fence_reg_lo); + } } else { /* To w/a incoherency with non-atomic 64-bit register updates, * we split the 64-bit update into two 32-bit writes. In order
On VGPU scenario the read/write operation of fence_reg will be trapped by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg. So it is unnecessary to read/write fence reg several times. This will help to reduce the unnecessary trap of fence_reg mmio operation. Signed-off-by: Zhao Yakui <yakui.zhao@intel.com> --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-)