diff mbox

[2/2] drm/i915: write fence reg only once on VGPU

Message ID 1530584549-31867-3-git-send-email-yakui.zhao@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhao, Yakui July 3, 2018, 2:22 a.m. UTC
On VGPU scenario the read/write operation of fence_reg will be trapped
by the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
So it is unnecessary to read/write fence reg several times. This will help
to reduce the unnecessary trap of fence_reg mmio operation.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

Comments

Zhao, Yakui July 3, 2018, 2:49 a.m. UTC | #1
>-----Original Message-----
>From: Zhao, Yakui
>Sent: Tuesday, July 3, 2018 10:22 AM
>To: intel-gfx@lists.freedesktop.org
>Cc: zhenyuw@linux.intel.com; Zhao, Yakui <yakui.zhao@intel.com>
>Subject: [PATCH 2/2] drm/i915: write fence reg only once on VGPU
>
>On VGPU scenario the read/write operation of fence_reg will be trapped by
>the GVT-g. And then gvt-g follows the HW spec to write the fence_reg.
>So it is unnecessary to read/write fence reg several times. This will help to
>reduce the unnecessary trap of fence_reg mmio operation.
>

Sorry for one typo. The V2 will be sent.

>Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
>---
> drivers/gpu/drm/i915/i915_gem_fence_reg.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>index d92fe03..55bf6d9 100644
>--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
>@@ -95,11 +95,15 @@ static void i965_write_fence_reg(struct
>drm_i915_fence_reg *fence,
>
> 	if (INTEL_GEN(fence->i915) >= 6) {
> 		/* Use the 64-bit RW to read/write fence reg on SNB+ */
>-		I915_WRITE64_FW(fence_reg_lo, 0);
>-		I915_READ64(fence_reg_lo);
>-
>-		I915_WRITE64_FW(fence_reg_lo, val);
>-		I915_READ64(fence_reg_lo);
>+		if (intel_vgpu_active(i915))
>+			I915_WRITE64_FW(fence_reg_lo, val);
>+		else {
>+			I915_WRITE64_FW(fence_reg_lo, 0);
>+			I915_READ64(fence_reg_lo);
>+
>+			I915_WRITE64_FW(fence_reg_lo, val);
>+			I915_READ64(fence_reg_lo);
>+		}
> 	} else {
> 		/* To w/a incoherency with non-atomic 64-bit register updates,
> 		 * we split the 64-bit update into two 32-bit writes. In order
>--
>2.7.4
kernel test robot July 3, 2018, 3:54 a.m. UTC | #2
Hi Zhao,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.18-rc3 next-20180702]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Zhao-Yakui/drm-i915-the-Read-Write-optimization-of-fence-reg/20180703-102412
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-x074-201826 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/gpu//drm/i915/i915_gem_fence_reg.c: In function 'i965_write_fence_reg':
>> drivers/gpu//drm/i915/i915_gem_fence_reg.c:98:25: error: 'i915' undeclared (first use in this function); did you mean 'to_i915'?
      if (intel_vgpu_active(i915))
                            ^~~~
                            to_i915
   drivers/gpu//drm/i915/i915_gem_fence_reg.c:98:25: note: each undeclared identifier is reported only once for each function it appears in

vim +98 drivers/gpu//drm/i915/i915_gem_fence_reg.c

    59	
    60	static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
    61					 struct i915_vma *vma)
    62	{
    63		i915_reg_t fence_reg_lo, fence_reg_hi;
    64		int fence_pitch_shift;
    65		u64 val;
    66		struct drm_i915_private *dev_priv = fence->i915;
    67	
    68		if (INTEL_GEN(fence->i915) >= 6) {
    69			fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
    70			fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
    71			fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
    72	
    73		} else {
    74			fence_reg_lo = FENCE_REG_965_LO(fence->id);
    75			fence_reg_hi = FENCE_REG_965_HI(fence->id);
    76			fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
    77		}
    78	
    79		val = 0;
    80		if (vma) {
    81			unsigned int stride = i915_gem_object_get_stride(vma->obj);
    82	
    83			GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
    84			GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE));
    85			GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE));
    86			GEM_BUG_ON(!IS_ALIGNED(stride, 128));
    87	
    88			val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32;
    89			val |= vma->node.start;
    90			val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
    91			if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
    92				val |= BIT(I965_FENCE_TILING_Y_SHIFT);
    93			val |= I965_FENCE_REG_VALID;
    94		}
    95	
    96		if (INTEL_GEN(fence->i915) >= 6) {
    97			/* Use the 64-bit RW to read/write fence reg on SNB+ */
  > 98			if (intel_vgpu_active(i915))
    99				I915_WRITE64_FW(fence_reg_lo, val);
   100			else {
   101				I915_WRITE64_FW(fence_reg_lo, 0);
   102				I915_READ64(fence_reg_lo);
   103	
   104				I915_WRITE64_FW(fence_reg_lo, val);
   105				I915_READ64(fence_reg_lo);
   106			}
   107		} else {
   108			/* To w/a incoherency with non-atomic 64-bit register updates,
   109			 * we split the 64-bit update into two 32-bit writes. In order
   110			 * for a partial fence not to be evaluated between writes, we
   111			 * precede the update with write to turn off the fence register,
   112			 * and only enable the fence as the last step.
   113			 *
   114			 * For extra levels of paranoia, we make sure each step lands
   115			 * before applying the next step.
   116			 */
   117			I915_WRITE(fence_reg_lo, 0);
   118			POSTING_READ(fence_reg_lo);
   119	
   120			I915_WRITE(fence_reg_hi, upper_32_bits(val));
   121			I915_WRITE(fence_reg_lo, lower_32_bits(val));
   122			POSTING_READ(fence_reg_lo);
   123		}
   124	}
   125	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot July 3, 2018, 4:19 a.m. UTC | #3
Hi Zhao,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on v4.18-rc3 next-20180702]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Zhao-Yakui/drm-i915-the-Read-Write-optimization-of-fence-reg/20180703-102412
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-x073-201826 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:10:0,
                    from include/linux/list.h:9,
                    from include/linux/agp_backend.h:33,
                    from include/drm/drmP.h:35,
                    from drivers/gpu/drm/i915/i915_gem_fence_reg.c:24:
   drivers/gpu/drm/i915/i915_gem_fence_reg.c: In function 'i965_write_fence_reg':
   drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:25: error: 'i915' undeclared (first use in this function); did you mean 'to_i915'?
      if (intel_vgpu_active(i915))
                            ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:3: note: in expansion of macro 'if'
      if (intel_vgpu_active(i915))
      ^~
   drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:25: note: each undeclared identifier is reported only once for each function it appears in
      if (intel_vgpu_active(i915))
                            ^
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> drivers/gpu/drm/i915/i915_gem_fence_reg.c:98:3: note: in expansion of macro 'if'
      if (intel_vgpu_active(i915))
      ^~

vim +/if +98 drivers/gpu/drm/i915/i915_gem_fence_reg.c

  > 24	#include <drm/drmP.h>
    25	#include <drm/i915_drm.h>
    26	#include "i915_drv.h"
    27	
    28	/**
    29	 * DOC: fence register handling
    30	 *
    31	 * Important to avoid confusions: "fences" in the i915 driver are not execution
    32	 * fences used to track command completion but hardware detiler objects which
    33	 * wrap a given range of the global GTT. Each platform has only a fairly limited
    34	 * set of these objects.
    35	 *
    36	 * Fences are used to detile GTT memory mappings. They're also connected to the
    37	 * hardware frontbuffer render tracking and hence interact with frontbuffer
    38	 * compression. Furthermore on older platforms fences are required for tiled
    39	 * objects used by the display engine. They can also be used by the render
    40	 * engine - they're required for blitter commands and are optional for render
    41	 * commands. But on gen4+ both display (with the exception of fbc) and rendering
    42	 * have their own tiling state bits and don't need fences.
    43	 *
    44	 * Also note that fences only support X and Y tiling and hence can't be used for
    45	 * the fancier new tiling formats like W, Ys and Yf.
    46	 *
    47	 * Finally note that because fences are such a restricted resource they're
    48	 * dynamically associated with objects. Furthermore fence state is committed to
    49	 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
    50	 * explicitly call i915_gem_object_get_fence() to synchronize fencing status
    51	 * for cpu access. Also note that some code wants an unfenced view, for those
    52	 * cases the fence can be removed forcefully with i915_gem_object_put_fence().
    53	 *
    54	 * Internally these functions will synchronize with userspace access by removing
    55	 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
    56	 */
    57	
    58	#define pipelined 0
    59	
    60	static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
    61					 struct i915_vma *vma)
    62	{
    63		i915_reg_t fence_reg_lo, fence_reg_hi;
    64		int fence_pitch_shift;
    65		u64 val;
    66		struct drm_i915_private *dev_priv = fence->i915;
    67	
    68		if (INTEL_GEN(fence->i915) >= 6) {
    69			fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
    70			fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
    71			fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
    72	
    73		} else {
    74			fence_reg_lo = FENCE_REG_965_LO(fence->id);
    75			fence_reg_hi = FENCE_REG_965_HI(fence->id);
    76			fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
    77		}
    78	
    79		val = 0;
    80		if (vma) {
    81			unsigned int stride = i915_gem_object_get_stride(vma->obj);
    82	
    83			GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
    84			GEM_BUG_ON(!IS_ALIGNED(vma->node.start, I965_FENCE_PAGE));
    85			GEM_BUG_ON(!IS_ALIGNED(vma->fence_size, I965_FENCE_PAGE));
    86			GEM_BUG_ON(!IS_ALIGNED(stride, 128));
    87	
    88			val = (vma->node.start + vma->fence_size - I965_FENCE_PAGE) << 32;
    89			val |= vma->node.start;
    90			val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
    91			if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y)
    92				val |= BIT(I965_FENCE_TILING_Y_SHIFT);
    93			val |= I965_FENCE_REG_VALID;
    94		}
    95	
    96		if (INTEL_GEN(fence->i915) >= 6) {
    97			/* Use the 64-bit RW to read/write fence reg on SNB+ */
  > 98			if (intel_vgpu_active(i915))
    99				I915_WRITE64_FW(fence_reg_lo, val);
   100			else {
   101				I915_WRITE64_FW(fence_reg_lo, 0);
   102				I915_READ64(fence_reg_lo);
   103	
   104				I915_WRITE64_FW(fence_reg_lo, val);
   105				I915_READ64(fence_reg_lo);
   106			}
   107		} else {
   108			/* To w/a incoherency with non-atomic 64-bit register updates,
   109			 * we split the 64-bit update into two 32-bit writes. In order
   110			 * for a partial fence not to be evaluated between writes, we
   111			 * precede the update with write to turn off the fence register,
   112			 * and only enable the fence as the last step.
   113			 *
   114			 * For extra levels of paranoia, we make sure each step lands
   115			 * before applying the next step.
   116			 */
   117			I915_WRITE(fence_reg_lo, 0);
   118			POSTING_READ(fence_reg_lo);
   119	
   120			I915_WRITE(fence_reg_hi, upper_32_bits(val));
   121			I915_WRITE(fence_reg_lo, lower_32_bits(val));
   122			POSTING_READ(fence_reg_lo);
   123		}
   124	}
   125	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d92fe03..55bf6d9 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -95,11 +95,15 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 
 	if (INTEL_GEN(fence->i915) >= 6) {
 		/* Use the 64-bit RW to read/write fence reg on SNB+ */
-		I915_WRITE64_FW(fence_reg_lo, 0);
-		I915_READ64(fence_reg_lo);
-
-		I915_WRITE64_FW(fence_reg_lo, val);
-		I915_READ64(fence_reg_lo);
+		if (intel_vgpu_active(i915))
+			I915_WRITE64_FW(fence_reg_lo, val);
+		else {
+			I915_WRITE64_FW(fence_reg_lo, 0);
+			I915_READ64(fence_reg_lo);
+
+			I915_WRITE64_FW(fence_reg_lo, val);
+			I915_READ64(fence_reg_lo);
+		}
 	} else {
 		/* To w/a incoherency with non-atomic 64-bit register updates,
 		 * we split the 64-bit update into two 32-bit writes. In order