diff mbox

[v3] drm/i915: Use 64-bit write to optimize writing fence_reg on VGPU

Message ID 1530624467-29569-1-git-send-email-yakui.zhao@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhao, Yakui July 3, 2018, 1:27 p.m. UTC
On VGPU scenario the read/write operation of fence_reg will be trapped
by the GVT-g. Then gvt-g follows the HW spec to program the fence_reg.
And the gvt-g takes care of updating the fence reg correctly for any
trapped value of fence reg.

So it is unnecessary to read/write fence reg several times. It is enough 
that the fence reg is written only value in 64-bit mdoe. This will help
to reduce the redundantt trap of fence_reg mmio operation.

V1->V2: Fix one typo error of parameter when calling intel_vgpu_active.
V2->V3: Follow Chris Wilson and Daniel Vetter to add more descriptions.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Chris Wilson July 3, 2018, 1:32 p.m. UTC | #1
Quoting Zhao Yakui (2018-07-03 14:27:47)
> On VGPU scenario the read/write operation of fence_reg will be trapped
> by the GVT-g. Then gvt-g follows the HW spec to program the fence_reg.
> And the gvt-g takes care of updating the fence reg correctly for any
> trapped value of fence reg.
> 
> So it is unnecessary to read/write fence reg several times. It is enough 
> that the fence reg is written only value in 64-bit mdoe. This will help
> to reduce the redundantt trap of fence_reg mmio operation.
> 
> V1->V2: Fix one typo error of parameter when calling intel_vgpu_active.
> V2->V3: Follow Chris Wilson and Daniel Vetter to add more descriptions.
> 
> Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index d548ac0..7b10bf9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -63,6 +63,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
>         i915_reg_t fence_reg_lo, fence_reg_hi;
>         int fence_pitch_shift;
>         u64 val;
> +       struct drm_i915_private *dev_priv = fence->i915;
>  
>         if (INTEL_GEN(fence->i915) >= 6) {
>                 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
> @@ -92,9 +93,17 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
>                 val |= I965_FENCE_REG_VALID;
>         }
>  
> -       if (!pipelined) {
> -               struct drm_i915_private *dev_priv = fence->i915;

Ahem.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index d548ac0..7b10bf9 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -63,6 +63,7 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
 	u64 val;
+	struct drm_i915_private *dev_priv = fence->i915;
 
 	if (INTEL_GEN(fence->i915) >= 6) {
 		fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
@@ -92,9 +93,17 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 		val |= I965_FENCE_REG_VALID;
 	}
 
-	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
-
+	if (intel_vgpu_active(dev_priv)) {
+		/* Use the 64-bit RW to write fence reg on VGPU mode.
+		 * The GVT-g can trap the written val of VGPU to program the
+		 * fence reg. And the fence write in gvt-g follows the
+		 * sequence of off/read/double-write/read. This assures that
+		 * the fence reg is configured correctly.
+		 * At the same time the 64-bit op can help to reduce the num
+		 * of VGPU trap for the fence reg.
+		 */
+		I915_WRITE64_FW(fence_reg_lo, val);
+	} else {
 		/* To w/a incoherency with non-atomic 64-bit register updates,
 		 * we split the 64-bit update into two 32-bit writes. In order
 		 * for a partial fence not to be evaluated between writes, we