diff mbox

[v3] drm/i915: Use two 32bit reads for select 64bit REG_READ ioctls

Message ID 1437046676-31811-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 16, 2015, 11:37 a.m. UTC
Since the hardware sometimes mysteriously totally flummoxes the 64bit
read of a 64bit register when read using a single instruction, split the
read into two instructions. Since the read here is of automatically
incrementing timestamp counters, we also have to be very careful in
order to make sure that it does not increment between the two
instructions.

However, since userspace tried to workaround this issue and so enshrined
this ABI for a broken hardware read and in the process neglected that
the read only fails in some environments, we have to introduce a new
uABI flag for userspace to request the 2x32 bit accurate read of the
timestamp.

v2: Fix alignment check and include details of the workaround for
userspace.

Reported-by: Karol Herbst <freedesktop@karolherbst.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
Testcase: igt/gem_reg_read
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Micha? Winiarski <michal.winiarski@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/intel_uncore.c | 26 +++++++++++++++++++-------
 include/uapi/drm/i915_drm.h         |  8 ++++++++
 2 files changed, 27 insertions(+), 7 deletions(-)

Comments

MichaƂ Winiarski July 17, 2015, 3:10 p.m. UTC | #1
On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote:
> Since the hardware sometimes mysteriously totally flummoxes the 64bit
> read of a 64bit register when read using a single instruction, split the
> read into two instructions. Since the read here is of automatically
> incrementing timestamp counters, we also have to be very careful in
> order to make sure that it does not increment between the two
> instructions.
> 
> However, since userspace tried to workaround this issue and so enshrined
> this ABI for a broken hardware read and in the process neglected that
> the read only fails in some environments, we have to introduce a new
> uABI flag for userspace to request the 2x32 bit accurate read of the
> timestamp.
> 
> v2: Fix alignment check and include details of the workaround for
> userspace.
> 
> Reported-by: Karol Herbst <freedesktop@karolherbst.de>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
> Testcase: igt/gem_reg_read
Tested-by: Micha? Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Micha? Winiarski <michal.winiarski@intel.com>
> Cc: stable@vger.kernel.org
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 26 +++++++++++++++++++-------
>  include/uapi/drm/i915_drm.h         |  8 ++++++++
>  2 files changed, 27 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 2c477663d378..eb244b57b3fd 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1310,10 +1310,12 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_reg_read *reg = data;
>  	struct register_whitelist const *entry = whitelist;
> +	unsigned size;
> +	u64 offset;
>  	int i, ret = 0;
>  
>  	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
> -		if (entry->offset == reg->offset &&
> +		if (entry->offset == (reg->offset & -entry->size) &&
>  		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
>  			break;
>  	}
> @@ -1321,23 +1323,33 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  	if (i == ARRAY_SIZE(whitelist))
>  		return -EINVAL;
>  
> +	/* We use the low bits to encode extra flags as the register should
> +	 * be naturally aligned (and those that are not so aligned merely
> +	 * limit the available flags for that register).
> +	 */
> +	offset = entry->offset;
> +	size = entry->size;
> +	size |= reg->offset ^ offset;
> +
>  	intel_runtime_pm_get(dev_priv);
>  
> -	switch (entry->size) {
> +	switch (size) {
> +	case 8 | 1:
> +		reg->val = I915_READ64_2x32(offset, offset+4);
> +		break;
>  	case 8:
> -		reg->val = I915_READ64(reg->offset);
> +		reg->val = I915_READ64(offset);
>  		break;
>  	case 4:
> -		reg->val = I915_READ(reg->offset);
> +		reg->val = I915_READ(offset);
>  		break;
>  	case 2:
> -		reg->val = I915_READ16(reg->offset);
> +		reg->val = I915_READ16(offset);
>  		break;
>  	case 1:
> -		reg->val = I915_READ8(reg->offset);
> +		reg->val = I915_READ8(offset);
>  		break;
>  	default:
> -		MISSING_CASE(entry->size);
>  		ret = -EINVAL;
>  		goto out;
>  	}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index b0f82ddab987..83f60f01dca2 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1087,6 +1087,14 @@ struct drm_i915_reg_read {
>  	__u64 offset;
>  	__u64 val; /* Return value */
>  };
> +/* Known registers:
> + *
> + * Render engine timestamp - 0x2358 + 64bit - gen7+
> + * - Note this register returns an invalid value if using the default
> + *   single instruction 8byte read, in order to workaround that use
> + *   offset (0x2538 | 1) instead.
> + *
> + */
>  
>  struct drm_i915_reset_stats {
>  	__u32 ctx_id;
> -- 
> 2.1.4
>
Shuang He July 19, 2015, 10:05 p.m. UTC | #2
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6813
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                                  302/302              302/302
SNB                                  315/315              315/315
IVB                                  342/342              342/342
BYT                 -1              285/285              284/285
HSW                                  378/378              378/378
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*BYT  igt@gem_partial_pwrite_pread@reads-display      PASS(1)      FAIL(1)
Note: You need to pay more attention to line start with '*'
Daniel Vetter July 21, 2015, 6:49 a.m. UTC | #3
On Fri, Jul 17, 2015 at 05:10:25PM +0200, Micha? Winiarski wrote:
> On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote:
> > Since the hardware sometimes mysteriously totally flummoxes the 64bit
> > read of a 64bit register when read using a single instruction, split the
> > read into two instructions. Since the read here is of automatically
> > incrementing timestamp counters, we also have to be very careful in
> > order to make sure that it does not increment between the two
> > instructions.
> > 
> > However, since userspace tried to workaround this issue and so enshrined
> > this ABI for a broken hardware read and in the process neglected that
> > the read only fails in some environments, we have to introduce a new
> > uABI flag for userspace to request the 2x32 bit accurate read of the
> > timestamp.
> > 
> > v2: Fix alignment check and include details of the workaround for
> > userspace.
> > 
> > Reported-by: Karol Herbst <freedesktop@karolherbst.de>
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
> > Testcase: igt/gem_reg_read
> Tested-by: Micha? Winiarski <michal.winiarski@intel.com>

Where are the mesa/beignet/libva patches for this?
-Daniel

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Micha? Winiarski <michal.winiarski@intel.com>
> > Cc: stable@vger.kernel.org
> > ---
> >  drivers/gpu/drm/i915/intel_uncore.c | 26 +++++++++++++++++++-------
> >  include/uapi/drm/i915_drm.h         |  8 ++++++++
> >  2 files changed, 27 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> > index 2c477663d378..eb244b57b3fd 100644
> > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > @@ -1310,10 +1310,12 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_i915_reg_read *reg = data;
> >  	struct register_whitelist const *entry = whitelist;
> > +	unsigned size;
> > +	u64 offset;
> >  	int i, ret = 0;
> >  
> >  	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
> > -		if (entry->offset == reg->offset &&
> > +		if (entry->offset == (reg->offset & -entry->size) &&
> >  		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
> >  			break;
> >  	}
> > @@ -1321,23 +1323,33 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> >  	if (i == ARRAY_SIZE(whitelist))
> >  		return -EINVAL;
> >  
> > +	/* We use the low bits to encode extra flags as the register should
> > +	 * be naturally aligned (and those that are not so aligned merely
> > +	 * limit the available flags for that register).
> > +	 */
> > +	offset = entry->offset;
> > +	size = entry->size;
> > +	size |= reg->offset ^ offset;
> > +
> >  	intel_runtime_pm_get(dev_priv);
> >  
> > -	switch (entry->size) {
> > +	switch (size) {
> > +	case 8 | 1:
> > +		reg->val = I915_READ64_2x32(offset, offset+4);
> > +		break;
> >  	case 8:
> > -		reg->val = I915_READ64(reg->offset);
> > +		reg->val = I915_READ64(offset);
> >  		break;
> >  	case 4:
> > -		reg->val = I915_READ(reg->offset);
> > +		reg->val = I915_READ(offset);
> >  		break;
> >  	case 2:
> > -		reg->val = I915_READ16(reg->offset);
> > +		reg->val = I915_READ16(offset);
> >  		break;
> >  	case 1:
> > -		reg->val = I915_READ8(reg->offset);
> > +		reg->val = I915_READ8(offset);
> >  		break;
> >  	default:
> > -		MISSING_CASE(entry->size);
> >  		ret = -EINVAL;
> >  		goto out;
> >  	}
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index b0f82ddab987..83f60f01dca2 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1087,6 +1087,14 @@ struct drm_i915_reg_read {
> >  	__u64 offset;
> >  	__u64 val; /* Return value */
> >  };
> > +/* Known registers:
> > + *
> > + * Render engine timestamp - 0x2358 + 64bit - gen7+
> > + * - Note this register returns an invalid value if using the default
> > + *   single instruction 8byte read, in order to workaround that use
> > + *   offset (0x2538 | 1) instead.
> > + *
> > + */
> >  
> >  struct drm_i915_reset_stats {
> >  	__u32 ctx_id;
> > -- 
> > 2.1.4
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson July 21, 2015, 9:45 a.m. UTC | #4
On Tue, Jul 21, 2015 at 08:49:31AM +0200, Daniel Vetter wrote:
> On Fri, Jul 17, 2015 at 05:10:25PM +0200, Micha? Winiarski wrote:
> > On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote:
> > > Since the hardware sometimes mysteriously totally flummoxes the 64bit
> > > read of a 64bit register when read using a single instruction, split the
> > > read into two instructions. Since the read here is of automatically
> > > incrementing timestamp counters, we also have to be very careful in
> > > order to make sure that it does not increment between the two
> > > instructions.
> > > 
> > > However, since userspace tried to workaround this issue and so enshrined
> > > this ABI for a broken hardware read and in the process neglected that
> > > the read only fails in some environments, we have to introduce a new
> > > uABI flag for userspace to request the 2x32 bit accurate read of the
> > > timestamp.
> > > 
> > > v2: Fix alignment check and include details of the workaround for
> > > userspace.
> > > 
> > > Reported-by: Karol Herbst <freedesktop@karolherbst.de>
> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
> > > Testcase: igt/gem_reg_read
> > Tested-by: Micha? Winiarski <michal.winiarski@intel.com>
> 
> Where are the mesa/beignet/libva patches for this?

Trivial. Absolutely trivial. Just waiting for the kernel.
-Chris
Daniel Vetter July 21, 2015, 9:49 a.m. UTC | #5
On Tue, Jul 21, 2015 at 10:45:45AM +0100, Chris Wilson wrote:
> On Tue, Jul 21, 2015 at 08:49:31AM +0200, Daniel Vetter wrote:
> > On Fri, Jul 17, 2015 at 05:10:25PM +0200, Micha? Winiarski wrote:
> > > On Thu, Jul 16, 2015 at 12:37:56PM +0100, Chris Wilson wrote:
> > > > Since the hardware sometimes mysteriously totally flummoxes the 64bit
> > > > read of a 64bit register when read using a single instruction, split the
> > > > read into two instructions. Since the read here is of automatically
> > > > incrementing timestamp counters, we also have to be very careful in
> > > > order to make sure that it does not increment between the two
> > > > instructions.
> > > > 
> > > > However, since userspace tried to workaround this issue and so enshrined
> > > > this ABI for a broken hardware read and in the process neglected that
> > > > the read only fails in some environments, we have to introduce a new
> > > > uABI flag for userspace to request the 2x32 bit accurate read of the
> > > > timestamp.
> > > > 
> > > > v2: Fix alignment check and include details of the workaround for
> > > > userspace.
> > > > 
> > > > Reported-by: Karol Herbst <freedesktop@karolherbst.de>
> > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91317
> > > > Testcase: igt/gem_reg_read
> > > Tested-by: Micha? Winiarski <michal.winiarski@intel.com>
> > 
> > Where are the mesa/beignet/libva patches for this?
> 
> Trivial. Absolutely trivial. Just waiting for the kernel.

Well still not how it should be done, so I guess you owe me them all ;-)

Anyway, applied to -fixes.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 2c477663d378..eb244b57b3fd 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1310,10 +1310,12 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_reg_read *reg = data;
 	struct register_whitelist const *entry = whitelist;
+	unsigned size;
+	u64 offset;
 	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == reg->offset &&
+		if (entry->offset == (reg->offset & -entry->size) &&
 		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
 			break;
 	}
@@ -1321,23 +1323,33 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 	if (i == ARRAY_SIZE(whitelist))
 		return -EINVAL;
 
+	/* We use the low bits to encode extra flags as the register should
+	 * be naturally aligned (and those that are not so aligned merely
+	 * limit the available flags for that register).
+	 */
+	offset = entry->offset;
+	size = entry->size;
+	size |= reg->offset ^ offset;
+
 	intel_runtime_pm_get(dev_priv);
 
-	switch (entry->size) {
+	switch (size) {
+	case 8 | 1:
+		reg->val = I915_READ64_2x32(offset, offset+4);
+		break;
 	case 8:
-		reg->val = I915_READ64(reg->offset);
+		reg->val = I915_READ64(offset);
 		break;
 	case 4:
-		reg->val = I915_READ(reg->offset);
+		reg->val = I915_READ(offset);
 		break;
 	case 2:
-		reg->val = I915_READ16(reg->offset);
+		reg->val = I915_READ16(offset);
 		break;
 	case 1:
-		reg->val = I915_READ8(reg->offset);
+		reg->val = I915_READ8(offset);
 		break;
 	default:
-		MISSING_CASE(entry->size);
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b0f82ddab987..83f60f01dca2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1087,6 +1087,14 @@  struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+/* Known registers:
+ *
+ * Render engine timestamp - 0x2358 + 64bit - gen7+
+ * - Note this register returns an invalid value if using the default
+ *   single instruction 8byte read, in order to workaround that use
+ *   offset (0x2538 | 1) instead.
+ *
+ */
 
 struct drm_i915_reset_stats {
 	__u32 ctx_id;