diff mbox

drm/i915/hsw: Disable L3 caching of atomic memory operations.

Message ID 1380751423-6255-1-git-send-email-currojerez@riseup.net (mailing list archive)
State New, archived
Headers show

Commit Message

Francisco Jerez Oct. 2, 2013, 10:03 p.m. UTC
Otherwise using any atomic memory operation will lock up the GPU due
to a Haswell hardware bug.  This patch also defines a new DRM param so
userspace knows that atomics can be used safely.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 drivers/gpu/drm/i915/i915_reg.h | 7 +++++++
 drivers/gpu/drm/i915/intel_pm.c | 6 ++++++
 include/uapi/drm/i915_drm.h     | 1 +
 4 files changed, 17 insertions(+)

Comments

Ben Widawsky Oct. 2, 2013, 10:16 p.m. UTC | #1
On Wed, Oct 02, 2013 at 03:03:43PM -0700, Francisco Jerez wrote:
> Otherwise using any atomic memory operation will lock up the GPU due
> to a Haswell hardware bug.  This patch also defines a new DRM param so
> userspace knows that atomics can be used safely.
> 
> Signed-off-by: Francisco Jerez <currojerez@riseup.net>
> ---
>  drivers/gpu/drm/i915/i915_dma.c | 3 +++
>  drivers/gpu/drm/i915/i915_reg.h | 7 +++++++
>  drivers/gpu/drm/i915/intel_pm.c | 6 ++++++
>  include/uapi/drm/i915_drm.h     | 1 +
>  4 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index c27a210..e4fcb3d 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1003,6 +1003,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
>  		value = 1;
>  		break;
> +	case I915_PARAM_HAS_ATOMICS:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index c159e1a..611a863 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3881,6 +3881,9 @@
>  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
>  #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
>  
> +#define HSW_SCRATCH1				0xb038
> +#define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1<<27)
> +
>  #define HSW_FUSE_STRAP		0x42014
>  #define  HSW_CDCLK_LIMIT	(1 << 24)
>  
> @@ -4728,6 +4731,10 @@
>  #define GEN7_ROW_CHICKEN2_GT2		0xf4f4
>  #define   DOP_CLOCK_GATING_DISABLE	(1<<0)
>  
> +#define HSW_ROW_CHICKEN3		0xe49c
> +#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_MASK       (1 << 22)
> +#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
> +

You do not need the mask bit. We have macros for this:
_MASKED_BIT_ENABLE

>  #define G4X_AUD_VID_DID			(dev_priv->info->display_mmio_offset + 0x62020)
>  #define INTEL_AUDIO_DEVCL		0x808629FB
>  #define INTEL_AUDIO_DEVBLC		0x80862801
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index dd176b7..47f2b2f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4955,6 +4955,12 @@ static void haswell_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
>  			GEN7_WA_L3_CHICKEN_MODE);
>  
> +	/* L3 caching of data atomics doesn't work -- disable it. */
> +	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
> +	I915_WRITE(HSW_ROW_CHICKEN3,
> +                   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_MASK |
> +                   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE);
> +
>  	/* This is required by WaCatErrorRejectionIssue:hsw */
>  	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
>  			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 55bb572..fe0f52e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -335,6 +335,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
> +#define I915_PARAM_HAS_ATOMICS           28
>  
>  typedef struct drm_i915_getparam {
>  	int param;

Also, AFAIK, this workaround still isn't in the bspec, so I think it's a
bit hard for people to review. I'd prefer if we had the bspec updated,
but since they're moving so slowly, and I have the context:

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Daniel Vetter Oct. 2, 2013, 10:20 p.m. UTC | #2
On Thu, Oct 3, 2013 at 12:03 AM, Francisco Jerez <currojerez@riseup.net> wrote:
> +       case I915_PARAM_HAS_ATOMICS:
> +               value = 1;
> +               break;

Generally when we do kernel fixes for gpu hangs like that we don't add
parameters (would drown in them otherwise) but simply queue it up to
-fixes and slap a cc: stable on it. Gpu hang fixes are critical enough
imo for that treatment, even when it's for brand new userspace code.

Any specific reason why we shouldn't follow this approach here? I'd
make the patch simpler and we could dump a bit of userspace code, too.
-Daniel
Francisco Jerez Oct. 2, 2013, 10:31 p.m. UTC | #3
Daniel Vetter <daniel@ffwll.ch> writes:

> On Thu, Oct 3, 2013 at 12:03 AM, Francisco Jerez <currojerez@riseup.net> wrote:
>> +       case I915_PARAM_HAS_ATOMICS:
>> +               value = 1;
>> +               break;
>
> Generally when we do kernel fixes for gpu hangs like that we don't add
> parameters (would drown in them otherwise) but simply queue it up to
> -fixes and slap a cc: stable on it. Gpu hang fixes are critical enough
> imo for that treatment, even when it's for brand new userspace code.
>
> Any specific reason why we shouldn't follow this approach here? I'd
> make the patch simpler and we could dump a bit of userspace code, too.
> -Daniel

Not really, I'm fine either way.  I'll send a revised version of this
patch without the param change.

Thanks.

> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Ben Widawsky Oct. 2, 2013, 10:31 p.m. UTC | #4
On Thu, Oct 03, 2013 at 12:20:43AM +0200, Daniel Vetter wrote:
> On Thu, Oct 3, 2013 at 12:03 AM, Francisco Jerez <currojerez@riseup.net> wrote:
> > +       case I915_PARAM_HAS_ATOMICS:
> > +               value = 1;
> > +               break;
> 
> Generally when we do kernel fixes for gpu hangs like that we don't add
> parameters (would drown in them otherwise) but simply queue it up to
> -fixes and slap a cc: stable on it. Gpu hang fixes are critical enough
> imo for that treatment, even when it's for brand new userspace code.
> 
> Any specific reason why we shouldn't follow this approach here? I'd
> make the patch simpler and we could dump a bit of userspace code, too.
> -Daniel
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

They haven't yet enabled this feature in mesa, so it's not exactly
fixing a hang. It is preventing one from ever occurring. Mesa versions
built against an older libdrm will not use atomics.
Eric Anholt Oct. 3, 2013, 1:36 a.m. UTC | #5
Daniel Vetter <daniel@ffwll.ch> writes:

> On Thu, Oct 3, 2013 at 12:03 AM, Francisco Jerez <currojerez@riseup.net> wrote:
>> +       case I915_PARAM_HAS_ATOMICS:
>> +               value = 1;
>> +               break;
>
> Generally when we do kernel fixes for gpu hangs like that we don't add
> parameters (would drown in them otherwise) but simply queue it up to
> -fixes and slap a cc: stable on it. Gpu hang fixes are critical enough
> imo for that treatment, even when it's for brand new userspace code.
>
> Any specific reason why we shouldn't follow this approach here? I'd
> make the patch simpler and we could dump a bit of userspace code, too.

Well, what it means is that people who pull new mesa on their old kernel
will reliably get GPU hangs when running piglit, which is something
we've avoided in the past when enabling new features
(I915_PARAM_HAS_GEN7_SOL_RESET for example).
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c27a210..e4fcb3d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1003,6 +1003,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_ATOMICS:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c159e1a..611a863 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3881,6 +3881,9 @@ 
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
 
+#define HSW_SCRATCH1				0xb038
+#define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1<<27)
+
 #define HSW_FUSE_STRAP		0x42014
 #define  HSW_CDCLK_LIMIT	(1 << 24)
 
@@ -4728,6 +4731,10 @@ 
 #define GEN7_ROW_CHICKEN2_GT2		0xf4f4
 #define   DOP_CLOCK_GATING_DISABLE	(1<<0)
 
+#define HSW_ROW_CHICKEN3		0xe49c
+#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_MASK       (1 << 22)
+#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
+
 #define G4X_AUD_VID_DID			(dev_priv->info->display_mmio_offset + 0x62020)
 #define INTEL_AUDIO_DEVCL		0x808629FB
 #define INTEL_AUDIO_DEVBLC		0x80862801
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index dd176b7..47f2b2f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4955,6 +4955,12 @@  static void haswell_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
 			GEN7_WA_L3_CHICKEN_MODE);
 
+	/* L3 caching of data atomics doesn't work -- disable it. */
+	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+	I915_WRITE(HSW_ROW_CHICKEN3,
+                   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_MASK |
+                   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE);
+
 	/* This is required by WaCatErrorRejectionIssue:hsw */
 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
 			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 55bb572..fe0f52e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -335,6 +335,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
+#define I915_PARAM_HAS_ATOMICS           28
 
 typedef struct drm_i915_getparam {
 	int param;