diff mbox

drm/i915: Only force GGTT coherency w/a on required chipsets

Message ID 20180720075931.14362-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 20, 2018, 7:59 a.m. UTC
Not all chipsets have an internal buffer delaying the visibility of
writes via the GGTT being visible by other physical paths, but we use a
very heavy workaround for all. We only need to apply that workarounds to
the chipsets we know suffer from the delay and the resulting coherency
issue.

Similarly, the same inconsistent coherency fouls up our ABI promise that
a write into a mmap_gtt is immediately visible to others. Since the HW
has made that a lie, let userspace know when that contract is broken.
(Not that userspace would want to use mmap_gtt on those chipsets for
other performance reasons...)

Testcase: igt/drv_selftest/live_coherency
Testcase: igt/gem_mmap_gtt/coherency
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c          |  3 +++
 drivers/gpu/drm/i915/i915_gem.c          |  5 +++++
 drivers/gpu/drm/i915/i915_pci.c          | 10 ++++++++++
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 include/uapi/drm/i915_drm.h              | 22 ++++++++++++++++++++++
 5 files changed, 41 insertions(+)

Comments

Ville Syrjälä July 20, 2018, 10:09 a.m. UTC | #1
On Fri, Jul 20, 2018 at 08:59:31AM +0100, Chris Wilson wrote:
> Not all chipsets have an internal buffer delaying the visibility of
> writes via the GGTT being visible by other physical paths, but we use a
> very heavy workaround for all. We only need to apply that workarounds to
> the chipsets we know suffer from the delay and the resulting coherency
> issue.
> 
> Similarly, the same inconsistent coherency fouls up our ABI promise that
> a write into a mmap_gtt is immediately visible to others. Since the HW
> has made that a lie, let userspace know when that contract is broken.
> (Not that userspace would want to use mmap_gtt on those chipsets for
> other performance reasons...)
> 
> Testcase: igt/drv_selftest/live_coherency
> Testcase: igt/gem_mmap_gtt/coherency
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c          |  3 +++
>  drivers/gpu/drm/i915/i915_gem.c          |  5 +++++
>  drivers/gpu/drm/i915/i915_pci.c          | 10 ++++++++++
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  include/uapi/drm/i915_drm.h              | 22 ++++++++++++++++++++++
>  5 files changed, 41 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f8cfd16be534..18a45e7a3d7c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -441,6 +441,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>  	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>  		value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
>  		break;
> +	case I915_PARAM_MMAP_GTT_COHERENT:
> +		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fcc73a6ab503..8b52cb768a67 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -802,6 +802,11 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>  	 * that was!).
>  	 */
>  
> +	wmb();
> +
> +	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
> +		return;
> +
>  	i915_gem_chipset_flush(dev_priv);
>  
>  	intel_runtime_pm_get(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 6a4d1388ad2d..e443fe44da3a 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -74,6 +74,7 @@

? These macros make me sad sometimes. Increase the context size a bit
for this one?

>  	.unfenced_needs_alignment = 1, \
>  	.ring_mask = RENDER_RING, \
>  	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>  	GEN_DEFAULT_PIPEOFFSETS, \
>  	GEN_DEFAULT_PAGE_SIZES, \
>  	CURSOR_OFFSETS
> @@ -110,6 +111,7 @@ static const struct intel_device_info intel_i865g_info = {
>  	.has_gmch_display = 1, \
>  	.ring_mask = RENDER_RING, \
>  	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>  	GEN_DEFAULT_PIPEOFFSETS, \
>  	GEN_DEFAULT_PAGE_SIZES, \
>  	CURSOR_OFFSETS
> @@ -117,6 +119,7 @@ static const struct intel_device_info intel_i865g_info = {
>  static const struct intel_device_info intel_i915g_info = {
>  	GEN3_FEATURES,
>  	PLATFORM(INTEL_I915G),
> +	.has_coherent_ggtt = false,
>  	.cursor_needs_physical = 1,
>  	.has_overlay = 1, .overlay_needs_physical = 1,
>  	.hws_needs_physical = 1,
> @@ -178,6 +181,7 @@ static const struct intel_device_info intel_pineview_info = {
>  	.has_gmch_display = 1, \
>  	.ring_mask = RENDER_RING, \
>  	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>  	GEN_DEFAULT_PIPEOFFSETS, \
>  	GEN_DEFAULT_PAGE_SIZES, \
>  	CURSOR_OFFSETS
> @@ -220,6 +224,7 @@ static const struct intel_device_info intel_gm45_info = {
>  	.has_hotplug = 1, \
>  	.ring_mask = RENDER_RING | BSD_RING, \
>  	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>  	/* ilk does support rc6, but we do not implement [power] contexts */ \
>  	.has_rc6 = 0, \
>  	GEN_DEFAULT_PIPEOFFSETS, \
> @@ -243,6 +248,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
>  	.has_hotplug = 1, \
>  	.has_fbc = 1, \
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>  	.has_llc = 1, \
>  	.has_rc6 = 1, \
>  	.has_rc6p = 1, \
> @@ -287,6 +293,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>  	.has_hotplug = 1, \
>  	.has_fbc = 1, \
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>  	.has_llc = 1, \
>  	.has_rc6 = 1, \
>  	.has_rc6p = 1, \
> @@ -347,6 +354,7 @@ static const struct intel_device_info intel_valleyview_info = {
>  	.has_aliasing_ppgtt = 1,
>  	.has_full_ppgtt = 1,
>  	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
>  	.display_mmio_offset = VLV_DISPLAY_BASE,
>  	GEN_DEFAULT_PAGE_SIZES,
> @@ -441,6 +449,7 @@ static const struct intel_device_info intel_cherryview_info = {
>  	.has_full_ppgtt = 1,
>  	.has_reset_engine = 1,
>  	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>  	.display_mmio_offset = VLV_DISPLAY_BASE,
>  	GEN_DEFAULT_PAGE_SIZES,
>  	GEN_CHV_PIPEOFFSETS,
> @@ -517,6 +526,7 @@ static const struct intel_device_info intel_skylake_gt4_info = {
>  	.has_full_48bit_ppgtt = 1, \
>  	.has_reset_engine = 1, \
>  	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>  	.has_ipc = 1, \
>  	GEN9_DEFAULT_PAGE_SIZES, \
>  	GEN_DEFAULT_PIPEOFFSETS, \
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 633f9fbf72ea..07e8364d1a8c 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -106,6 +106,7 @@ enum intel_platform {
>  	func(has_resource_streamer); \
>  	func(has_runtime_pm); \
>  	func(has_snoop); \
> +	func(has_coherent_ggtt); \
>  	func(unfenced_needs_alignment); \
>  	func(cursor_needs_physical); \
>  	func(hws_needs_physical); \
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..8ee81e6f151c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
>  
> +/*
> + * Once upon a time we supposed that writes through the GGTT would be
> + * immediately in physical memory (once flushed out of the CPU path). However,
> + * on a few different processors and chipsets, this is not necessarily the case
> + * as the writes appear to be buffered internally. Thus a read of the backing
> + * storage (physical memory) via a different path (with different physical tags
> + * to the indirect write via the GGTT) will see stale values from before
> + * the GGTT write. Inside the kernel, we can for the most part keep track of
> + * the different read/write domains in use (e.g. set-domain), but the assumption
> + * of coherency is baked into the ABI, hence reporting its true state in this
> + * parameter.
> + *
> + * If set to true, writes via mmap_gtt are immediately visible following an
> + * lfence to flush the WCB.
> + *
> + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
> + * internal buffer and are _not_ immediately visible to third parties accessing
> + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
> + * communications channel when set to false is strongly disadvised.
> + */
> +#define I915_PARAM_MMAP_GTT_COHERENT	52
> +
>  typedef struct drm_i915_getparam {
>  	__s32 param;
>  	/*
> -- 
> 2.18.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f8cfd16be534..18a45e7a3d7c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -441,6 +441,9 @@  static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
 		value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
 		break;
+	case I915_PARAM_MMAP_GTT_COHERENT:
+		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fcc73a6ab503..8b52cb768a67 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -802,6 +802,11 @@  void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	 * that was!).
 	 */
 
+	wmb();
+
+	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
+		return;
+
 	i915_gem_chipset_flush(dev_priv);
 
 	intel_runtime_pm_get(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6a4d1388ad2d..e443fe44da3a 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -74,6 +74,7 @@ 
 	.unfenced_needs_alignment = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = false, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -110,6 +111,7 @@  static const struct intel_device_info intel_i865g_info = {
 	.has_gmch_display = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -117,6 +119,7 @@  static const struct intel_device_info intel_i865g_info = {
 static const struct intel_device_info intel_i915g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915G),
+	.has_coherent_ggtt = false,
 	.cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
@@ -178,6 +181,7 @@  static const struct intel_device_info intel_pineview_info = {
 	.has_gmch_display = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -220,6 +224,7 @@  static const struct intel_device_info intel_gm45_info = {
 	.has_hotplug = 1, \
 	.ring_mask = RENDER_RING | BSD_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	/* ilk does support rc6, but we do not implement [power] contexts */ \
 	.has_rc6 = 0, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -243,6 +248,7 @@  static const struct intel_device_info intel_ironlake_m_info = {
 	.has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
@@ -287,6 +293,7 @@  static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	.has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
@@ -347,6 +354,7 @@  static const struct intel_device_info intel_valleyview_info = {
 	.has_aliasing_ppgtt = 1,
 	.has_full_ppgtt = 1,
 	.has_snoop = true,
+	.has_coherent_ggtt = false,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
 	GEN_DEFAULT_PAGE_SIZES,
@@ -441,6 +449,7 @@  static const struct intel_device_info intel_cherryview_info = {
 	.has_full_ppgtt = 1,
 	.has_reset_engine = 1,
 	.has_snoop = true,
+	.has_coherent_ggtt = false,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
 	GEN_DEFAULT_PAGE_SIZES,
 	GEN_CHV_PIPEOFFSETS,
@@ -517,6 +526,7 @@  static const struct intel_device_info intel_skylake_gt4_info = {
 	.has_full_48bit_ppgtt = 1, \
 	.has_reset_engine = 1, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = false, \
 	.has_ipc = 1, \
 	GEN9_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_PIPEOFFSETS, \
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 633f9fbf72ea..07e8364d1a8c 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -106,6 +106,7 @@  enum intel_platform {
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
+	func(has_coherent_ggtt); \
 	func(unfenced_needs_alignment); \
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..8ee81e6f151c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -529,6 +529,28 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * If set to true, writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when set to false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*