diff mbox

drm/i915: Only force GGTT coherency w/a on required chipsets

Message ID 20180720101910.11153-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 20, 2018, 10:19 a.m. UTC
Not all chipsets have an internal buffer delaying the visibility of
writes via the GGTT being visible by other physical paths, but we use a
very heavy workaround for all. We only need to apply that workarounds to
the chipsets we know suffer from the delay and the resulting coherency
issue.

Similarly, the same inconsistent coherency fouls up our ABI promise that
a write into a mmap_gtt is immediately visible to others. Since the HW
has made that a lie, let userspace know when that contract is broken.
(Not that userspace would want to use mmap_gtt on those chipsets for
other performance reasons...)

Testcase: igt/drv_selftest/live_coherency
Testcase: igt/gem_mmap_gtt/coherency
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
Resend with --function-context
-Chris
---
 drivers/gpu/drm/i915/i915_drv.c          |  3 +++
 drivers/gpu/drm/i915/i915_gem.c          |  5 +++++
 drivers/gpu/drm/i915/i915_pci.c          | 10 ++++++++++
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 include/uapi/drm/i915_drm.h              | 22 ++++++++++++++++++++++
 5 files changed, 41 insertions(+)

Comments

Lis, Tomasz July 20, 2018, 2:41 p.m. UTC | #1
On 2018-07-20 12:19, Chris Wilson wrote:
> Not all chipsets have an internal buffer delaying the visibility of
> writes via the GGTT being visible by other physical paths, but we use a
> very heavy workaround for all. We only need to apply that workarounds to
> the chipsets we know suffer from the delay and the resulting coherency
> issue.
>
> Similarly, the same inconsistent coherency fouls up our ABI promise that
> a write into a mmap_gtt is immediately visible to others. Since the HW
> has made that a lie, let userspace know when that contract is broken.
> (Not that userspace would want to use mmap_gtt on those chipsets for
> other performance reasons...)
>
> Testcase: igt/drv_selftest/live_coherency
> Testcase: igt/gem_mmap_gtt/coherency
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Is there any mention of this bug/feature in bspec? Would be nice to have 
a reference.

Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
> Resend with --function-context
> -Chris
> ---
>   drivers/gpu/drm/i915/i915_drv.c          |  3 +++
>   drivers/gpu/drm/i915/i915_gem.c          |  5 +++++
>   drivers/gpu/drm/i915/i915_pci.c          | 10 ++++++++++
>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>   include/uapi/drm/i915_drm.h              | 22 ++++++++++++++++++++++
>   5 files changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f8cfd16be534..18a45e7a3d7c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -302,152 +302,155 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
>   static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   			       struct drm_file *file_priv)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct pci_dev *pdev = dev_priv->drm.pdev;
>   	drm_i915_getparam_t *param = data;
>   	int value;
>   
>   	switch (param->param) {
>   	case I915_PARAM_IRQ_ACTIVE:
>   	case I915_PARAM_ALLOW_BATCHBUFFER:
>   	case I915_PARAM_LAST_DISPATCH:
>   	case I915_PARAM_HAS_EXEC_CONSTANTS:
>   		/* Reject all old ums/dri params. */
>   		return -ENODEV;
>   	case I915_PARAM_CHIPSET_ID:
>   		value = pdev->device;
>   		break;
>   	case I915_PARAM_REVISION:
>   		value = pdev->revision;
>   		break;
>   	case I915_PARAM_NUM_FENCES_AVAIL:
>   		value = dev_priv->num_fence_regs;
>   		break;
>   	case I915_PARAM_HAS_OVERLAY:
>   		value = dev_priv->overlay ? 1 : 0;
>   		break;
>   	case I915_PARAM_HAS_BSD:
>   		value = !!dev_priv->engine[VCS];
>   		break;
>   	case I915_PARAM_HAS_BLT:
>   		value = !!dev_priv->engine[BCS];
>   		break;
>   	case I915_PARAM_HAS_VEBOX:
>   		value = !!dev_priv->engine[VECS];
>   		break;
>   	case I915_PARAM_HAS_BSD2:
>   		value = !!dev_priv->engine[VCS2];
>   		break;
>   	case I915_PARAM_HAS_LLC:
>   		value = HAS_LLC(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_WT:
>   		value = HAS_WT(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_ALIASING_PPGTT:
>   		value = USES_PPGTT(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_SEMAPHORES:
>   		value = HAS_LEGACY_SEMAPHORES(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_SECURE_BATCHES:
>   		value = capable(CAP_SYS_ADMIN);
>   		break;
>   	case I915_PARAM_CMD_PARSER_VERSION:
>   		value = i915_cmd_parser_get_version(dev_priv);
>   		break;
>   	case I915_PARAM_SUBSLICE_TOTAL:
>   		value = sseu_subslice_total(&INTEL_INFO(dev_priv)->sseu);
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_EU_TOTAL:
>   		value = INTEL_INFO(dev_priv)->sseu.eu_total;
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_HAS_GPU_RESET:
>   		value = i915_modparams.enable_hangcheck &&
>   			intel_has_gpu_reset(dev_priv);
>   		if (value && intel_has_reset_engine(dev_priv))
>   			value = 2;
>   		break;
>   	case I915_PARAM_HAS_RESOURCE_STREAMER:
>   		value = HAS_RESOURCE_STREAMER(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_POOLED_EU:
>   		value = HAS_POOLED_EU(dev_priv);
>   		break;
>   	case I915_PARAM_MIN_EU_IN_POOL:
>   		value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
>   		break;
>   	case I915_PARAM_HUC_STATUS:
>   		value = intel_huc_check_status(&dev_priv->huc);
>   		if (value < 0)
>   			return value;
>   		break;
>   	case I915_PARAM_MMAP_GTT_VERSION:
>   		/* Though we've started our numbering from 1, and so class all
>   		 * earlier versions as 0, in effect their value is undefined as
>   		 * the ioctl will report EINVAL for the unknown param!
>   		 */
>   		value = i915_gem_mmap_gtt_version();
>   		break;
>   	case I915_PARAM_HAS_SCHEDULER:
>   		value = dev_priv->caps.scheduler;
>   		break;
>   
>   	case I915_PARAM_MMAP_VERSION:
>   		/* Remember to bump this if the version changes! */
>   	case I915_PARAM_HAS_GEM:
>   	case I915_PARAM_HAS_PAGEFLIPPING:
>   	case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */
>   	case I915_PARAM_HAS_RELAXED_FENCING:
>   	case I915_PARAM_HAS_COHERENT_RINGS:
>   	case I915_PARAM_HAS_RELAXED_DELTA:
>   	case I915_PARAM_HAS_GEN7_SOL_RESET:
>   	case I915_PARAM_HAS_WAIT_TIMEOUT:
>   	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
>   	case I915_PARAM_HAS_PINNED_BATCHES:
>   	case I915_PARAM_HAS_EXEC_NO_RELOC:
>   	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
>   	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
>   	case I915_PARAM_HAS_EXEC_SOFTPIN:
>   	case I915_PARAM_HAS_EXEC_ASYNC:
>   	case I915_PARAM_HAS_EXEC_FENCE:
>   	case I915_PARAM_HAS_EXEC_CAPTURE:
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
>   		 * INTEL_INFO(), a feature macro, or similar.
>   		 */
>   		value = 1;
>   		break;
>   	case I915_PARAM_HAS_CONTEXT_ISOLATION:
>   		value = intel_engines_has_context_isolation(dev_priv);
>   		break;
>   	case I915_PARAM_SLICE_MASK:
>   		value = INTEL_INFO(dev_priv)->sseu.slice_mask;
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
>   		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>   		value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
>   		break;
> +	case I915_PARAM_MMAP_GTT_COHERENT:
> +		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
>   	}
>   
>   	if (put_user(value, param->value))
>   		return -EFAULT;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fcc73a6ab503..8b52cb768a67 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -784,31 +784,36 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
>   void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>   {
>   	/*
>   	 * No actual flushing is required for the GTT write domain for reads
>   	 * from the GTT domain. Writes to it "immediately" go to main memory
>   	 * as far as we know, so there's no chipset flush. It also doesn't
>   	 * land in the GPU render cache.
>   	 *
>   	 * However, we do have to enforce the order so that all writes through
>   	 * the GTT land before any writes to the device, such as updates to
>   	 * the GATT itself.
>   	 *
>   	 * We also have to wait a bit for the writes to land from the GTT.
>   	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
>   	 * timing. This issue has only been observed when switching quickly
>   	 * between GTT writes and CPU reads from inside the kernel on recent hw,
>   	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
>   	 * system agents we cannot reproduce this behaviour, until Cannonlake
>   	 * that was!).
>   	 */
>   
> +	wmb();
> +
> +	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
> +		return;
> +
>   	i915_gem_chipset_flush(dev_priv);
>   
>   	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&dev_priv->uncore.lock);
>   
>   	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>   
>   	spin_unlock_irq(&dev_priv->uncore.lock);
>   	intel_runtime_pm_put(dev_priv);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 6a4d1388ad2d..e443fe44da3a 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1,79 +1,80 @@
>   /*
>    * Copyright © 2016 Intel Corporation
>    *
>    * Permission is hereby granted, free of charge, to any person obtaining a
>    * copy of this software and associated documentation files (the "Software"),
>    * to deal in the Software without restriction, including without limitation
>    * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>    * and/or sell copies of the Software, and to permit persons to whom the
>    * Software is furnished to do so, subject to the following conditions:
>    *
>    * The above copyright notice and this permission notice (including the next
>    * paragraph) shall be included in all copies or substantial portions of the
>    * Software.
>    *
>    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>    * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>    * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>    * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>    * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>    * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>    * IN THE SOFTWARE.
>    *
>    */
>   
>   #include <linux/console.h>
>   #include <linux/vgaarb.h>
>   #include <linux/vga_switcheroo.h>
>   
>   #include "i915_drv.h"
>   #include "i915_selftest.h"
>   
>   #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
>   #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
>   
>   #define GEN_DEFAULT_PIPEOFFSETS \
>   	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
>   			  PIPE_C_OFFSET, PIPE_EDP_OFFSET }, \
>   	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
>   			   TRANSCODER_C_OFFSET, TRANSCODER_EDP_OFFSET }, \
>   	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET }
>   
>   #define GEN_CHV_PIPEOFFSETS \
>   	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
>   			  CHV_PIPE_C_OFFSET }, \
>   	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
>   			   CHV_TRANSCODER_C_OFFSET, }, \
>   	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \
>   			     CHV_PALETTE_C_OFFSET }
>   
>   #define CURSOR_OFFSETS \
>   	.cursor_offsets = { CURSOR_A_OFFSET, CURSOR_B_OFFSET, CHV_CURSOR_C_OFFSET }
>   
>   #define IVB_CURSOR_OFFSETS \
>   	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
>   
>   #define BDW_COLORS \
>   	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
>   #define CHV_COLORS \
>   	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
>   #define GLK_COLORS \
>   	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
>   
>   /* Keep in gen based order, and chronological order within a gen */
>   
>   #define GEN_DEFAULT_PAGE_SIZES \
>   	.page_sizes = I915_GTT_PAGE_SIZE_4K
>   
>   #define GEN2_FEATURES \
>   	GEN(2), \
>   	.num_pipes = 1, \
>   	.has_overlay = 1, .overlay_needs_physical = 1, \
>   	.has_gmch_display = 1, \
>   	.hws_needs_physical = 1, \
>   	.unfenced_needs_alignment = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -102,14 +103,15 @@ static const struct intel_device_info intel_i85x_info = {
>   static const struct intel_device_info intel_i865g_info = {
>   	GEN2_FEATURES,
>   	PLATFORM(INTEL_I865G),
>   };
>   
>   #define GEN3_FEATURES \
>   	GEN(3), \
>   	.num_pipes = 2, \
>   	.has_gmch_display = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -117,8 +119,9 @@ static const struct intel_device_info intel_i865g_info = {
>   static const struct intel_device_info intel_i915g_info = {
>   	GEN3_FEATURES,
>   	PLATFORM(INTEL_I915G),
> +	.has_coherent_ggtt = false,
>   	.cursor_needs_physical = 1,
>   	.has_overlay = 1, .overlay_needs_physical = 1,
>   	.hws_needs_physical = 1,
>   	.unfenced_needs_alignment = 1,
>   };
> @@ -166,18 +169,19 @@ static const struct intel_device_info intel_g33_info = {
>   static const struct intel_device_info intel_pineview_info = {
>   	GEN3_FEATURES,
>   	PLATFORM(INTEL_PINEVIEW),
>   	.is_mobile = 1,
>   	.has_hotplug = 1,
>   	.has_overlay = 1,
>   };
>   
>   #define GEN4_FEATURES \
>   	GEN(4), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.has_gmch_display = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -209,19 +213,20 @@ static const struct intel_device_info intel_g45_info = {
>   static const struct intel_device_info intel_gm45_info = {
>   	GEN4_FEATURES,
>   	PLATFORM(INTEL_GM45),
>   	.is_mobile = 1, .has_fbc = 1,
>   	.supports_tv = 1,
>   	.ring_mask = RENDER_RING | BSD_RING,
>   };
>   
>   #define GEN5_FEATURES \
>   	GEN(5), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	/* ilk does support rc6, but we do not implement [power] contexts */ \
>   	.has_rc6 = 0, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -234,23 +239,24 @@ static const struct intel_device_info intel_ironlake_d_info = {
>   static const struct intel_device_info intel_ironlake_m_info = {
>   	GEN5_FEATURES,
>   	PLATFORM(INTEL_IRONLAKE),
>   	.is_mobile = 1, .has_fbc = 1,
>   };
>   
>   #define GEN6_FEATURES \
>   	GEN(6), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.has_fbc = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
>   
>   #define SNB_D_PLATFORM \
>   	GEN6_FEATURES, \
>   	PLATFORM(INTEL_SANDYBRIDGE)
> @@ -279,24 +285,25 @@ static const struct intel_device_info intel_sandybridge_m_gt1_info = {
>   static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>   	SNB_M_PLATFORM,
>   	.gt = 2,
>   };
>   
>   #define GEN7_FEATURES  \
>   	GEN(7), \
>   	.num_pipes = 3, \
>   	.has_hotplug = 1, \
>   	.has_fbc = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	.has_full_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	IVB_CURSOR_OFFSETS
>   
>   #define IVB_D_PLATFORM \
>   	GEN7_FEATURES, \
>   	PLATFORM(INTEL_IVYBRIDGE), \
>   	.has_l3_dpf = 1
> @@ -338,34 +345,35 @@ static const struct intel_device_info intel_ivybridge_q_info = {
>   static const struct intel_device_info intel_valleyview_info = {
>   	PLATFORM(INTEL_VALLEYVIEW),
>   	GEN(7),
>   	.is_lp = 1,
>   	.num_pipes = 2,
>   	.has_runtime_pm = 1,
>   	.has_rc6 = 1,
>   	.has_gmch_display = 1,
>   	.has_hotplug = 1,
>   	.has_aliasing_ppgtt = 1,
>   	.has_full_ppgtt = 1,
>   	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
>   	.display_mmio_offset = VLV_DISPLAY_BASE,
>   	GEN_DEFAULT_PAGE_SIZES,
>   	GEN_DEFAULT_PIPEOFFSETS,
>   	CURSOR_OFFSETS
>   };
>   
>   #define G75_FEATURES  \
>   	GEN7_FEATURES, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
>   	.has_ddi = 1, \
>   	.has_fpga_dbg = 1, \
>   	.has_psr = 1, \
>   	.has_resource_streamer = 1, \
>   	.has_dp_mst = 1, \
>   	.has_rc6p = 0 /* RC6p removed-by HSW */, \
>   	.has_runtime_pm = 1
>   
>   #define HSW_PLATFORM \
>   	G75_FEATURES, \
>   	PLATFORM(INTEL_HASWELL), \
>   	.has_l3_dpf = 1
> @@ -427,42 +435,43 @@ static const struct intel_device_info intel_broadwell_gt3_info = {
>   static const struct intel_device_info intel_cherryview_info = {
>   	PLATFORM(INTEL_CHERRYVIEW),
>   	GEN(8),
>   	.num_pipes = 3,
>   	.has_hotplug = 1,
>   	.is_lp = 1,
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
>   	.has_64bit_reloc = 1,
>   	.has_runtime_pm = 1,
>   	.has_resource_streamer = 1,
>   	.has_rc6 = 1,
>   	.has_logical_ring_contexts = 1,
>   	.has_gmch_display = 1,
>   	.has_aliasing_ppgtt = 1,
>   	.has_full_ppgtt = 1,
>   	.has_reset_engine = 1,
>   	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>   	.display_mmio_offset = VLV_DISPLAY_BASE,
>   	GEN_DEFAULT_PAGE_SIZES,
>   	GEN_CHV_PIPEOFFSETS,
>   	CURSOR_OFFSETS,
>   	CHV_COLORS,
>   };
>   
>   #define GEN9_DEFAULT_PAGE_SIZES \
>   	.page_sizes = I915_GTT_PAGE_SIZE_4K | \
>   		      I915_GTT_PAGE_SIZE_64K | \
>   		      I915_GTT_PAGE_SIZE_2M
>   
>   #define GEN9_FEATURES \
>   	GEN8_FEATURES, \
>   	GEN(9), \
>   	GEN9_DEFAULT_PAGE_SIZES, \
>   	.has_logical_ring_preemption = 1, \
>   	.has_csr = 1, \
>   	.has_guc = 1, \
>   	.has_ipc = 1, \
>   	.ddb_size = 896
>   
>   #define SKL_PLATFORM \
>   	GEN9_FEATURES, \
>   	PLATFORM(INTEL_SKYLAKE)
> @@ -490,35 +499,36 @@ static const struct intel_device_info intel_skylake_gt3_info = {
>   static const struct intel_device_info intel_skylake_gt4_info = {
>   	SKL_GT3_PLUS_PLATFORM,
>   	.gt = 4,
>   };
>   
>   #define GEN9_LP_FEATURES \
>   	GEN(9), \
>   	.is_lp = 1, \
>   	.has_hotplug = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
>   	.num_pipes = 3, \
>   	.has_64bit_reloc = 1, \
>   	.has_ddi = 1, \
>   	.has_fpga_dbg = 1, \
>   	.has_fbc = 1, \
>   	.has_psr = 1, \
>   	.has_runtime_pm = 1, \
>   	.has_pooled_eu = 0, \
>   	.has_csr = 1, \
>   	.has_resource_streamer = 1, \
>   	.has_rc6 = 1, \
>   	.has_dp_mst = 1, \
>   	.has_logical_ring_contexts = 1, \
>   	.has_logical_ring_preemption = 1, \
>   	.has_guc = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	.has_full_ppgtt = 1, \
>   	.has_full_48bit_ppgtt = 1, \
>   	.has_reset_engine = 1, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>   	.has_ipc = 1, \
>   	GEN9_DEFAULT_PAGE_SIZES, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	IVB_CURSOR_OFFSETS, \
>   	BDW_COLORS
No value for gen10/gen11 yet?
-Tomasz
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 633f9fbf72ea..07e8364d1a8c 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -33,85 +33,86 @@ struct drm_i915_private;
>   /* Keep in gen based order, and chronological order within a gen */
>   enum intel_platform {
>   	INTEL_PLATFORM_UNINITIALIZED = 0,
>   	/* gen2 */
>   	INTEL_I830,
>   	INTEL_I845G,
>   	INTEL_I85X,
>   	INTEL_I865G,
>   	/* gen3 */
>   	INTEL_I915G,
>   	INTEL_I915GM,
>   	INTEL_I945G,
>   	INTEL_I945GM,
>   	INTEL_G33,
>   	INTEL_PINEVIEW,
>   	/* gen4 */
>   	INTEL_I965G,
>   	INTEL_I965GM,
>   	INTEL_G45,
>   	INTEL_GM45,
>   	/* gen5 */
>   	INTEL_IRONLAKE,
>   	/* gen6 */
>   	INTEL_SANDYBRIDGE,
>   	/* gen7 */
>   	INTEL_IVYBRIDGE,
>   	INTEL_VALLEYVIEW,
>   	INTEL_HASWELL,
>   	/* gen8 */
>   	INTEL_BROADWELL,
>   	INTEL_CHERRYVIEW,
>   	/* gen9 */
>   	INTEL_SKYLAKE,
>   	INTEL_BROXTON,
>   	INTEL_KABYLAKE,
>   	INTEL_GEMINILAKE,
>   	INTEL_COFFEELAKE,
>   	/* gen10 */
>   	INTEL_CANNONLAKE,
>   	/* gen11 */
>   	INTEL_ICELAKE,
>   	INTEL_MAX_PLATFORMS
>   };
>   
>   #define DEV_INFO_FOR_EACH_FLAG(func) \
>   	func(is_mobile); \
>   	func(is_lp); \
>   	func(is_alpha_support); \
>   	/* Keep has_* in alphabetical order */ \
>   	func(has_64bit_reloc); \
>   	func(has_aliasing_ppgtt); \
>   	func(has_csr); \
>   	func(has_ddi); \
>   	func(has_dp_mst); \
>   	func(has_reset_engine); \
>   	func(has_fbc); \
>   	func(has_fpga_dbg); \
>   	func(has_full_ppgtt); \
>   	func(has_full_48bit_ppgtt); \
>   	func(has_gmch_display); \
>   	func(has_guc); \
>   	func(has_guc_ct); \
>   	func(has_hotplug); \
>   	func(has_l3_dpf); \
>   	func(has_llc); \
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
>   	func(has_rc6); \
>   	func(has_rc6p); \
>   	func(has_resource_streamer); \
>   	func(has_runtime_pm); \
>   	func(has_snoop); \
> +	func(has_coherent_ggtt); \
>   	func(unfenced_needs_alignment); \
>   	func(cursor_needs_physical); \
>   	func(hws_needs_physical); \
>   	func(overlay_needs_physical); \
>   	func(supports_tv); \
>   	func(has_ipc);
>   
>   #define GEN_MAX_SLICES		(6) /* CNL upper bound */
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..8ee81e6f151c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
>   
> +/*
> + * Once upon a time we supposed that writes through the GGTT would be
> + * immediately in physical memory (once flushed out of the CPU path). However,
> + * on a few different processors and chipsets, this is not necessarily the case
> + * as the writes appear to be buffered internally. Thus a read of the backing
> + * storage (physical memory) via a different path (with different physical tags
> + * to the indirect write via the GGTT) will see stale values from before
> + * the GGTT write. Inside the kernel, we can for the most part keep track of
> + * the different read/write domains in use (e.g. set-domain), but the assumption
> + * of coherency is baked into the ABI, hence reporting its true state in this
> + * parameter.
> + *
> + * If set to true, writes via mmap_gtt are immediately visible following an
> + * lfence to flush the WCB.
> + *
> + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
> + * internal buffer and are _not_ immediately visible to third parties accessing
> + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
> + * communications channel when set to false is strongly disadvised.
> + */
> +#define I915_PARAM_MMAP_GTT_COHERENT	52
> +
>   typedef struct drm_i915_getparam {
>   	__s32 param;
>   	/*
Chris Wilson July 20, 2018, 2:55 p.m. UTC | #2
Quoting Lis, Tomasz (2018-07-20 15:41:33)
> 
> 
> On 2018-07-20 12:19, Chris Wilson wrote:
> > Not all chipsets have an internal buffer delaying the visibility of
> > writes via the GGTT being visible by other physical paths, but we use a
> > very heavy workaround for all. We only need to apply that workarounds to
> > the chipsets we know suffer from the delay and the resulting coherency
> > issue.
> >
> > Similarly, the same inconsistent coherency fouls up our ABI promise that
> > a write into a mmap_gtt is immediately visible to others. Since the HW
> > has made that a lie, let userspace know when that contract is broken.
> > (Not that userspace would want to use mmap_gtt on those chipsets for
> > other performance reasons...)
> >
> > Testcase: igt/drv_selftest/live_coherency
> > Testcase: igt/gem_mmap_gtt/coherency
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Is there any mention of this bug/feature in bspec? Would be nice to have 
> a reference.

I know that some HW bod has come across it, as they recommended the same
for ordering mem access between us using the GTT for iomaps and GuC
submission. All I know is the background chatter... We don't seem to
have a named w/a; iirc it was just concluded that was the way it was
meant to work ;)
-Chris
Tvrtko Ursulin July 20, 2018, 3:13 p.m. UTC | #3
On 20/07/2018 11:19, Chris Wilson wrote:
> Not all chipsets have an internal buffer delaying the visibility of
> writes via the GGTT being visible by other physical paths, but we use a
> very heavy workaround for all. We only need to apply that workarounds to
> the chipsets we know suffer from the delay and the resulting coherency
> issue.
> 
> Similarly, the same inconsistent coherency fouls up our ABI promise that
> a write into a mmap_gtt is immediately visible to others. Since the HW
> has made that a lie, let userspace know when that contract is broken.
> (Not that userspace would want to use mmap_gtt on those chipsets for
> other performance reasons...)
> 
> Testcase: igt/drv_selftest/live_coherency
> Testcase: igt/gem_mmap_gtt/coherency

The list of platforms to mark with false was compiled from the test results?

But then before this patch workaround was applied everywhere - so if the 
test was failing even then - that means workaround wasn't sufficient?

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100587
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
> Resend with --function-context
> -Chris
> ---
>   drivers/gpu/drm/i915/i915_drv.c          |  3 +++
>   drivers/gpu/drm/i915/i915_gem.c          |  5 +++++
>   drivers/gpu/drm/i915/i915_pci.c          | 10 ++++++++++
>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>   include/uapi/drm/i915_drm.h              | 22 ++++++++++++++++++++++
>   5 files changed, 41 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f8cfd16be534..18a45e7a3d7c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -302,152 +302,155 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
>   static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   			       struct drm_file *file_priv)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct pci_dev *pdev = dev_priv->drm.pdev;
>   	drm_i915_getparam_t *param = data;
>   	int value;
>   
>   	switch (param->param) {
>   	case I915_PARAM_IRQ_ACTIVE:
>   	case I915_PARAM_ALLOW_BATCHBUFFER:
>   	case I915_PARAM_LAST_DISPATCH:
>   	case I915_PARAM_HAS_EXEC_CONSTANTS:
>   		/* Reject all old ums/dri params. */
>   		return -ENODEV;
>   	case I915_PARAM_CHIPSET_ID:
>   		value = pdev->device;
>   		break;
>   	case I915_PARAM_REVISION:
>   		value = pdev->revision;
>   		break;
>   	case I915_PARAM_NUM_FENCES_AVAIL:
>   		value = dev_priv->num_fence_regs;
>   		break;
>   	case I915_PARAM_HAS_OVERLAY:
>   		value = dev_priv->overlay ? 1 : 0;
>   		break;
>   	case I915_PARAM_HAS_BSD:
>   		value = !!dev_priv->engine[VCS];
>   		break;
>   	case I915_PARAM_HAS_BLT:
>   		value = !!dev_priv->engine[BCS];
>   		break;
>   	case I915_PARAM_HAS_VEBOX:
>   		value = !!dev_priv->engine[VECS];
>   		break;
>   	case I915_PARAM_HAS_BSD2:
>   		value = !!dev_priv->engine[VCS2];
>   		break;
>   	case I915_PARAM_HAS_LLC:
>   		value = HAS_LLC(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_WT:
>   		value = HAS_WT(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_ALIASING_PPGTT:
>   		value = USES_PPGTT(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_SEMAPHORES:
>   		value = HAS_LEGACY_SEMAPHORES(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_SECURE_BATCHES:
>   		value = capable(CAP_SYS_ADMIN);
>   		break;
>   	case I915_PARAM_CMD_PARSER_VERSION:
>   		value = i915_cmd_parser_get_version(dev_priv);
>   		break;
>   	case I915_PARAM_SUBSLICE_TOTAL:
>   		value = sseu_subslice_total(&INTEL_INFO(dev_priv)->sseu);
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_EU_TOTAL:
>   		value = INTEL_INFO(dev_priv)->sseu.eu_total;
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_HAS_GPU_RESET:
>   		value = i915_modparams.enable_hangcheck &&
>   			intel_has_gpu_reset(dev_priv);
>   		if (value && intel_has_reset_engine(dev_priv))
>   			value = 2;
>   		break;
>   	case I915_PARAM_HAS_RESOURCE_STREAMER:
>   		value = HAS_RESOURCE_STREAMER(dev_priv);
>   		break;
>   	case I915_PARAM_HAS_POOLED_EU:
>   		value = HAS_POOLED_EU(dev_priv);
>   		break;
>   	case I915_PARAM_MIN_EU_IN_POOL:
>   		value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
>   		break;
>   	case I915_PARAM_HUC_STATUS:
>   		value = intel_huc_check_status(&dev_priv->huc);
>   		if (value < 0)
>   			return value;
>   		break;
>   	case I915_PARAM_MMAP_GTT_VERSION:
>   		/* Though we've started our numbering from 1, and so class all
>   		 * earlier versions as 0, in effect their value is undefined as
>   		 * the ioctl will report EINVAL for the unknown param!
>   		 */
>   		value = i915_gem_mmap_gtt_version();
>   		break;
>   	case I915_PARAM_HAS_SCHEDULER:
>   		value = dev_priv->caps.scheduler;
>   		break;
>   
>   	case I915_PARAM_MMAP_VERSION:
>   		/* Remember to bump this if the version changes! */
>   	case I915_PARAM_HAS_GEM:
>   	case I915_PARAM_HAS_PAGEFLIPPING:
>   	case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */
>   	case I915_PARAM_HAS_RELAXED_FENCING:
>   	case I915_PARAM_HAS_COHERENT_RINGS:
>   	case I915_PARAM_HAS_RELAXED_DELTA:
>   	case I915_PARAM_HAS_GEN7_SOL_RESET:
>   	case I915_PARAM_HAS_WAIT_TIMEOUT:
>   	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
>   	case I915_PARAM_HAS_PINNED_BATCHES:
>   	case I915_PARAM_HAS_EXEC_NO_RELOC:
>   	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
>   	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
>   	case I915_PARAM_HAS_EXEC_SOFTPIN:
>   	case I915_PARAM_HAS_EXEC_ASYNC:
>   	case I915_PARAM_HAS_EXEC_FENCE:
>   	case I915_PARAM_HAS_EXEC_CAPTURE:
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
>   		 * INTEL_INFO(), a feature macro, or similar.
>   		 */
>   		value = 1;
>   		break;
>   	case I915_PARAM_HAS_CONTEXT_ISOLATION:
>   		value = intel_engines_has_context_isolation(dev_priv);
>   		break;
>   	case I915_PARAM_SLICE_MASK:
>   		value = INTEL_INFO(dev_priv)->sseu.slice_mask;
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
>   		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>   		value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
>   		break;
> +	case I915_PARAM_MMAP_GTT_COHERENT:
> +		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
>   	}
>   
>   	if (put_user(value, param->value))
>   		return -EFAULT;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fcc73a6ab503..8b52cb768a67 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -784,31 +784,36 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
>   void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
>   {
>   	/*
>   	 * No actual flushing is required for the GTT write domain for reads
>   	 * from the GTT domain. Writes to it "immediately" go to main memory
>   	 * as far as we know, so there's no chipset flush. It also doesn't
>   	 * land in the GPU render cache.
>   	 *
>   	 * However, we do have to enforce the order so that all writes through
>   	 * the GTT land before any writes to the device, such as updates to
>   	 * the GATT itself.
>   	 *
>   	 * We also have to wait a bit for the writes to land from the GTT.
>   	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
>   	 * timing. This issue has only been observed when switching quickly
>   	 * between GTT writes and CPU reads from inside the kernel on recent hw,
>   	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
>   	 * system agents we cannot reproduce this behaviour, until Cannonlake
>   	 * that was!).
>   	 */
>   
> +	wmb();
> +
> +	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
> +		return;
> +
>   	i915_gem_chipset_flush(dev_priv);
>   
>   	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&dev_priv->uncore.lock);
>   
>   	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
>   
>   	spin_unlock_irq(&dev_priv->uncore.lock);
>   	intel_runtime_pm_put(dev_priv);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 6a4d1388ad2d..e443fe44da3a 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1,79 +1,80 @@
>   /*
>    * Copyright © 2016 Intel Corporation
>    *
>    * Permission is hereby granted, free of charge, to any person obtaining a
>    * copy of this software and associated documentation files (the "Software"),
>    * to deal in the Software without restriction, including without limitation
>    * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>    * and/or sell copies of the Software, and to permit persons to whom the
>    * Software is furnished to do so, subject to the following conditions:
>    *
>    * The above copyright notice and this permission notice (including the next
>    * paragraph) shall be included in all copies or substantial portions of the
>    * Software.
>    *
>    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>    * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>    * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>    * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>    * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>    * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>    * IN THE SOFTWARE.
>    *
>    */
>   
>   #include <linux/console.h>
>   #include <linux/vgaarb.h>
>   #include <linux/vga_switcheroo.h>
>   
>   #include "i915_drv.h"
>   #include "i915_selftest.h"
>   
>   #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
>   #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
>   
>   #define GEN_DEFAULT_PIPEOFFSETS \
>   	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
>   			  PIPE_C_OFFSET, PIPE_EDP_OFFSET }, \
>   	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
>   			   TRANSCODER_C_OFFSET, TRANSCODER_EDP_OFFSET }, \
>   	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET }
>   
>   #define GEN_CHV_PIPEOFFSETS \
>   	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
>   			  CHV_PIPE_C_OFFSET }, \
>   	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
>   			   CHV_TRANSCODER_C_OFFSET, }, \
>   	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \
>   			     CHV_PALETTE_C_OFFSET }
>   
>   #define CURSOR_OFFSETS \
>   	.cursor_offsets = { CURSOR_A_OFFSET, CURSOR_B_OFFSET, CHV_CURSOR_C_OFFSET }
>   
>   #define IVB_CURSOR_OFFSETS \
>   	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
>   
>   #define BDW_COLORS \
>   	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
>   #define CHV_COLORS \
>   	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
>   #define GLK_COLORS \
>   	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
>   
>   /* Keep in gen based order, and chronological order within a gen */
>   
>   #define GEN_DEFAULT_PAGE_SIZES \
>   	.page_sizes = I915_GTT_PAGE_SIZE_4K
>   
>   #define GEN2_FEATURES \
>   	GEN(2), \
>   	.num_pipes = 1, \
>   	.has_overlay = 1, .overlay_needs_physical = 1, \
>   	.has_gmch_display = 1, \
>   	.hws_needs_physical = 1, \
>   	.unfenced_needs_alignment = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -102,14 +103,15 @@ static const struct intel_device_info intel_i85x_info = {
>   static const struct intel_device_info intel_i865g_info = {
>   	GEN2_FEATURES,
>   	PLATFORM(INTEL_I865G),
>   };
>   
>   #define GEN3_FEATURES \
>   	GEN(3), \
>   	.num_pipes = 2, \
>   	.has_gmch_display = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -117,8 +119,9 @@ static const struct intel_device_info intel_i865g_info = {
>   static const struct intel_device_info intel_i915g_info = {
>   	GEN3_FEATURES,
>   	PLATFORM(INTEL_I915G),
> +	.has_coherent_ggtt = false,
>   	.cursor_needs_physical = 1,
>   	.has_overlay = 1, .overlay_needs_physical = 1,
>   	.hws_needs_physical = 1,
>   	.unfenced_needs_alignment = 1,
>   };
> @@ -166,18 +169,19 @@ static const struct intel_device_info intel_g33_info = {
>   static const struct intel_device_info intel_pineview_info = {
>   	GEN3_FEATURES,
>   	PLATFORM(INTEL_PINEVIEW),
>   	.is_mobile = 1,
>   	.has_hotplug = 1,
>   	.has_overlay = 1,
>   };
>   
>   #define GEN4_FEATURES \
>   	GEN(4), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.has_gmch_display = 1, \
>   	.ring_mask = RENDER_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -209,19 +213,20 @@ static const struct intel_device_info intel_g45_info = {
>   static const struct intel_device_info intel_gm45_info = {
>   	GEN4_FEATURES,
>   	PLATFORM(INTEL_GM45),
>   	.is_mobile = 1, .has_fbc = 1,
>   	.supports_tv = 1,
>   	.ring_mask = RENDER_RING | BSD_RING,
>   };
>   
>   #define GEN5_FEATURES \
>   	GEN(5), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = true, \
>   	/* ilk does support rc6, but we do not implement [power] contexts */ \
>   	.has_rc6 = 0, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
> @@ -234,23 +239,24 @@ static const struct intel_device_info intel_ironlake_d_info = {
>   static const struct intel_device_info intel_ironlake_m_info = {
>   	GEN5_FEATURES,
>   	PLATFORM(INTEL_IRONLAKE),
>   	.is_mobile = 1, .has_fbc = 1,
>   };
>   
>   #define GEN6_FEATURES \
>   	GEN(6), \
>   	.num_pipes = 2, \
>   	.has_hotplug = 1, \
>   	.has_fbc = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	CURSOR_OFFSETS
>   
>   #define SNB_D_PLATFORM \
>   	GEN6_FEATURES, \
>   	PLATFORM(INTEL_SANDYBRIDGE)
> @@ -279,24 +285,25 @@ static const struct intel_device_info intel_sandybridge_m_gt1_info = {
>   static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>   	SNB_M_PLATFORM,
>   	.gt = 2,
>   };
>   
>   #define GEN7_FEATURES  \
>   	GEN(7), \
>   	.num_pipes = 3, \
>   	.has_hotplug = 1, \
>   	.has_fbc = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
> +	.has_coherent_ggtt = true, \
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	.has_full_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
>   	IVB_CURSOR_OFFSETS
>   
>   #define IVB_D_PLATFORM \
>   	GEN7_FEATURES, \
>   	PLATFORM(INTEL_IVYBRIDGE), \
>   	.has_l3_dpf = 1
> @@ -338,34 +345,35 @@ static const struct intel_device_info intel_ivybridge_q_info = {
>   static const struct intel_device_info intel_valleyview_info = {
>   	PLATFORM(INTEL_VALLEYVIEW),
>   	GEN(7),
>   	.is_lp = 1,
>   	.num_pipes = 2,
>   	.has_runtime_pm = 1,
>   	.has_rc6 = 1,
>   	.has_gmch_display = 1,
>   	.has_hotplug = 1,
>   	.has_aliasing_ppgtt = 1,
>   	.has_full_ppgtt = 1,
>   	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
>   	.display_mmio_offset = VLV_DISPLAY_BASE,
>   	GEN_DEFAULT_PAGE_SIZES,
>   	GEN_DEFAULT_PIPEOFFSETS,
>   	CURSOR_OFFSETS
>   };
>   
>   #define G75_FEATURES  \
>   	GEN7_FEATURES, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
>   	.has_ddi = 1, \
>   	.has_fpga_dbg = 1, \
>   	.has_psr = 1, \
>   	.has_resource_streamer = 1, \
>   	.has_dp_mst = 1, \
>   	.has_rc6p = 0 /* RC6p removed-by HSW */, \
>   	.has_runtime_pm = 1
>   
>   #define HSW_PLATFORM \
>   	G75_FEATURES, \
>   	PLATFORM(INTEL_HASWELL), \
>   	.has_l3_dpf = 1
> @@ -427,42 +435,43 @@ static const struct intel_device_info intel_broadwell_gt3_info = {
>   static const struct intel_device_info intel_cherryview_info = {
>   	PLATFORM(INTEL_CHERRYVIEW),
>   	GEN(8),
>   	.num_pipes = 3,
>   	.has_hotplug = 1,
>   	.is_lp = 1,
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
>   	.has_64bit_reloc = 1,
>   	.has_runtime_pm = 1,
>   	.has_resource_streamer = 1,
>   	.has_rc6 = 1,
>   	.has_logical_ring_contexts = 1,
>   	.has_gmch_display = 1,
>   	.has_aliasing_ppgtt = 1,
>   	.has_full_ppgtt = 1,
>   	.has_reset_engine = 1,
>   	.has_snoop = true,
> +	.has_coherent_ggtt = false,
>   	.display_mmio_offset = VLV_DISPLAY_BASE,
>   	GEN_DEFAULT_PAGE_SIZES,
>   	GEN_CHV_PIPEOFFSETS,
>   	CURSOR_OFFSETS,
>   	CHV_COLORS,
>   };
>   
>   #define GEN9_DEFAULT_PAGE_SIZES \
>   	.page_sizes = I915_GTT_PAGE_SIZE_4K | \
>   		      I915_GTT_PAGE_SIZE_64K | \
>   		      I915_GTT_PAGE_SIZE_2M
>   
>   #define GEN9_FEATURES \
>   	GEN8_FEATURES, \
>   	GEN(9), \
>   	GEN9_DEFAULT_PAGE_SIZES, \
>   	.has_logical_ring_preemption = 1, \
>   	.has_csr = 1, \
>   	.has_guc = 1, \
>   	.has_ipc = 1, \
>   	.ddb_size = 896
>   
>   #define SKL_PLATFORM \
>   	GEN9_FEATURES, \
>   	PLATFORM(INTEL_SKYLAKE)
> @@ -490,35 +499,36 @@ static const struct intel_device_info intel_skylake_gt3_info = {
>   static const struct intel_device_info intel_skylake_gt4_info = {
>   	SKL_GT3_PLUS_PLATFORM,
>   	.gt = 4,
>   };
>   
>   #define GEN9_LP_FEATURES \
>   	GEN(9), \
>   	.is_lp = 1, \
>   	.has_hotplug = 1, \
>   	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
>   	.num_pipes = 3, \
>   	.has_64bit_reloc = 1, \
>   	.has_ddi = 1, \
>   	.has_fpga_dbg = 1, \
>   	.has_fbc = 1, \
>   	.has_psr = 1, \
>   	.has_runtime_pm = 1, \
>   	.has_pooled_eu = 0, \
>   	.has_csr = 1, \
>   	.has_resource_streamer = 1, \
>   	.has_rc6 = 1, \
>   	.has_dp_mst = 1, \
>   	.has_logical_ring_contexts = 1, \
>   	.has_logical_ring_preemption = 1, \
>   	.has_guc = 1, \
>   	.has_aliasing_ppgtt = 1, \
>   	.has_full_ppgtt = 1, \
>   	.has_full_48bit_ppgtt = 1, \
>   	.has_reset_engine = 1, \
>   	.has_snoop = true, \
> +	.has_coherent_ggtt = false, \
>   	.has_ipc = 1, \
>   	GEN9_DEFAULT_PAGE_SIZES, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	IVB_CURSOR_OFFSETS, \
>   	BDW_COLORS
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 633f9fbf72ea..07e8364d1a8c 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -33,85 +33,86 @@ struct drm_i915_private;
>   /* Keep in gen based order, and chronological order within a gen */
>   enum intel_platform {
>   	INTEL_PLATFORM_UNINITIALIZED = 0,
>   	/* gen2 */
>   	INTEL_I830,
>   	INTEL_I845G,
>   	INTEL_I85X,
>   	INTEL_I865G,
>   	/* gen3 */
>   	INTEL_I915G,
>   	INTEL_I915GM,
>   	INTEL_I945G,
>   	INTEL_I945GM,
>   	INTEL_G33,
>   	INTEL_PINEVIEW,
>   	/* gen4 */
>   	INTEL_I965G,
>   	INTEL_I965GM,
>   	INTEL_G45,
>   	INTEL_GM45,
>   	/* gen5 */
>   	INTEL_IRONLAKE,
>   	/* gen6 */
>   	INTEL_SANDYBRIDGE,
>   	/* gen7 */
>   	INTEL_IVYBRIDGE,
>   	INTEL_VALLEYVIEW,
>   	INTEL_HASWELL,
>   	/* gen8 */
>   	INTEL_BROADWELL,
>   	INTEL_CHERRYVIEW,
>   	/* gen9 */
>   	INTEL_SKYLAKE,
>   	INTEL_BROXTON,
>   	INTEL_KABYLAKE,
>   	INTEL_GEMINILAKE,
>   	INTEL_COFFEELAKE,
>   	/* gen10 */
>   	INTEL_CANNONLAKE,
>   	/* gen11 */
>   	INTEL_ICELAKE,
>   	INTEL_MAX_PLATFORMS
>   };
>   
>   #define DEV_INFO_FOR_EACH_FLAG(func) \
>   	func(is_mobile); \
>   	func(is_lp); \
>   	func(is_alpha_support); \
>   	/* Keep has_* in alphabetical order */ \
>   	func(has_64bit_reloc); \
>   	func(has_aliasing_ppgtt); \
>   	func(has_csr); \
>   	func(has_ddi); \
>   	func(has_dp_mst); \
>   	func(has_reset_engine); \
>   	func(has_fbc); \
>   	func(has_fpga_dbg); \
>   	func(has_full_ppgtt); \
>   	func(has_full_48bit_ppgtt); \
>   	func(has_gmch_display); \
>   	func(has_guc); \
>   	func(has_guc_ct); \
>   	func(has_hotplug); \
>   	func(has_l3_dpf); \
>   	func(has_llc); \
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
>   	func(has_rc6); \
>   	func(has_rc6p); \
>   	func(has_resource_streamer); \
>   	func(has_runtime_pm); \
>   	func(has_snoop); \
> +	func(has_coherent_ggtt); \
>   	func(unfenced_needs_alignment); \
>   	func(cursor_needs_physical); \
>   	func(hws_needs_physical); \
>   	func(overlay_needs_physical); \
>   	func(supports_tv); \
>   	func(has_ipc);
>   
>   #define GEN_MAX_SLICES		(6) /* CNL upper bound */
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..8ee81e6f151c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
>   
> +/*
> + * Once upon a time we supposed that writes through the GGTT would be
> + * immediately in physical memory (once flushed out of the CPU path). However,
> + * on a few different processors and chipsets, this is not necessarily the case
> + * as the writes appear to be buffered internally. Thus a read of the backing
> + * storage (physical memory) via a different path (with different physical tags
> + * to the indirect write via the GGTT) will see stale values from before
> + * the GGTT write. Inside the kernel, we can for the most part keep track of
> + * the different read/write domains in use (e.g. set-domain), but the assumption
> + * of coherency is baked into the ABI, hence reporting its true state in this
> + * parameter.
> + *
> + * If set to true, writes via mmap_gtt are immediately visible following an
> + * lfence to flush the WCB.
> + *
> + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
> + * internal buffer and are _not_ immediately visible to third parties accessing
> + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
> + * communications channel when set to false is strongly disadvised.

I would perhaps change the language from "set to true/false" to 
"reported as true/false".

> + */
> +#define I915_PARAM_MMAP_GTT_COHERENT	52
> +
>   typedef struct drm_i915_getparam {
>   	__s32 param;
>   	/*
> 

Looks fine to me in principle. Is there some userspace ready to start 
consuming it?

Regards,

Tvrtko
Chris Wilson July 20, 2018, 3:21 p.m. UTC | #4
Quoting Tvrtko Ursulin (2018-07-20 16:13:14)
> 
> On 20/07/2018 11:19, Chris Wilson wrote:
> > Not all chipsets have an internal buffer delaying the visibility of
> > writes via the GGTT being visible by other physical paths, but we use a
> > very heavy workaround for all. We only need to apply that workarounds to
> > the chipsets we know suffer from the delay and the resulting coherency
> > issue.
> > 
> > Similarly, the same inconsistent coherency fouls up our ABI promise that
> > a write into a mmap_gtt is immediately visible to others. Since the HW
> > has made that a lie, let userspace know when that contract is broken.
> > (Not that userspace would want to use mmap_gtt on those chipsets for
> > other performance reasons...)
> > 
> > Testcase: igt/drv_selftest/live_coherency
> > Testcase: igt/gem_mmap_gtt/coherency
> 
> The list of platforms to mark with false was compiled from the test results?

Yes. Coverage of older chipsets is less than ideal, as we have fewer of
them being tested and the gmch wasn't tied to any processor. So whereas
my PIIIm + i915gm passes, CI's P4 + i915g fails. That is odd.
 
> But then before this patch workaround was applied everywhere - so if the 
> test was failing even then - that means workaround wasn't sufficient?

The test is being run in userspace; bypassing any domain control in the
kernel, assuming the coherency model built into the ABI.

> > +/*
> > + * Once upon a time we supposed that writes through the GGTT would be
> > + * immediately in physical memory (once flushed out of the CPU path). However,
> > + * on a few different processors and chipsets, this is not necessarily the case
> > + * as the writes appear to be buffered internally. Thus a read of the backing
> > + * storage (physical memory) via a different path (with different physical tags
> > + * to the indirect write via the GGTT) will see stale values from before
> > + * the GGTT write. Inside the kernel, we can for the most part keep track of
> > + * the different read/write domains in use (e.g. set-domain), but the assumption
> > + * of coherency is baked into the ABI, hence reporting its true state in this
> > + * parameter.
> > + *
> > + * If set to true, writes via mmap_gtt are immediately visible following an
> > + * lfence to flush the WCB.
> > + *
> > + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
> > + * internal buffer and are _not_ immediately visible to third parties accessing
> > + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
> > + * communications channel when set to false is strongly disadvised.
> 
> I would perhaps change the language from "set to true/false" to 
> "reported as true/false".
> 
> > + */
> > +#define I915_PARAM_MMAP_GTT_COHERENT 52
> > +
> >   typedef struct drm_i915_getparam {
> >       __s32 param;
> >       /*
> > 
> 
> Looks fine to me in principle. Is there some userspace ready to start 
> consuming it?

My only plan is for igt to know when the tests are expected to fail.
Real userspace should not be using GTT, it is slow (even slower than
intended ;) and constrained, so subject to aperture thrashing, fencing
is only usable for two out of the many tiling modes, etc, etc, etc.
-Chris
Tvrtko Ursulin July 20, 2018, 3:27 p.m. UTC | #5
On 20/07/2018 16:21, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-20 16:13:14)
>>
>> On 20/07/2018 11:19, Chris Wilson wrote:
>>> Not all chipsets have an internal buffer delaying the visibility of
>>> writes via the GGTT being visible by other physical paths, but we use a
>>> very heavy workaround for all. We only need to apply that workarounds to
>>> the chipsets we know suffer from the delay and the resulting coherency
>>> issue.
>>>
>>> Similarly, the same inconsistent coherency fouls up our ABI promise that
>>> a write into a mmap_gtt is immediately visible to others. Since the HW
>>> has made that a lie, let userspace know when that contract is broken.
>>> (Not that userspace would want to use mmap_gtt on those chipsets for
>>> other performance reasons...)
>>>
>>> Testcase: igt/drv_selftest/live_coherency
>>> Testcase: igt/gem_mmap_gtt/coherency
>>
>> The list of platforms to mark with false was compiled from the test results?
> 
> Yes. Coverage of older chipsets is less than ideal, as we have fewer of
> them being tested and the gmch wasn't tied to any processor. So whereas
> my PIIIm + i915gm passes, CI's P4 + i915g fails. That is odd.

Hope the oddity is not hiding something but we'll see.

>> But then before this patch workaround was applied everywhere - so if the
>> test was failing even then - that means workaround wasn't sufficient?
> 
> The test is being run in userspace; bypassing any domain control in the
> kernel, assuming the coherency model built into the ABI.

Ah yes.

>>> +/*
>>> + * Once upon a time we supposed that writes through the GGTT would be
>>> + * immediately in physical memory (once flushed out of the CPU path). However,
>>> + * on a few different processors and chipsets, this is not necessarily the case
>>> + * as the writes appear to be buffered internally. Thus a read of the backing
>>> + * storage (physical memory) via a different path (with different physical tags
>>> + * to the indirect write via the GGTT) will see stale values from before
>>> + * the GGTT write. Inside the kernel, we can for the most part keep track of
>>> + * the different read/write domains in use (e.g. set-domain), but the assumption
>>> + * of coherency is baked into the ABI, hence reporting its true state in this
>>> + * parameter.
>>> + *
>>> + * If set to true, writes via mmap_gtt are immediately visible following an
>>> + * lfence to flush the WCB.
>>> + *
>>> + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
>>> + * internal buffer and are _not_ immediately visible to third parties accessing
>>> + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
>>> + * communications channel when set to false is strongly disadvised.
>>
>> I would perhaps change the language from "set to true/false" to
>> "reported as true/false".
>>
>>> + */
>>> +#define I915_PARAM_MMAP_GTT_COHERENT 52
>>> +
>>>    typedef struct drm_i915_getparam {
>>>        __s32 param;
>>>        /*
>>>
>>
>> Looks fine to me in principle. Is there some userspace ready to start
>> consuming it?
> 
> My only plan is for igt to know when the tests are expected to fail.
> Real userspace should not be using GTT, it is slow (even slower than
> intended ;) and constrained, so subject to aperture thrashing, fencing
> is only usable for two out of the many tiling modes, etc, etc, etc.

I'll take the view that get_param is tiny enough for this to be fine.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson July 20, 2018, 3:31 p.m. UTC | #6
Quoting Tvrtko Ursulin (2018-07-20 16:27:20)
> 
> On 20/07/2018 16:21, Chris Wilson wrote:
> > My only plan is for igt to know when the tests are expected to fail.
> > Real userspace should not be using GTT, it is slow (even slower than
> > intended ;) and constrained, so subject to aperture thrashing, fencing
> > is only usable for two out of the many tiling modes, etc, etc, etc.
> 
> I'll take the view that get_param is tiny enough for this to be fine.
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Also take a peek over at
https://patchwork.freedesktop.org/patch/239949/
-Chris
Chris Wilson July 20, 2018, 4:37 p.m. UTC | #7
Quoting Tvrtko Ursulin (2018-07-20 16:13:14)
> 
> On 20/07/2018 11:19, Chris Wilson wrote:
> > +/*
> > + * Once upon a time we supposed that writes through the GGTT would be
> > + * immediately in physical memory (once flushed out of the CPU path). However,
> > + * on a few different processors and chipsets, this is not necessarily the case
> > + * as the writes appear to be buffered internally. Thus a read of the backing
> > + * storage (physical memory) via a different path (with different physical tags
> > + * to the indirect write via the GGTT) will see stale values from before
> > + * the GGTT write. Inside the kernel, we can for the most part keep track of
> > + * the different read/write domains in use (e.g. set-domain), but the assumption
> > + * of coherency is baked into the ABI, hence reporting its true state in this
> > + * parameter.
> > + *
> > + * If set to true, writes via mmap_gtt are immediately visible following an
> > + * lfence to flush the WCB.
> > + *
> > + * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
> > + * internal buffer and are _not_ immediately visible to third parties accessing
> > + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
> > + * communications channel when set to false is strongly disadvised.
> 
> I would perhaps change the language from "set to true/false" to 
> "reported as true/false".

s/set/reported/ and pushed. Thanks for the reviews,
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f8cfd16be534..18a45e7a3d7c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -302,152 +302,155 @@  static void intel_detect_pch(struct drm_i915_private *dev_priv)
 static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file_priv)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	drm_i915_getparam_t *param = data;
 	int value;
 
 	switch (param->param) {
 	case I915_PARAM_IRQ_ACTIVE:
 	case I915_PARAM_ALLOW_BATCHBUFFER:
 	case I915_PARAM_LAST_DISPATCH:
 	case I915_PARAM_HAS_EXEC_CONSTANTS:
 		/* Reject all old ums/dri params. */
 		return -ENODEV;
 	case I915_PARAM_CHIPSET_ID:
 		value = pdev->device;
 		break;
 	case I915_PARAM_REVISION:
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
 		value = dev_priv->num_fence_regs;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = dev_priv->overlay ? 1 : 0;
 		break;
 	case I915_PARAM_HAS_BSD:
 		value = !!dev_priv->engine[VCS];
 		break;
 	case I915_PARAM_HAS_BLT:
 		value = !!dev_priv->engine[BCS];
 		break;
 	case I915_PARAM_HAS_VEBOX:
 		value = !!dev_priv->engine[VECS];
 		break;
 	case I915_PARAM_HAS_BSD2:
 		value = !!dev_priv->engine[VCS2];
 		break;
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev_priv);
 		break;
 	case I915_PARAM_HAS_WT:
 		value = HAS_WT(dev_priv);
 		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
 		value = USES_PPGTT(dev_priv);
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
 		value = HAS_LEGACY_SEMAPHORES(dev_priv);
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
 		break;
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version(dev_priv);
 		break;
 	case I915_PARAM_SUBSLICE_TOTAL:
 		value = sseu_subslice_total(&INTEL_INFO(dev_priv)->sseu);
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_EU_TOTAL:
 		value = INTEL_INFO(dev_priv)->sseu.eu_total;
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_HAS_GPU_RESET:
 		value = i915_modparams.enable_hangcheck &&
 			intel_has_gpu_reset(dev_priv);
 		if (value && intel_has_reset_engine(dev_priv))
 			value = 2;
 		break;
 	case I915_PARAM_HAS_RESOURCE_STREAMER:
 		value = HAS_RESOURCE_STREAMER(dev_priv);
 		break;
 	case I915_PARAM_HAS_POOLED_EU:
 		value = HAS_POOLED_EU(dev_priv);
 		break;
 	case I915_PARAM_MIN_EU_IN_POOL:
 		value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
 		break;
 	case I915_PARAM_HUC_STATUS:
 		value = intel_huc_check_status(&dev_priv->huc);
 		if (value < 0)
 			return value;
 		break;
 	case I915_PARAM_MMAP_GTT_VERSION:
 		/* Though we've started our numbering from 1, and so class all
 		 * earlier versions as 0, in effect their value is undefined as
 		 * the ioctl will report EINVAL for the unknown param!
 		 */
 		value = i915_gem_mmap_gtt_version();
 		break;
 	case I915_PARAM_HAS_SCHEDULER:
 		value = dev_priv->caps.scheduler;
 		break;
 
 	case I915_PARAM_MMAP_VERSION:
 		/* Remember to bump this if the version changes! */
 	case I915_PARAM_HAS_GEM:
 	case I915_PARAM_HAS_PAGEFLIPPING:
 	case I915_PARAM_HAS_EXECBUF2: /* depends on GEM */
 	case I915_PARAM_HAS_RELAXED_FENCING:
 	case I915_PARAM_HAS_COHERENT_RINGS:
 	case I915_PARAM_HAS_RELAXED_DELTA:
 	case I915_PARAM_HAS_GEN7_SOL_RESET:
 	case I915_PARAM_HAS_WAIT_TIMEOUT:
 	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
 	case I915_PARAM_HAS_PINNED_BATCHES:
 	case I915_PARAM_HAS_EXEC_NO_RELOC:
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
 		 * INTEL_INFO(), a feature macro, or similar.
 		 */
 		value = 1;
 		break;
 	case I915_PARAM_HAS_CONTEXT_ISOLATION:
 		value = intel_engines_has_context_isolation(dev_priv);
 		break;
 	case I915_PARAM_SLICE_MASK:
 		value = INTEL_INFO(dev_priv)->sseu.slice_mask;
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
 		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
 		value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz;
 		break;
+	case I915_PARAM_MMAP_GTT_COHERENT:
+		value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
 	}
 
 	if (put_user(value, param->value))
 		return -EFAULT;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fcc73a6ab503..8b52cb768a67 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -784,31 +784,36 @@  fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
 	/*
 	 * No actual flushing is required for the GTT write domain for reads
 	 * from the GTT domain. Writes to it "immediately" go to main memory
 	 * as far as we know, so there's no chipset flush. It also doesn't
 	 * land in the GPU render cache.
 	 *
 	 * However, we do have to enforce the order so that all writes through
 	 * the GTT land before any writes to the device, such as updates to
 	 * the GATT itself.
 	 *
 	 * We also have to wait a bit for the writes to land from the GTT.
 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 	 * timing. This issue has only been observed when switching quickly
 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
 	 * system agents we cannot reproduce this behaviour, until Cannonlake
 	 * that was!).
 	 */
 
+	wmb();
+
+	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
+		return;
+
 	i915_gem_chipset_flush(dev_priv);
 
 	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&dev_priv->uncore.lock);
 
 	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 
 	spin_unlock_irq(&dev_priv->uncore.lock);
 	intel_runtime_pm_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6a4d1388ad2d..e443fe44da3a 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1,79 +1,80 @@ 
 /*
  * Copyright © 2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
  */
 
 #include <linux/console.h>
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 
 #include "i915_drv.h"
 #include "i915_selftest.h"
 
 #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
 #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
 
 #define GEN_DEFAULT_PIPEOFFSETS \
 	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
 			  PIPE_C_OFFSET, PIPE_EDP_OFFSET }, \
 	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
 			   TRANSCODER_C_OFFSET, TRANSCODER_EDP_OFFSET }, \
 	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET }
 
 #define GEN_CHV_PIPEOFFSETS \
 	.pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
 			  CHV_PIPE_C_OFFSET }, \
 	.trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
 			   CHV_TRANSCODER_C_OFFSET, }, \
 	.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \
 			     CHV_PALETTE_C_OFFSET }
 
 #define CURSOR_OFFSETS \
 	.cursor_offsets = { CURSOR_A_OFFSET, CURSOR_B_OFFSET, CHV_CURSOR_C_OFFSET }
 
 #define IVB_CURSOR_OFFSETS \
 	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
 
 #define BDW_COLORS \
 	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
 #define CHV_COLORS \
 	.color = { .degamma_lut_size = 65, .gamma_lut_size = 257 }
 #define GLK_COLORS \
 	.color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 }
 
 /* Keep in gen based order, and chronological order within a gen */
 
 #define GEN_DEFAULT_PAGE_SIZES \
 	.page_sizes = I915_GTT_PAGE_SIZE_4K
 
 #define GEN2_FEATURES \
 	GEN(2), \
 	.num_pipes = 1, \
 	.has_overlay = 1, .overlay_needs_physical = 1, \
 	.has_gmch_display = 1, \
 	.hws_needs_physical = 1, \
 	.unfenced_needs_alignment = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = false, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -102,14 +103,15 @@  static const struct intel_device_info intel_i85x_info = {
 static const struct intel_device_info intel_i865g_info = {
 	GEN2_FEATURES,
 	PLATFORM(INTEL_I865G),
 };
 
 #define GEN3_FEATURES \
 	GEN(3), \
 	.num_pipes = 2, \
 	.has_gmch_display = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -117,8 +119,9 @@  static const struct intel_device_info intel_i865g_info = {
 static const struct intel_device_info intel_i915g_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_I915G),
+	.has_coherent_ggtt = false,
 	.cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
 	.unfenced_needs_alignment = 1,
 };
@@ -166,18 +169,19 @@  static const struct intel_device_info intel_g33_info = {
 static const struct intel_device_info intel_pineview_info = {
 	GEN3_FEATURES,
 	PLATFORM(INTEL_PINEVIEW),
 	.is_mobile = 1,
 	.has_hotplug = 1,
 	.has_overlay = 1,
 };
 
 #define GEN4_FEATURES \
 	GEN(4), \
 	.num_pipes = 2, \
 	.has_hotplug = 1, \
 	.has_gmch_display = 1, \
 	.ring_mask = RENDER_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -209,19 +213,20 @@  static const struct intel_device_info intel_g45_info = {
 static const struct intel_device_info intel_gm45_info = {
 	GEN4_FEATURES,
 	PLATFORM(INTEL_GM45),
 	.is_mobile = 1, .has_fbc = 1,
 	.supports_tv = 1,
 	.ring_mask = RENDER_RING | BSD_RING,
 };
 
 #define GEN5_FEATURES \
 	GEN(5), \
 	.num_pipes = 2, \
 	.has_hotplug = 1, \
 	.ring_mask = RENDER_RING | BSD_RING, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = true, \
 	/* ilk does support rc6, but we do not implement [power] contexts */ \
 	.has_rc6 = 0, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
@@ -234,23 +239,24 @@  static const struct intel_device_info intel_ironlake_d_info = {
 static const struct intel_device_info intel_ironlake_m_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 	.is_mobile = 1, .has_fbc = 1,
 };
 
 #define GEN6_FEATURES \
 	GEN(6), \
 	.num_pipes = 2, \
 	.has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	CURSOR_OFFSETS
 
 #define SNB_D_PLATFORM \
 	GEN6_FEATURES, \
 	PLATFORM(INTEL_SANDYBRIDGE)
@@ -279,24 +285,25 @@  static const struct intel_device_info intel_sandybridge_m_gt1_info = {
 static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	SNB_M_PLATFORM,
 	.gt = 2,
 };
 
 #define GEN7_FEATURES  \
 	GEN(7), \
 	.num_pipes = 3, \
 	.has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
 	IVB_CURSOR_OFFSETS
 
 #define IVB_D_PLATFORM \
 	GEN7_FEATURES, \
 	PLATFORM(INTEL_IVYBRIDGE), \
 	.has_l3_dpf = 1
@@ -338,34 +345,35 @@  static const struct intel_device_info intel_ivybridge_q_info = {
 static const struct intel_device_info intel_valleyview_info = {
 	PLATFORM(INTEL_VALLEYVIEW),
 	GEN(7),
 	.is_lp = 1,
 	.num_pipes = 2,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
 	.has_full_ppgtt = 1,
 	.has_snoop = true,
+	.has_coherent_ggtt = false,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
 	GEN_DEFAULT_PAGE_SIZES,
 	GEN_DEFAULT_PIPEOFFSETS,
 	CURSOR_OFFSETS
 };
 
 #define G75_FEATURES  \
 	GEN7_FEATURES, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
 	.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
 	.has_psr = 1, \
 	.has_resource_streamer = 1, \
 	.has_dp_mst = 1, \
 	.has_rc6p = 0 /* RC6p removed-by HSW */, \
 	.has_runtime_pm = 1
 
 #define HSW_PLATFORM \
 	G75_FEATURES, \
 	PLATFORM(INTEL_HASWELL), \
 	.has_l3_dpf = 1
@@ -427,42 +435,43 @@  static const struct intel_device_info intel_broadwell_gt3_info = {
 static const struct intel_device_info intel_cherryview_info = {
 	PLATFORM(INTEL_CHERRYVIEW),
 	GEN(8),
 	.num_pipes = 3,
 	.has_hotplug = 1,
 	.is_lp = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
 	.has_64bit_reloc = 1,
 	.has_runtime_pm = 1,
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
 	.has_full_ppgtt = 1,
 	.has_reset_engine = 1,
 	.has_snoop = true,
+	.has_coherent_ggtt = false,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
 	GEN_DEFAULT_PAGE_SIZES,
 	GEN_CHV_PIPEOFFSETS,
 	CURSOR_OFFSETS,
 	CHV_COLORS,
 };
 
 #define GEN9_DEFAULT_PAGE_SIZES \
 	.page_sizes = I915_GTT_PAGE_SIZE_4K | \
 		      I915_GTT_PAGE_SIZE_64K | \
 		      I915_GTT_PAGE_SIZE_2M
 
 #define GEN9_FEATURES \
 	GEN8_FEATURES, \
 	GEN(9), \
 	GEN9_DEFAULT_PAGE_SIZES, \
 	.has_logical_ring_preemption = 1, \
 	.has_csr = 1, \
 	.has_guc = 1, \
 	.has_ipc = 1, \
 	.ddb_size = 896
 
 #define SKL_PLATFORM \
 	GEN9_FEATURES, \
 	PLATFORM(INTEL_SKYLAKE)
@@ -490,35 +499,36 @@  static const struct intel_device_info intel_skylake_gt3_info = {
 static const struct intel_device_info intel_skylake_gt4_info = {
 	SKL_GT3_PLUS_PLATFORM,
 	.gt = 4,
 };
 
 #define GEN9_LP_FEATURES \
 	GEN(9), \
 	.is_lp = 1, \
 	.has_hotplug = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
 	.num_pipes = 3, \
 	.has_64bit_reloc = 1, \
 	.has_ddi = 1, \
 	.has_fpga_dbg = 1, \
 	.has_fbc = 1, \
 	.has_psr = 1, \
 	.has_runtime_pm = 1, \
 	.has_pooled_eu = 0, \
 	.has_csr = 1, \
 	.has_resource_streamer = 1, \
 	.has_rc6 = 1, \
 	.has_dp_mst = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_preemption = 1, \
 	.has_guc = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	.has_full_48bit_ppgtt = 1, \
 	.has_reset_engine = 1, \
 	.has_snoop = true, \
+	.has_coherent_ggtt = false, \
 	.has_ipc = 1, \
 	GEN9_DEFAULT_PAGE_SIZES, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	IVB_CURSOR_OFFSETS, \
 	BDW_COLORS
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 633f9fbf72ea..07e8364d1a8c 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -33,85 +33,86 @@  struct drm_i915_private;
 /* Keep in gen based order, and chronological order within a gen */
 enum intel_platform {
 	INTEL_PLATFORM_UNINITIALIZED = 0,
 	/* gen2 */
 	INTEL_I830,
 	INTEL_I845G,
 	INTEL_I85X,
 	INTEL_I865G,
 	/* gen3 */
 	INTEL_I915G,
 	INTEL_I915GM,
 	INTEL_I945G,
 	INTEL_I945GM,
 	INTEL_G33,
 	INTEL_PINEVIEW,
 	/* gen4 */
 	INTEL_I965G,
 	INTEL_I965GM,
 	INTEL_G45,
 	INTEL_GM45,
 	/* gen5 */
 	INTEL_IRONLAKE,
 	/* gen6 */
 	INTEL_SANDYBRIDGE,
 	/* gen7 */
 	INTEL_IVYBRIDGE,
 	INTEL_VALLEYVIEW,
 	INTEL_HASWELL,
 	/* gen8 */
 	INTEL_BROADWELL,
 	INTEL_CHERRYVIEW,
 	/* gen9 */
 	INTEL_SKYLAKE,
 	INTEL_BROXTON,
 	INTEL_KABYLAKE,
 	INTEL_GEMINILAKE,
 	INTEL_COFFEELAKE,
 	/* gen10 */
 	INTEL_CANNONLAKE,
 	/* gen11 */
 	INTEL_ICELAKE,
 	INTEL_MAX_PLATFORMS
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func) \
 	func(is_mobile); \
 	func(is_lp); \
 	func(is_alpha_support); \
 	/* Keep has_* in alphabetical order */ \
 	func(has_64bit_reloc); \
 	func(has_aliasing_ppgtt); \
 	func(has_csr); \
 	func(has_ddi); \
 	func(has_dp_mst); \
 	func(has_reset_engine); \
 	func(has_fbc); \
 	func(has_fpga_dbg); \
 	func(has_full_ppgtt); \
 	func(has_full_48bit_ppgtt); \
 	func(has_gmch_display); \
 	func(has_guc); \
 	func(has_guc_ct); \
 	func(has_hotplug); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
 	func(has_rc6); \
 	func(has_rc6p); \
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
+	func(has_coherent_ggtt); \
 	func(unfenced_needs_alignment); \
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
 	func(supports_tv); \
 	func(has_ipc);
 
 #define GEN_MAX_SLICES		(6) /* CNL upper bound */
 #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..8ee81e6f151c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -529,6 +529,28 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * If set to true, writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * If set to false, writes via mmap_gtt are indeterminatnly delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when set to false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*