diff mbox series

[v2,29/37] drm/i915: Introduce DRM_I915_GEM_MMAP_OFFSET

Message ID 20190627205633.1143-30-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce memory region concept (including device local memory) | expand

Commit Message

Matthew Auld June 27, 2019, 8:56 p.m. UTC
From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>

Add a new CPU mmap implementation that allows multiple fault handlers
that depends on the object's backing pages.

Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
and use the zero extending behaviour of drm to differentiate between
them, when we inspect the flags.

Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h    |  2 ++
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 30 ++++++++++++++++++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 ++
 drivers/gpu/drm/i915/i915_drv.c               |  3 +-
 include/uapi/drm/i915_drm.h                   | 31 +++++++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)

Comments

Chris Wilson June 28, 2019, 12:12 a.m. UTC | #1
Quoting Matthew Auld (2019-06-27 21:56:25)
> From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> 
> Add a new CPU mmap implementation that allows multiple fault handlers
> that depends on the object's backing pages.
> 
> Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
> and use the zero extending behaviour of drm to differentiate between
> them, when we inspect the flags.
> 
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_ioctls.h    |  2 ++
>  drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 30 ++++++++++++++++++
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 ++
>  drivers/gpu/drm/i915/i915_drv.c               |  3 +-
>  include/uapi/drm/i915_drm.h                   | 31 +++++++++++++++++++
>  5 files changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> index ddc7f2a52b3e..5abd5b2172f2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> @@ -30,6 +30,8 @@ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>                         struct drm_file *file);
>  int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
>                             struct drm_file *file);
> +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
> +                              struct drm_file *file_priv);
>  int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>                          struct drm_file *file);
>  int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 7b46f44d9c20..cbf89e80a97b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -536,12 +536,42 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
>                         struct drm_file *file)
>  {
>         struct drm_i915_gem_mmap_offset *args = data;
> +       struct drm_i915_private *i915 = to_i915(dev);
> +
> +       if (args->flags & I915_MMAP_OFFSET_FLAGS)
> +               return i915_gem_mmap_offset_ioctl(dev, data, file);
> +
> +       if (!HAS_MAPPABLE_APERTURE(i915)) {
> +               DRM_ERROR("No aperture, cannot mmap via legacy GTT\n");
> +               return -ENODEV;
> +       }
>  
>         return __assign_gem_object_mmap_data(file, args->handle,
>                                              I915_MMAP_TYPE_GTT,
>                                              &args->offset);
>  }
>  
> +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
> +                              struct drm_file *file)

This seems highly redundant and not correctly plugged in.
-Chris
Daniel Vetter July 30, 2019, 9:49 a.m. UTC | #2
On Thu, Jun 27, 2019 at 09:56:25PM +0100, Matthew Auld wrote:
> From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> 
> Add a new CPU mmap implementation that allows multiple fault handlers
> that depends on the object's backing pages.
> 
> Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
> and use the zero extending behaviour of drm to differentiate between
> them, when we inspect the flags.
> 
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

So I thought that the plan is to reject invalid mmaps, i.e. mmap modes
which are not compatibale with all placement options. Given that, why do
we need this?

- cpu mmap with all the flags still keep working, as long as the only
  placement you select is smem.

- for lmem/stolen the only option we have is a wc mapping, either through
  the pci bar or through the gtt. So for objects only sitting in there
  also no problem, we can just keep using the current gtt mmap stuff (but
  redirect it internally).

- that leaves us with objects which can move around. Only option allows is
  WC, and the gtt mmap ioctl does that already. When the object is in smem
  we'll need to redirect it to a cpu wc mmap, but I think we need to do
  that anyway.

So not really seeing what the uapi problem is you're trying to solve here?

Can you pls explain why we need this?

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_ioctls.h    |  2 ++
>  drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 30 ++++++++++++++++++
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 ++
>  drivers/gpu/drm/i915/i915_drv.c               |  3 +-
>  include/uapi/drm/i915_drm.h                   | 31 +++++++++++++++++++
>  5 files changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> index ddc7f2a52b3e..5abd5b2172f2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
> @@ -30,6 +30,8 @@ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file);
>  int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
>  			    struct drm_file *file);
> +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
> +			       struct drm_file *file_priv);
>  int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>  			 struct drm_file *file);
>  int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 7b46f44d9c20..cbf89e80a97b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -536,12 +536,42 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file)
>  {
>  	struct drm_i915_gem_mmap_offset *args = data;
> +	struct drm_i915_private *i915 = to_i915(dev);
> +
> +	if (args->flags & I915_MMAP_OFFSET_FLAGS)
> +		return i915_gem_mmap_offset_ioctl(dev, data, file);
> +
> +	if (!HAS_MAPPABLE_APERTURE(i915)) {
> +		DRM_ERROR("No aperture, cannot mmap via legacy GTT\n");
> +		return -ENODEV;
> +	}
>  
>  	return __assign_gem_object_mmap_data(file, args->handle,
>  					     I915_MMAP_TYPE_GTT,
>  					     &args->offset);
>  }
>  
> +int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
> +			       struct drm_file *file)
> +{
> +	struct drm_i915_gem_mmap_offset *args = data;
> +	enum i915_mmap_type type;
> +
> +	if ((args->flags & (I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB)) &&
> +	    !boot_cpu_has(X86_FEATURE_PAT))
> +		return -ENODEV;
> +
> +	if (args->flags & I915_MMAP_OFFSET_WC)
> +		type = I915_MMAP_TYPE_OFFSET_WC;
> +	else if (args->flags & I915_MMAP_OFFSET_WB)
> +		type = I915_MMAP_TYPE_OFFSET_WB;
> +	else if (args->flags & I915_MMAP_OFFSET_UC)
> +		type = I915_MMAP_TYPE_OFFSET_UC;
> +
> +	return __assign_gem_object_mmap_data(file, args->handle, type,
> +					     &args->offset);
> +}
> +
>  void i915_mmap_offset_object_release(struct kref *ref)
>  {
>  	struct i915_mmap_offset *mmo = container_of(ref,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 86f358da8085..f95e54a25426 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -63,6 +63,9 @@ struct drm_i915_gem_object_ops {
>  
>  enum i915_mmap_type {
>  	I915_MMAP_TYPE_GTT = 0,
> +	I915_MMAP_TYPE_OFFSET_WC,
> +	I915_MMAP_TYPE_OFFSET_WB,
> +	I915_MMAP_TYPE_OFFSET_UC,
>  };
>  
>  struct i915_mmap_offset {
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 0f1f3b7f3029..8dadd6b9a0a9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -459,6 +459,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>  	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>  	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>  	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> +	case I915_PARAM_MMAP_OFFSET_VERSION:
>  		/* For the time being all of these are always true;
>  		 * if some supported hardware does not have one of these
>  		 * features this value needs to be provided from
> @@ -3176,7 +3177,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
> -	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_OFFSET, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW),
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 328d05e77d9f..729e729e2282 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -359,6 +359,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_QUERY			0x39
>  #define DRM_I915_GEM_VM_CREATE		0x3a
>  #define DRM_I915_GEM_VM_DESTROY		0x3b
> +#define DRM_I915_GEM_MMAP_OFFSET   	DRM_I915_GEM_MMAP_GTT
>  /* Must be kept compact -- no holes */
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> @@ -421,6 +422,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
>  #define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
>  #define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
> +#define DRM_IOCTL_I915_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_OFFSET, struct drm_i915_gem_mmap_offset)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -610,6 +612,10 @@ typedef struct drm_i915_irq_wait {
>   * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
>   */
>  #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
> +
> +/* Mmap offset ioctl */
> +#define I915_PARAM_MMAP_OFFSET_VERSION	54
> +
>  /* Must be kept compact -- no holes and well documented */
>  
>  typedef struct drm_i915_getparam {
> @@ -785,6 +791,31 @@ struct drm_i915_gem_mmap_gtt {
>  	__u64 offset;
>  };
>  
> +struct drm_i915_gem_mmap_offset {
> +	/** Handle for the object being mapped. */
> +	__u32 handle;
> +	__u32 pad;
> +	/**
> +	 * Fake offset to use for subsequent mmap call
> +	 *
> +	 * This is a fixed-size type for 32/64 compatibility.
> +	 */
> +	__u64 offset;
> +
> +	/**
> +	 * Flags for extended behaviour.
> +	 *
> +	 * It is mandatory that either one of the _WC/_WB flags
> +	 * should be passed here.
> +	 */
> +	__u64 flags;
> +#define I915_MMAP_OFFSET_WC (1 << 0)
> +#define I915_MMAP_OFFSET_WB (1 << 1)
> +#define I915_MMAP_OFFSET_UC (1 << 2)
> +#define I915_MMAP_OFFSET_FLAGS \
> +	(I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB | I915_MMAP_OFFSET_UC)
> +};
> +
>  struct drm_i915_gem_set_domain {
>  	/** Handle for the object */
>  	__u32 handle;
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Matthew Auld July 30, 2019, 2:28 p.m. UTC | #3
On 30/07/2019 10:49, Daniel Vetter wrote:
> On Thu, Jun 27, 2019 at 09:56:25PM +0100, Matthew Auld wrote:
>> From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
>>
>> Add a new CPU mmap implementation that allows multiple fault handlers
>> that depends on the object's backing pages.
>>
>> Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
>> and use the zero extending behaviour of drm to differentiate between
>> them, when we inspect the flags.
>>
>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> So I thought that the plan is to reject invalid mmaps, i.e. mmap modes
> which are not compatibale with all placement options. Given that, why do
> we need this?

We are meant to reject anything !wc for LMEM. There were some patches 
for that but I guess got lost under the radar...

> 
> - cpu mmap with all the flags still keep working, as long as the only
>    placement you select is smem.
> 
> - for lmem/stolen the only option we have is a wc mapping, either through
>    the pci bar or through the gtt. So for objects only sitting in there
>    also no problem, we can just keep using the current gtt mmap stuff (but
>    redirect it internally).
> 
> - that leaves us with objects which can move around. Only option allows is
>    WC, and the gtt mmap ioctl does that already. When the object is in smem
>    we'll need to redirect it to a cpu wc mmap, but I think we need to do
>    that anyway.

So for legacy, gtt_mmap will still go through the aperture, otherwise if 
LMEM is supported then there is no aperture, so we just wc mmap via cpu 
or LMEMBAR depending on the final object placement. And cpu_mmap still 
works if we don't care about LMEM. Hmm, so do we even need most of the 
previous patch then? ALso does that mean we also have to track the 
placement of an object in igt?

gem_mmap__wc:

if (supports_lmem(dev))
	gtt_mmap();
else
	gem_mmap(wc);

gem_mmap__wc:

if (placement_contains(obj, LMEM))
	gtt_mmap();
else
	gem_mmap(wc);

?

> 
> So not really seeing what the uapi problem is you're trying to solve here?
> 
> Can you pls explain why we need this?

The naming of gtt_mmap seemed confusing, since there is no aperture, and 
having one mmap ioctl to cover both smem and lmem seemed like a nice 
idea...also I think umd's stopped using gtt_mmap(or were told to?) but 
maybe those aren't good enough reasons.
Daniel Vetter July 30, 2019, 4:22 p.m. UTC | #4
On Tue, Jul 30, 2019 at 03:28:11PM +0100, Matthew Auld wrote:
> On 30/07/2019 10:49, Daniel Vetter wrote:
> > On Thu, Jun 27, 2019 at 09:56:25PM +0100, Matthew Auld wrote:
> > > From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > > 
> > > Add a new CPU mmap implementation that allows multiple fault handlers
> > > that depends on the object's backing pages.
> > > 
> > > Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
> > > and use the zero extending behaviour of drm to differentiate between
> > > them, when we inspect the flags.
> > > 
> > > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > 
> > So I thought that the plan is to reject invalid mmaps, i.e. mmap modes
> > which are not compatibale with all placement options. Given that, why do
> > we need this?
> 
> We are meant to reject anything !wc for LMEM. There were some patches for
> that but I guess got lost under the radar...
> 
> > 
> > - cpu mmap with all the flags still keep working, as long as the only
> >    placement you select is smem.
> > 
> > - for lmem/stolen the only option we have is a wc mapping, either through
> >    the pci bar or through the gtt. So for objects only sitting in there
> >    also no problem, we can just keep using the current gtt mmap stuff (but
> >    redirect it internally).
> > 
> > - that leaves us with objects which can move around. Only option allows is
> >    WC, and the gtt mmap ioctl does that already. When the object is in smem
> >    we'll need to redirect it to a cpu wc mmap, but I think we need to do
> >    that anyway.
> 
> So for legacy, gtt_mmap will still go through the aperture, otherwise if
> LMEM is supported then there is no aperture, so we just wc mmap via cpu or
> LMEMBAR depending on the final object placement. And cpu_mmap still works if
> we don't care about LMEM. Hmm, so do we even need most of the previous patch
> then? ALso does that mean we also have to track the placement of an object
> in igt?
> 
> gem_mmap__wc:
> 
> if (supports_lmem(dev))
> 	gtt_mmap();
> else
> 	gem_mmap(wc);
> 
> gem_mmap__wc:
> 
> if (placement_contains(obj, LMEM))
> 	gtt_mmap();
> else
> 	gem_mmap(wc);
> 
> ?

Well if you want cpu wc mmaps, then just allocate it as smem ... we might
need a new gem_mmap__lmem I guess to exercise all the possible ways to get
at stuff in lmem (including when it migrates around underneath us while we
access it through the mmap). I wouldn't try too hard to smash all these
use/testcases into one.

> > So not really seeing what the uapi problem is you're trying to solve here?
> > 
> > Can you pls explain why we need this?
> 
> The naming of gtt_mmap seemed confusing, since there is no aperture, and
> having one mmap ioctl to cover both smem and lmem seemed like a nice
> idea...also I think umd's stopped using gtt_mmap(or were told to?) but maybe
> those aren't good enough reasons.

We stopped using gtt mmap because for many cases cpu WC mmap is faster.

Wrt having a clean slate: Not sure why this would benefit us, we just
diverge a bit more from how this works on !lmem, so a bit more complexity
(not much) everywhere for not much gain.

I'm also not sure whether there will be a whole lot of uses of such a
magic LMEMBAR wc mapping. It's probably slow for the exact same reasons
gtt mmap is slow.
-Daniel
Daniel Vetter Aug. 12, 2019, 4:18 p.m. UTC | #5
On Tue, Jul 30, 2019 at 6:22 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Jul 30, 2019 at 03:28:11PM +0100, Matthew Auld wrote:
> > On 30/07/2019 10:49, Daniel Vetter wrote:
> > > On Thu, Jun 27, 2019 at 09:56:25PM +0100, Matthew Auld wrote:
> > > > From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > > >
> > > > Add a new CPU mmap implementation that allows multiple fault handlers
> > > > that depends on the object's backing pages.
> > > >
> > > > Note that we multiplex mmap_gtt and mmap_offset through the same ioctl,
> > > > and use the zero extending behaviour of drm to differentiate between
> > > > them, when we inspect the flags.
> > > >
> > > > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > >
> > > So I thought that the plan is to reject invalid mmaps, i.e. mmap modes
> > > which are not compatibale with all placement options. Given that, why do
> > > we need this?
> >
> > We are meant to reject anything !wc for LMEM. There were some patches for
> > that but I guess got lost under the radar...
> >
> > >
> > > - cpu mmap with all the flags still keep working, as long as the only
> > >    placement you select is smem.
> > >
> > > - for lmem/stolen the only option we have is a wc mapping, either through
> > >    the pci bar or through the gtt. So for objects only sitting in there
> > >    also no problem, we can just keep using the current gtt mmap stuff (but
> > >    redirect it internally).
> > >
> > > - that leaves us with objects which can move around. Only option allows is
> > >    WC, and the gtt mmap ioctl does that already. When the object is in smem
> > >    we'll need to redirect it to a cpu wc mmap, but I think we need to do
> > >    that anyway.
> >
> > So for legacy, gtt_mmap will still go through the aperture, otherwise if
> > LMEM is supported then there is no aperture, so we just wc mmap via cpu or
> > LMEMBAR depending on the final object placement. And cpu_mmap still works if
> > we don't care about LMEM. Hmm, so do we even need most of the previous patch
> > then? ALso does that mean we also have to track the placement of an object
> > in igt?
> >
> > gem_mmap__wc:
> >
> > if (supports_lmem(dev))
> >       gtt_mmap();
> > else
> >       gem_mmap(wc);
> >
> > gem_mmap__wc:
> >
> > if (placement_contains(obj, LMEM))
> >       gtt_mmap();
> > else
> >       gem_mmap(wc);
> >
> > ?
>
> Well if you want cpu wc mmaps, then just allocate it as smem ... we might
> need a new gem_mmap__lmem I guess to exercise all the possible ways to get
> at stuff in lmem (including when it migrates around underneath us while we
> access it through the mmap). I wouldn't try too hard to smash all these
> use/testcases into one.

Chatted a lot with Joonas today, and realized I missread outright what
this does. Looking at the end result I think it's all nicely aligned
with other (discrete/ttm) drivers, so all good from that point of
view. Still not sure whether it's really a good idea to do this fairly
minor uapi cleanup tied in with lmem. But I guess we committed to that
now, so welp ...
-Daniel

> > > So not really seeing what the uapi problem is you're trying to solve here?
> > >
> > > Can you pls explain why we need this?
> >
> > The naming of gtt_mmap seemed confusing, since there is no aperture, and
> > having one mmap ioctl to cover both smem and lmem seemed like a nice
> > idea...also I think umd's stopped using gtt_mmap(or were told to?) but maybe
> > those aren't good enough reasons.
>
> We stopped using gtt mmap because for many cases cpu WC mmap is faster.
>
> Wrt having a clean slate: Not sure why this would benefit us, we just
> diverge a bit more from how this works on !lmem, so a bit more complexity
> (not much) everywhere for not much gain.
>
> I'm also not sure whether there will be a whole lot of uses of such a
> magic LMEMBAR wc mapping. It's probably slow for the exact same reasons
> gtt mmap is slow.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
index ddc7f2a52b3e..5abd5b2172f2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -30,6 +30,8 @@  int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file);
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
 int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
 int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 7b46f44d9c20..cbf89e80a97b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -536,12 +536,42 @@  i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file)
 {
 	struct drm_i915_gem_mmap_offset *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	if (args->flags & I915_MMAP_OFFSET_FLAGS)
+		return i915_gem_mmap_offset_ioctl(dev, data, file);
+
+	if (!HAS_MAPPABLE_APERTURE(i915)) {
+		DRM_ERROR("No aperture, cannot mmap via legacy GTT\n");
+		return -ENODEV;
+	}
 
 	return __assign_gem_object_mmap_data(file, args->handle,
 					     I915_MMAP_TYPE_GTT,
 					     &args->offset);
 }
 
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_offset *args = data;
+	enum i915_mmap_type type;
+
+	if ((args->flags & (I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB)) &&
+	    !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	if (args->flags & I915_MMAP_OFFSET_WC)
+		type = I915_MMAP_TYPE_OFFSET_WC;
+	else if (args->flags & I915_MMAP_OFFSET_WB)
+		type = I915_MMAP_TYPE_OFFSET_WB;
+	else if (args->flags & I915_MMAP_OFFSET_UC)
+		type = I915_MMAP_TYPE_OFFSET_UC;
+
+	return __assign_gem_object_mmap_data(file, args->handle, type,
+					     &args->offset);
+}
+
 void i915_mmap_offset_object_release(struct kref *ref)
 {
 	struct i915_mmap_offset *mmo = container_of(ref,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 86f358da8085..f95e54a25426 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -63,6 +63,9 @@  struct drm_i915_gem_object_ops {
 
 enum i915_mmap_type {
 	I915_MMAP_TYPE_GTT = 0,
+	I915_MMAP_TYPE_OFFSET_WC,
+	I915_MMAP_TYPE_OFFSET_WB,
+	I915_MMAP_TYPE_OFFSET_UC,
 };
 
 struct i915_mmap_offset {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0f1f3b7f3029..8dadd6b9a0a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -459,6 +459,7 @@  static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_MMAP_OFFSET_VERSION:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -3176,7 +3177,7 @@  static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_OFFSET, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, DRM_RENDER_ALLOW),
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 328d05e77d9f..729e729e2282 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -359,6 +359,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_QUERY			0x39
 #define DRM_I915_GEM_VM_CREATE		0x3a
 #define DRM_I915_GEM_VM_DESTROY		0x3b
+#define DRM_I915_GEM_MMAP_OFFSET   	DRM_I915_GEM_MMAP_GTT
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -421,6 +422,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
 #define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
 #define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_MMAP_OFFSET		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_OFFSET, struct drm_i915_gem_mmap_offset)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -610,6 +612,10 @@  typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/* Mmap offset ioctl */
+#define I915_PARAM_MMAP_OFFSET_VERSION	54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -785,6 +791,31 @@  struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+struct drm_i915_gem_mmap_offset {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 pad;
+	/**
+	 * Fake offset to use for subsequent mmap call
+	 *
+	 * This is a fixed-size type for 32/64 compatibility.
+	 */
+	__u64 offset;
+
+	/**
+	 * Flags for extended behaviour.
+	 *
+	 * It is mandatory that either one of the _WC/_WB flags
+	 * should be passed here.
+	 */
+	__u64 flags;
+#define I915_MMAP_OFFSET_WC (1 << 0)
+#define I915_MMAP_OFFSET_WB (1 << 1)
+#define I915_MMAP_OFFSET_UC (1 << 2)
+#define I915_MMAP_OFFSET_FLAGS \
+	(I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB | I915_MMAP_OFFSET_UC)
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	__u32 handle;