diff mbox series

drm/i915/gem: Calculate object page offset for partial memory mapping

Message ID 20240325134033.287913-1-andi.shyti@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/gem: Calculate object page offset for partial memory mapping | expand

Commit Message

Andi Shyti March 25, 2024, 1:40 p.m. UTC
To enable partial memory mapping of GPU virtual memory, it's
necessary to introduce an offset to the object's memory
(obj->mm.pages) scatterlist. This adjustment compensates for
instances when userspace mappings do not start from the beginning
of the object.

Based on a patch by Chris Wilson
<chris.p.wilson@linux.intel.com>.

Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c |  8 +++++---
 drivers/gpu/drm/i915/i915_mm.c           | 12 +++++++++++-
 drivers/gpu/drm/i915/i915_mm.h           |  3 ++-
 3 files changed, 18 insertions(+), 5 deletions(-)

Comments

Nirmoy Das March 25, 2024, 11:19 p.m. UTC | #1
Hi Andi,

I have too many questions :) I think the patch makes sense but need more 
context, see below:

On 3/25/2024 2:40 PM, Andi Shyti wrote:
> To enable partial memory mapping of GPU virtual memory, it's
> necessary to introduce an offset to the object's memory
> (obj->mm.pages) scatterlist. This adjustment compensates for
> instances when userspace mappings do not start from the beginning
> of the object.
>
> Based on a patch by Chris Wilson
> <chris.p.wilson@linux.intel.com>.
>
> Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_mman.c |  8 +++++---
>   drivers/gpu/drm/i915/i915_mm.c           | 12 +++++++++++-
>   drivers/gpu/drm/i915/i915_mm.h           |  3 ++-
>   3 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index a2195e28b625..57a2dda2c3cc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
>   	/* PTEs are revoked in obj->ops->put_pages() */
>   	err = remap_io_sg(area,
>   			  area->vm_start, area->vm_end - area->vm_start,
> -			  obj->mm.pages->sgl, iomap);
> +			  obj->mm.pages->sgl, 0, iomap);

Why don't we need partial mmap for CPU but only for GTT ?

Sounds like this also need to be cover by a IGT tests.  Don't we need 
"Fixes" tag for this?

Regards,

Nirmoy

>   
>   	if (area->vm_flags & VM_WRITE) {
>   		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> @@ -302,14 +302,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
>   	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
>   	bool write = area->vm_flags & VM_WRITE;
>   	struct i915_gem_ww_ctx ww;
> +	unsigned long obj_offset;
>   	intel_wakeref_t wakeref;
>   	struct i915_vma *vma;
>   	pgoff_t page_offset;
>   	int srcu;
>   	int ret;
>   
> -	/* We don't use vmf->pgoff since that has the fake offset */
> +	obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
>   	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
> +	page_offset += obj_offset;
>   
>   	trace_i915_gem_object_fault(obj, page_offset, true, write);
>   
> @@ -404,7 +406,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
>   
>   	/* Finally, remap it using the new GTT offset */
>   	ret = remap_io_mapping(area,
> -			       area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
> +			       area->vm_start + ((vma->gtt_view.partial.offset - obj_offset) << PAGE_SHIFT),
>   			       (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
>   			       min_t(u64, vma->size, area->vm_end - area->vm_start),
>   			       &ggtt->iomap);
> diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
> index 7998bc74ab49..f5c97a620962 100644
> --- a/drivers/gpu/drm/i915/i915_mm.c
> +++ b/drivers/gpu/drm/i915/i915_mm.c
> @@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma,
>    * @addr: target user address to start at
>    * @size: size of map area
>    * @sgl: Start sg entry
> + * @offset: offset from the start of the page
>    * @iobase: Use stored dma address offset by this address or pfn if -1
>    *
>    *  Note: this is only safe if the mm semaphore is held when called.
>    */
>   int remap_io_sg(struct vm_area_struct *vma,
>   		unsigned long addr, unsigned long size,
> -		struct scatterlist *sgl, resource_size_t iobase)
> +		struct scatterlist *sgl, unsigned long offset,
> +		resource_size_t iobase)
>   {
>   	struct remap_pfn r = {
>   		.mm = vma->vm_mm,
> @@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma,
>   	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
>   	GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
>   
> +	while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
> +		offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
> +		r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
> +		if (!r.sgt.sgp)
> +			return -EINVAL;
> +	}
> +	r.sgt.curr = offset << PAGE_SHIFT;
> +
>   	if (!use_dma(iobase))
>   		flush_cache_range(vma, addr, size);
>   
> diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
> index 04c8974d822b..69f9351b1a1c 100644
> --- a/drivers/gpu/drm/i915/i915_mm.h
> +++ b/drivers/gpu/drm/i915/i915_mm.h
> @@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
>   
>   int remap_io_sg(struct vm_area_struct *vma,
>   		unsigned long addr, unsigned long size,
> -		struct scatterlist *sgl, resource_size_t iobase);
> +		struct scatterlist *sgl, unsigned long offset,
> +		resource_size_t iobase);
>   
>   #endif /* __I915_MM_H__ */
Andi Shyti March 26, 2024, 11:12 a.m. UTC | #2
Hi Nirmoy,

...

> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > index a2195e28b625..57a2dda2c3cc 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
> >   	/* PTEs are revoked in obj->ops->put_pages() */
> >   	err = remap_io_sg(area,
> >   			  area->vm_start, area->vm_end - area->vm_start,
> > -			  obj->mm.pages->sgl, iomap);
> > +			  obj->mm.pages->sgl, 0, iomap);
> 
> Why don't we need partial mmap for CPU but only for GTT ?

As far as I understood we don't. I have a version with the CPU
offset as well in trybot[*]

But without support for segmented buffer objects, I don't know
how much this has any effect.

> Sounds like this also need to be cover by a IGT tests.

Yes, I it does need some igt work, working on it.

> Don't we need "Fixes" tag for this?

Why should we? I'm not fixing anything here, I'm just
recalculating the mapping not starting from the beginning of the
scatter page.

Andi

[*] https://patchwork.freedesktop.org/patch/584474/?series=131539&rev=2
Nirmoy Das March 26, 2024, 12:05 p.m. UTC | #3
Hi Andi,

On 3/26/2024 12:12 PM, Andi Shyti wrote:
> Hi Nirmoy,
>
> ...
>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> index a2195e28b625..57a2dda2c3cc 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
>>>    	/* PTEs are revoked in obj->ops->put_pages() */
>>>    	err = remap_io_sg(area,
>>>    			  area->vm_start, area->vm_end - area->vm_start,
>>> -			  obj->mm.pages->sgl, iomap);
>>> +			  obj->mm.pages->sgl, 0, iomap);
>> Why don't we need partial mmap for CPU but only for GTT ?
> As far as I understood we don't. I have a version with the CPU
> offset as well in trybot[*]
>
> But without support for segmented buffer objects, I don't know
> how much this has any effect.

You confused me more :) Why segmented buffer object is needed for 
partial CPU mmap but not for GTT  ?

 From high level,  GTT and CPU both should support partial mmap unless I 
missing something here.

>
>> Sounds like this also need to be cover by a IGT tests.
> Yes, I it does need some igt work, working on it.
>
>> Don't we need "Fixes" tag for this?
> Why should we? I'm not fixing anything here,

If userspace  expects partial mmap to work then this is a bug/gap in 
i915 so we need to

backport this as far as possible. Need some information about the 
requirement about  why we need this patch suddenly?


Regards,

Nirmoy

>   I'm just
> recalculating the mapping not starting from the beginning of the
> scatter page.
>
> Andi
>
> [*] https://patchwork.freedesktop.org/patch/584474/?series=131539&rev=2
Andi Shyti March 28, 2024, 10:53 a.m. UTC | #4
Hi Nirmoy,

On Tue, Mar 26, 2024 at 01:05:37PM +0100, Nirmoy Das wrote:
> On 3/26/2024 12:12 PM, Andi Shyti wrote:

> > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > index a2195e28b625..57a2dda2c3cc 100644
> > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> > > > @@ -276,7 +276,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
> > > >    	/* PTEs are revoked in obj->ops->put_pages() */
> > > >    	err = remap_io_sg(area,
> > > >    			  area->vm_start, area->vm_end - area->vm_start,
> > > > -			  obj->mm.pages->sgl, iomap);
> > > > +			  obj->mm.pages->sgl, 0, iomap);
> > > Why don't we need partial mmap for CPU but only for GTT ?
> > As far as I understood we don't. I have a version with the CPU
> > offset as well in trybot[*]
> > 
> > But without support for segmented buffer objects, I don't know
> > how much this has any effect.
> 
> You confused me more :) Why segmented buffer object is needed for partial
> CPU mmap but not for GTT  ?

atually segmented bo's were introduced to support single dma
buffers instead of fragmented buffers. But this goes beyond the
scope of this patch.

> From high level,  GTT and CPU both should support partial mmap unless I
> missing something here.

But yes, we could take the patch I linked which adds some offset
to the cpu memory. I will add it in V2.

> > 
> > > Sounds like this also need to be cover by a IGT tests.
> > Yes, I it does need some igt work, working on it.
> > 
> > > Don't we need "Fixes" tag for this?
> > Why should we? I'm not fixing anything here,
> 
> If userspace  expects partial mmap to work then this is a bug/gap in i915 so
> we need to
> 
> backport this as far as possible. Need some information about the
> requirement about  why we need this patch suddenly?

But a gap is not a bug. Theoretically we are adding a feature.

On the other hand it would be a bug if the API promises to add
the offset but in reality it doesn't. I will check if this is the
case and it needs to be well described in the commit message.

Thanks,
Andi
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index a2195e28b625..57a2dda2c3cc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -276,7 +276,7 @@  static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
 	/* PTEs are revoked in obj->ops->put_pages() */
 	err = remap_io_sg(area,
 			  area->vm_start, area->vm_end - area->vm_start,
-			  obj->mm.pages->sgl, iomap);
+			  obj->mm.pages->sgl, 0, iomap);
 
 	if (area->vm_flags & VM_WRITE) {
 		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -302,14 +302,16 @@  static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
 	struct i915_gem_ww_ctx ww;
+	unsigned long obj_offset;
 	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	int srcu;
 	int ret;
 
-	/* We don't use vmf->pgoff since that has the fake offset */
+	obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+	page_offset += obj_offset;
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
@@ -404,7 +406,7 @@  static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
-			       area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
+			       area->vm_start + ((vma->gtt_view.partial.offset - obj_offset) << PAGE_SHIFT),
 			       (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->iomap);
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 7998bc74ab49..f5c97a620962 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -122,13 +122,15 @@  int remap_io_mapping(struct vm_area_struct *vma,
  * @addr: target user address to start at
  * @size: size of map area
  * @sgl: Start sg entry
+ * @offset: offset from the start of the page
  * @iobase: Use stored dma address offset by this address or pfn if -1
  *
  *  Note: this is only safe if the mm semaphore is held when called.
  */
 int remap_io_sg(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long size,
-		struct scatterlist *sgl, resource_size_t iobase)
+		struct scatterlist *sgl, unsigned long offset,
+		resource_size_t iobase)
 {
 	struct remap_pfn r = {
 		.mm = vma->vm_mm,
@@ -141,6 +143,14 @@  int remap_io_sg(struct vm_area_struct *vma,
 	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
 	GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
 
+	while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
+		offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
+		r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
+		if (!r.sgt.sgp)
+			return -EINVAL;
+	}
+	r.sgt.curr = offset << PAGE_SHIFT;
+
 	if (!use_dma(iobase))
 		flush_cache_range(vma, addr, size);
 
diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h
index 04c8974d822b..69f9351b1a1c 100644
--- a/drivers/gpu/drm/i915/i915_mm.h
+++ b/drivers/gpu/drm/i915/i915_mm.h
@@ -30,6 +30,7 @@  int remap_io_mapping(struct vm_area_struct *vma,
 
 int remap_io_sg(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long size,
-		struct scatterlist *sgl, resource_size_t iobase);
+		struct scatterlist *sgl, unsigned long offset,
+		resource_size_t iobase);
 
 #endif /* __I915_MM_H__ */