diff mbox series

[v8,21/22] drm/i915/vm_bind: Properly build persistent map sg table

Message ID 20221129072635.847-22-niranjana.vishwanathapura@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/vm_bind: Add VM_BIND functionality | expand

Commit Message

Niranjana Vishwanathapura Nov. 29, 2022, 7:26 a.m. UTC
Properly build the sg table for persistent mapping which can
be partial map of the underlying object. Ensure the sg pages
are properly set for page backed regions. The dump capture
support requires this for page backed regions.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 1 deletion(-)

Comments

Matthew Auld Dec. 12, 2022, 6:17 p.m. UTC | #1
On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
> Properly build the sg table for persistent mapping which can
> be partial map of the underlying object. Ensure the sg pages
> are properly set for page backed regions. The dump capture
> support requires this for page backed regions.
> 
> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
>   1 file changed, 119 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 1b9033865768..68a9ac77b4f2 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
>   	return ERR_PTR(ret);
>   }
>   
> +static unsigned int
> +intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
> +		  u64 offset, u64 length, bool dry_run)
> +{
> +	struct scatterlist *dst_sg, *src_sg;
> +	unsigned int i, len, nents = 0;
> +
> +	dst_sg = dst_st->sgl;
> +	for_each_sgtable_dma_sg(src_st, src_sg, i) {
> +		if (sg_dma_len(src_sg) <= offset) {
> +			offset -= sg_dma_len(src_sg);
> +			continue;
> +		}
> +
> +		nents++;
> +		len = min(sg_dma_len(src_sg) - offset, length);
> +		if (!dry_run) {
> +			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
> +			sg_dma_len(dst_sg) = len;
> +			dst_sg = sg_next(dst_sg);
> +		}
> +
> +		length -= len;
> +		offset = 0;
> +		if (!length)
> +			break;
> +	}
> +	WARN_ON_ONCE(length);
> +
> +	return nents;
> +}
> +
> +static unsigned int
> +intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
> +	      u64 offset, u64 length, bool dry_run)
> +{
> +	struct scatterlist *dst_sg, *src_sg;
> +	unsigned int i, len, nents = 0;
> +
> +	dst_sg = dst_st->sgl;
> +	for_each_sgtable_sg(src_st, src_sg, i) {
> +		if (src_sg->length <= offset) {
> +			offset -= src_sg->length;
> +			continue;
> +		}
> +
> +		nents++;
> +		len = min(src_sg->length - offset, length);
> +		if (!dry_run) {
> +			unsigned long pfn;
> +
> +			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
> +			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
> +			dst_sg = sg_next(dst_sg);
> +		}
> +
> +		length -= len;
> +		offset = 0;
> +		if (!length)
> +			break;
> +	}
> +	WARN_ON_ONCE(length);
> +
> +	return nents;
> +}
> +
> +static noinline struct sg_table *
> +intel_persistent_partial_pages(const struct i915_gtt_view *view,
> +			       struct drm_i915_gem_object *obj)
> +{
> +	u64 offset = view->partial.offset << PAGE_SHIFT;
> +	struct sg_table *st, *obj_st = obj->mm.pages;
> +	u64 length = view->partial.size << PAGE_SHIFT;
> +	struct scatterlist *sg;
> +	unsigned int nents;
> +	int ret = -ENOMEM;
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (!st)
> +		goto err_st_alloc;
> +
> +	/* Get required sg_table size */
> +	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
> +	if (i915_gem_object_has_struct_page(obj)) {
> +		unsigned int pg_nents;
> +
> +		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
> +		if (nents < pg_nents)
> +			nents = pg_nents;
> +	}
> +
> +	ret = sg_alloc_table(st, nents, GFP_KERNEL);
> +	if (ret)
> +		goto err_sg_alloc;
> +
> +	/* Build sg_table for specified <offset, length> section */
> +	intel_copy_dma_sg(obj_st, st, offset, length, false);
> +	if (i915_gem_object_has_struct_page(obj))
> +		intel_copy_sg(obj_st, st, offset, length, false);
> +
> +	/* Mark last sg */
> +	sg = st->sgl;
> +	while (sg_next(sg))
> +		sg = sg_next(sg);
> +	sg_mark_end(sg);

Do we need this bit? The nents is exactly orig_nents, and sg_alloc_table 
will already mark the end for you.

Is it not possible to re-use remap_contiguous_pages() somehow? Also do 
we need the dry_run bit if we use sg_trim()? Maybe something like:

dst = sg_alloc_table(partial.size);

remap_contigious_pages_sg(dst, src);
i915_sg_trim(dst);

dst->nents = 0;
sg = remap_contigious_pages_dma_sg(dst, src);

> +
> +	return st;
> +
> +err_sg_alloc:
> +	kfree(st);
> +err_st_alloc:
> +	return ERR_PTR(ret);
> +}
> +
>   static int
>   __i915_vma_get_pages(struct i915_vma *vma)
>   {
> @@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
>   		break;
>   
>   	case I915_GTT_VIEW_PARTIAL:
> -		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
> +		if (i915_vma_is_persistent(vma))
> +			pages = intel_persistent_partial_pages(&vma->gtt_view,
> +							       vma->obj);
> +		else
> +			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>   		break;
>   	}
>
Niranjana Vishwanathapura Dec. 14, 2022, 4:58 a.m. UTC | #2
On Mon, Dec 12, 2022 at 06:17:01PM +0000, Matthew Auld wrote:
>On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
>>Properly build the sg table for persistent mapping which can
>>be partial map of the underlying object. Ensure the sg pages
>>are properly set for page backed regions. The dump capture
>>support requires this for page backed regions.
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
>>  1 file changed, 119 insertions(+), 1 deletion(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>index 1b9033865768..68a9ac77b4f2 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>@@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
>>  	return ERR_PTR(ret);
>>  }
>>+static unsigned int
>>+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+		  u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_dma_sg(src_st, src_sg, i) {
>>+		if (sg_dma_len(src_sg) <= offset) {
>>+			offset -= sg_dma_len(src_sg);
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(sg_dma_len(src_sg) - offset, length);
>>+		if (!dry_run) {
>>+			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
>>+			sg_dma_len(dst_sg) = len;
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static unsigned int
>>+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+	      u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_sg(src_st, src_sg, i) {
>>+		if (src_sg->length <= offset) {
>>+			offset -= src_sg->length;
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(src_sg->length - offset, length);
>>+		if (!dry_run) {
>>+			unsigned long pfn;
>>+
>>+			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
>>+			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static noinline struct sg_table *
>>+intel_persistent_partial_pages(const struct i915_gtt_view *view,
>>+			       struct drm_i915_gem_object *obj)
>>+{
>>+	u64 offset = view->partial.offset << PAGE_SHIFT;
>>+	struct sg_table *st, *obj_st = obj->mm.pages;
>>+	u64 length = view->partial.size << PAGE_SHIFT;
>>+	struct scatterlist *sg;
>>+	unsigned int nents;
>>+	int ret = -ENOMEM;
>>+
>>+	st = kmalloc(sizeof(*st), GFP_KERNEL);
>>+	if (!st)
>>+		goto err_st_alloc;
>>+
>>+	/* Get required sg_table size */
>>+	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
>>+	if (i915_gem_object_has_struct_page(obj)) {
>>+		unsigned int pg_nents;
>>+
>>+		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
>>+		if (nents < pg_nents)
>>+			nents = pg_nents;
>>+	}
>>+
>>+	ret = sg_alloc_table(st, nents, GFP_KERNEL);
>>+	if (ret)
>>+		goto err_sg_alloc;
>>+
>>+	/* Build sg_table for specified <offset, length> section */
>>+	intel_copy_dma_sg(obj_st, st, offset, length, false);
>>+	if (i915_gem_object_has_struct_page(obj))
>>+		intel_copy_sg(obj_st, st, offset, length, false);
>>+
>>+	/* Mark last sg */
>>+	sg = st->sgl;
>>+	while (sg_next(sg))
>>+		sg = sg_next(sg);
>>+	sg_mark_end(sg);
>
>Do we need this bit? The nents is exactly orig_nents, and 
>sg_alloc_table will already mark the end for you.
>

Ok, looks like we don't need it as sg_alloc_table() sets it.
While looking at sg_alloc_table(), looks like it is possible for it
to return with -ENOMEM with partial built table, but we are not
cleaning it anywhere. Something to consider later may be.
https://elixir.bootlin.com/linux/latest/source/lib/scatterlist.c#L330

>Is it not possible to re-use remap_contiguous_pages() somehow? Also do 
>we need the dry_run bit if we use sg_trim()? Maybe something like:
>
>dst = sg_alloc_table(partial.size);
>
>remap_contigious_pages_sg(dst, src);
>i915_sg_trim(dst);
>
>dst->nents = 0;
>sg = remap_contigious_pages_dma_sg(dst, src);
>

But then those remap_contiguous_pages[_dma]_sg would look just like
out intel_copy[_dma]_sg().
And the problem I have with i915_sg_trim() is that it uses _sg iterator
only and not _dma_sg iterator. I think at least in theory, it is possible
to have more number of dma_sg elements than the sg (page) elements. Right?
That is why I am doing a dry run of both above and getting the max of both.

Niranjana

>>+
>>+	return st;
>>+
>>+err_sg_alloc:
>>+	kfree(st);
>>+err_st_alloc:
>>+	return ERR_PTR(ret);
>>+}
>>+
>>  static int
>>  __i915_vma_get_pages(struct i915_vma *vma)
>>  {
>>@@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
>>  		break;
>>  	case I915_GTT_VIEW_PARTIAL:
>>-		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>+		if (i915_vma_is_persistent(vma))
>>+			pages = intel_persistent_partial_pages(&vma->gtt_view,
>>+							       vma->obj);
>>+		else
>>+			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>  		break;
>>  	}
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1b9033865768..68a9ac77b4f2 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1298,6 +1298,120 @@  intel_partial_pages(const struct i915_gtt_view *view,
 	return ERR_PTR(ret);
 }
 
+static unsigned int
+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
+		  u64 offset, u64 length, bool dry_run)
+{
+	struct scatterlist *dst_sg, *src_sg;
+	unsigned int i, len, nents = 0;
+
+	dst_sg = dst_st->sgl;
+	for_each_sgtable_dma_sg(src_st, src_sg, i) {
+		if (sg_dma_len(src_sg) <= offset) {
+			offset -= sg_dma_len(src_sg);
+			continue;
+		}
+
+		nents++;
+		len = min(sg_dma_len(src_sg) - offset, length);
+		if (!dry_run) {
+			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
+			sg_dma_len(dst_sg) = len;
+			dst_sg = sg_next(dst_sg);
+		}
+
+		length -= len;
+		offset = 0;
+		if (!length)
+			break;
+	}
+	WARN_ON_ONCE(length);
+
+	return nents;
+}
+
+static unsigned int
+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
+	      u64 offset, u64 length, bool dry_run)
+{
+	struct scatterlist *dst_sg, *src_sg;
+	unsigned int i, len, nents = 0;
+
+	dst_sg = dst_st->sgl;
+	for_each_sgtable_sg(src_st, src_sg, i) {
+		if (src_sg->length <= offset) {
+			offset -= src_sg->length;
+			continue;
+		}
+
+		nents++;
+		len = min(src_sg->length - offset, length);
+		if (!dry_run) {
+			unsigned long pfn;
+
+			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
+			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
+			dst_sg = sg_next(dst_sg);
+		}
+
+		length -= len;
+		offset = 0;
+		if (!length)
+			break;
+	}
+	WARN_ON_ONCE(length);
+
+	return nents;
+}
+
+static noinline struct sg_table *
+intel_persistent_partial_pages(const struct i915_gtt_view *view,
+			       struct drm_i915_gem_object *obj)
+{
+	u64 offset = view->partial.offset << PAGE_SHIFT;
+	struct sg_table *st, *obj_st = obj->mm.pages;
+	u64 length = view->partial.size << PAGE_SHIFT;
+	struct scatterlist *sg;
+	unsigned int nents;
+	int ret = -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	/* Get required sg_table size */
+	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
+	if (i915_gem_object_has_struct_page(obj)) {
+		unsigned int pg_nents;
+
+		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
+		if (nents < pg_nents)
+			nents = pg_nents;
+	}
+
+	ret = sg_alloc_table(st, nents, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	/* Build sg_table for specified <offset, length> section */
+	intel_copy_dma_sg(obj_st, st, offset, length, false);
+	if (i915_gem_object_has_struct_page(obj))
+		intel_copy_sg(obj_st, st, offset, length, false);
+
+	/* Mark last sg */
+	sg = st->sgl;
+	while (sg_next(sg))
+		sg = sg_next(sg);
+	sg_mark_end(sg);
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+	return ERR_PTR(ret);
+}
+
 static int
 __i915_vma_get_pages(struct i915_vma *vma)
 {
@@ -1330,7 +1444,11 @@  __i915_vma_get_pages(struct i915_vma *vma)
 		break;
 
 	case I915_GTT_VIEW_PARTIAL:
-		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
+		if (i915_vma_is_persistent(vma))
+			pages = intel_persistent_partial_pages(&vma->gtt_view,
+							       vma->obj);
+		else
+			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
 		break;
 	}