diff mbox series

[3/6] drm/i915: use vmap in shmem_pin_map

Message ID 20200918163724.2511-4-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/6] zsmalloc: switch from alloc_vm_area to get_vm_area | expand

Commit Message

Christoph Hellwig Sept. 18, 2020, 4:37 p.m. UTC
shmem_pin_map somewhat awkwardly reimplements vmap using
alloc_vm_area and manual pte setup.  The only practical difference
is that alloc_vm_area prefeaults the vmalloc area PTEs, which doesn't
seem to be required here (and could be added to vmap using a flag
if actually required).

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/i915/gt/shmem_utils.c | 90 +++++++++++----------------
 1 file changed, 38 insertions(+), 52 deletions(-)

Comments

Matthew Wilcox Sept. 21, 2020, 7:11 p.m. UTC | #1
On Fri, Sep 18, 2020 at 06:37:21PM +0200, Christoph Hellwig wrote:
>  void shmem_unpin_map(struct file *file, void *ptr)
>  {
> +	long i = shmem_npages(file);
> +
>  	mapping_clear_unevictable(file->f_mapping);
> -	__shmem_unpin_map(file, ptr, shmem_npte(file));
> +	vunmap(ptr);
> +
> +	for (i = 0; i < shmem_npages(file); i++) {
> +		struct page *page;
> +
> +		page = shmem_read_mapping_page_gfp(file->f_mapping, i,
> +						   GFP_KERNEL);
> +		if (!WARN_ON(IS_ERR(page))) {
> +			put_page(page);
> +			put_page(page);
> +		}
> +	}
>  }

This is awkward.  I'd like it if we had a vfree() variant which called
put_page() instead of __free_pages().  I'd like it even more if we
used release_pages() instead of our own loop that called put_page().

Perhaps something like this ...

+++ b/mm/vmalloc.c
@@ -2262,7 +2262,7 @@ static void __vunmap(const void *addr, int deallocate_page
s)
 
        vm_remove_mappings(area, deallocate_pages);
 
-       if (deallocate_pages) {
+       if (deallocate_pages == 1) {
                int i;
 
                for (i = 0; i < area->nr_pages; i++) {
@@ -2271,8 +2271,12 @@ static void __vunmap(const void *addr, int deallocate_pages)
                        BUG_ON(!page);
                        __free_pages(page, 0);
                }
-               atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
+       } else if (deallocate_pages == 2) {
+               release_pages(area->pages, area->nr_pages);
+       }
 
+       if (deallocate_pages) {
+               atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
                kvfree(area->pages);
        }
@@ -2369,6 +2373,14 @@ void vunmap(const void *addr)
 }
 EXPORT_SYMBOL(vunmap);
 
+void vunmap_put_pages(const void *addr)
+{
+       BUG_ON(in_interrupt());
+       might_sleep();
+       if (addr)
+               __vunmap(addr, 2);
+}
+
 /**
  * vmap - map an array of pages into virtually contiguous space
  * @pages: array of page pointers

only with kernel-doc and so on.  I bet somebody has a better idea for a name.
Christoph Hellwig Sept. 22, 2020, 6:22 a.m. UTC | #2
On Mon, Sep 21, 2020 at 08:11:57PM +0100, Matthew Wilcox wrote:
> This is awkward.  I'd like it if we had a vfree() variant which called
> put_page() instead of __free_pages().  I'd like it even more if we
> used release_pages() instead of our own loop that called put_page().

Note that we don't need a new vfree variant, we can do this manually if
we want, take a look at kernel/dma/remap.c.  But I thought this code
intentionally doesn't want to do that to avoid locking in the memory
for the pages array.  Maybe the i915 maintainers can clarify.
Tvrtko Ursulin Sept. 22, 2020, 8:23 a.m. UTC | #3
On 22/09/2020 07:22, Christoph Hellwig wrote:
> On Mon, Sep 21, 2020 at 08:11:57PM +0100, Matthew Wilcox wrote:
>> This is awkward.  I'd like it if we had a vfree() variant which called
>> put_page() instead of __free_pages().  I'd like it even more if we
>> used release_pages() instead of our own loop that called put_page().
> 
> Note that we don't need a new vfree variant, we can do this manually if
> we want, take a look at kernel/dma/remap.c.  But I thought this code
> intentionally doesn't want to do that to avoid locking in the memory
> for the pages array.  Maybe the i915 maintainers can clarify.

+ Chris & Matt who were involved with this part of i915.

If I understood this sub-thread correctly, iterating and freeing the 
pages via the vmapped ptes, so no need for a
shmem_read_mapping_page_gfp loop in shmem_unpin_map looks plausible to me.

I did not get the reference to kernel/dma/remap.c though, and also not 
sure how to do the error unwind path in shmem_pin_map at which point the 
allocated vm area hasn't been fully populated yet. Hand-roll the loop 
walking vm area struct in there?

Regards,

Tvrtko
Matthew Wilcox Sept. 22, 2020, 11:21 a.m. UTC | #4
On Tue, Sep 22, 2020 at 08:22:49AM +0200, Christoph Hellwig wrote:
> On Mon, Sep 21, 2020 at 08:11:57PM +0100, Matthew Wilcox wrote:
> > This is awkward.  I'd like it if we had a vfree() variant which called
> > put_page() instead of __free_pages().  I'd like it even more if we
> > used release_pages() instead of our own loop that called put_page().
> 
> Note that we don't need a new vfree variant, we can do this manually if
> we want, take a look at kernel/dma/remap.c.  But I thought this code
> intentionally doesn't want to do that to avoid locking in the memory
> for the pages array.  Maybe the i915 maintainers can clarify.

Actually, vfree() will work today; I cc'd you on a documentation update
to make it clear that this is permitted.

From my current experience with the i915 shmem code, I think that the
i915 maintainers are experts at graphics, and are unfamiliar with the MM.
There are a number of places where they do things the hard way.
Christoph Hellwig Sept. 22, 2020, 2:31 p.m. UTC | #5
On Tue, Sep 22, 2020 at 09:23:59AM +0100, Tvrtko Ursulin wrote:
> If I understood this sub-thread correctly, iterating and freeing the pages 
> via the vmapped ptes, so no need for a
> shmem_read_mapping_page_gfp loop in shmem_unpin_map looks plausible to me.
>
> I did not get the reference to kernel/dma/remap.c though,

What I mean is the code in dma_common_find_pages, which returns the
page array for freeing.

>
> and also not sure 
> how to do the error unwind path in shmem_pin_map at which point the 
> allocated vm area hasn't been fully populated yet. Hand-roll the loop 
> walking vm area struct in there?

Yes.  What I originally did (re-created as I didn't save it) would be
something like this:

---
From 5605e77cda246df6dd7ded99ec22cb3f341ef5d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Sep 2020 13:54:04 +0200
Subject: drm/i915: use vmap in shmem_pin_map

shmem_pin_map somewhat awkwardly reimplements vmap using
alloc_vm_area and manual pte setup.  The only practical difference
is that alloc_vm_area prefeaults the vmalloc area PTEs, which doesn't
seem to be required here (and could be added to vmap using a flag
if actually required).

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/i915/gt/shmem_utils.c | 81 +++++++++------------------
 1 file changed, 27 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 43c7acbdc79dea..7ec6ba4c1065b2 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -49,80 +49,53 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
 	return file;
 }
 
-static size_t shmem_npte(struct file *file)
+static size_t shmem_npages(struct file *file)
 {
 	return file->f_mapping->host->i_size >> PAGE_SHIFT;
 }
 
-static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
-{
-	unsigned long pfn;
-
-	vunmap(ptr);
-
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
-
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (!WARN_ON(IS_ERR(page))) {
-			put_page(page);
-			put_page(page);
-		}
-	}
-}
-
 void *shmem_pin_map(struct file *file)
 {
-	const size_t n_pte = shmem_npte(file);
-	pte_t *stack[32], **ptes, **mem;
-	struct vm_struct *area;
-	unsigned long pfn;
-
-	mem = stack;
-	if (n_pte > ARRAY_SIZE(stack)) {
-		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
-		if (!mem)
-			return NULL;
-	}
+	size_t n_pages = shmem_npages(file), i;
+	struct page **pages;
+	void *vaddr;
 
-	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
-	if (!area) {
-		if (mem != stack)
-			kvfree(mem);
+	pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
 		return NULL;
-	}
-
-	ptes = mem;
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
 
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (IS_ERR(page))
+	for (i = 0; i < n_pages; i++) {
+		pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
+						       GFP_KERNEL);
+		if (IS_ERR(pages[i]))
 			goto err_page;
-
-		**ptes++ = mk_pte(page,  PAGE_KERNEL);
 	}
 
-	if (mem != stack)
-		kvfree(mem);
-
+	vaddr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+	if (!vaddr)
+		goto err_page;
 	mapping_set_unevictable(file->f_mapping);
-	return area->addr;
-
+	return vaddr;
 err_page:
-	if (mem != stack)
-		kvfree(mem);
-
-	__shmem_unpin_map(file, area->addr, pfn);
+	while (--i >= 0)
+		put_page(pages[i]);
+	kvfree(pages);
 	return NULL;
 }
 
 void shmem_unpin_map(struct file *file, void *ptr)
 {
+	struct vm_struct *area = find_vm_area(ptr);
+	size_t i = shmem_npages(file);
+
+	if (WARN_ON_ONCE(!area || !area->pages))
+		return;
+
 	mapping_clear_unevictable(file->f_mapping);
-	__shmem_unpin_map(file, ptr, shmem_npte(file));
+	for (i = 0; i < shmem_npages(file); i++)
+		put_page(area->pages[i]);
+	kvfree(area->pages);
+	vunmap(ptr);
 }
 
 static int __shmem_rw(struct file *file, loff_t off,
Christoph Hellwig Sept. 22, 2020, 2:39 p.m. UTC | #6
On Tue, Sep 22, 2020 at 12:21:44PM +0100, Matthew Wilcox wrote:
> Actually, vfree() will work today; I cc'd you on a documentation update
> to make it clear that this is permitted.

vfree calls __free_pages, the i915 and a lot of other code calls
put_page.  They are mostly the same, but not quite and everytime I
look into that mess I'm more confused than before.

Can someone in the know write sensible documentation on when to use
__free_page(s) vs put_page?
Matthew Wilcox Sept. 22, 2020, 2:53 p.m. UTC | #7
On Tue, Sep 22, 2020 at 04:39:06PM +0200, Christoph Hellwig wrote:
> On Tue, Sep 22, 2020 at 12:21:44PM +0100, Matthew Wilcox wrote:
> > Actually, vfree() will work today; I cc'd you on a documentation update
> > to make it clear that this is permitted.
> 
> vfree calls __free_pages, the i915 and a lot of other code calls
> put_page.  They are mostly the same, but not quite and everytime I
> look into that mess I'm more confused than before.
> 
> Can someone in the know write sensible documentation on when to use
> __free_page(s) vs put_page?

I started on that, and then I found a bug that's been lurking for 12
years, so that delayed the documentation somewhat.  The short answer is
that __free_pages() lets you free non-compound high-order pages while
put_page() can only free order-0 and compound pages.

I would really like to overhaul our memory allocation APIs:

current			new
__get_free_page(s)	alloc_page(s)
free_page(s)		free_page(s)
alloc_page(s)		get_free_page(s)
__free_pages		put_page_order

Then put_page() and put_page_order() are more obviously friends.

But I cannot imagine a world in which Linus says yes to that upheaval.
He's previous expressed dislike of the get_free_page() family of APIs,
and thinks all those callers should just use kmalloc().  Maybe we can
make that transition happen, now that kmalloc() aligns larger allocations.
Tvrtko Ursulin Sept. 22, 2020, 4:13 p.m. UTC | #8
On 22/09/2020 15:31, Christoph Hellwig wrote:
> On Tue, Sep 22, 2020 at 09:23:59AM +0100, Tvrtko Ursulin wrote:
>> If I understood this sub-thread correctly, iterating and freeing the pages
>> via the vmapped ptes, so no need for a
>> shmem_read_mapping_page_gfp loop in shmem_unpin_map looks plausible to me.
>>
>> I did not get the reference to kernel/dma/remap.c though,
> 
> What I mean is the code in dma_common_find_pages, which returns the
> page array for freeing.

Got it.

>> and also not sure
>> how to do the error unwind path in shmem_pin_map at which point the
>> allocated vm area hasn't been fully populated yet. Hand-roll the loop
>> walking vm area struct in there?
> 
> Yes.  What I originally did (re-created as I didn't save it) would be
> something like this:
> 
> ---
>>From 5605e77cda246df6dd7ded99ec22cb3f341ef5d5 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Wed, 16 Sep 2020 13:54:04 +0200
> Subject: drm/i915: use vmap in shmem_pin_map
> 
> shmem_pin_map somewhat awkwardly reimplements vmap using
> alloc_vm_area and manual pte setup.  The only practical difference
> is that alloc_vm_area prefeaults the vmalloc area PTEs, which doesn't
> seem to be required here (and could be added to vmap using a flag
> if actually required).
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   drivers/gpu/drm/i915/gt/shmem_utils.c | 81 +++++++++------------------
>   1 file changed, 27 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
> index 43c7acbdc79dea..7ec6ba4c1065b2 100644
> --- a/drivers/gpu/drm/i915/gt/shmem_utils.c
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
> @@ -49,80 +49,53 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
>   	return file;
>   }
>   
> -static size_t shmem_npte(struct file *file)
> +static size_t shmem_npages(struct file *file)
>   {
>   	return file->f_mapping->host->i_size >> PAGE_SHIFT;
>   }
>   
> -static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
> -{
> -	unsigned long pfn;
> -
> -	vunmap(ptr);
> -
> -	for (pfn = 0; pfn < n_pte; pfn++) {
> -		struct page *page;
> -
> -		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> -						   GFP_KERNEL);
> -		if (!WARN_ON(IS_ERR(page))) {
> -			put_page(page);
> -			put_page(page);
> -		}
> -	}
> -}
> -
>   void *shmem_pin_map(struct file *file)
>   {
> -	const size_t n_pte = shmem_npte(file);
> -	pte_t *stack[32], **ptes, **mem;

Chris can comment how much he'd miss the 32 page stack shortcut.

> -	struct vm_struct *area;
> -	unsigned long pfn;
> -
> -	mem = stack;
> -	if (n_pte > ARRAY_SIZE(stack)) {
> -		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
> -		if (!mem)
> -			return NULL;
> -	}
> +	size_t n_pages = shmem_npages(file), i;
> +	struct page **pages;
> +	void *vaddr;
>   
> -	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
> -	if (!area) {
> -		if (mem != stack)
> -			kvfree(mem);
> +	pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
> +	if (!pages)
>   		return NULL;
> -	}
> -
> -	ptes = mem;
> -	for (pfn = 0; pfn < n_pte; pfn++) {
> -		struct page *page;
>   
> -		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> -						   GFP_KERNEL);
> -		if (IS_ERR(page))
> +	for (i = 0; i < n_pages; i++) {
> +		pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
> +						       GFP_KERNEL);
> +		if (IS_ERR(pages[i]))
>   			goto err_page;
> -
> -		**ptes++ = mk_pte(page,  PAGE_KERNEL);
>   	}
>   
> -	if (mem != stack)
> -		kvfree(mem);
> -
> +	vaddr = vmap(pages, n_pages, 0, PAGE_KERNEL);
> +	if (!vaddr)
> +		goto err_page;
>   	mapping_set_unevictable(file->f_mapping);
> -	return area->addr;
> -
> +	return vaddr;

Is there something in vmap() preventing us from freeing the pages array 
here? I can't spot anything that is holding on to the pointer. Or it was 
just a sketch before you realized we could walk the vm_area?

Also, I may be totally misunderstanding something, but I think you need 
to assign area->pages manually so shmem_unpin_map can access it below.

>   err_page:
> -	if (mem != stack)
> -		kvfree(mem);
> -
> -	__shmem_unpin_map(file, area->addr, pfn);
> +	while (--i >= 0)
> +		put_page(pages[i]);
> +	kvfree(pages);
>   	return NULL;
>   }
>   
>   void shmem_unpin_map(struct file *file, void *ptr)
>   {
> +	struct vm_struct *area = find_vm_area(ptr);
> +	size_t i = shmem_npages(file);
> +
> +	if (WARN_ON_ONCE(!area || !area->pages))
> +		return;
> +
>   	mapping_clear_unevictable(file->f_mapping);
> -	__shmem_unpin_map(file, ptr, shmem_npte(file));
> +	for (i = 0; i < shmem_npages(file); i++)
> +		put_page(area->pages[i]);
> +	kvfree(area->pages);
> +	vunmap(ptr);

Is the verdict from mm experts that we can't use vfree due __free_pages 
vs put_page differences?

Could we get from ptes to pages, so that we don't have to keep the 
area->pages array allocated for the duration of the pin?

Regards,

Tvrtko

>   }
>   
>   static int __shmem_rw(struct file *file, loff_t off,
>
Christoph Hellwig Sept. 22, 2020, 4:33 p.m. UTC | #9
On Tue, Sep 22, 2020 at 05:13:45PM +0100, Tvrtko Ursulin wrote:
>>   void *shmem_pin_map(struct file *file)
>>   {
>> -	const size_t n_pte = shmem_npte(file);
>> -	pte_t *stack[32], **ptes, **mem;
>
> Chris can comment how much he'd miss the 32 page stack shortcut.

I'd like to see a profile that claim that kmalloc matters in a
path that does a vmap and reads pages through the page cache.
Especially when the kmalloc saves doing another page cache lookup
on the free side.

> Is there something in vmap() preventing us from freeing the pages array 
> here? I can't spot anything that is holding on to the pointer. Or it was 
> just a sketch before you realized we could walk the vm_area?
>
> Also, I may be totally misunderstanding something, but I think you need to 
> assign area->pages manually so shmem_unpin_map can access it below.

We need area->pages to hold the pages for the free side.  That being
said the patch I posted is broken because it never assigned to that.
As said it was a sketch.  This is the patch I just rebooted into on
my Laptop:

http://git.infradead.org/users/hch/misc.git/commitdiff/048522dfa26b6667adfb0371ff530dc263abe829

it needs extra prep patches from the series:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/alloc_vm_area

>>   	mapping_clear_unevictable(file->f_mapping);
>> -	__shmem_unpin_map(file, ptr, shmem_npte(file));
>> +	for (i = 0; i < shmem_npages(file); i++)
>> +		put_page(area->pages[i]);
>> +	kvfree(area->pages);
>> +	vunmap(ptr);
>
> Is the verdict from mm experts that we can't use vfree due __free_pages vs 
> put_page differences?

Switched to vfree now.

> Could we get from ptes to pages, so that we don't have to keep the 
> area->pages array allocated for the duration of the pin?

We could do vmalloc_to_page, but that is fairly expensive (not as bad
as reading from the page cache..).  Are you really worried about the
allocation?
Tvrtko Ursulin Sept. 22, 2020, 5:04 p.m. UTC | #10
On 22/09/2020 17:33, Christoph Hellwig wrote:
> On Tue, Sep 22, 2020 at 05:13:45PM +0100, Tvrtko Ursulin wrote:
>>>    void *shmem_pin_map(struct file *file)
>>>    {
>>> -	const size_t n_pte = shmem_npte(file);
>>> -	pte_t *stack[32], **ptes, **mem;
>>
>> Chris can comment how much he'd miss the 32 page stack shortcut.
> 
> I'd like to see a profile that claim that kmalloc matters in a
> path that does a vmap and reads pages through the page cache.
> Especially when the kmalloc saves doing another page cache lookup
> on the free side.

Only reason I can come up with now is if mapping side is on a latency 
sensitive path, while un-mapping is lazy/delayed so can be more costly. 
Then fast map and extra cost on unmap may make sense.

It more applies to the other i915 patch, which implements a much more 
used API, but whether or not we can demonstrate any difference in the 
perf profiles I couldn't tell you without trying to collect some.

>> Is there something in vmap() preventing us from freeing the pages array
>> here? I can't spot anything that is holding on to the pointer. Or it was
>> just a sketch before you realized we could walk the vm_area?
>>
>> Also, I may be totally misunderstanding something, but I think you need to
>> assign area->pages manually so shmem_unpin_map can access it below.
> 
> We need area->pages to hold the pages for the free side.  That being
> said the patch I posted is broken because it never assigned to that.
> As said it was a sketch.  This is the patch I just rebooted into on
> my Laptop:
> 
> http://git.infradead.org/users/hch/misc.git/commitdiff/048522dfa26b6667adfb0371ff530dc263abe829
> 
> it needs extra prep patches from the series:
> 
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/alloc_vm_area
> 
>>>    	mapping_clear_unevictable(file->f_mapping);
>>> -	__shmem_unpin_map(file, ptr, shmem_npte(file));
>>> +	for (i = 0; i < shmem_npages(file); i++)
>>> +		put_page(area->pages[i]);
>>> +	kvfree(area->pages);
>>> +	vunmap(ptr);
>>
>> Is the verdict from mm experts that we can't use vfree due __free_pages vs
>> put_page differences?
> 
> Switched to vfree now.
> 
>> Could we get from ptes to pages, so that we don't have to keep the
>> area->pages array allocated for the duration of the pin?
> 
> We could do vmalloc_to_page, but that is fairly expensive (not as bad
> as reading from the page cache..).  Are you really worried about the
> allocation?

Not so much given how we don't even use shmem_pin_map outside selftests.

If we start using it I expect it will be for tiny objects anyway. Only 
if they end up being pinned for the lifetime of the driver, it may be a 
pointless waste of memory compared to the downsides of vmalloc_to_page. 
But we can revisit this particular edge case optimization if the need 
arises.

I'll look at your other i915 patch tomorrow.

Regards,

Tvrtko
Christoph Hellwig Sept. 23, 2020, 6:11 a.m. UTC | #11
On Tue, Sep 22, 2020 at 06:04:37PM +0100, Tvrtko Ursulin wrote:
> Only reason I can come up with now is if mapping side is on a latency 
> sensitive path, while un-mapping is lazy/delayed so can be more costly. 
> Then fast map and extra cost on unmap may make sense.

In general yes.  But compared to the overall operations a small kmalloc
is in the noise, so I'd really like to see numbers.

> It more applies to the other i915 patch, which implements a much more used 
> API, but whether or not we can demonstrate any difference in the perf 
> profiles I couldn't tell you without trying to collect some.

The other patch keeps the stack, as avoiding it would not simplify the
code as significantly.  I still doubt it is all that useful, though.


>> We could do vmalloc_to_page, but that is fairly expensive (not as bad
>> as reading from the page cache..).  Are you really worried about the
>> allocation?
>
> Not so much given how we don't even use shmem_pin_map outside selftests.
>
> If we start using it I expect it will be for tiny objects anyway. Only if 
> they end up being pinned for the lifetime of the driver, it may be a 
> pointless waste of memory compared to the downsides of vmalloc_to_page. But 
> we can revisit this particular edge case optimization if the need arises.

For tiny object we could either look into using kmap, or in fact
ensure the shmem files aren't in highmem, in which case you could
always use single-page mappings without any extra mapping.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index 43c7acbdc79dea..77410091597f19 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -49,80 +49,66 @@  struct file *shmem_create_from_object(struct drm_i915_gem_object *obj)
 	return file;
 }
 
-static size_t shmem_npte(struct file *file)
+static size_t shmem_npages(struct file *file)
 {
 	return file->f_mapping->host->i_size >> PAGE_SHIFT;
 }
 
-static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte)
-{
-	unsigned long pfn;
-
-	vunmap(ptr);
-
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
-
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (!WARN_ON(IS_ERR(page))) {
-			put_page(page);
-			put_page(page);
-		}
-	}
-}
-
 void *shmem_pin_map(struct file *file)
 {
-	const size_t n_pte = shmem_npte(file);
-	pte_t *stack[32], **ptes, **mem;
-	struct vm_struct *area;
-	unsigned long pfn;
-
-	mem = stack;
-	if (n_pte > ARRAY_SIZE(stack)) {
-		mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL);
-		if (!mem)
+	const size_t n_pages = shmem_npages(file);
+	struct page **pages, *stack[32];
+	void *vaddr;
+	long i;
+
+	pages = stack;
+	if (n_pages > ARRAY_SIZE(stack)) {
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
 			return NULL;
 	}
 
-	area = alloc_vm_area(n_pte << PAGE_SHIFT, mem);
-	if (!area) {
-		if (mem != stack)
-			kvfree(mem);
-		return NULL;
-	}
-
-	ptes = mem;
-	for (pfn = 0; pfn < n_pte; pfn++) {
-		struct page *page;
-
-		page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
-						   GFP_KERNEL);
-		if (IS_ERR(page))
+	for (i = 0; i < n_pages; i++) {
+		pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i,
+						       GFP_KERNEL);
+		if (IS_ERR(pages[i]))
 			goto err_page;
-
-		**ptes++ = mk_pte(page,  PAGE_KERNEL);
 	}
 
-	if (mem != stack)
-		kvfree(mem);
+	vaddr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+	if (!vaddr)
+		goto err_page;
 
+	if (pages != stack)
+		kvfree(pages);
 	mapping_set_unevictable(file->f_mapping);
-	return area->addr;
+	return vaddr;
 
 err_page:
-	if (mem != stack)
-		kvfree(mem);
-
-	__shmem_unpin_map(file, area->addr, pfn);
+	while (--i >= 0)
+		put_page(pages[i]);
+	if (pages != stack)
+		kvfree(pages);
 	return NULL;
 }
 
 void shmem_unpin_map(struct file *file, void *ptr)
 {
+	long i = shmem_npages(file);
+
 	mapping_clear_unevictable(file->f_mapping);
-	__shmem_unpin_map(file, ptr, shmem_npte(file));
+	vunmap(ptr);
+
+	for (i = 0; i < shmem_npages(file); i++) {
+		struct page *page;
+
+		page = shmem_read_mapping_page_gfp(file->f_mapping, i,
+						   GFP_KERNEL);
+		if (!WARN_ON(IS_ERR(page))) {
+			put_page(page);
+			put_page(page);
+		}
+	}
 }
 
 static int __shmem_rw(struct file *file, loff_t off,