diff mbox series

[v4,1/5] udmabuf: direct map pfn when first page fault

Message ID 20240822084342.1574914-2-link@vivo.com (mailing list archive)
State New, archived
Headers show
Series udmbuf bug fix and some improvements | expand

Commit Message

Huan Yang Aug. 22, 2024, 8:43 a.m. UTC
The current udmabuf mmap uses a page fault to populate the vma.

However, the current udmabuf has already obtained and pinned the folio
upon completion of the creation.This means that the physical memory has
already been acquired, rather than being accessed dynamically.

As a result, the page fault has lost its purpose as a demanding
page. Due to the fact that page fault requires trapping into kernel mode
and filling in when accessing the corresponding virtual address in mmap,
when creating a large size udmabuf, this represents a considerable
overhead.

This patch fill vma area with pfn when the first page fault trigger, so,
any other access will not enter page fault.

Suggested-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Huan Yang <link@vivo.com>
---
 drivers/dma-buf/udmabuf.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

Comments

Kasireddy, Vivek Aug. 29, 2024, 6:08 a.m. UTC | #1
Hi Huan,

> Subject: [PATCH v4 1/5] udmabuf: direct map pfn when first page fault
> 
> The current udmabuf mmap uses a page fault to populate the vma.
> 
> However, the current udmabuf has already obtained and pinned the folio
> upon completion of the creation.This means that the physical memory has
> already been acquired, rather than being accessed dynamically.
> 
> As a result, the page fault has lost its purpose as a demanding
> page. Due to the fact that page fault requires trapping into kernel mode
> and filling in when accessing the corresponding virtual address in mmap,
> when creating a large size udmabuf, this represents a considerable
> overhead.
> 
> This patch fill vma area with pfn when the first page fault trigger, so,
> any other access will not enter page fault.
> 
> Suggested-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> Signed-off-by: Huan Yang <link@vivo.com>
> ---
>  drivers/dma-buf/udmabuf.c | 26 ++++++++++++++++++++++++--
>  1 file changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 047c3cd2ceff..0e33d25310ec 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -43,7 +43,8 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault
> *vmf)
>  	struct vm_area_struct *vma = vmf->vma;
>  	struct udmabuf *ubuf = vma->vm_private_data;
>  	pgoff_t pgoff = vmf->pgoff;
> -	unsigned long pfn;
> +	unsigned long addr, end, pfn;
> +	vm_fault_t ret;
> 
>  	if (pgoff >= ubuf->pagecount)
>  		return VM_FAULT_SIGBUS;
> @@ -51,7 +52,28 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault
> *vmf)
>  	pfn = folio_pfn(ubuf->folios[pgoff]);
>  	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
> 
> -	return vmf_insert_pfn(vma, vmf->address, pfn);
> +	ret = vmf_insert_pfn(vma, vmf->address, pfn);
> +	if (ret & VM_FAULT_ERROR)
> +		return ret;
> +
> +	/* pre fault */
> +	pgoff = vma->vm_pgoff;
> +	end = vma->vm_end;
> +	addr = vma->vm_start;
> +
> +	for (; addr < end; pgoff++, addr += PAGE_SIZE) {
Although unlikely, I think we should also check for pgoff < ubuf->pagecount.

> +		if (addr == vmf->address)
> +			continue;
> +
> +		pfn = folio_pfn(ubuf->folios[pgoff]);
> +
> +		pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
> +
> +		if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
Shouldn't you store the return value of vmf_insert_pfn in ret? Otherwise, we'll
return success when the above call fails.

Anyway, I am wondering if it is more optimal to just iterate over pages instead
of addresses. Something like below:

+       unsigned long nr_pages = vma_pages(vma);
+       unsigned long addr = vma->vm_start;

-       if (pgoff >= ubuf->pagecount)
-               return VM_FAULT_SIGBUS;
+       WARN_ON(nr_pages != ubuf->pagecount);

-       pfn = folio_pfn(ubuf->folios[pgoff]);
-       pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
+       for (pg = 0; pg < nr_pages && pg < ubuf->pagecount; pg++) {
+               pfn = folio_pfn(ubuf->folios[pg]);
+               pfn += ubuf->offsets[pg] >> PAGE_SHIFT;

-       return vmf_insert_pfn(vma, vmf->address, pfn);
+               ret = vmf_insert_pfn(vma, addr, pfn);
+               addr += PAGE_SIZE;
+ }

Thanks,
Vivek

> +			break;
> +	}
> +
> +	return ret;
>  }
> 
>  static const struct vm_operations_struct udmabuf_vm_ops = {
> --
> 2.45.2
Huan Yang Aug. 29, 2024, 6:35 a.m. UTC | #2
在 2024/8/29 14:08, Kasireddy, Vivek 写道:
> Hi Huan,
>
>> Subject: [PATCH v4 1/5] udmabuf: direct map pfn when first page fault
>>
>> The current udmabuf mmap uses a page fault to populate the vma.
>>
>> However, the current udmabuf has already obtained and pinned the folio
>> upon completion of the creation.This means that the physical memory has
>> already been acquired, rather than being accessed dynamically.
>>
>> As a result, the page fault has lost its purpose as a demanding
>> page. Due to the fact that page fault requires trapping into kernel mode
>> and filling in when accessing the corresponding virtual address in mmap,
>> when creating a large size udmabuf, this represents a considerable
>> overhead.
>>
>> This patch fill vma area with pfn when the first page fault trigger, so,
>> any other access will not enter page fault.
>>
>> Suggested-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
>> Signed-off-by: Huan Yang <link@vivo.com>
>> ---
>>   drivers/dma-buf/udmabuf.c | 26 ++++++++++++++++++++++++--
>>   1 file changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 047c3cd2ceff..0e33d25310ec 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -43,7 +43,8 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault
>> *vmf)
>>   	struct vm_area_struct *vma = vmf->vma;
>>   	struct udmabuf *ubuf = vma->vm_private_data;
>>   	pgoff_t pgoff = vmf->pgoff;
>> -	unsigned long pfn;
>> +	unsigned long addr, end, pfn;
>> +	vm_fault_t ret;
>>
>>   	if (pgoff >= ubuf->pagecount)
>>   		return VM_FAULT_SIGBUS;
>> @@ -51,7 +52,28 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault
>> *vmf)
>>   	pfn = folio_pfn(ubuf->folios[pgoff]);
>>   	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
>>
>> -	return vmf_insert_pfn(vma, vmf->address, pfn);
>> +	ret = vmf_insert_pfn(vma, vmf->address, pfn);
>> +	if (ret & VM_FAULT_ERROR)
>> +		return ret;
>> +
>> +	/* pre fault */
>> +	pgoff = vma->vm_pgoff;
>> +	end = vma->vm_end;
>> +	addr = vma->vm_start;
>> +
>> +	for (; addr < end; pgoff++, addr += PAGE_SIZE) {
> Although unlikely, I think we should also check for pgoff < ubuf->pagecount.
Ohh, yes.
>
>> +		if (addr == vmf->address)
>> +			continue;
>> +
>> +		pfn = folio_pfn(ubuf->folios[pgoff]);
>> +
>> +		pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
>> +
>> +		if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
> Shouldn't you store the return value of vmf_insert_pfn in ret? Otherwise, we'll
> return success when the above call fails.

If anything wrong in there, let it report when it true access and then 
report. Not in there.

It's pre-fault, 'this' page's fault is already dealed success. No reason 
to report in it's fault.

>
> Anyway, I am wondering if it is more optimal to just iterate over pages instead
> of addresses. Something like below:
>
> +       unsigned long nr_pages = vma_pages(vma);
> +       unsigned long addr = vma->vm_start;
>
> -       if (pgoff >= ubuf->pagecount)
> -               return VM_FAULT_SIGBUS;
> +       WARN_ON(nr_pages != ubuf->pagecount);
>
> -       pfn = folio_pfn(ubuf->folios[pgoff]);
> -       pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
> +       for (pg = 0; pg < nr_pages && pg < ubuf->pagecount; pg++) {
> +               pfn = folio_pfn(ubuf->folios[pg]);
> +               pfn += ubuf->offsets[pg] >> PAGE_SHIFT;
>
> -       return vmf_insert_pfn(vma, vmf->address, pfn);
> +               ret = vmf_insert_pfn(vma, addr, pfn);
> +               addr += PAGE_SIZE;
> + }

Maybe mmap with a offset? just start from 0 not too good.

This seems clear, but is it not good to handle insert pfn error elsewhere?

>
> Thanks,
> Vivek
>
>> +			break;
>> +	}
>> +
>> +	return ret;
>>   }
>>
>>   static const struct vm_operations_struct udmabuf_vm_ops = {
>> --
>> 2.45.2
diff mbox series

Patch

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 047c3cd2ceff..0e33d25310ec 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -43,7 +43,8 @@  static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct udmabuf *ubuf = vma->vm_private_data;
 	pgoff_t pgoff = vmf->pgoff;
-	unsigned long pfn;
+	unsigned long addr, end, pfn;
+	vm_fault_t ret;
 
 	if (pgoff >= ubuf->pagecount)
 		return VM_FAULT_SIGBUS;
@@ -51,7 +52,28 @@  static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
 	pfn = folio_pfn(ubuf->folios[pgoff]);
 	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
 
-	return vmf_insert_pfn(vma, vmf->address, pfn);
+	ret = vmf_insert_pfn(vma, vmf->address, pfn);
+	if (ret & VM_FAULT_ERROR)
+		return ret;
+
+	/* pre fault */
+	pgoff = vma->vm_pgoff;
+	end = vma->vm_end;
+	addr = vma->vm_start;
+
+	for (; addr < end; pgoff++, addr += PAGE_SIZE) {
+		if (addr == vmf->address)
+			continue;
+
+		pfn = folio_pfn(ubuf->folios[pgoff]);
+
+		pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
+
+		if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
+			break;
+	}
+
+	return ret;
 }
 
 static const struct vm_operations_struct udmabuf_vm_ops = {