diff mbox series

[v2,1/4] udmabuf: cancel mmap page fault, direct map it

Message ID 20240805032550.3912454-2-link@vivo.com (mailing list archive)
State New
Headers show
Series udmbuf bug fix and some improvements | expand

Commit Message

Huan Yang Aug. 5, 2024, 3:25 a.m. UTC
The current udmabuf mmap uses a page fault mechanism to populate the vma.

However, the current udmabuf has already obtained and pinned the folio
upon completion of the creation.This means that the physical memory has
already been acquired, rather than being accessed dynamically. The
current page fault method only saves some page table memory.

As a result, the page fault mechanism has lost its purpose as a demanding
page. Due to the fact that page fault requires trapping into kernel mode
and filling in when accessing the corresponding virtual address in mmap,
this means that user mode access to virtual addresses needs to trap into
kernel mode.

Therefore, when creating a large size udmabuf, this represents a
considerable overhead.

The current patch removes the page fault method of mmap and
instead fills it directly when mmap is triggered.

Signed-off-by: Huan Yang <link@vivo.com>
---
 drivers/dma-buf/udmabuf.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

Comments

Kasireddy, Vivek Aug. 10, 2024, 1:28 a.m. UTC | #1
Hi Huan,

> 
> The current udmabuf mmap uses a page fault mechanism to populate the
> vma.
> 
> However, the current udmabuf has already obtained and pinned the folio
> upon completion of the creation.This means that the physical memory has
> already been acquired, rather than being accessed dynamically. The
> current page fault method only saves some page table memory.
> 
> As a result, the page fault mechanism has lost its purpose as a demanding
> page. Due to the fact that page fault requires trapping into kernel mode
> and filling in when accessing the corresponding virtual address in mmap,
> this means that user mode access to virtual addresses needs to trap into
> kernel mode.
> 
> Therefore, when creating a large size udmabuf, this represents a
> considerable overhead.
> 
> The current patch removes the page fault method of mmap and
> instead fills it directly when mmap is triggered.
I think it makes sense to populate the vma when the first fault is triggered
instead of doing it during mmap. This is because the userspace may call
mmap but does not actually use the data. Qemu works this way depending on
whether opengl is available in the environment or not.

> 
> Signed-off-by: Huan Yang <link@vivo.com>
> ---
>  drivers/dma-buf/udmabuf.c | 39 ++++++++++++++++-----------------------
>  1 file changed, 16 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 047c3cd2ceff..475268d4ebb1 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -38,36 +38,29 @@ struct udmabuf_folio {
>  	struct list_head list;
>  };
> 
> -static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
> -{
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct udmabuf *ubuf = vma->vm_private_data;
> -	pgoff_t pgoff = vmf->pgoff;
> -	unsigned long pfn;
> -
> -	if (pgoff >= ubuf->pagecount)
> -		return VM_FAULT_SIGBUS;
> -
> -	pfn = folio_pfn(ubuf->folios[pgoff]);
> -	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
> -
> -	return vmf_insert_pfn(vma, vmf->address, pfn);
> -}
> -
> -static const struct vm_operations_struct udmabuf_vm_ops = {
> -	.fault = udmabuf_vm_fault,
> -};
> -
>  static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
> *vma)
>  {
>  	struct udmabuf *ubuf = buf->priv;
> +	unsigned long addr;
> +	unsigned long end;
> +	unsigned long pgoff;
> +	int ret;
> 
>  	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
>  		return -EINVAL;
> 
> -	vma->vm_ops = &udmabuf_vm_ops;
> -	vma->vm_private_data = ubuf;
> -	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND |
> VM_DONTDUMP);
> +	for (pgoff = vma->vm_pgoff, end = vma->vm_end, addr = vma-
> >vm_start;
> +	     addr < end; pgoff++, addr += PAGE_SIZE) {
> +		struct page *page =
> +			folio_page(ubuf->folios[pgoff],
> +				   ubuf->offsets[pgoff] >> PAGE_SHIFT);
Please don't use struct page pointers, given the recent conversion to use
only folios in udmabuf driver. I think what you are trying to do above can
be done using only folios.

> +
> +		ret = remap_pfn_range(vma, addr, page_to_pfn(page),
> PAGE_SIZE,
> +				      vma->vm_page_prot);
Could you please retain the use of vmf_insert_pfn() here, given the simplicity,
among other reasons?

Thanks,
Vivek

> +		if (ret)
> +			return ret;
> +	}
> +
>  	return 0;
>  }
> 
> --
> 2.45.2
Huan Yang Aug. 12, 2024, 2:49 a.m. UTC | #2
在 2024/8/10 9:28, Kasireddy, Vivek 写道:
> [Some people who received this message don't often get email from vivek.kasireddy@intel.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> Hi Huan,
>
>> The current udmabuf mmap uses a page fault mechanism to populate the
>> vma.
>>
>> However, the current udmabuf has already obtained and pinned the folio
>> upon completion of the creation.This means that the physical memory has
>> already been acquired, rather than being accessed dynamically. The
>> current page fault method only saves some page table memory.
>>
>> As a result, the page fault mechanism has lost its purpose as a demanding
>> page. Due to the fact that page fault requires trapping into kernel mode
>> and filling in when accessing the corresponding virtual address in mmap,
>> this means that user mode access to virtual addresses needs to trap into
>> kernel mode.
>>
>> Therefore, when creating a large size udmabuf, this represents a
>> considerable overhead.
>>
>> The current patch removes the page fault method of mmap and
>> instead fills it directly when mmap is triggered.
> I think it makes sense to populate the vma when the first fault is triggered
> instead of doing it during mmap. This is because the userspace may call
> mmap but does not actually use the data. Qemu works this way depending on
Yes, the idea of this is also related to the concept of page fault.

However, the folio has already been pinned during creation. I think 
using the page fault

again is theoretically sound, but it may not save memory, only increase 
context switch overhead.


> whether opengl is available in the environment or not.
>
>> Signed-off-by: Huan Yang <link@vivo.com>
>> ---
>>   drivers/dma-buf/udmabuf.c | 39 ++++++++++++++++-----------------------
>>   1 file changed, 16 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 047c3cd2ceff..475268d4ebb1 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -38,36 +38,29 @@ struct udmabuf_folio {
>>        struct list_head list;
>>   };
>>
>> -static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
>> -{
>> -     struct vm_area_struct *vma = vmf->vma;
>> -     struct udmabuf *ubuf = vma->vm_private_data;
>> -     pgoff_t pgoff = vmf->pgoff;
>> -     unsigned long pfn;
>> -
>> -     if (pgoff >= ubuf->pagecount)
>> -             return VM_FAULT_SIGBUS;
>> -
>> -     pfn = folio_pfn(ubuf->folios[pgoff]);
>> -     pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
>> -
>> -     return vmf_insert_pfn(vma, vmf->address, pfn);
>> -}
>> -
>> -static const struct vm_operations_struct udmabuf_vm_ops = {
>> -     .fault = udmabuf_vm_fault,
>> -};
>> -
>>   static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
>> *vma)
>>   {
>>        struct udmabuf *ubuf = buf->priv;
>> +     unsigned long addr;
>> +     unsigned long end;
>> +     unsigned long pgoff;
>> +     int ret;
>>
>>        if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
>>                return -EINVAL;
>>
>> -     vma->vm_ops = &udmabuf_vm_ops;
>> -     vma->vm_private_data = ubuf;
>> -     vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND |
>> VM_DONTDUMP);
>> +     for (pgoff = vma->vm_pgoff, end = vma->vm_end, addr = vma-
>>> vm_start;
>> +          addr < end; pgoff++, addr += PAGE_SIZE) {
>> +             struct page *page =
>> +                     folio_page(ubuf->folios[pgoff],
>> +                                ubuf->offsets[pgoff] >> PAGE_SHIFT);
> Please don't use struct page pointers, given the recent conversion to use
> only folios in udmabuf driver. I think what you are trying to do above can
> be done using only folios.
Yes, just use pfn. Consider of HVO, must use this.
>
>> +
>> +             ret = remap_pfn_range(vma, addr, page_to_pfn(page),
>> PAGE_SIZE,
>> +                                   vma->vm_page_prot);
> Could you please retain the use of vmf_insert_pfn() here, given the simplicity,
> among other reasons?
I will make the correction.

Thanks.
>
> Thanks,
> Vivek
>
>> +             if (ret)
>> +                     return ret;
>> +     }
>> +
>>        return 0;
>>   }
>>
>> --
>> 2.45.2
Christian König Aug. 22, 2024, 8:11 a.m. UTC | #3
Am 12.08.24 um 04:49 schrieb Huan Yang:
>
> 在 2024/8/10 9:28, Kasireddy, Vivek 写道:
>> [Some people who received this message don't often get email from 
>> vivek.kasireddy@intel.com. Learn why this is important at 
>> https://aka.ms/LearnAboutSenderIdentification ]
>>
>> Hi Huan,
>>
>>> The current udmabuf mmap uses a page fault mechanism to populate the
>>> vma.
>>>
>>> However, the current udmabuf has already obtained and pinned the folio
>>> upon completion of the creation.This means that the physical memory has
>>> already been acquired, rather than being accessed dynamically. The
>>> current page fault method only saves some page table memory.
>>>
>>> As a result, the page fault mechanism has lost its purpose as a 
>>> demanding
>>> page. Due to the fact that page fault requires trapping into kernel 
>>> mode
>>> and filling in when accessing the corresponding virtual address in 
>>> mmap,
>>> this means that user mode access to virtual addresses needs to trap 
>>> into
>>> kernel mode.
>>>
>>> Therefore, when creating a large size udmabuf, this represents a
>>> considerable overhead.
>>>
>>> The current patch removes the page fault method of mmap and
>>> instead fills it directly when mmap is triggered.
>> I think it makes sense to populate the vma when the first fault is 
>> triggered
>> instead of doing it during mmap. This is because the userspace may call
>> mmap but does not actually use the data. Qemu works this way 
>> depending on
> Yes, the idea of this is also related to the concept of page fault.
>
> However, the folio has already been pinned during creation. I think 
> using the page fault
>
> again is theoretically sound, but it may not save memory, only 
> increase context switch overhead.

This is not about saving memory but rather correctness and desired handling.

A mmap() operation is for creating the VMA and *not* filling the page 
tables. That might work but is not really a desired approach.

Regards,
Christian.

>
>
>> whether opengl is available in the environment or not.
>>
>>> Signed-off-by: Huan Yang <link@vivo.com>
>>> ---
>>>   drivers/dma-buf/udmabuf.c | 39 
>>> ++++++++++++++++-----------------------
>>>   1 file changed, 16 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>>> index 047c3cd2ceff..475268d4ebb1 100644
>>> --- a/drivers/dma-buf/udmabuf.c
>>> +++ b/drivers/dma-buf/udmabuf.c
>>> @@ -38,36 +38,29 @@ struct udmabuf_folio {
>>>        struct list_head list;
>>>   };
>>>
>>> -static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
>>> -{
>>> -     struct vm_area_struct *vma = vmf->vma;
>>> -     struct udmabuf *ubuf = vma->vm_private_data;
>>> -     pgoff_t pgoff = vmf->pgoff;
>>> -     unsigned long pfn;
>>> -
>>> -     if (pgoff >= ubuf->pagecount)
>>> -             return VM_FAULT_SIGBUS;
>>> -
>>> -     pfn = folio_pfn(ubuf->folios[pgoff]);
>>> -     pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
>>> -
>>> -     return vmf_insert_pfn(vma, vmf->address, pfn);
>>> -}
>>> -
>>> -static const struct vm_operations_struct udmabuf_vm_ops = {
>>> -     .fault = udmabuf_vm_fault,
>>> -};
>>> -
>>>   static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
>>> *vma)
>>>   {
>>>        struct udmabuf *ubuf = buf->priv;
>>> +     unsigned long addr;
>>> +     unsigned long end;
>>> +     unsigned long pgoff;
>>> +     int ret;
>>>
>>>        if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
>>>                return -EINVAL;
>>>
>>> -     vma->vm_ops = &udmabuf_vm_ops;
>>> -     vma->vm_private_data = ubuf;
>>> -     vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND |
>>> VM_DONTDUMP);
>>> +     for (pgoff = vma->vm_pgoff, end = vma->vm_end, addr = vma-
>>>> vm_start;
>>> +          addr < end; pgoff++, addr += PAGE_SIZE) {
>>> +             struct page *page =
>>> +                     folio_page(ubuf->folios[pgoff],
>>> +                                ubuf->offsets[pgoff] >> PAGE_SHIFT);
>> Please don't use struct page pointers, given the recent conversion to 
>> use
>> only folios in udmabuf driver. I think what you are trying to do 
>> above can
>> be done using only folios.
> Yes, just use pfn. Consider of HVO, must use this.
>>
>>> +
>>> +             ret = remap_pfn_range(vma, addr, page_to_pfn(page),
>>> PAGE_SIZE,
>>> +                                   vma->vm_page_prot);
>> Could you please retain the use of vmf_insert_pfn() here, given the 
>> simplicity,
>> among other reasons?
> I will make the correction.
>
> Thanks.
>>
>> Thanks,
>> Vivek
>>
>>> +             if (ret)
>>> +                     return ret;
>>> +     }
>>> +
>>>        return 0;
>>>   }
>>>
>>> -- 
>>> 2.45.2
diff mbox series

Patch

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 047c3cd2ceff..475268d4ebb1 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -38,36 +38,29 @@  struct udmabuf_folio {
 	struct list_head list;
 };
 
-static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct udmabuf *ubuf = vma->vm_private_data;
-	pgoff_t pgoff = vmf->pgoff;
-	unsigned long pfn;
-
-	if (pgoff >= ubuf->pagecount)
-		return VM_FAULT_SIGBUS;
-
-	pfn = folio_pfn(ubuf->folios[pgoff]);
-	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
-
-	return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
-static const struct vm_operations_struct udmabuf_vm_ops = {
-	.fault = udmabuf_vm_fault,
-};
-
 static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
 {
 	struct udmabuf *ubuf = buf->priv;
+	unsigned long addr;
+	unsigned long end;
+	unsigned long pgoff;
+	int ret;
 
 	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
 		return -EINVAL;
 
-	vma->vm_ops = &udmabuf_vm_ops;
-	vma->vm_private_data = ubuf;
-	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+	for (pgoff = vma->vm_pgoff, end = vma->vm_end, addr = vma->vm_start;
+	     addr < end; pgoff++, addr += PAGE_SIZE) {
+		struct page *page =
+			folio_page(ubuf->folios[pgoff],
+				   ubuf->offsets[pgoff] >> PAGE_SHIFT);
+
+		ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
+				      vma->vm_page_prot);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }