Message ID | 20190820065830.GA11255@AMDC3964.digital.local (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/vgem: Added page prefaulting | expand |
+CC: $(./script/get_maintainers.pl drivers/gpu/drm/vgem/vgem_drv.c) On 20.08.2019 08:58, Szymon Andrzejuk wrote: > Page fault handler inside vgem driver now preallocates pages in advance > when fault occurs for the first time. Pages can be allocated in > direction of increasing/decreasing addresses, depending on memory access > profile. In case of random access no preallocation occurs. > > Synthetic benchmark showed over 8x bandwidth increase when copying data > from mmapped vgem buffer with memcpy and ~160 times when accessing mapped > buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag. > Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers > sees ~17% FPS increase. > > This performance increase only occurs when accessing vgem buffer mapped > using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported > from prime fd the vgem page fault handler is not invoked. It's advised > to use vector streaming copy instructions and avoid sequential accesses > in this case. Streaming copy brings the performance to be on par with > similar buffer allocated with memfd_create(2) syscall. > > Signed-off-by: Szymon Andrzejuk <s.andrzejuk@samsung.com> > --- > drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------ > 1 file changed, 143 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c > index 11a8f99ba18c..739ba841e89c 100644 > --- a/drivers/gpu/drm/vgem/vgem_drv.c > +++ b/drivers/gpu/drm/vgem/vgem_drv.c > @@ -34,6 +34,7 @@ > #include <linux/ramfs.h> > #include <linux/shmem_fs.h> > #include <linux/dma-buf.h> > +#include <linux/pfn_t.h> > #include "vgem_drv.h" > > #define DRIVER_NAME "vgem" > @@ -50,8 +51,21 @@ static struct vgem_device { > static void vgem_gem_free_object(struct drm_gem_object *obj) > { > struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); > + int i; > + > + mutex_lock(&vgem_obj->pages_lock); > + if (vgem_obj->pages) { > + int num_pages = obj->size >> PAGE_SHIFT; > + > + for (i = 0; i < num_pages; i++) { > + if (vgem_obj->pages[i]) > + put_page(vgem_obj->pages[i]); > + } > + kvfree(vgem_obj->pages); > + vgem_obj->pages = NULL; > + } > + mutex_unlock(&vgem_obj->pages_lock); > > - kvfree(vgem_obj->pages); > mutex_destroy(&vgem_obj->pages_lock); > > if (obj->import_attach) > @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object *obj) > kfree(vgem_obj); > } > > +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma, > + unsigned long vaddr, int page_num) unused page_num > +{ > + unsigned long pfn; > + int insert_ret; > + > + pfn = page_to_pfn(page); > + insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV)); > + > + if (insert_ret & VM_FAULT_ERROR) > + return VM_FAULT_ERROR; Is it OK to return mask? instead of insert_ret. > + > + return 0; > +} > + > +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj, > + int page_num, struct page **page) > +{ > + int ret; > + struct page *mapped_page; > + > + mapped_page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping, > + page_num); > + if (IS_ERR(page)) { > + switch (PTR_ERR(page)) { > + case -ENOSPC: > + case -ENOMEM: > + ret = VM_FAULT_OOM; > + break; > + case -EBUSY: > + ret = VM_FAULT_RETRY; > + break; > + case -EFAULT: > + case -EINVAL: > + ret = VM_FAULT_SIGBUS; > + break; > + default: > + WARN_ON(PTR_ERR(page)); > + ret = VM_FAULT_SIGBUS; > + break; > + } > + > + return ret; > + } > + > + *page = mapped_page; > + return 0; > +} > + > +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj, > + struct vm_area_struct *vma, > + int page_num, unsigned long vaddr) > +{ > + int ret; > + > + ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]); > + if (ret) > + return ret; > + > + ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num); > + if (ret) > + return ret; One can use return __vgem_alloc_page(...), up to you. > + > + return 0; > +} > + > static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) > vm_fault_t ret = VM_FAULT_SIGBUS; > loff_t num_pages; > pgoff_t page_offset; > + int page_num, page_prep_ret; > + const int PREFAULT_PAGES = 8; > page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT; > > num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE); > @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) > if (page_offset >= num_pages) > return VM_FAULT_SIGBUS; > > + ret = VM_FAULT_NOPAGE; > + > mutex_lock(&obj->pages_lock); > - if (obj->pages) { > - get_page(obj->pages[page_offset]); > - vmf->page = obj->pages[page_offset]; > - ret = 0; > - } > - mutex_unlock(&obj->pages_lock); > - if (ret) { > - struct page *page; > - > - page = shmem_read_mapping_page( > - file_inode(obj->base.filp)->i_mapping, > - page_offset); > - if (!IS_ERR(page)) { > - vmf->page = page; > - ret = 0; > - } else switch (PTR_ERR(page)) { > - case -ENOSPC: > - case -ENOMEM: > - ret = VM_FAULT_OOM; > - break; > - case -EBUSY: > - ret = VM_FAULT_RETRY; > - break; > - case -EFAULT: > - case -EINVAL: > - ret = VM_FAULT_SIGBUS; > - break; > - default: > - WARN_ON(PTR_ERR(page)); > - ret = VM_FAULT_SIGBUS; > - break; > + > + if (num_pages > 1) { > + bool forward = true; > + bool random = false; > + > + // Determine prefaulting direction. If adjacent pages are both > + // allocated/not allocated then we have random access. > + // Always try to prefault on first and last page. > + if (page_offset != 0 && page_offset != num_pages - 1) { > + struct page *next, *prev; > + > + next = obj->pages[page_offset + 1]; > + prev = obj->pages[page_offset - 1]; > + if (!((uintptr_t)next ^ (uintptr_t)prev)) > + random = true; > + else if (!prev) > + forward = false; > + } else { > + forward = (page_offset == 0); > } Quite complicated, maybe sth like this: bool next, prev; next = obj->pages[page_offset + 1]; prev = obj->pages[page_offset - 1]; if (prev == next) random = true; > > + if (!random) { > + for (page_num = page_offset; > + forward ? page_num < page_offset + PREFAULT_PAGES && page_num < num_pages : > + page_offset - page_num < PREFAULT_PAGES && page_num >= 0; > + forward ? page_num++ : page_num--) { Again complicated, try pre-calculate boundaries and: start_page = ...; end_page = ...; step = forward ? 1 : -1; for (page_num = start_page; page_num < end_page; page_num += step) ... > + if (!obj->pages[page_num]) { > + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_num, vaddr); > + if (page_prep_ret) { > + ret = page_prep_ret; > + break; > + } > + } else { > + // random access, exit loop > + break; > + } > + > + vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE; vaddr += step * PAGE_SIZE; Regards Andrzej > + } > + } else { > + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); > + if (page_prep_ret) > + ret = page_prep_ret; > + } > + } else { > + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); > + if (page_prep_ret) > + ret = page_prep_ret; > } > + > + mutex_unlock(&obj->pages_lock); > return ret; > } > > @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, > unsigned long size) > { > struct drm_vgem_gem_object *obj; > - int ret; > + int ret, num_pages; > > obj = __vgem_gem_create(dev, size); > if (IS_ERR(obj)) > @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, > if (ret) > return ERR_PTR(ret); > > + mutex_lock(&obj->pages_lock); > + > + num_pages = obj->base.size >> PAGE_SHIFT; > + obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); > + > + mutex_unlock(&obj->pages_lock); > + > return &obj->base; > } > > @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct vm_area_struct *vma) > /* Keep the WC mmaping set by drm_gem_mmap() but our pages > * are ordinary and not special. > */ > - vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP; > + vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; > + > return 0; > } > > > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thank you for the review Andrzej. I'll update the patch to v2 shortly, it should cover all your comments. FYI, I'll be on holiday until September 16 so I might not be able to respond in the following days. Regards, Szymon On 03.09.2019 15:19, Andrzej Hajda wrote: > +CC: $(./script/get_maintainers.pl drivers/gpu/drm/vgem/vgem_drv.c) > > > On 20.08.2019 08:58, Szymon Andrzejuk wrote: >> Page fault handler inside vgem driver now preallocates pages in advance >> when fault occurs for the first time. Pages can be allocated in >> direction of increasing/decreasing addresses, depending on memory access >> profile. In case of random access no preallocation occurs. >> >> Synthetic benchmark showed over 8x bandwidth increase when copying data >> from mmapped vgem buffer with memcpy and ~160 times when accessing mapped >> buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag. >> Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers >> sees ~17% FPS increase. >> >> This performance increase only occurs when accessing vgem buffer mapped >> using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported >> from prime fd the vgem page fault handler is not invoked. It's advised >> to use vector streaming copy instructions and avoid sequential accesses >> in this case. Streaming copy brings the performance to be on par with >> similar buffer allocated with memfd_create(2) syscall. >> >> Signed-off-by: Szymon Andrzejuk <s.andrzejuk@samsung.com> >> --- >> drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------ >> 1 file changed, 143 insertions(+), 34 deletions(-) >> >> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c >> index 11a8f99ba18c..739ba841e89c 100644 >> --- a/drivers/gpu/drm/vgem/vgem_drv.c >> +++ b/drivers/gpu/drm/vgem/vgem_drv.c >> @@ -34,6 +34,7 @@ >> #include <linux/ramfs.h> >> #include <linux/shmem_fs.h> >> #include <linux/dma-buf.h> >> +#include <linux/pfn_t.h> >> #include "vgem_drv.h" >> >> #define DRIVER_NAME "vgem" >> @@ -50,8 +51,21 @@ static struct vgem_device { >> static void vgem_gem_free_object(struct drm_gem_object *obj) >> { >> struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); >> + int i; >> + >> + mutex_lock(&vgem_obj->pages_lock); >> + if (vgem_obj->pages) { >> + int num_pages = obj->size >> PAGE_SHIFT; >> + >> + for (i = 0; i < num_pages; i++) { >> + if (vgem_obj->pages[i]) >> + put_page(vgem_obj->pages[i]); >> + } >> + kvfree(vgem_obj->pages); >> + vgem_obj->pages = NULL; >> + } >> + mutex_unlock(&vgem_obj->pages_lock); >> >> - kvfree(vgem_obj->pages); >> mutex_destroy(&vgem_obj->pages_lock); >> >> if (obj->import_attach) >> @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object *obj) >> kfree(vgem_obj); >> } >> >> +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma, >> + unsigned long vaddr, int page_num) > > unused page_num > > >> +{ >> + unsigned long pfn; >> + int insert_ret; >> + >> + pfn = page_to_pfn(page); >> + insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV)); >> + >> + if (insert_ret & VM_FAULT_ERROR) >> + return VM_FAULT_ERROR; > > Is it OK to return mask? instead of insert_ret. > > >> + >> + return 0; >> +} >> + >> +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj, >> + int page_num, struct page **page) >> +{ >> + int ret; >> + struct page *mapped_page; >> + >> + mapped_page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping, >> + page_num); >> + if (IS_ERR(page)) { >> + switch (PTR_ERR(page)) { >> + case -ENOSPC: >> + case -ENOMEM: >> + ret = VM_FAULT_OOM; >> + break; >> + case -EBUSY: >> + ret = VM_FAULT_RETRY; >> + break; >> + case -EFAULT: >> + case -EINVAL: >> + ret = VM_FAULT_SIGBUS; >> + break; >> + default: >> + WARN_ON(PTR_ERR(page)); >> + ret = VM_FAULT_SIGBUS; >> + break; >> + } >> + >> + return ret; >> + } >> + >> + *page = mapped_page; >> + return 0; >> +} >> + >> +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj, >> + struct vm_area_struct *vma, >> + int page_num, unsigned long vaddr) >> +{ >> + int ret; >> + >> + ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]); >> + if (ret) >> + return ret; >> + >> + ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num); >> + if (ret) >> + return ret; > > One can use return __vgem_alloc_page(...), up to you. > > >> + >> + return 0; >> +} >> + >> static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> { >> struct vm_area_struct *vma = vmf->vma; >> @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> vm_fault_t ret = VM_FAULT_SIGBUS; >> loff_t num_pages; >> pgoff_t page_offset; >> + int page_num, page_prep_ret; >> + const int PREFAULT_PAGES = 8; >> page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT; >> >> num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE); >> @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) >> if (page_offset >= num_pages) >> return VM_FAULT_SIGBUS; >> >> + ret = VM_FAULT_NOPAGE; >> + >> mutex_lock(&obj->pages_lock); >> - if (obj->pages) { >> - get_page(obj->pages[page_offset]); >> - vmf->page = obj->pages[page_offset]; >> - ret = 0; >> - } >> - mutex_unlock(&obj->pages_lock); >> - if (ret) { >> - struct page *page; >> - >> - page = shmem_read_mapping_page( >> - file_inode(obj->base.filp)->i_mapping, >> - page_offset); >> - if (!IS_ERR(page)) { >> - vmf->page = page; >> - ret = 0; >> - } else switch (PTR_ERR(page)) { >> - case -ENOSPC: >> - case -ENOMEM: >> - ret = VM_FAULT_OOM; >> - break; >> - case -EBUSY: >> - ret = VM_FAULT_RETRY; >> - break; >> - case -EFAULT: >> - case -EINVAL: >> - ret = VM_FAULT_SIGBUS; >> - break; >> - default: >> - WARN_ON(PTR_ERR(page)); >> - ret = VM_FAULT_SIGBUS; >> - break; >> + >> + if (num_pages > 1) { >> + bool forward = true; >> + bool random = false; >> + >> + // Determine prefaulting direction. If adjacent pages are both >> + // allocated/not allocated then we have random access. >> + // Always try to prefault on first and last page. >> + if (page_offset != 0 && page_offset != num_pages - 1) { >> + struct page *next, *prev; >> + >> + next = obj->pages[page_offset + 1]; >> + prev = obj->pages[page_offset - 1]; >> + if (!((uintptr_t)next ^ (uintptr_t)prev)) >> + random = true; >> + else if (!prev) >> + forward = false; >> + } else { >> + forward = (page_offset == 0); >> } > > Quite complicated, maybe sth like this: > > bool next, prev; > > next = obj->pages[page_offset + 1]; > > prev = obj->pages[page_offset - 1]; > > if (prev == next) > > random = true; > > >> >> + if (!random) { >> + for (page_num = page_offset; >> + forward ? page_num < page_offset + PREFAULT_PAGES && page_num < num_pages : >> + page_offset - page_num < PREFAULT_PAGES && page_num >= 0; >> + forward ? page_num++ : page_num--) { > > Again complicated, try pre-calculate boundaries and: > > start_page = ...; > > end_page = ...; > > step = forward ? 1 : -1; > > for (page_num = start_page; page_num < end_page; page_num += step) > > ... > > >> + if (!obj->pages[page_num]) { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_num, vaddr); >> + if (page_prep_ret) { >> + ret = page_prep_ret; >> + break; >> + } >> + } else { >> + // random access, exit loop >> + break; >> + } >> + >> + vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE; > > vaddr += step * PAGE_SIZE; > > > Regards > > Andrzej > > >> + } >> + } else { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); >> + if (page_prep_ret) >> + ret = page_prep_ret; >> + } >> + } else { >> + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); >> + if (page_prep_ret) >> + ret = page_prep_ret; >> } >> + >> + mutex_unlock(&obj->pages_lock); >> return ret; >> } >> >> @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, >> unsigned long size) >> { >> struct drm_vgem_gem_object *obj; >> - int ret; >> + int ret, num_pages; >> >> obj = __vgem_gem_create(dev, size); >> if (IS_ERR(obj)) >> @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, >> if (ret) >> return ERR_PTR(ret); >> >> + mutex_lock(&obj->pages_lock); >> + >> + num_pages = obj->base.size >> PAGE_SHIFT; >> + obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); >> + >> + mutex_unlock(&obj->pages_lock); >> + >> return &obj->base; >> } >> >> @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct vm_area_struct *vma) >> /* Keep the WC mmaping set by drm_gem_mmap() but our pages >> * are ordinary and not special. >> */ >> - vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP; >> + vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; >> + >> return 0; >> } >> >> >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > > >
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 11a8f99ba18c..739ba841e89c 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -34,6 +34,7 @@ #include <linux/ramfs.h> #include <linux/shmem_fs.h> #include <linux/dma-buf.h> +#include <linux/pfn_t.h> #include "vgem_drv.h" #define DRIVER_NAME "vgem" @@ -50,8 +51,21 @@ static struct vgem_device { static void vgem_gem_free_object(struct drm_gem_object *obj) { struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj); + int i; + + mutex_lock(&vgem_obj->pages_lock); + if (vgem_obj->pages) { + int num_pages = obj->size >> PAGE_SHIFT; + + for (i = 0; i < num_pages; i++) { + if (vgem_obj->pages[i]) + put_page(vgem_obj->pages[i]); + } + kvfree(vgem_obj->pages); + vgem_obj->pages = NULL; + } + mutex_unlock(&vgem_obj->pages_lock); - kvfree(vgem_obj->pages); mutex_destroy(&vgem_obj->pages_lock); if (obj->import_attach) @@ -61,6 +75,72 @@ static void vgem_gem_free_object(struct drm_gem_object *obj) kfree(vgem_obj); } +static int __vgem_alloc_page(struct page *page, struct vm_area_struct *vma, + unsigned long vaddr, int page_num) +{ + unsigned long pfn; + int insert_ret; + + pfn = page_to_pfn(page); + insert_ret = vmf_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV)); + + if (insert_ret & VM_FAULT_ERROR) + return VM_FAULT_ERROR; + + return 0; +} + +static int __vgem_read_mapping_page(struct drm_vgem_gem_object *obj, + int page_num, struct page **page) +{ + int ret; + struct page *mapped_page; + + mapped_page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping, + page_num); + if (IS_ERR(page)) { + switch (PTR_ERR(page)) { + case -ENOSPC: + case -ENOMEM: + ret = VM_FAULT_OOM; + break; + case -EBUSY: + ret = VM_FAULT_RETRY; + break; + case -EFAULT: + case -EINVAL: + ret = VM_FAULT_SIGBUS; + break; + default: + WARN_ON(PTR_ERR(page)); + ret = VM_FAULT_SIGBUS; + break; + } + + return ret; + } + + *page = mapped_page; + return 0; +} + +static int __vgem_prepare_single_page(struct drm_vgem_gem_object *obj, + struct vm_area_struct *vma, + int page_num, unsigned long vaddr) +{ + int ret; + + ret = __vgem_read_mapping_page(obj, page_num, &obj->pages[page_num]); + if (ret) + return ret; + + ret = __vgem_alloc_page(obj->pages[page_num], vma, vaddr, page_num); + if (ret) + return ret; + + return 0; +} + static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -70,6 +150,8 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) vm_fault_t ret = VM_FAULT_SIGBUS; loff_t num_pages; pgoff_t page_offset; + int page_num, page_prep_ret; + const int PREFAULT_PAGES = 8; page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT; num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE); @@ -77,41 +159,60 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf) if (page_offset >= num_pages) return VM_FAULT_SIGBUS; + ret = VM_FAULT_NOPAGE; + mutex_lock(&obj->pages_lock); - if (obj->pages) { - get_page(obj->pages[page_offset]); - vmf->page = obj->pages[page_offset]; - ret = 0; - } - mutex_unlock(&obj->pages_lock); - if (ret) { - struct page *page; - - page = shmem_read_mapping_page( - file_inode(obj->base.filp)->i_mapping, - page_offset); - if (!IS_ERR(page)) { - vmf->page = page; - ret = 0; - } else switch (PTR_ERR(page)) { - case -ENOSPC: - case -ENOMEM: - ret = VM_FAULT_OOM; - break; - case -EBUSY: - ret = VM_FAULT_RETRY; - break; - case -EFAULT: - case -EINVAL: - ret = VM_FAULT_SIGBUS; - break; - default: - WARN_ON(PTR_ERR(page)); - ret = VM_FAULT_SIGBUS; - break; + + if (num_pages > 1) { + bool forward = true; + bool random = false; + + // Determine prefaulting direction. If adjacent pages are both + // allocated/not allocated then we have random access. + // Always try to prefault on first and last page. + if (page_offset != 0 && page_offset != num_pages - 1) { + struct page *next, *prev; + + next = obj->pages[page_offset + 1]; + prev = obj->pages[page_offset - 1]; + if (!((uintptr_t)next ^ (uintptr_t)prev)) + random = true; + else if (!prev) + forward = false; + } else { + forward = (page_offset == 0); } + if (!random) { + for (page_num = page_offset; + forward ? page_num < page_offset + PREFAULT_PAGES && page_num < num_pages : + page_offset - page_num < PREFAULT_PAGES && page_num >= 0; + forward ? page_num++ : page_num--) { + if (!obj->pages[page_num]) { + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_num, vaddr); + if (page_prep_ret) { + ret = page_prep_ret; + break; + } + } else { + // random access, exit loop + break; + } + + vaddr = vaddr + (forward ? 1 : -1) * PAGE_SIZE; + } + } else { + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); + if (page_prep_ret) + ret = page_prep_ret; + } + } else { + page_prep_ret = __vgem_prepare_single_page(obj, vma, page_offset, vaddr); + if (page_prep_ret) + ret = page_prep_ret; } + + mutex_unlock(&obj->pages_lock); return ret; } @@ -182,7 +283,7 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, unsigned long size) { struct drm_vgem_gem_object *obj; - int ret; + int ret, num_pages; obj = __vgem_gem_create(dev, size); if (IS_ERR(obj)) @@ -193,6 +294,13 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, if (ret) return ERR_PTR(ret); + mutex_lock(&obj->pages_lock); + + num_pages = obj->base.size >> PAGE_SHIFT; + obj->pages = kvcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + + mutex_unlock(&obj->pages_lock); + return &obj->base; } @@ -262,7 +370,8 @@ static int vgem_mmap(struct file *filp, struct vm_area_struct *vma) /* Keep the WC mmaping set by drm_gem_mmap() but our pages * are ordinary and not special. */ - vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; + return 0; }
Page fault handler inside vgem driver now preallocates pages in advance when fault occurs for the first time. Pages can be allocated in direction of increasing/decreasing addresses, depending on memory access profile. In case of random access no preallocation occurs. Synthetic benchmark showed over 8x bandwidth increase when copying data from mmapped vgem buffer with memcpy and ~160 times when accessing mapped buffer sequentially. Compiled with gcc 8.2.0 with -O2 flag. Unigine Heaven running on custom virgl vtest virtual GPU with vgem buffers sees ~17% FPS increase. This performance increase only occurs when accessing vgem buffer mapped using DRM_IOCTL_MODE_MAP_DUMB ioctl. When accessing buffer imported from prime fd the vgem page fault handler is not invoked. It's advised to use vector streaming copy instructions and avoid sequential accesses in this case. Streaming copy brings the performance to be on par with similar buffer allocated with memfd_create(2) syscall. Signed-off-by: Szymon Andrzejuk <s.andrzejuk@samsung.com> --- drivers/gpu/drm/vgem/vgem_drv.c | 177 ++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 34 deletions(-)