Message ID | 6b6a4e1c-a9e9-9592-d5b4-3c9210c8b650@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | WIP: Performance improvements | expand |
On Fri, Jul 28, 2023 at 04:02:02PM +0500, Muhammad Usama Anjum wrote: > We are optimizing for more performance. Please find the change-set below > for easy review before next revision and post your comments: > > - Replace memcpy() with direct copy as it proving to be very expensive > - Don't check if PAGE_IS_FILE if no mask needs it as it is very > expensive to check per pte > - Add question in comment for discussion purpose > - Add fast path for exclusive WP for ptes Please read the kernel documentation for how to properly submit patches (i.e. don't mush them all together into one and then properly describe what you are doing and why you are doing it). Also actually test the patch and prove (or disprove) if it does matter for performance. good luck! greg k-h
On Fri, Jul 28, 2023 at 4:02 AM Muhammad Usama Anjum <usama.anjum@collabora.com> wrote: > > We are optimizing for more performance. Please find the change-set below > for easy review before next revision and post your comments: > > - Replace memcpy() with direct copy as it proving to be very expensive > - Don't check if PAGE_IS_FILE if no mask needs it as it is very > expensive to check per pte > - Add question in comment for discussion purpose > - Add fast path for exclusive WP for ptes > --- > fs/proc/task_mmu.c | 54 ++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 43 insertions(+), 11 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 7e92c33635cab..879baf896ed0b 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1757,37 +1757,51 @@ static int pagemap_release(struct inode *inode, > struct file *file) > PAGE_IS_HUGE) > #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) > > +#define MASKS_OF_INTEREST(a) (a.category_inverted | a.category_mask | \ > + a.category_anyof_mask | a.return_mask) > + > struct pagemap_scan_private { > struct pm_scan_arg arg; > + unsigned long masks_of_interest; > unsigned long cur_vma_category; > struct page_region *vec_buf, cur_buf; > unsigned long vec_buf_len, vec_buf_index, found_pages, end_addr; > struct page_region __user *vec_out; > }; > > -static unsigned long pagemap_page_category(struct vm_area_struct *vma, > +static unsigned long pagemap_page_category(struct pagemap_scan_private *p, > + struct vm_area_struct *vma, > unsigned long addr, pte_t pte) > { > unsigned long categories = 0; > > if (pte_present(pte)) { > - struct page *page = vm_normal_page(vma, addr, pte); > + struct page *page; > > categories |= PAGE_IS_PRESENT; > if (!pte_uffd_wp(pte)) > categories |= PAGE_IS_WRITTEN; > - if (page && !PageAnon(page)) > - categories |= PAGE_IS_FILE; > + > + if (p->masks_of_interest & PAGE_IS_FILE) { > + page = vm_normal_page(vma, addr, pte); > + if (page && !PageAnon(page)) > + categories |= PAGE_IS_FILE; > + } > + > if (is_zero_pfn(pte_pfn(pte))) > categories |= PAGE_IS_PFNZERO; > } else if (is_swap_pte(pte)) { > - swp_entry_t swp = pte_to_swp_entry(pte); > + swp_entry_t swp; > > categories |= PAGE_IS_SWAPPED; > if (!pte_swp_uffd_wp_any(pte)) > categories |= PAGE_IS_WRITTEN; > - if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp))) > - categories |= PAGE_IS_FILE; > + > + if (p->masks_of_interest & PAGE_IS_FILE) { > + swp = pte_to_swp_entry(pte); > + if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp))) > + categories |= PAGE_IS_FILE; > + } > } > > return categories; > @@ -1957,9 +1971,7 @@ static bool pagemap_scan_push_range(unsigned long > categories, > if (p->vec_buf_index >= p->vec_buf_len) > return false; > > - memcpy(&p->vec_buf[p->vec_buf_index], cur_buf, > - sizeof(*p->vec_buf)); > - ++p->vec_buf_index; > + p->vec_buf[p->vec_buf_index++] = *cur_buf; > } > > cur_buf->start = addr; > @@ -2095,9 +2107,24 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, > unsigned long start, > return 0; > } > > + if (!p->vec_buf) { > + /* Fast path for performing exclusive WP */ > + for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { > + if (pte_uffd_wp(ptep_get(pte))) > + continue; > + make_uffd_wp_pte(vma, addr, pte); > + if (!flush) { > + start = addr; > + flush = true; > + } > + } > + ret = 0; > + goto flush_and_return; > + } > + > for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { > unsigned long categories = p->cur_vma_category | > - pagemap_page_category(vma, addr, ptep_get(pte)); > + pagemap_page_category(p, vma, addr, ptep_get(pte)); > unsigned long next = addr + PAGE_SIZE; > > ret = pagemap_scan_output(categories, p, addr, &next); > @@ -2119,6 +2146,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, > unsigned long start, > } > } > > +flush_and_return: > if (flush) > flush_tlb_range(vma, start, addr); > > @@ -2284,6 +2312,9 @@ static int pagemap_scan_init_bounce_buffer(struct > pagemap_scan_private *p) > * consecutive ranges that have the same categories returned across > * walk_page_range() calls. > */ > + // Question: Increasing the vec_buf_len increases the execution speed. > + // But it'll increase the memory needed to run the IOCTL. Can we do > something here? > + // Right now only have space for 512 entries of page_region The main problem here is that walk_page_range is executed for 512 pages. I think we need to execute it for the whole range and interrupt it when we need to drain the bounce buffer. For a trivial program that scans its address space the time is reduced from 20 seconds to 0.001 seconds. The test program and perf data are here: https://gist.github.com/avagin/c5a22f3c78f8cb34281602dfe9c43d10 > p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT, > p->arg.vec_len - 1); > p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf), > @@ -2329,6 +2360,7 @@ static long do_pagemap_scan(struct mm_struct *mm, > unsigned long uarg) > if (ret) > return ret; > > + p.masks_of_interest = MASKS_OF_INTEREST(p.arg); > ret = pagemap_scan_init_bounce_buffer(&p); > if (ret) > return ret; > -- > 2.39.2 >
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7e92c33635cab..879baf896ed0b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1757,37 +1757,51 @@ static int pagemap_release(struct inode *inode, struct file *file) PAGE_IS_HUGE) #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) +#define MASKS_OF_INTEREST(a) (a.category_inverted | a.category_mask | \ + a.category_anyof_mask | a.return_mask) + struct pagemap_scan_private { struct pm_scan_arg arg; + unsigned long masks_of_interest; unsigned long cur_vma_category; struct page_region *vec_buf, cur_buf; unsigned long vec_buf_len, vec_buf_index, found_pages, end_addr; struct page_region __user *vec_out; }; -static unsigned long pagemap_page_category(struct vm_area_struct *vma, +static unsigned long pagemap_page_category(struct pagemap_scan_private *p, + struct vm_area_struct *vma, unsigned long addr, pte_t pte) { unsigned long categories = 0; if (pte_present(pte)) { - struct page *page = vm_normal_page(vma, addr, pte); + struct page *page; categories |= PAGE_IS_PRESENT; if (!pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; - if (page && !PageAnon(page)) - categories |= PAGE_IS_FILE; + + if (p->masks_of_interest & PAGE_IS_FILE) { + page = vm_normal_page(vma, addr, pte); + if (page && !PageAnon(page)) + categories |= PAGE_IS_FILE; + } + if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; } else if (is_swap_pte(pte)) { - swp_entry_t swp = pte_to_swp_entry(pte); + swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; - if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp))) - categories |= PAGE_IS_FILE; + + if (p->masks_of_interest & PAGE_IS_FILE) { + swp = pte_to_swp_entry(pte); + if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp))) + categories |= PAGE_IS_FILE; + } }