diff mbox series

WIP: Performance improvements

Message ID 6b6a4e1c-a9e9-9592-d5b4-3c9210c8b650@collabora.com (mailing list archive)
State New, archived
Headers show
Series WIP: Performance improvements | expand

Commit Message

Muhammad Usama Anjum July 28, 2023, 11:02 a.m. UTC
We are optimizing for more performance. Please find the change-set below
for easy review before next revision and post your comments:

- Replace memcpy() with direct copy as it proving to be very expensive
- Don't check if PAGE_IS_FILE if no mask needs it as it is very
  expensive to check per pte
- Add question in comment for discussion purpose
- Add fast path for exclusive WP for ptes
---
 fs/proc/task_mmu.c | 54 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 11 deletions(-)

 	return categories;
@@ -1957,9 +1971,7 @@ static bool pagemap_scan_push_range(unsigned long
categories,
 		if (p->vec_buf_index >= p->vec_buf_len)
 			return false;

-		memcpy(&p->vec_buf[p->vec_buf_index], cur_buf,
-		       sizeof(*p->vec_buf));
-		++p->vec_buf_index;
+		p->vec_buf[p->vec_buf_index++] = *cur_buf;
 	}

 	cur_buf->start = addr;
@@ -2095,9 +2107,24 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd,
unsigned long start,
 		return 0;
 	}

+	if (!p->vec_buf) {
+		/* Fast path for performing exclusive WP */
+		for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
+			if (pte_uffd_wp(ptep_get(pte)))
+				continue;
+			make_uffd_wp_pte(vma, addr, pte);
+			if (!flush) {
+				start = addr;
+				flush = true;
+			}
+		}
+		ret = 0;
+		goto flush_and_return;
+	}
+
 	for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
 		unsigned long categories = p->cur_vma_category |
-					   pagemap_page_category(vma, addr, ptep_get(pte));
+					   pagemap_page_category(p, vma, addr, ptep_get(pte));
 		unsigned long next = addr + PAGE_SIZE;

 		ret = pagemap_scan_output(categories, p, addr, &next);
@@ -2119,6 +2146,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd,
unsigned long start,
 		}
 	}

+flush_and_return:
 	if (flush)
 		flush_tlb_range(vma, start, addr);

@@ -2284,6 +2312,9 @@ static int pagemap_scan_init_bounce_buffer(struct
pagemap_scan_private *p)
 	 * consecutive ranges that have the same categories returned across
 	 * walk_page_range() calls.
 	 */
+	// Question: Increasing the vec_buf_len increases the execution speed.
+	// But it'll increase the memory needed to run the IOCTL. Can we do
something here?
+	// Right now only have space for 512 entries of page_region
 	p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT,
 			       p->arg.vec_len - 1);
 	p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf),
@@ -2329,6 +2360,7 @@ static long do_pagemap_scan(struct mm_struct *mm,
unsigned long uarg)
 	if (ret)
 		return ret;

+	p.masks_of_interest = MASKS_OF_INTEREST(p.arg);
 	ret = pagemap_scan_init_bounce_buffer(&p);
 	if (ret)
 		return ret;

Comments

Greg Kroah-Hartman July 28, 2023, 11:13 a.m. UTC | #1
On Fri, Jul 28, 2023 at 04:02:02PM +0500, Muhammad Usama Anjum wrote:
> We are optimizing for more performance. Please find the change-set below
> for easy review before next revision and post your comments:
> 
> - Replace memcpy() with direct copy as it proving to be very expensive
> - Don't check if PAGE_IS_FILE if no mask needs it as it is very
>   expensive to check per pte
> - Add question in comment for discussion purpose
> - Add fast path for exclusive WP for ptes

Please read the kernel documentation for how to properly submit patches
(i.e. don't mush them all together into one and then properly describe
what you are doing and why you are doing it).

Also actually test the patch and prove (or disprove) if it does matter
for performance.

good luck!

greg k-h
Andrei Vagin Aug. 7, 2023, 3:32 a.m. UTC | #2
On Fri, Jul 28, 2023 at 4:02 AM Muhammad Usama Anjum
<usama.anjum@collabora.com> wrote:
>
> We are optimizing for more performance. Please find the change-set below
> for easy review before next revision and post your comments:
>
> - Replace memcpy() with direct copy as it proving to be very expensive
> - Don't check if PAGE_IS_FILE if no mask needs it as it is very
>   expensive to check per pte
> - Add question in comment for discussion purpose
> - Add fast path for exclusive WP for ptes
> ---
>  fs/proc/task_mmu.c | 54 ++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 43 insertions(+), 11 deletions(-)
>
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 7e92c33635cab..879baf896ed0b 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1757,37 +1757,51 @@ static int pagemap_release(struct inode *inode,
> struct file *file)
>                                  PAGE_IS_HUGE)
>  #define PM_SCAN_FLAGS          (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
>
> +#define MASKS_OF_INTEREST(a)   (a.category_inverted | a.category_mask | \
> +                                a.category_anyof_mask | a.return_mask)
> +
>  struct pagemap_scan_private {
>         struct pm_scan_arg arg;
> +       unsigned long masks_of_interest;
>         unsigned long cur_vma_category;
>         struct page_region *vec_buf, cur_buf;
>         unsigned long vec_buf_len, vec_buf_index, found_pages, end_addr;
>         struct page_region __user *vec_out;
>  };
>
> -static unsigned long pagemap_page_category(struct vm_area_struct *vma,
> +static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
> +                                          struct vm_area_struct *vma,
>                                            unsigned long addr, pte_t pte)
>  {
>         unsigned long categories = 0;
>
>         if (pte_present(pte)) {
> -               struct page *page = vm_normal_page(vma, addr, pte);
> +               struct page *page;
>
>                 categories |= PAGE_IS_PRESENT;
>                 if (!pte_uffd_wp(pte))
>                         categories |= PAGE_IS_WRITTEN;
> -               if (page && !PageAnon(page))
> -                       categories |= PAGE_IS_FILE;
> +
> +               if (p->masks_of_interest & PAGE_IS_FILE) {
> +                       page = vm_normal_page(vma, addr, pte);
> +                       if (page && !PageAnon(page))
> +                               categories |= PAGE_IS_FILE;
> +               }
> +
>                 if (is_zero_pfn(pte_pfn(pte)))
>                         categories |= PAGE_IS_PFNZERO;
>         } else if (is_swap_pte(pte)) {
> -               swp_entry_t swp = pte_to_swp_entry(pte);
> +               swp_entry_t swp;
>
>                 categories |= PAGE_IS_SWAPPED;
>                 if (!pte_swp_uffd_wp_any(pte))
>                         categories |= PAGE_IS_WRITTEN;
> -               if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp)))
> -                       categories |= PAGE_IS_FILE;
> +
> +               if (p->masks_of_interest & PAGE_IS_FILE) {
> +                       swp = pte_to_swp_entry(pte);
> +                       if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp)))
> +                               categories |= PAGE_IS_FILE;
> +               }
>         }
>
>         return categories;
> @@ -1957,9 +1971,7 @@ static bool pagemap_scan_push_range(unsigned long
> categories,
>                 if (p->vec_buf_index >= p->vec_buf_len)
>                         return false;
>
> -               memcpy(&p->vec_buf[p->vec_buf_index], cur_buf,
> -                      sizeof(*p->vec_buf));
> -               ++p->vec_buf_index;
> +               p->vec_buf[p->vec_buf_index++] = *cur_buf;
>         }
>
>         cur_buf->start = addr;
> @@ -2095,9 +2107,24 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd,
> unsigned long start,
>                 return 0;
>         }
>
> +       if (!p->vec_buf) {
> +               /* Fast path for performing exclusive WP */
> +               for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
> +                       if (pte_uffd_wp(ptep_get(pte)))
> +                               continue;
> +                       make_uffd_wp_pte(vma, addr, pte);
> +                       if (!flush) {
> +                               start = addr;
> +                               flush = true;
> +                       }
> +               }
> +               ret = 0;
> +               goto flush_and_return;
> +       }
> +
>         for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
>                 unsigned long categories = p->cur_vma_category |
> -                                          pagemap_page_category(vma, addr, ptep_get(pte));
> +                                          pagemap_page_category(p, vma, addr, ptep_get(pte));
>                 unsigned long next = addr + PAGE_SIZE;
>
>                 ret = pagemap_scan_output(categories, p, addr, &next);
> @@ -2119,6 +2146,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd,
> unsigned long start,
>                 }
>         }
>
> +flush_and_return:
>         if (flush)
>                 flush_tlb_range(vma, start, addr);
>
> @@ -2284,6 +2312,9 @@ static int pagemap_scan_init_bounce_buffer(struct
> pagemap_scan_private *p)
>          * consecutive ranges that have the same categories returned across
>          * walk_page_range() calls.
>          */
> +       // Question: Increasing the vec_buf_len increases the execution speed.
> +       // But it'll increase the memory needed to run the IOCTL. Can we do
> something here?
> +       // Right now only have space for 512 entries of page_region

The main problem here is that walk_page_range is executed for 512 pages.
I think we need to execute it for the whole range and interrupt it
when we need to
drain the bounce buffer.

For a trivial program that scans its address space the time is reduced from 20
seconds to 0.001 seconds. The test program and perf data are here:
https://gist.github.com/avagin/c5a22f3c78f8cb34281602dfe9c43d10

>         p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT,
>                                p->arg.vec_len - 1);
>         p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf),
> @@ -2329,6 +2360,7 @@ static long do_pagemap_scan(struct mm_struct *mm,
> unsigned long uarg)
>         if (ret)
>                 return ret;
>
> +       p.masks_of_interest = MASKS_OF_INTEREST(p.arg);
>         ret = pagemap_scan_init_bounce_buffer(&p);
>         if (ret)
>                 return ret;
> --
> 2.39.2
>
diff mbox series

Patch

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7e92c33635cab..879baf896ed0b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1757,37 +1757,51 @@  static int pagemap_release(struct inode *inode,
struct file *file)
 				 PAGE_IS_HUGE)
 #define PM_SCAN_FLAGS		(PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)

+#define MASKS_OF_INTEREST(a)	(a.category_inverted | a.category_mask | \
+				 a.category_anyof_mask | a.return_mask)
+
 struct pagemap_scan_private {
 	struct pm_scan_arg arg;
+	unsigned long masks_of_interest;
 	unsigned long cur_vma_category;
 	struct page_region *vec_buf, cur_buf;
 	unsigned long vec_buf_len, vec_buf_index, found_pages, end_addr;
 	struct page_region __user *vec_out;
 };

-static unsigned long pagemap_page_category(struct vm_area_struct *vma,
+static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
+					   struct vm_area_struct *vma,
 					   unsigned long addr, pte_t pte)
 {
 	unsigned long categories = 0;

 	if (pte_present(pte)) {
-		struct page *page = vm_normal_page(vma, addr, pte);
+		struct page *page;

 		categories |= PAGE_IS_PRESENT;
 		if (!pte_uffd_wp(pte))
 			categories |= PAGE_IS_WRITTEN;
-		if (page && !PageAnon(page))
-			categories |= PAGE_IS_FILE;
+
+		if (p->masks_of_interest & PAGE_IS_FILE) {
+			page = vm_normal_page(vma, addr, pte);
+			if (page && !PageAnon(page))
+				categories |= PAGE_IS_FILE;
+		}
+
 		if (is_zero_pfn(pte_pfn(pte)))
 			categories |= PAGE_IS_PFNZERO;
 	} else if (is_swap_pte(pte)) {
-		swp_entry_t swp = pte_to_swp_entry(pte);
+		swp_entry_t swp;

 		categories |= PAGE_IS_SWAPPED;
 		if (!pte_swp_uffd_wp_any(pte))
 			categories |= PAGE_IS_WRITTEN;
-		if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp)))
-			categories |= PAGE_IS_FILE;
+
+		if (p->masks_of_interest & PAGE_IS_FILE) {
+			swp = pte_to_swp_entry(pte);
+			if (is_pfn_swap_entry(swp) && !PageAnon(pfn_swap_entry_to_page(swp)))
+				categories |= PAGE_IS_FILE;
+		}
 	}