diff mbox series

mm/vmstat: add events for ksm cow

Message ID 20220323031730.2342930-1-yang.yang29@zte.com.cn (mailing list archive)
State New
Headers show
Series mm/vmstat: add events for ksm cow | expand

Commit Message

CGEL March 23, 2022, 3:17 a.m. UTC
From: Yang Yang <yang.yang29@zte.com.cn>

Users may use ksm by calling madvise(, , MADV_MERGEABLE) when they want
to save memory, it's a tradeoff by suffering delay on ksm cow. Users can
get to know how much memory ksm saved by reading
/sys/kernel/mm/ksm/pages_sharing, but they don't know what's the costs
of ksm cow, and this is important of some delay sensitive tasks.

So add ksm cow events to help users evaluate whether or how to use ksm.

Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
---
 include/linux/vm_event_item.h |  2 ++
 mm/memory.c                   | 18 +++++++++++++++---
 mm/vmstat.c                   |  2 ++
 3 files changed, 19 insertions(+), 3 deletions(-)

Comments

kernel test robot March 23, 2022, 6:30 a.m. UTC | #1
Hi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-mm/master]

url:    https://github.com/0day-ci/linux/commits/cgel-zte-gmail-com/mm-vmstat-add-events-for-ksm-cow/20220323-111932
base:   https://github.com/hnaz/linux-mm master
config: nios2-defconfig (https://download.01.org/0day-ci/archive/20220323/202203231454.DszZMI5Y-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/cc1a5f6c95b38b1bebb673c7fe3a2b64e2362acc
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review cgel-zte-gmail-com/mm-vmstat-add-events-for-ksm-cow/20220323-111932
        git checkout cc1a5f6c95b38b1bebb673c7fe3a2b64e2362acc
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=nios2 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   mm/memory.c: In function 'do_wp_page':
>> mm/memory.c:3341:40: error: 'KSM_COW_FAIL' undeclared (first use in this function)
    3341 |                         count_vm_event(KSM_COW_FAIL);
         |                                        ^~~~~~~~~~~~
   mm/memory.c:3341:40: note: each undeclared identifier is reported only once for each function it appears in
>> mm/memory.c:3343:40: error: 'KSM_COW_SUCCESS' undeclared (first use in this function)
    3343 |                         count_vm_event(KSM_COW_SUCCESS);
         |                                        ^~~~~~~~~~~~~~~


vim +/KSM_COW_FAIL +3341 mm/memory.c

  3229	
  3230	/*
  3231	 * This routine handles present pages, when users try to write
  3232	 * to a shared page. It is done by copying the page to a new address
  3233	 * and decrementing the shared-page counter for the old page.
  3234	 *
  3235	 * Note that this routine assumes that the protection checks have been
  3236	 * done by the caller (the low-level page fault routine in most cases).
  3237	 * Thus we can safely just mark it writable once we've done any necessary
  3238	 * COW.
  3239	 *
  3240	 * We also mark the page dirty at this point even though the page will
  3241	 * change only once the write actually happens. This avoids a few races,
  3242	 * and potentially makes it more efficient.
  3243	 *
  3244	 * We enter with non-exclusive mmap_lock (to exclude vma changes,
  3245	 * but allow concurrent faults), with pte both mapped and locked.
  3246	 * We return with mmap_lock still held, but pte unmapped and unlocked.
  3247	 */
  3248	static vm_fault_t do_wp_page(struct vm_fault *vmf)
  3249		__releases(vmf->ptl)
  3250	{
  3251		struct vm_area_struct *vma = vmf->vma;
  3252		vm_fault_t ret = 0;
  3253		bool ksm = 0;
  3254	
  3255		if (userfaultfd_pte_wp(vma, *vmf->pte)) {
  3256			pte_unmap_unlock(vmf->pte, vmf->ptl);
  3257			return handle_userfault(vmf, VM_UFFD_WP);
  3258		}
  3259	
  3260		/*
  3261		 * Userfaultfd write-protect can defer flushes. Ensure the TLB
  3262		 * is flushed in this case before copying.
  3263		 */
  3264		if (unlikely(userfaultfd_wp(vmf->vma) &&
  3265			     mm_tlb_flush_pending(vmf->vma->vm_mm)))
  3266			flush_tlb_page(vmf->vma, vmf->address);
  3267	
  3268		vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
  3269		if (!vmf->page) {
  3270			/*
  3271			 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
  3272			 * VM_PFNMAP VMA.
  3273			 *
  3274			 * We should not cow pages in a shared writeable mapping.
  3275			 * Just mark the pages writable and/or call ops->pfn_mkwrite.
  3276			 */
  3277			if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
  3278					     (VM_WRITE|VM_SHARED))
  3279				return wp_pfn_shared(vmf);
  3280	
  3281			pte_unmap_unlock(vmf->pte, vmf->ptl);
  3282			return wp_page_copy(vmf);
  3283		}
  3284	
  3285		/*
  3286		 * Take out anonymous pages first, anonymous shared vmas are
  3287		 * not dirty accountable.
  3288		 */
  3289		if (PageAnon(vmf->page)) {
  3290			struct page *page = vmf->page;
  3291			ksm = PageKsm(page);
  3292	
  3293			/*
  3294			 * We have to verify under page lock: these early checks are
  3295			 * just an optimization to avoid locking the page and freeing
  3296			 * the swapcache if there is little hope that we can reuse.
  3297			 *
  3298			 * PageKsm() doesn't necessarily raise the page refcount.
  3299			 */
  3300			if (ksm || page_count(page) > 3)
  3301				goto copy;
  3302			if (!PageLRU(page))
  3303				/*
  3304				 * Note: We cannot easily detect+handle references from
  3305				 * remote LRU pagevecs or references to PageLRU() pages.
  3306				 */
  3307				lru_add_drain();
  3308			if (page_count(page) > 1 + PageSwapCache(page))
  3309				goto copy;
  3310			if (!trylock_page(page))
  3311				goto copy;
  3312			if (PageSwapCache(page))
  3313				try_to_free_swap(page);
  3314			if (ksm || page_count(page) != 1) {
  3315				unlock_page(page);
  3316				goto copy;
  3317			}
  3318			/*
  3319			 * Ok, we've got the only page reference from our mapping
  3320			 * and the page is locked, it's dark out, and we're wearing
  3321			 * sunglasses. Hit it.
  3322			 */
  3323			unlock_page(page);
  3324			wp_page_reuse(vmf);
  3325			return VM_FAULT_WRITE;
  3326		} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
  3327						(VM_WRITE|VM_SHARED))) {
  3328			return wp_page_shared(vmf);
  3329		}
  3330	copy:
  3331		/*
  3332		 * Ok, we need to copy. Oh, well..
  3333		 */
  3334		get_page(vmf->page);
  3335	
  3336		pte_unmap_unlock(vmf->pte, vmf->ptl);
  3337		ret = wp_page_copy(vmf);
  3338	
  3339		if (ksm) {
  3340			if (unlikely(ret & VM_FAULT_ERROR))
> 3341				count_vm_event(KSM_COW_FAIL);
  3342			else
> 3343				count_vm_event(KSM_COW_SUCCESS);
  3344		}
  3345	
  3346		return ret;
  3347	}
  3348
diff mbox series

Patch

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 16a0a4fd000b..6f32be04212f 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -131,6 +131,8 @@  enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		SWAP_RA_HIT,
 #ifdef CONFIG_KSM
 		KSM_SWPIN_COPY,
+		KSM_COW_SUCCESS,
+		KSM_COW_FAIL,
 #endif
 #endif
 #ifdef CONFIG_X86
diff --git a/mm/memory.c b/mm/memory.c
index 4111f97c91a0..06c92f322cdb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3257,6 +3257,8 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
+	vm_fault_t ret = 0;
+	bool ksm = 0;
 
 	if (userfaultfd_pte_wp(vma, *vmf->pte)) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3294,6 +3296,7 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	 */
 	if (PageAnon(vmf->page)) {
 		struct page *page = vmf->page;
+		ksm = PageKsm(page);
 
 		/*
 		 * We have to verify under page lock: these early checks are
@@ -3302,7 +3305,7 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 		 *
 		 * PageKsm() doesn't necessarily raise the page refcount.
 		 */
-		if (PageKsm(page) || page_count(page) > 3)
+		if (ksm || page_count(page) > 3)
 			goto copy;
 		if (!PageLRU(page))
 			/*
@@ -3316,7 +3319,7 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 			goto copy;
 		if (PageSwapCache(page))
 			try_to_free_swap(page);
-		if (PageKsm(page) || page_count(page) != 1) {
+		if (ksm || page_count(page) != 1) {
 			unlock_page(page);
 			goto copy;
 		}
@@ -3339,7 +3342,16 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	get_page(vmf->page);
 
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
-	return wp_page_copy(vmf);
+	ret = wp_page_copy(vmf);
+
+	if (ksm) {
+		if (unlikely(ret & VM_FAULT_ERROR))
+			count_vm_event(KSM_COW_FAIL);
+		else
+			count_vm_event(KSM_COW_SUCCESS);
+	}
+
+	return ret;
 }
 
 static void unmap_mapping_range_vma(struct vm_area_struct *vma,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d5cc8d739fac..a2c29a5206ec 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1390,6 +1390,8 @@  const char * const vmstat_text[] = {
 	"swap_ra_hit",
 #ifdef CONFIG_KSM
 	"ksm_swpin_copy",
+	"ksm_cow_success",
+	"ksm_cow_fail",
 #endif
 #endif
 #ifdef CONFIG_X86