diff mbox series

[v2,2/3] mm/damon: profiling enhancement

Message ID 20240318132848.82686-3-aravinda.prasad@intel.com (mailing list archive)
State New
Headers show
Series mm/damon: Profiling enhancements for DAMON | expand

Commit Message

Prasad, Aravinda March 18, 2024, 1:28 p.m. UTC
This patch adds profiling enhancement for DAMON.
Given the sampling_addr and its region bounds,
this patch picks the highest possible page table
tree level such that the address range covered by
the picked page table level (P*D) is within the
region's bounds. Once a page table level is picked,
access bit setting and checking is done at that level.
As the higher levels of the page table tree covers
a larger address space, any accessed bit set implies
one or more pages in the given region is accessed.
This helps in quickly identifying hot regions when
the region size is large (e.g., several GBs), which
is common for large footprint applications.

Signed-off-by: Alan Nair <alan.nair@intel.com>
Signed-off-by: Sandeep Kumar <sandeep4.kumar@intel.com>
Signed-off-by: Aravinda Prasad <aravinda.prasad@intel.com>
---
 mm/damon/vaddr.c | 233 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 221 insertions(+), 12 deletions(-)

Comments

kernel test robot March 18, 2024, 6:23 p.m. UTC | #1
Hi Aravinda,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Aravinda-Prasad/mm-damon-mm-infrastructure-support/20240318-212723
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240318132848.82686-3-aravinda.prasad%40intel.com
patch subject: [PATCH v2 2/3] mm/damon: profiling enhancement
config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20240319/202403190218.1tBSAJpX-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240319/202403190218.1tBSAJpX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403190218.1tBSAJpX-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/damon/vaddr.c: In function 'pick_profile_level':
>> mm/damon/vaddr.c:78:13: error: implicit declaration of function 'pgtable_l5_enabled' [-Werror=implicit-function-declaration]
      78 |         if (pgtable_l5_enabled()) {
         |             ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/pgtable_l5_enabled +78 mm/damon/vaddr.c

    54	
    55	/* Pick the highest possible page table profiling level for addr
    56	 * in the region defined by start and end
    57	 */
    58	static int pick_profile_level(unsigned long start, unsigned long end,
    59			unsigned long addr)
    60	{
    61		/* Start with PTE and check if higher levels can be picked */
    62		int level = 0;
    63	
    64		if (!arch_has_hw_nonleaf_pmd_young())
    65			return level;
    66	
    67		/* Check if PMD or higher can be picked, else use PTE */
    68		if (pmd_addr_start(addr, (start) - 1) < start
    69				|| pmd_addr_end(addr, (end) + 1) > end)
    70			return level;
    71	
    72		level++;
    73		/* Check if PUD or higher can be picked, else use PMD */
    74		if (pud_addr_start(addr, (start) - 1) < start
    75				|| pud_addr_end(addr, (end) + 1) > end)
    76			return level;
    77	
  > 78		if (pgtable_l5_enabled()) {
    79			level++;
    80			/* Check if P4D or higher can be picked, else use PUD */
    81			if (p4d_addr_start(addr, (start) - 1) < start
    82					|| p4d_addr_end(addr, (end) + 1) > end)
    83				return level;
    84		}
    85	
    86		level++;
    87		/* Check if PGD can be picked, else return PUD level */
    88		if (pgd_addr_start(addr, (start) - 1) < start
    89				|| pgd_addr_end(addr, (end) + 1) > end)
    90			return level;
    91
kernel test robot March 18, 2024, 9:59 p.m. UTC | #2
Hi Aravinda,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Aravinda-Prasad/mm-damon-mm-infrastructure-support/20240318-212723
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240318132848.82686-3-aravinda.prasad%40intel.com
patch subject: [PATCH v2 2/3] mm/damon: profiling enhancement
config: hexagon-allmodconfig (https://download.01.org/0day-ci/archive/20240319/202403190550.8chO4Zt4-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 8f68022f8e6e54d1aeae4ed301f5a015963089b7)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240319/202403190550.8chO4Zt4-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403190550.8chO4Zt4-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from mm/damon/vaddr.c:10:
   In file included from include/linux/highmem.h:10:
   In file included from include/linux/mm.h:2194:
   include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     522 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from mm/damon/vaddr.c:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:328:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
         |                                                   ^
   In file included from mm/damon/vaddr.c:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:328:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
         |                                                   ^
   In file included from mm/damon/vaddr.c:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:328:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
>> mm/damon/vaddr.c:78:6: error: call to undeclared function 'pgtable_l5_enabled'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
      78 |         if (pgtable_l5_enabled()) {
         |             ^
   7 warnings and 1 error generated.


vim +/pgtable_l5_enabled +78 mm/damon/vaddr.c

    54	
    55	/* Pick the highest possible page table profiling level for addr
    56	 * in the region defined by start and end
    57	 */
    58	static int pick_profile_level(unsigned long start, unsigned long end,
    59			unsigned long addr)
    60	{
    61		/* Start with PTE and check if higher levels can be picked */
    62		int level = 0;
    63	
    64		if (!arch_has_hw_nonleaf_pmd_young())
    65			return level;
    66	
    67		/* Check if PMD or higher can be picked, else use PTE */
    68		if (pmd_addr_start(addr, (start) - 1) < start
    69				|| pmd_addr_end(addr, (end) + 1) > end)
    70			return level;
    71	
    72		level++;
    73		/* Check if PUD or higher can be picked, else use PMD */
    74		if (pud_addr_start(addr, (start) - 1) < start
    75				|| pud_addr_end(addr, (end) + 1) > end)
    76			return level;
    77	
  > 78		if (pgtable_l5_enabled()) {
    79			level++;
    80			/* Check if P4D or higher can be picked, else use PUD */
    81			if (p4d_addr_start(addr, (start) - 1) < start
    82					|| p4d_addr_end(addr, (end) + 1) > end)
    83				return level;
    84		}
    85	
    86		level++;
    87		/* Check if PGD can be picked, else return PUD level */
    88		if (pgd_addr_start(addr, (start) - 1) < start
    89				|| pgd_addr_end(addr, (end) + 1) > end)
    90			return level;
    91
diff mbox series

Patch

diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 381559e4a1fa..daa1a2aedab6 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -52,6 +52,53 @@  static struct mm_struct *damon_get_mm(struct damon_target *t)
 	return mm;
 }
 
+/* Pick the highest possible page table profiling level for addr
+ * in the region defined by start and end
+ */
+static int pick_profile_level(unsigned long start, unsigned long end,
+		unsigned long addr)
+{
+	/* Start with PTE and check if higher levels can be picked */
+	int level = 0;
+
+	if (!arch_has_hw_nonleaf_pmd_young())
+		return level;
+
+	/* Check if PMD or higher can be picked, else use PTE */
+	if (pmd_addr_start(addr, (start) - 1) < start
+			|| pmd_addr_end(addr, (end) + 1) > end)
+		return level;
+
+	level++;
+	/* Check if PUD or higher can be picked, else use PMD */
+	if (pud_addr_start(addr, (start) - 1) < start
+			|| pud_addr_end(addr, (end) + 1) > end)
+		return level;
+
+	if (pgtable_l5_enabled()) {
+		level++;
+		/* Check if P4D or higher can be picked, else use PUD */
+		if (p4d_addr_start(addr, (start) - 1) < start
+				|| p4d_addr_end(addr, (end) + 1) > end)
+			return level;
+	}
+
+	level++;
+	/* Check if PGD can be picked, else return PUD level */
+	if (pgd_addr_start(addr, (start) - 1) < start
+			|| pgd_addr_end(addr, (end) + 1) > end)
+		return level;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/* Do not pick PGD level if PTI is enabled */
+	if (static_cpu_has(X86_FEATURE_PTI))
+		return level;
+#endif
+
+	/* Return PGD level */
+	return ++level;
+}
+
 /*
  * Functions for the initial monitoring target regions construction
  */
@@ -387,16 +434,90 @@  static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_mkold_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_mkold_ops = {
-	.pmd_entry = damon_mkold_pmd_entry,
+
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+static int damon_mkold_pmd(pmd_t *pmd, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+
+	if (!pmd_present(*pmd))
+		return 0;
+
+	ptl = pmd_lock(walk->mm, pmd);
+	pmdp_clear_young_notify(walk->vma, addr, pmd);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_mkold_pud(pud_t *pud, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+
+	if (!pud_present(*pud))
+		return 0;
+
+	ptl = pud_lock(walk->mm, pud);
+	pudp_clear_young_notify(walk->vma, addr, pud);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_mkold_p4d(p4d_t *p4d, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+
+	if (!p4d_present(*p4d))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	p4dp_clear_young_notify(walk->vma, addr, p4d);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+
+static int damon_mkold_pgd(pgd_t *pgd, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+
+	if (!pgd_present(*pgd))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	pgdp_clear_young_notify(walk->vma, addr, pgd);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+#endif
+
+static const struct mm_walk_ops damon_mkold_ops[] = {
+	{.pmd_entry = damon_mkold_pmd_entry,
 	.hugetlb_entry = damon_mkold_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
+	.walk_lock = PGWALK_RDLOCK},
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+	{.pmd_entry = damon_mkold_pmd},
+	{.pud_entry = damon_mkold_pud},
+	{.p4d_entry = damon_mkold_p4d},
+	{.pgd_entry = damon_mkold_pgd},
+#endif
 };
 
-static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+static void damon_va_mkold(struct mm_struct *mm, struct damon_region *r)
 {
+	unsigned long addr = r->sampling_addr;
+	int profile_level;
+
+	profile_level = pick_profile_level(r->ar.start, r->ar.end, addr);
+
 	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops[profile_level], NULL);
 	mmap_read_unlock(mm);
 }
 
@@ -409,7 +530,7 @@  static void __damon_va_prepare_access_check(struct mm_struct *mm,
 {
 	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
 
-	damon_va_mkold(mm, r->sampling_addr);
+	damon_va_mkold(mm, r);
 }
 
 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
@@ -531,22 +652,110 @@  static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_young_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_young_ops = {
-	.pmd_entry = damon_young_pmd_entry,
+
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+static int damon_young_pmd(pmd_t *pmd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pmd_present(*pmd))
+		return 0;
+
+	ptl = pmd_lock(walk->mm, pmd);
+	if (pmd_young(*pmd) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PMD_SHIFT);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_young_pud(pud_t *pud, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pud_present(*pud))
+		return 0;
+
+	ptl = pud_lock(walk->mm, pud);
+	if (pud_young(*pud) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PUD_SHIFT);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_young_p4d(p4d_t *p4d, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!p4d_present(*p4d))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	if (p4d_young(*p4d) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << P4D_SHIFT);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+
+static int damon_young_pgd(pgd_t *pgd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pgd_present(*pgd))
+		return 0;
+
+	spin_lock(&pgd_lock);
+	if (pgd_young(*pgd) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PGDIR_SHIFT);
+	spin_unlock(&pgd_lock);
+
+	return 0;
+}
+#endif
+
+static const struct mm_walk_ops damon_young_ops[] = {
+	{.pmd_entry = damon_young_pmd_entry,
 	.hugetlb_entry = damon_young_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
+	.walk_lock = PGWALK_RDLOCK},
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+	{.pmd_entry = damon_young_pmd},
+	{.pud_entry = damon_young_pud},
+	{.p4d_entry = damon_young_p4d},
+	{.pgd_entry = damon_young_pgd},
+#endif
 };
 
-static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+static bool damon_va_young(struct mm_struct *mm, struct damon_region *r,
 		unsigned long *folio_sz)
 {
+	unsigned long addr = r->sampling_addr;
+	int profile_level;
 	struct damon_young_walk_private arg = {
 		.folio_sz = folio_sz,
 		.young = false,
 	};
 
+	profile_level = pick_profile_level(r->ar.start, r->ar.end, addr);
+
 	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+	walk_page_range(mm, addr, addr + 1, &damon_young_ops[profile_level], &arg);
 	mmap_read_unlock(mm);
 	return arg.young;
 }
@@ -577,7 +786,7 @@  static void __damon_va_check_access(struct mm_struct *mm,
 		return;
 	}
 
-	last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
+	last_accessed = damon_va_young(mm, r, &last_folio_sz);
 	damon_update_region_access_rate(r, last_accessed, attrs);
 
 	last_addr = r->sampling_addr;