diff mbox series

[v2,2/2] fork: group allocation of per-cpu counters for mm struct

Message ID 20230822184152.2194558-3-mjguzik@gmail.com (mailing list archive)
State New
Headers show
Series execve scalability issues, part 1 | expand

Commit Message

Mateusz Guzik Aug. 22, 2023, 6:41 p.m. UTC
A trivial execve scalability test which tries to be very friendly
(statically linked binaries, all separate) is predominantly bottlenecked
by back-to-back per-cpu counter allocations which serialize on global
locks.

Ease the pain by allocating and freeing them in one go.

Bench can be found here:
http://apollo.backplane.com/DFlyMisc/doexec.c

$ cc -static -O2 -o static-doexec doexec.c
$ ./static-doexec $(nproc)

Even at a very modest scale of 26 cores (ops/s):
before:	133543.63
after:	186061.81 (+39%)

While with the patch these allocations remain a significant problem,
the primary bottleneck shifts to:

    __pv_queued_spin_lock_slowpath+1
    _raw_spin_lock_irqsave+57
    folio_lruvec_lock_irqsave+91
    release_pages+590
    tlb_batch_pages_flush+61
    tlb_finish_mmu+101
    exit_mmap+327
    __mmput+61
    begin_new_exec+1245
    load_elf_binary+712
    bprm_execve+644
    do_execveat_common.isra.0+429
    __x64_sys_execve+50
    do_syscall_64+46
    entry_SYSCALL_64_after_hwframe+110

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 kernel/fork.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

Comments

kernel test robot Aug. 23, 2023, 2:22 a.m. UTC | #1
Hi Mateusz,

kernel test robot noticed the following build errors:

[auto build test ERROR on dennis-percpu/for-next]
[also build test ERROR on linus/master v6.5-rc7 next-20230822]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mateusz-Guzik/pcpcntr-add-group-allocation-free/20230823-024312
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git for-next
patch link:    https://lore.kernel.org/r/20230822184152.2194558-3-mjguzik%40gmail.com
patch subject: [PATCH v2 2/2] fork: group allocation of per-cpu counters for mm struct
config: um-allyesconfig (https://download.01.org/0day-ci/archive/20230823/202308231004.tg0zQ8e9-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce: (https://download.01.org/0day-ci/archive/20230823/202308231004.tg0zQ8e9-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308231004.tg0zQ8e9-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from kernel/fork.c:34:
   In file included from include/linux/mempolicy.h:15:
   In file included from include/linux/pagemap.h:11:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
                                                     ^
   In file included from kernel/fork.c:34:
   In file included from include/linux/mempolicy.h:15:
   In file included from include/linux/pagemap.h:11:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
                                                     ^
   In file included from kernel/fork.c:34:
   In file included from include/linux/mempolicy.h:15:
   In file included from include/linux/pagemap.h:11:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsb(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsw(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsl(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesb(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesw(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesl(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
>> kernel/fork.c:926:2: error: implicit declaration of function 'percpu_counter_destroy_many' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
           percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
           ^
   kernel/fork.c:926:2: note: did you mean 'percpu_counter_destroy'?
   include/linux/percpu_counter.h:136:20: note: 'percpu_counter_destroy' declared here
   static inline void percpu_counter_destroy(struct percpu_counter *fbc)
                      ^
>> kernel/fork.c:1299:6: error: implicit declaration of function 'percpu_counter_init_many' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
           if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, NR_MM_COUNTERS))
               ^
   kernel/fork.c:1299:6: note: did you mean 'percpu_counter_init'?
   include/linux/percpu_counter.h:129:19: note: 'percpu_counter_init' declared here
   static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
                     ^
   12 warnings and 2 errors generated.


vim +/percpu_counter_destroy_many +926 kernel/fork.c

   904	
   905	/*
   906	 * Called when the last reference to the mm
   907	 * is dropped: either by a lazy thread or by
   908	 * mmput. Free the page directory and the mm.
   909	 */
   910	void __mmdrop(struct mm_struct *mm)
   911	{
   912		BUG_ON(mm == &init_mm);
   913		WARN_ON_ONCE(mm == current->mm);
   914	
   915		/* Ensure no CPUs are using this as their lazy tlb mm */
   916		cleanup_lazy_tlbs(mm);
   917	
   918		WARN_ON_ONCE(mm == current->active_mm);
   919		mm_free_pgd(mm);
   920		destroy_context(mm);
   921		mmu_notifier_subscriptions_destroy(mm);
   922		check_mm(mm);
   923		put_user_ns(mm->user_ns);
   924		mm_pasid_drop(mm);
   925		mm_destroy_cid(mm);
 > 926		percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
   927	
   928		free_mm(mm);
   929	}
   930	EXPORT_SYMBOL_GPL(__mmdrop);
   931
kernel test robot Aug. 23, 2023, 4:05 a.m. UTC | #2
Hi Mateusz,

kernel test robot noticed the following build errors:

[auto build test ERROR on dennis-percpu/for-next]
[also build test ERROR on linus/master v6.5-rc7 next-20230822]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mateusz-Guzik/pcpcntr-add-group-allocation-free/20230823-024312
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git for-next
patch link:    https://lore.kernel.org/r/20230822184152.2194558-3-mjguzik%40gmail.com
patch subject: [PATCH v2 2/2] fork: group allocation of per-cpu counters for mm struct
config: arm-randconfig-r005-20230823 (https://download.01.org/0day-ci/archive/20230823/202308231154.SM8fedb1-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 13.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230823/202308231154.SM8fedb1-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308231154.SM8fedb1-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/fork.c: In function '__mmdrop':
>> kernel/fork.c:926:9: error: implicit declaration of function 'percpu_counter_destroy_many'; did you mean 'percpu_counter_destroy'? [-Werror=implicit-function-declaration]
     926 |         percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~
         |         percpu_counter_destroy
   kernel/fork.c: In function 'mm_init':
>> kernel/fork.c:1299:13: error: implicit declaration of function 'percpu_counter_init_many'; did you mean 'percpu_counter_init'? [-Werror=implicit-function-declaration]
    1299 |         if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, NR_MM_COUNTERS))
         |             ^~~~~~~~~~~~~~~~~~~~~~~~
         |             percpu_counter_init
   cc1: some warnings being treated as errors


vim +926 kernel/fork.c

   904	
   905	/*
   906	 * Called when the last reference to the mm
   907	 * is dropped: either by a lazy thread or by
   908	 * mmput. Free the page directory and the mm.
   909	 */
   910	void __mmdrop(struct mm_struct *mm)
   911	{
   912		BUG_ON(mm == &init_mm);
   913		WARN_ON_ONCE(mm == current->mm);
   914	
   915		/* Ensure no CPUs are using this as their lazy tlb mm */
   916		cleanup_lazy_tlbs(mm);
   917	
   918		WARN_ON_ONCE(mm == current->active_mm);
   919		mm_free_pgd(mm);
   920		destroy_context(mm);
   921		mmu_notifier_subscriptions_destroy(mm);
   922		check_mm(mm);
   923		put_user_ns(mm->user_ns);
   924		mm_pasid_drop(mm);
   925		mm_destroy_cid(mm);
 > 926		percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
   927	
   928		free_mm(mm);
   929	}
   930	EXPORT_SYMBOL_GPL(__mmdrop);
   931
diff mbox series

Patch

diff --git a/kernel/fork.c b/kernel/fork.c
index d2e12b6d2b18..4f0ada33457e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,8 +909,6 @@  static void cleanup_lazy_tlbs(struct mm_struct *mm)
  */
 void __mmdrop(struct mm_struct *mm)
 {
-	int i;
-
 	BUG_ON(mm == &init_mm);
 	WARN_ON_ONCE(mm == current->mm);
 
@@ -925,9 +923,8 @@  void __mmdrop(struct mm_struct *mm)
 	put_user_ns(mm->user_ns);
 	mm_pasid_drop(mm);
 	mm_destroy_cid(mm);
+	percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
 
-	for (i = 0; i < NR_MM_COUNTERS; i++)
-		percpu_counter_destroy(&mm->rss_stat[i]);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1252,8 +1249,6 @@  static void mm_init_uprobes_state(struct mm_struct *mm)
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	struct user_namespace *user_ns)
 {
-	int i;
-
 	mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
 	mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
 	atomic_set(&mm->mm_users, 1);
@@ -1301,17 +1296,14 @@  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	if (mm_alloc_cid(mm))
 		goto fail_cid;
 
-	for (i = 0; i < NR_MM_COUNTERS; i++)
-		if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
-			goto fail_pcpu;
+	if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, NR_MM_COUNTERS))
+		goto fail_pcpu;
 
 	mm->user_ns = get_user_ns(user_ns);
 	lru_gen_init_mm(mm);
 	return mm;
 
 fail_pcpu:
-	while (i > 0)
-		percpu_counter_destroy(&mm->rss_stat[--i]);
 	mm_destroy_cid(mm);
 fail_cid:
 	destroy_context(mm);