diff mbox series

[RFC,5/5] test: add sysctl for folio copy tests and adjust NR_MAX_BATCHED_MIGRATION

Message ID 20250103172419.4148674-6-ziy@nvidia.com
State RFC
Headers show
Series Accelerate page migration with batching and multi threads | expand

Commit Message

Zi Yan Jan. 3, 2025, 5:24 p.m. UTC
1. enable multi-threaded copy
2. specify how many CPU threads to use
3. push from local CPUs or pull from remote CPUs
4. change NR_MAX_BATCHED_MIGRATION to HPAGE_PUD_NR to allow batching THP
copies.

These are for testing purpose only.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/mm.h     |  4 ++++
 include/linux/sysctl.h |  1 +
 kernel/sysctl.c        | 29 ++++++++++++++++++++++++++++-
 mm/copy_pages.c        | 10 +++++++---
 mm/migrate.c           |  6 ++++--
 5 files changed, 44 insertions(+), 6 deletions(-)

Comments

Gregory Price Jan. 3, 2025, 10:21 p.m. UTC | #1
On Fri, Jan 03, 2025 at 12:24:19PM -0500, Zi Yan wrote:
... snip ...
> +	{
> +		.procname	= "use_mt_copy",
> +		.data		= &use_mt_copy,
> +		.maxlen		= sizeof(use_mt_copy),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "limit_mt_num",
> +		.data		= &limit_mt_num,
> +		.maxlen		= sizeof(limit_mt_num),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= SYSCTL_32,
> +	},
> +	{
> +		.procname	= "push_0_pull_1",
> +		.data		= &push_0_pull_1,
> +		.maxlen		= sizeof(push_0_pull_1),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
>  	{
>  		.procname	= "drop_caches",
>  		.data		= &sysctl_drop_caches,

Build errors here

~Gregory

---

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f9ba48cd6e09..bca82e6132b3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2093,8 +2093,8 @@ static struct ctl_table vm_table[] = {
 #endif
        {
                .procname       = "use_mt_copy",
-               .data           = &use_mt_copy,
-               .maxlen         = sizeof(use_mt_copy),
+               .data           = &sysctl_use_mt_copy,
+               .maxlen         = sizeof(sysctl_use_mt_copy),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
@@ -2102,8 +2102,8 @@ static struct ctl_table vm_table[] = {
        },
        {
                .procname       = "limit_mt_num",
-               .data           = &limit_mt_num,
-               .maxlen         = sizeof(limit_mt_num),
+               .data           = &sysctl_limit_mt_num,
+               .maxlen         = sizeof(sysctl_limit_mt_num),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ONE,
@@ -2111,8 +2111,8 @@ static struct ctl_table vm_table[] = {
        },
        {
                .procname       = "push_0_pull_1",
-               .data           = &push_0_pull_1,
-               .maxlen         = sizeof(push_0_pull_1),
+               .data           = &sysctl_push_0_pull_1,
+               .maxlen         = sizeof(sysctl_push_0_pull_1),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
Zi Yan Jan. 3, 2025, 10:56 p.m. UTC | #2
On 3 Jan 2025, at 17:21, Gregory Price wrote:

> On Fri, Jan 03, 2025 at 12:24:19PM -0500, Zi Yan wrote:
> ... snip ...
>> +	{
>> +		.procname	= "use_mt_copy",
>> +		.data		= &use_mt_copy,
>> +		.maxlen		= sizeof(use_mt_copy),
>> +		.mode		= 0644,
>> +		.proc_handler	= proc_dointvec_minmax,
>> +		.extra1		= SYSCTL_ZERO,
>> +		.extra2		= SYSCTL_ONE,
>> +	},
>> +	{
>> +		.procname	= "limit_mt_num",
>> +		.data		= &limit_mt_num,
>> +		.maxlen		= sizeof(limit_mt_num),
>> +		.mode		= 0644,
>> +		.proc_handler	= proc_dointvec_minmax,
>> +		.extra1		= SYSCTL_ONE,
>> +		.extra2		= SYSCTL_32,
>> +	},
>> +	{
>> +		.procname	= "push_0_pull_1",
>> +		.data		= &push_0_pull_1,
>> +		.maxlen		= sizeof(push_0_pull_1),
>> +		.mode		= 0644,
>> +		.proc_handler	= proc_dointvec_minmax,
>> +		.extra1		= SYSCTL_ZERO,
>> +		.extra2		= SYSCTL_ONE,
>> +	},
>>  	{
>>  		.procname	= "drop_caches",
>>  		.data		= &sysctl_drop_caches,
>
> Build errors here

Thanks, these changes must be lost during my patch clean time.

>
> ~Gregory
>
> ---
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index f9ba48cd6e09..bca82e6132b3 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2093,8 +2093,8 @@ static struct ctl_table vm_table[] = {
>  #endif
>         {
>                 .procname       = "use_mt_copy",
> -               .data           = &use_mt_copy,
> -               .maxlen         = sizeof(use_mt_copy),
> +               .data           = &sysctl_use_mt_copy,
> +               .maxlen         = sizeof(sysctl_use_mt_copy),
>                 .mode           = 0644,
>                 .proc_handler   = proc_dointvec_minmax,
>                 .extra1         = SYSCTL_ZERO,
> @@ -2102,8 +2102,8 @@ static struct ctl_table vm_table[] = {
>         },
>         {
>                 .procname       = "limit_mt_num",
> -               .data           = &limit_mt_num,
> -               .maxlen         = sizeof(limit_mt_num),
> +               .data           = &sysctl_limit_mt_num,
> +               .maxlen         = sizeof(sysctl_limit_mt_num),
>                 .mode           = 0644,
>                 .proc_handler   = proc_dointvec_minmax,
>                 .extra1         = SYSCTL_ONE,
> @@ -2111,8 +2111,8 @@ static struct ctl_table vm_table[] = {
>         },
>         {
>                 .procname       = "push_0_pull_1",
> -               .data           = &push_0_pull_1,
> -               .maxlen         = sizeof(push_0_pull_1),
> +               .data           = &sysctl_push_0_pull_1,
> +               .maxlen         = sizeof(sysctl_push_0_pull_1),
>                 .mode           = 0644,
>                 .proc_handler   = proc_dointvec_minmax,
>                 .extra1         = SYSCTL_ZERO,


Best Regards,
Yan, Zi
kernel test robot Jan. 4, 2025, 4:51 a.m. UTC | #3
Hi Zi,

[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master sysctl/sysctl-next v6.13-rc5 next-20241220]
[cannot apply to mcgrof/sysctl-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zi-Yan/mm-separate-move-undo-doing-on-folio-list-from-migrate_pages_batch/20250104-012955
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250103172419.4148674-6-ziy%40nvidia.com
patch subject: [RFC PATCH 5/5] test: add sysctl for folio copy tests and adjust NR_MAX_BATCHED_MIGRATION
config: i386-buildonly-randconfig-001-20250104 (https://download.01.org/0day-ci/archive/20250104/202501041224.wjWZ3pHL-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250104/202501041224.wjWZ3pHL-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501041224.wjWZ3pHL-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sysctl.c:2096:13: error: use of undeclared identifier 'use_mt_copy'
    2096 |                 .data           = &use_mt_copy,
         |                                    ^
   kernel/sysctl.c:2097:21: error: use of undeclared identifier 'use_mt_copy'
    2097 |                 .maxlen         = sizeof(use_mt_copy),
         |                                          ^
>> kernel/sysctl.c:2105:13: error: use of undeclared identifier 'limit_mt_num'
    2105 |                 .data           = &limit_mt_num,
         |                                    ^
   kernel/sysctl.c:2106:21: error: use of undeclared identifier 'limit_mt_num'
    2106 |                 .maxlen         = sizeof(limit_mt_num),
         |                                          ^
>> kernel/sysctl.c:2114:13: error: use of undeclared identifier 'push_0_pull_1'
    2114 |                 .data           = &push_0_pull_1,
         |                                    ^
   kernel/sysctl.c:2115:21: error: use of undeclared identifier 'push_0_pull_1'
    2115 |                 .maxlen         = sizeof(push_0_pull_1),
         |                                          ^
>> kernel/sysctl.c:2268:2: error: invalid application of 'sizeof' to an incomplete type 'struct ctl_table[]'
    2268 |         register_sysctl_init("vm", vm_table);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/sysctl.h:234:46: note: expanded from macro 'register_sysctl_init'
     234 |         __register_sysctl_init(path, table, #table, ARRAY_SIZE(table))
         |                                                     ^~~~~~~~~~~~~~~~~
   include/linux/array_size.h:11:32: note: expanded from macro 'ARRAY_SIZE'
      11 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
         |                                ^~~~~
   7 errors generated.


vim +/use_mt_copy +2096 kernel/sysctl.c

  2032	
  2033	static struct ctl_table vm_table[] = {
  2034		{
  2035			.procname	= "overcommit_memory",
  2036			.data		= &sysctl_overcommit_memory,
  2037			.maxlen		= sizeof(sysctl_overcommit_memory),
  2038			.mode		= 0644,
  2039			.proc_handler	= overcommit_policy_handler,
  2040			.extra1		= SYSCTL_ZERO,
  2041			.extra2		= SYSCTL_TWO,
  2042		},
  2043		{
  2044			.procname	= "overcommit_ratio",
  2045			.data		= &sysctl_overcommit_ratio,
  2046			.maxlen		= sizeof(sysctl_overcommit_ratio),
  2047			.mode		= 0644,
  2048			.proc_handler	= overcommit_ratio_handler,
  2049		},
  2050		{
  2051			.procname	= "overcommit_kbytes",
  2052			.data		= &sysctl_overcommit_kbytes,
  2053			.maxlen		= sizeof(sysctl_overcommit_kbytes),
  2054			.mode		= 0644,
  2055			.proc_handler	= overcommit_kbytes_handler,
  2056		},
  2057		{
  2058			.procname	= "page-cluster",
  2059			.data		= &page_cluster,
  2060			.maxlen		= sizeof(int),
  2061			.mode		= 0644,
  2062			.proc_handler	= proc_dointvec_minmax,
  2063			.extra1		= SYSCTL_ZERO,
  2064			.extra2		= (void *)&page_cluster_max,
  2065		},
  2066		{
  2067			.procname	= "dirtytime_expire_seconds",
  2068			.data		= &dirtytime_expire_interval,
  2069			.maxlen		= sizeof(dirtytime_expire_interval),
  2070			.mode		= 0644,
  2071			.proc_handler	= dirtytime_interval_handler,
  2072			.extra1		= SYSCTL_ZERO,
  2073		},
  2074		{
  2075			.procname	= "swappiness",
  2076			.data		= &vm_swappiness,
  2077			.maxlen		= sizeof(vm_swappiness),
  2078			.mode		= 0644,
  2079			.proc_handler	= proc_dointvec_minmax,
  2080			.extra1		= SYSCTL_ZERO,
  2081			.extra2		= SYSCTL_TWO_HUNDRED,
  2082		},
  2083	#ifdef CONFIG_NUMA
  2084		{
  2085			.procname	= "numa_stat",
  2086			.data		= &sysctl_vm_numa_stat,
  2087			.maxlen		= sizeof(int),
  2088			.mode		= 0644,
  2089			.proc_handler	= sysctl_vm_numa_stat_handler,
  2090			.extra1		= SYSCTL_ZERO,
  2091			.extra2		= SYSCTL_ONE,
  2092		},
  2093	#endif
  2094		{
  2095			.procname	= "use_mt_copy",
> 2096			.data		= &use_mt_copy,
  2097			.maxlen		= sizeof(use_mt_copy),
  2098			.mode		= 0644,
  2099			.proc_handler	= proc_dointvec_minmax,
  2100			.extra1		= SYSCTL_ZERO,
  2101			.extra2		= SYSCTL_ONE,
  2102		},
  2103		{
  2104			.procname	= "limit_mt_num",
> 2105			.data		= &limit_mt_num,
  2106			.maxlen		= sizeof(limit_mt_num),
  2107			.mode		= 0644,
  2108			.proc_handler	= proc_dointvec_minmax,
  2109			.extra1		= SYSCTL_ONE,
  2110			.extra2		= SYSCTL_32,
  2111		},
  2112		{
  2113			.procname	= "push_0_pull_1",
> 2114			.data		= &push_0_pull_1,
  2115			.maxlen		= sizeof(push_0_pull_1),
  2116			.mode		= 0644,
  2117			.proc_handler	= proc_dointvec_minmax,
  2118			.extra1		= SYSCTL_ZERO,
  2119			.extra2		= SYSCTL_ONE,
  2120		},
  2121		{
  2122			.procname	= "drop_caches",
  2123			.data		= &sysctl_drop_caches,
  2124			.maxlen		= sizeof(int),
  2125			.mode		= 0200,
  2126			.proc_handler	= drop_caches_sysctl_handler,
  2127			.extra1		= SYSCTL_ONE,
  2128			.extra2		= SYSCTL_FOUR,
  2129		},
  2130		{
  2131			.procname	= "page_lock_unfairness",
  2132			.data		= &sysctl_page_lock_unfairness,
  2133			.maxlen		= sizeof(sysctl_page_lock_unfairness),
  2134			.mode		= 0644,
  2135			.proc_handler	= proc_dointvec_minmax,
  2136			.extra1		= SYSCTL_ZERO,
  2137		},
  2138	#ifdef CONFIG_MMU
  2139		{
  2140			.procname	= "max_map_count",
  2141			.data		= &sysctl_max_map_count,
  2142			.maxlen		= sizeof(sysctl_max_map_count),
  2143			.mode		= 0644,
  2144			.proc_handler	= proc_dointvec_minmax,
  2145			.extra1		= SYSCTL_ZERO,
  2146		},
  2147	#else
  2148		{
  2149			.procname	= "nr_trim_pages",
  2150			.data		= &sysctl_nr_trim_pages,
  2151			.maxlen		= sizeof(sysctl_nr_trim_pages),
  2152			.mode		= 0644,
  2153			.proc_handler	= proc_dointvec_minmax,
  2154			.extra1		= SYSCTL_ZERO,
  2155		},
  2156	#endif
  2157		{
  2158			.procname	= "vfs_cache_pressure",
  2159			.data		= &sysctl_vfs_cache_pressure,
  2160			.maxlen		= sizeof(sysctl_vfs_cache_pressure),
  2161			.mode		= 0644,
  2162			.proc_handler	= proc_dointvec_minmax,
  2163			.extra1		= SYSCTL_ZERO,
  2164		},
  2165	#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
  2166	    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
  2167		{
  2168			.procname	= "legacy_va_layout",
  2169			.data		= &sysctl_legacy_va_layout,
  2170			.maxlen		= sizeof(sysctl_legacy_va_layout),
  2171			.mode		= 0644,
  2172			.proc_handler	= proc_dointvec_minmax,
  2173			.extra1		= SYSCTL_ZERO,
  2174		},
  2175	#endif
  2176	#ifdef CONFIG_NUMA
  2177		{
  2178			.procname	= "zone_reclaim_mode",
  2179			.data		= &node_reclaim_mode,
  2180			.maxlen		= sizeof(node_reclaim_mode),
  2181			.mode		= 0644,
  2182			.proc_handler	= proc_dointvec_minmax,
  2183			.extra1		= SYSCTL_ZERO,
  2184		},
  2185	#endif
  2186	#ifdef CONFIG_SMP
  2187		{
  2188			.procname	= "stat_interval",
  2189			.data		= &sysctl_stat_interval,
  2190			.maxlen		= sizeof(sysctl_stat_interval),
  2191			.mode		= 0644,
  2192			.proc_handler	= proc_dointvec_jiffies,
  2193		},
  2194		{
  2195			.procname	= "stat_refresh",
  2196			.data		= NULL,
  2197			.maxlen		= 0,
  2198			.mode		= 0600,
  2199			.proc_handler	= vmstat_refresh,
  2200		},
  2201	#endif
  2202	#ifdef CONFIG_MMU
  2203		{
  2204			.procname	= "mmap_min_addr",
  2205			.data		= &dac_mmap_min_addr,
  2206			.maxlen		= sizeof(unsigned long),
  2207			.mode		= 0644,
  2208			.proc_handler	= mmap_min_addr_handler,
  2209		},
  2210	#endif
  2211	#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
  2212	   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
  2213		{
  2214			.procname	= "vdso_enabled",
  2215	#ifdef CONFIG_X86_32
  2216			.data		= &vdso32_enabled,
  2217			.maxlen		= sizeof(vdso32_enabled),
  2218	#else
  2219			.data		= &vdso_enabled,
  2220			.maxlen		= sizeof(vdso_enabled),
  2221	#endif
  2222			.mode		= 0644,
  2223			.proc_handler	= proc_dointvec,
  2224			.extra1		= SYSCTL_ZERO,
  2225		},
  2226	#endif
  2227		{
  2228			.procname	= "user_reserve_kbytes",
  2229			.data		= &sysctl_user_reserve_kbytes,
  2230			.maxlen		= sizeof(sysctl_user_reserve_kbytes),
  2231			.mode		= 0644,
  2232			.proc_handler	= proc_doulongvec_minmax,
  2233		},
  2234		{
  2235			.procname	= "admin_reserve_kbytes",
  2236			.data		= &sysctl_admin_reserve_kbytes,
  2237			.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
  2238			.mode		= 0644,
  2239			.proc_handler	= proc_doulongvec_minmax,
  2240		},
  2241	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
  2242		{
  2243			.procname	= "mmap_rnd_bits",
  2244			.data		= &mmap_rnd_bits,
  2245			.maxlen		= sizeof(mmap_rnd_bits),
  2246			.mode		= 0600,
  2247			.proc_handler	= proc_dointvec_minmax,
  2248			.extra1		= (void *)&mmap_rnd_bits_min,
  2249			.extra2		= (void *)&mmap_rnd_bits_max,
  2250		},
  2251	#endif
  2252	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
  2253		{
  2254			.procname	= "mmap_rnd_compat_bits",
  2255			.data		= &mmap_rnd_compat_bits,
  2256			.maxlen		= sizeof(mmap_rnd_compat_bits),
  2257			.mode		= 0600,
  2258			.proc_handler	= proc_dointvec_minmax,
  2259			.extra1		= (void *)&mmap_rnd_compat_bits_min,
  2260			.extra2		= (void *)&mmap_rnd_compat_bits_max,
  2261		},
  2262	#endif
  2263	};
  2264	
  2265	int __init sysctl_init_bases(void)
  2266	{
  2267		register_sysctl_init("kernel", kern_table);
> 2268		register_sysctl_init("vm", vm_table);
kernel test robot Jan. 4, 2025, 5:24 a.m. UTC | #4
Hi Zi,

[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master sysctl/sysctl-next v6.13-rc5 next-20241220]
[cannot apply to mcgrof/sysctl-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zi-Yan/mm-separate-move-undo-doing-on-folio-list-from-migrate_pages_batch/20250104-012955
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250103172419.4148674-6-ziy%40nvidia.com
patch subject: [RFC PATCH 5/5] test: add sysctl for folio copy tests and adjust NR_MAX_BATCHED_MIGRATION
config: i386-buildonly-randconfig-004-20250104 (https://download.01.org/0day-ci/archive/20250104/202501041307.zfuQDHd5-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250104/202501041307.zfuQDHd5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501041307.zfuQDHd5-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sysctl.c:2096:36: error: 'use_mt_copy' undeclared here (not in a function)
    2096 |                 .data           = &use_mt_copy,
         |                                    ^~~~~~~~~~~
>> kernel/sysctl.c:2105:36: error: 'limit_mt_num' undeclared here (not in a function)
    2105 |                 .data           = &limit_mt_num,
         |                                    ^~~~~~~~~~~~
>> kernel/sysctl.c:2114:36: error: 'push_0_pull_1' undeclared here (not in a function)
    2114 |                 .data           = &push_0_pull_1,
         |                                    ^~~~~~~~~~~~~


vim +/use_mt_copy +2096 kernel/sysctl.c

  2032	
  2033	static struct ctl_table vm_table[] = {
  2034		{
  2035			.procname	= "overcommit_memory",
  2036			.data		= &sysctl_overcommit_memory,
  2037			.maxlen		= sizeof(sysctl_overcommit_memory),
  2038			.mode		= 0644,
  2039			.proc_handler	= overcommit_policy_handler,
  2040			.extra1		= SYSCTL_ZERO,
  2041			.extra2		= SYSCTL_TWO,
  2042		},
  2043		{
  2044			.procname	= "overcommit_ratio",
  2045			.data		= &sysctl_overcommit_ratio,
  2046			.maxlen		= sizeof(sysctl_overcommit_ratio),
  2047			.mode		= 0644,
  2048			.proc_handler	= overcommit_ratio_handler,
  2049		},
  2050		{
  2051			.procname	= "overcommit_kbytes",
  2052			.data		= &sysctl_overcommit_kbytes,
  2053			.maxlen		= sizeof(sysctl_overcommit_kbytes),
  2054			.mode		= 0644,
  2055			.proc_handler	= overcommit_kbytes_handler,
  2056		},
  2057		{
  2058			.procname	= "page-cluster",
  2059			.data		= &page_cluster,
  2060			.maxlen		= sizeof(int),
  2061			.mode		= 0644,
  2062			.proc_handler	= proc_dointvec_minmax,
  2063			.extra1		= SYSCTL_ZERO,
  2064			.extra2		= (void *)&page_cluster_max,
  2065		},
  2066		{
  2067			.procname	= "dirtytime_expire_seconds",
  2068			.data		= &dirtytime_expire_interval,
  2069			.maxlen		= sizeof(dirtytime_expire_interval),
  2070			.mode		= 0644,
  2071			.proc_handler	= dirtytime_interval_handler,
  2072			.extra1		= SYSCTL_ZERO,
  2073		},
  2074		{
  2075			.procname	= "swappiness",
  2076			.data		= &vm_swappiness,
  2077			.maxlen		= sizeof(vm_swappiness),
  2078			.mode		= 0644,
  2079			.proc_handler	= proc_dointvec_minmax,
  2080			.extra1		= SYSCTL_ZERO,
  2081			.extra2		= SYSCTL_TWO_HUNDRED,
  2082		},
  2083	#ifdef CONFIG_NUMA
  2084		{
  2085			.procname	= "numa_stat",
  2086			.data		= &sysctl_vm_numa_stat,
  2087			.maxlen		= sizeof(int),
  2088			.mode		= 0644,
  2089			.proc_handler	= sysctl_vm_numa_stat_handler,
  2090			.extra1		= SYSCTL_ZERO,
  2091			.extra2		= SYSCTL_ONE,
  2092		},
  2093	#endif
  2094		{
  2095			.procname	= "use_mt_copy",
> 2096			.data		= &use_mt_copy,
  2097			.maxlen		= sizeof(use_mt_copy),
  2098			.mode		= 0644,
  2099			.proc_handler	= proc_dointvec_minmax,
  2100			.extra1		= SYSCTL_ZERO,
  2101			.extra2		= SYSCTL_ONE,
  2102		},
  2103		{
  2104			.procname	= "limit_mt_num",
> 2105			.data		= &limit_mt_num,
  2106			.maxlen		= sizeof(limit_mt_num),
  2107			.mode		= 0644,
  2108			.proc_handler	= proc_dointvec_minmax,
  2109			.extra1		= SYSCTL_ONE,
  2110			.extra2		= SYSCTL_32,
  2111		},
  2112		{
  2113			.procname	= "push_0_pull_1",
> 2114			.data		= &push_0_pull_1,
  2115			.maxlen		= sizeof(push_0_pull_1),
  2116			.mode		= 0644,
  2117			.proc_handler	= proc_dointvec_minmax,
  2118			.extra1		= SYSCTL_ZERO,
  2119			.extra2		= SYSCTL_ONE,
  2120		},
  2121		{
  2122			.procname	= "drop_caches",
  2123			.data		= &sysctl_drop_caches,
  2124			.maxlen		= sizeof(int),
  2125			.mode		= 0200,
  2126			.proc_handler	= drop_caches_sysctl_handler,
  2127			.extra1		= SYSCTL_ONE,
  2128			.extra2		= SYSCTL_FOUR,
  2129		},
  2130		{
  2131			.procname	= "page_lock_unfairness",
  2132			.data		= &sysctl_page_lock_unfairness,
  2133			.maxlen		= sizeof(sysctl_page_lock_unfairness),
  2134			.mode		= 0644,
  2135			.proc_handler	= proc_dointvec_minmax,
  2136			.extra1		= SYSCTL_ZERO,
  2137		},
  2138	#ifdef CONFIG_MMU
  2139		{
  2140			.procname	= "max_map_count",
  2141			.data		= &sysctl_max_map_count,
  2142			.maxlen		= sizeof(sysctl_max_map_count),
  2143			.mode		= 0644,
  2144			.proc_handler	= proc_dointvec_minmax,
  2145			.extra1		= SYSCTL_ZERO,
  2146		},
  2147	#else
  2148		{
  2149			.procname	= "nr_trim_pages",
  2150			.data		= &sysctl_nr_trim_pages,
  2151			.maxlen		= sizeof(sysctl_nr_trim_pages),
  2152			.mode		= 0644,
  2153			.proc_handler	= proc_dointvec_minmax,
  2154			.extra1		= SYSCTL_ZERO,
  2155		},
  2156	#endif
  2157		{
  2158			.procname	= "vfs_cache_pressure",
  2159			.data		= &sysctl_vfs_cache_pressure,
  2160			.maxlen		= sizeof(sysctl_vfs_cache_pressure),
  2161			.mode		= 0644,
  2162			.proc_handler	= proc_dointvec_minmax,
  2163			.extra1		= SYSCTL_ZERO,
  2164		},
  2165	#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
  2166	    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
  2167		{
  2168			.procname	= "legacy_va_layout",
  2169			.data		= &sysctl_legacy_va_layout,
  2170			.maxlen		= sizeof(sysctl_legacy_va_layout),
  2171			.mode		= 0644,
  2172			.proc_handler	= proc_dointvec_minmax,
  2173			.extra1		= SYSCTL_ZERO,
  2174		},
  2175	#endif
  2176	#ifdef CONFIG_NUMA
  2177		{
  2178			.procname	= "zone_reclaim_mode",
  2179			.data		= &node_reclaim_mode,
  2180			.maxlen		= sizeof(node_reclaim_mode),
  2181			.mode		= 0644,
  2182			.proc_handler	= proc_dointvec_minmax,
  2183			.extra1		= SYSCTL_ZERO,
  2184		},
  2185	#endif
  2186	#ifdef CONFIG_SMP
  2187		{
  2188			.procname	= "stat_interval",
  2189			.data		= &sysctl_stat_interval,
  2190			.maxlen		= sizeof(sysctl_stat_interval),
  2191			.mode		= 0644,
  2192			.proc_handler	= proc_dointvec_jiffies,
  2193		},
  2194		{
  2195			.procname	= "stat_refresh",
  2196			.data		= NULL,
  2197			.maxlen		= 0,
  2198			.mode		= 0600,
  2199			.proc_handler	= vmstat_refresh,
  2200		},
  2201	#endif
  2202	#ifdef CONFIG_MMU
  2203		{
  2204			.procname	= "mmap_min_addr",
  2205			.data		= &dac_mmap_min_addr,
  2206			.maxlen		= sizeof(unsigned long),
  2207			.mode		= 0644,
  2208			.proc_handler	= mmap_min_addr_handler,
  2209		},
  2210	#endif
  2211	#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
  2212	   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
  2213		{
  2214			.procname	= "vdso_enabled",
  2215	#ifdef CONFIG_X86_32
  2216			.data		= &vdso32_enabled,
  2217			.maxlen		= sizeof(vdso32_enabled),
  2218	#else
  2219			.data		= &vdso_enabled,
  2220			.maxlen		= sizeof(vdso_enabled),
  2221	#endif
  2222			.mode		= 0644,
  2223			.proc_handler	= proc_dointvec,
  2224			.extra1		= SYSCTL_ZERO,
  2225		},
  2226	#endif
  2227		{
  2228			.procname	= "user_reserve_kbytes",
  2229			.data		= &sysctl_user_reserve_kbytes,
  2230			.maxlen		= sizeof(sysctl_user_reserve_kbytes),
  2231			.mode		= 0644,
  2232			.proc_handler	= proc_doulongvec_minmax,
  2233		},
  2234		{
  2235			.procname	= "admin_reserve_kbytes",
  2236			.data		= &sysctl_admin_reserve_kbytes,
  2237			.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
  2238			.mode		= 0644,
  2239			.proc_handler	= proc_doulongvec_minmax,
  2240		},
  2241	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
  2242		{
  2243			.procname	= "mmap_rnd_bits",
  2244			.data		= &mmap_rnd_bits,
  2245			.maxlen		= sizeof(mmap_rnd_bits),
  2246			.mode		= 0600,
  2247			.proc_handler	= proc_dointvec_minmax,
  2248			.extra1		= (void *)&mmap_rnd_bits_min,
  2249			.extra2		= (void *)&mmap_rnd_bits_max,
  2250		},
  2251	#endif
  2252	#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
  2253		{
  2254			.procname	= "mmap_rnd_compat_bits",
  2255			.data		= &mmap_rnd_compat_bits,
  2256			.maxlen		= sizeof(mmap_rnd_compat_bits),
  2257			.mode		= 0600,
  2258			.proc_handler	= proc_dointvec_minmax,
  2259			.extra1		= (void *)&mmap_rnd_compat_bits_min,
  2260			.extra2		= (void *)&mmap_rnd_compat_bits_max,
  2261		},
  2262	#endif
  2263	};
  2264
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1a11f9df5c2d..277b12b9ef0d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -42,6 +42,10 @@  struct pt_regs;
 struct folio_batch;
 
 extern int sysctl_page_lock_unfairness;
+extern int sysctl_use_mt_copy;
+extern unsigned int sysctl_limit_mt_num;
+extern unsigned int sysctl_push_0_pull_1;
+
 
 void mm_core_init(void);
 void init_mm_internals(void);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 40a6ac6c9713..f33dafea2533 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -52,6 +52,7 @@  struct ctl_dir;
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 #define SYSCTL_MAXOLDUID		((void *)&sysctl_vals[10])
 #define SYSCTL_NEG_ONE			((void *)&sysctl_vals[11])
+#define SYSCTL_32			((void *)&sysctl_vals[12])
 
 extern const int sysctl_vals[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5c9202cb8f59..f9ba48cd6e09 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -82,7 +82,7 @@ 
 #endif
 
 /* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
+const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1, 32 };
 EXPORT_SYMBOL(sysctl_vals);
 
 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
@@ -2091,6 +2091,33 @@  static struct ctl_table vm_table[] = {
 		.extra2		= SYSCTL_ONE,
 	},
 #endif
+	{
+		.procname	= "use_mt_copy",
+		.data		= &use_mt_copy,
+		.maxlen		= sizeof(use_mt_copy),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "limit_mt_num",
+		.data		= &limit_mt_num,
+		.maxlen		= sizeof(limit_mt_num),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_32,
+	},
+	{
+		.procname	= "push_0_pull_1",
+		.data		= &push_0_pull_1,
+		.maxlen		= sizeof(push_0_pull_1),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 	{
 		.procname	= "drop_caches",
 		.data		= &sysctl_drop_caches,
diff --git a/mm/copy_pages.c b/mm/copy_pages.c
index 0e2231199f66..257034550c86 100644
--- a/mm/copy_pages.c
+++ b/mm/copy_pages.c
@@ -10,7 +10,9 @@ 
 #include <linux/migrate.h>
 
 
-unsigned int limit_mt_num = 4;
+unsigned int sysctl_limit_mt_num = 4;
+/* push by default */
+unsigned int sysctl_push_0_pull_1;
 
 struct copy_item {
 	char *to;
@@ -45,11 +47,13 @@  int copy_page_lists_mt(struct list_head *dst_folios,
 		struct list_head *src_folios, int nr_items)
 {
 	int err = 0;
-	unsigned int total_mt_num = limit_mt_num;
+	unsigned int total_mt_num = sysctl_limit_mt_num;
 	int to_node = folio_nid(list_first_entry(dst_folios, struct folio, lru));
+	int from_node = folio_nid(list_first_entry(src_folios, struct folio, lru));
 	int i;
 	struct copy_page_info *work_items[32] = {0};
-	const struct cpumask *per_node_cpumask = cpumask_of_node(to_node);
+	const struct cpumask *per_node_cpumask =
+		cpumask_of_node(sysctl_push_0_pull_1 ? to_node : from_node);
 	int cpu_id_list[32] = {0};
 	int cpu;
 	int max_items_per_thread;
diff --git a/mm/migrate.c b/mm/migrate.c
index 18440180d747..0f7a4b09acda 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -51,6 +51,7 @@ 
 
 #include "internal.h"
 
+int sysctl_use_mt_copy;
 
 bool isolate_movable_page(struct page *page, isolate_mode_t mode)
 {
@@ -1621,7 +1622,7 @@  static inline int try_split_folio(struct folio *folio, struct list_head *split_f
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define NR_MAX_BATCHED_MIGRATION	HPAGE_PMD_NR
+#define NR_MAX_BATCHED_MIGRATION	HPAGE_PUD_NR
 #else
 #define NR_MAX_BATCHED_MIGRATION	512
 #endif
@@ -1868,7 +1869,8 @@  static void migrate_folios_batch_move(struct list_head *src_folios,
 		goto out;
 
 	/* Batch copy the folios */
-	if (total_nr_pages > 32) {
+	/* if (total_nr_pages > 32) { */
+	if (sysctl_use_mt_copy) {
 		copy_page_lists_mt(dst_folios, src_folios, total_nr_folios);
 	} else {
 		dst = list_first_entry(dst_folios, struct folio, lru);