diff mbox series

[1/5] Memcgroup: force empty after memcgroup offline

Message ID 1547955021-11520-2-git-send-email-duanxiongchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series fix offline memcgroup still hold in memory | expand

Commit Message

Xiongchun Duan Jan. 20, 2019, 3:30 a.m. UTC
After memcgroup offline,if page still charge in memcgroup, this memcgroup will hold
in memory.in some system which has many memory(such 256G) will hold more than
100000 offline memcgroup. this memory can't be free as soon as possible.
Using workqueue and timer to repeatedly trigger offline memcgroup force empty
memory will solve this problem. the reason why need repeatedly trigger is
that force_empty fail to reclaim page when this page is locked.

Signed-off-by: Xiongchun Duan <duanxiongchun@bytedance.com>
---
 Documentation/cgroup-v1/memory.txt |  7 +++++--
 Documentation/sysctl/kernel.txt    | 10 ++++++++++
 include/linux/memcontrol.h         |  6 ++++++
 kernel/sysctl.c                    |  9 +++++++++
 mm/memcontrol.c                    | 17 +++++++++++++++++
 5 files changed, 47 insertions(+), 2 deletions(-)

Comments

kernel test robot Jan. 21, 2019, 6:52 p.m. UTC | #1
Hi Xiongchun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.0-rc2 next-20190116]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Xiongchun-Duan/Memcgroup-force-empty-after-memcgroup-offline/20190122-014721
config: x86_64-randconfig-x005-201903 (attached as .config)
compiler: gcc-8 (Debian 8.2.0-14) 8.2.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> kernel/sysctl.c:1257:22: error: 'sysctl_cgroup_default_retry' undeclared here (not in a function); did you mean 'sysctl_rmem_default'?
      .data           = &sysctl_cgroup_default_retry,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~
                         sysctl_rmem_default
>> kernel/sysctl.c:1261:22: error: 'sysctl_cgroup_default_retry_min' undeclared here (not in a function); did you mean 'sysctl_rmem_default'?
      .extra1         = &sysctl_cgroup_default_retry_min,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                         sysctl_rmem_default
>> kernel/sysctl.c:1262:22: error: 'sysctl_cgroup_default_retry_max' undeclared here (not in a function); did you mean 'sysctl_rmem_default'?
      .extra2         = &sysctl_cgroup_default_retry_max,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                         sysctl_rmem_default

vim +1257 kernel/sysctl.c

   977	
   978	#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
   979		{
   980			.procname       = "unknown_nmi_panic",
   981			.data           = &unknown_nmi_panic,
   982			.maxlen         = sizeof (int),
   983			.mode           = 0644,
   984			.proc_handler   = proc_dointvec,
   985		},
   986	#endif
   987	#if defined(CONFIG_X86)
   988		{
   989			.procname	= "panic_on_unrecovered_nmi",
   990			.data		= &panic_on_unrecovered_nmi,
   991			.maxlen		= sizeof(int),
   992			.mode		= 0644,
   993			.proc_handler	= proc_dointvec,
   994		},
   995		{
   996			.procname	= "panic_on_io_nmi",
   997			.data		= &panic_on_io_nmi,
   998			.maxlen		= sizeof(int),
   999			.mode		= 0644,
  1000			.proc_handler	= proc_dointvec,
  1001		},
  1002	#ifdef CONFIG_DEBUG_STACKOVERFLOW
  1003		{
  1004			.procname	= "panic_on_stackoverflow",
  1005			.data		= &sysctl_panic_on_stackoverflow,
  1006			.maxlen		= sizeof(int),
  1007			.mode		= 0644,
  1008			.proc_handler	= proc_dointvec,
  1009		},
  1010	#endif
  1011		{
  1012			.procname	= "bootloader_type",
  1013			.data		= &bootloader_type,
  1014			.maxlen		= sizeof (int),
  1015			.mode		= 0444,
  1016			.proc_handler	= proc_dointvec,
  1017		},
  1018		{
  1019			.procname	= "bootloader_version",
  1020			.data		= &bootloader_version,
  1021			.maxlen		= sizeof (int),
  1022			.mode		= 0444,
  1023			.proc_handler	= proc_dointvec,
  1024		},
  1025		{
  1026			.procname	= "io_delay_type",
  1027			.data		= &io_delay_type,
  1028			.maxlen		= sizeof(int),
  1029			.mode		= 0644,
  1030			.proc_handler	= proc_dointvec,
  1031		},
  1032	#endif
  1033	#if defined(CONFIG_MMU)
  1034		{
  1035			.procname	= "randomize_va_space",
  1036			.data		= &randomize_va_space,
  1037			.maxlen		= sizeof(int),
  1038			.mode		= 0644,
  1039			.proc_handler	= proc_dointvec,
  1040		},
  1041	#endif
  1042	#if defined(CONFIG_S390) && defined(CONFIG_SMP)
  1043		{
  1044			.procname	= "spin_retry",
  1045			.data		= &spin_retry,
  1046			.maxlen		= sizeof (int),
  1047			.mode		= 0644,
  1048			.proc_handler	= proc_dointvec,
  1049		},
  1050	#endif
  1051	#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
  1052		{
  1053			.procname	= "acpi_video_flags",
  1054			.data		= &acpi_realmode_flags,
  1055			.maxlen		= sizeof (unsigned long),
  1056			.mode		= 0644,
  1057			.proc_handler	= proc_doulongvec_minmax,
  1058		},
  1059	#endif
  1060	#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
  1061		{
  1062			.procname	= "ignore-unaligned-usertrap",
  1063			.data		= &no_unaligned_warning,
  1064			.maxlen		= sizeof (int),
  1065		 	.mode		= 0644,
  1066			.proc_handler	= proc_dointvec,
  1067		},
  1068	#endif
  1069	#ifdef CONFIG_IA64
  1070		{
  1071			.procname	= "unaligned-dump-stack",
  1072			.data		= &unaligned_dump_stack,
  1073			.maxlen		= sizeof (int),
  1074			.mode		= 0644,
  1075			.proc_handler	= proc_dointvec,
  1076		},
  1077	#endif
  1078	#ifdef CONFIG_DETECT_HUNG_TASK
  1079		{
  1080			.procname	= "hung_task_panic",
  1081			.data		= &sysctl_hung_task_panic,
  1082			.maxlen		= sizeof(int),
  1083			.mode		= 0644,
  1084			.proc_handler	= proc_dointvec_minmax,
  1085			.extra1		= &zero,
  1086			.extra2		= &one,
  1087		},
  1088		{
  1089			.procname	= "hung_task_check_count",
  1090			.data		= &sysctl_hung_task_check_count,
  1091			.maxlen		= sizeof(int),
  1092			.mode		= 0644,
  1093			.proc_handler	= proc_dointvec_minmax,
  1094			.extra1		= &zero,
  1095		},
  1096		{
  1097			.procname	= "hung_task_timeout_secs",
  1098			.data		= &sysctl_hung_task_timeout_secs,
  1099			.maxlen		= sizeof(unsigned long),
  1100			.mode		= 0644,
  1101			.proc_handler	= proc_dohung_task_timeout_secs,
  1102			.extra2		= &hung_task_timeout_max,
  1103		},
  1104		{
  1105			.procname	= "hung_task_check_interval_secs",
  1106			.data		= &sysctl_hung_task_check_interval_secs,
  1107			.maxlen		= sizeof(unsigned long),
  1108			.mode		= 0644,
  1109			.proc_handler	= proc_dohung_task_timeout_secs,
  1110			.extra2		= &hung_task_timeout_max,
  1111		},
  1112		{
  1113			.procname	= "hung_task_warnings",
  1114			.data		= &sysctl_hung_task_warnings,
  1115			.maxlen		= sizeof(int),
  1116			.mode		= 0644,
  1117			.proc_handler	= proc_dointvec_minmax,
  1118			.extra1		= &neg_one,
  1119		},
  1120	#endif
  1121	#ifdef CONFIG_RT_MUTEXES
  1122		{
  1123			.procname	= "max_lock_depth",
  1124			.data		= &max_lock_depth,
  1125			.maxlen		= sizeof(int),
  1126			.mode		= 0644,
  1127			.proc_handler	= proc_dointvec,
  1128		},
  1129	#endif
  1130		{
  1131			.procname	= "poweroff_cmd",
  1132			.data		= &poweroff_cmd,
  1133			.maxlen		= POWEROFF_CMD_PATH_LEN,
  1134			.mode		= 0644,
  1135			.proc_handler	= proc_dostring,
  1136		},
  1137	#ifdef CONFIG_KEYS
  1138		{
  1139			.procname	= "keys",
  1140			.mode		= 0555,
  1141			.child		= key_sysctls,
  1142		},
  1143	#endif
  1144	#ifdef CONFIG_PERF_EVENTS
  1145		/*
  1146		 * User-space scripts rely on the existence of this file
  1147		 * as a feature check for perf_events being enabled.
  1148		 *
  1149		 * So it's an ABI, do not remove!
  1150		 */
  1151		{
  1152			.procname	= "perf_event_paranoid",
  1153			.data		= &sysctl_perf_event_paranoid,
  1154			.maxlen		= sizeof(sysctl_perf_event_paranoid),
  1155			.mode		= 0644,
  1156			.proc_handler	= proc_dointvec,
  1157		},
  1158		{
  1159			.procname	= "perf_event_mlock_kb",
  1160			.data		= &sysctl_perf_event_mlock,
  1161			.maxlen		= sizeof(sysctl_perf_event_mlock),
  1162			.mode		= 0644,
  1163			.proc_handler	= proc_dointvec,
  1164		},
  1165		{
  1166			.procname	= "perf_event_max_sample_rate",
  1167			.data		= &sysctl_perf_event_sample_rate,
  1168			.maxlen		= sizeof(sysctl_perf_event_sample_rate),
  1169			.mode		= 0644,
  1170			.proc_handler	= perf_proc_update_handler,
  1171			.extra1		= &one,
  1172		},
  1173		{
  1174			.procname	= "perf_cpu_time_max_percent",
  1175			.data		= &sysctl_perf_cpu_time_max_percent,
  1176			.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
  1177			.mode		= 0644,
  1178			.proc_handler	= perf_cpu_time_max_percent_handler,
  1179			.extra1		= &zero,
  1180			.extra2		= &one_hundred,
  1181		},
  1182		{
  1183			.procname	= "perf_event_max_stack",
  1184			.data		= &sysctl_perf_event_max_stack,
  1185			.maxlen		= sizeof(sysctl_perf_event_max_stack),
  1186			.mode		= 0644,
  1187			.proc_handler	= perf_event_max_stack_handler,
  1188			.extra1		= &zero,
  1189			.extra2		= &six_hundred_forty_kb,
  1190		},
  1191		{
  1192			.procname	= "perf_event_max_contexts_per_stack",
  1193			.data		= &sysctl_perf_event_max_contexts_per_stack,
  1194			.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
  1195			.mode		= 0644,
  1196			.proc_handler	= perf_event_max_stack_handler,
  1197			.extra1		= &zero,
  1198			.extra2		= &one_thousand,
  1199		},
  1200	#endif
  1201		{
  1202			.procname	= "panic_on_warn",
  1203			.data		= &panic_on_warn,
  1204			.maxlen		= sizeof(int),
  1205			.mode		= 0644,
  1206			.proc_handler	= proc_dointvec_minmax,
  1207			.extra1		= &zero,
  1208			.extra2		= &one,
  1209		},
  1210	#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
  1211		{
  1212			.procname	= "timer_migration",
  1213			.data		= &sysctl_timer_migration,
  1214			.maxlen		= sizeof(unsigned int),
  1215			.mode		= 0644,
  1216			.proc_handler	= timer_migration_handler,
  1217			.extra1		= &zero,
  1218			.extra2		= &one,
  1219		},
  1220	#endif
  1221	#ifdef CONFIG_BPF_SYSCALL
  1222		{
  1223			.procname	= "unprivileged_bpf_disabled",
  1224			.data		= &sysctl_unprivileged_bpf_disabled,
  1225			.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
  1226			.mode		= 0644,
  1227			/* only handle a transition from default "0" to "1" */
  1228			.proc_handler	= proc_dointvec_minmax,
  1229			.extra1		= &one,
  1230			.extra2		= &one,
  1231		},
  1232	#endif
  1233	#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
  1234		{
  1235			.procname	= "panic_on_rcu_stall",
  1236			.data		= &sysctl_panic_on_rcu_stall,
  1237			.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
  1238			.mode		= 0644,
  1239			.proc_handler	= proc_dointvec_minmax,
  1240			.extra1		= &zero,
  1241			.extra2		= &one,
  1242		},
  1243	#endif
  1244	#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
  1245		{
  1246			.procname	= "stack_erasing",
  1247			.data		= NULL,
  1248			.maxlen		= sizeof(int),
  1249			.mode		= 0600,
  1250			.proc_handler	= stack_erasing_sysctl,
  1251			.extra1		= &zero,
  1252			.extra2		= &one,
  1253		},
  1254	#endif
  1255		{
  1256			.procname       = "cgroup_default_retry",
> 1257			.data           = &sysctl_cgroup_default_retry,
  1258			.maxlen         = sizeof(unsigned int),
  1259			.mode           = 0644,
  1260			.proc_handler   = proc_dointvec_minmax,
> 1261			.extra1         = &sysctl_cgroup_default_retry_min,
> 1262			.extra2         = &sysctl_cgroup_default_retry_max,
  1263		},
  1264		{ }
  1265	};
  1266	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Jan. 21, 2019, 7:09 p.m. UTC | #2
Hi Xiongchun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.0-rc2 next-20190116]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Xiongchun-Duan/Memcgroup-force-empty-after-memcgroup-offline/20190122-014721
config: i386-randconfig-s2-01210338 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> kernel/sysctl.c:1257:22: error: 'sysctl_cgroup_default_retry' undeclared here (not in a function)
      .data           = &sysctl_cgroup_default_retry,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~
>> kernel/sysctl.c:1261:22: error: 'sysctl_cgroup_default_retry_min' undeclared here (not in a function)
      .extra1         = &sysctl_cgroup_default_retry_min,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> kernel/sysctl.c:1262:22: error: 'sysctl_cgroup_default_retry_max' undeclared here (not in a function)
      .extra2         = &sysctl_cgroup_default_retry_max,
                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

vim +/sysctl_cgroup_default_retry +1257 kernel/sysctl.c

   977	
   978	#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
   979		{
   980			.procname       = "unknown_nmi_panic",
   981			.data           = &unknown_nmi_panic,
   982			.maxlen         = sizeof (int),
   983			.mode           = 0644,
   984			.proc_handler   = proc_dointvec,
   985		},
   986	#endif
   987	#if defined(CONFIG_X86)
   988		{
   989			.procname	= "panic_on_unrecovered_nmi",
   990			.data		= &panic_on_unrecovered_nmi,
   991			.maxlen		= sizeof(int),
   992			.mode		= 0644,
   993			.proc_handler	= proc_dointvec,
   994		},
   995		{
   996			.procname	= "panic_on_io_nmi",
   997			.data		= &panic_on_io_nmi,
   998			.maxlen		= sizeof(int),
   999			.mode		= 0644,
  1000			.proc_handler	= proc_dointvec,
  1001		},
  1002	#ifdef CONFIG_DEBUG_STACKOVERFLOW
  1003		{
  1004			.procname	= "panic_on_stackoverflow",
  1005			.data		= &sysctl_panic_on_stackoverflow,
  1006			.maxlen		= sizeof(int),
  1007			.mode		= 0644,
  1008			.proc_handler	= proc_dointvec,
  1009		},
  1010	#endif
  1011		{
  1012			.procname	= "bootloader_type",
  1013			.data		= &bootloader_type,
  1014			.maxlen		= sizeof (int),
  1015			.mode		= 0444,
  1016			.proc_handler	= proc_dointvec,
  1017		},
  1018		{
  1019			.procname	= "bootloader_version",
  1020			.data		= &bootloader_version,
  1021			.maxlen		= sizeof (int),
  1022			.mode		= 0444,
  1023			.proc_handler	= proc_dointvec,
  1024		},
  1025		{
  1026			.procname	= "io_delay_type",
  1027			.data		= &io_delay_type,
  1028			.maxlen		= sizeof(int),
  1029			.mode		= 0644,
  1030			.proc_handler	= proc_dointvec,
  1031		},
  1032	#endif
  1033	#if defined(CONFIG_MMU)
  1034		{
  1035			.procname	= "randomize_va_space",
  1036			.data		= &randomize_va_space,
  1037			.maxlen		= sizeof(int),
  1038			.mode		= 0644,
  1039			.proc_handler	= proc_dointvec,
  1040		},
  1041	#endif
  1042	#if defined(CONFIG_S390) && defined(CONFIG_SMP)
  1043		{
  1044			.procname	= "spin_retry",
  1045			.data		= &spin_retry,
  1046			.maxlen		= sizeof (int),
  1047			.mode		= 0644,
  1048			.proc_handler	= proc_dointvec,
  1049		},
  1050	#endif
  1051	#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
  1052		{
  1053			.procname	= "acpi_video_flags",
  1054			.data		= &acpi_realmode_flags,
  1055			.maxlen		= sizeof (unsigned long),
  1056			.mode		= 0644,
  1057			.proc_handler	= proc_doulongvec_minmax,
  1058		},
  1059	#endif
  1060	#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
  1061		{
  1062			.procname	= "ignore-unaligned-usertrap",
  1063			.data		= &no_unaligned_warning,
  1064			.maxlen		= sizeof (int),
  1065		 	.mode		= 0644,
  1066			.proc_handler	= proc_dointvec,
  1067		},
  1068	#endif
  1069	#ifdef CONFIG_IA64
  1070		{
  1071			.procname	= "unaligned-dump-stack",
  1072			.data		= &unaligned_dump_stack,
  1073			.maxlen		= sizeof (int),
  1074			.mode		= 0644,
  1075			.proc_handler	= proc_dointvec,
  1076		},
  1077	#endif
  1078	#ifdef CONFIG_DETECT_HUNG_TASK
  1079		{
  1080			.procname	= "hung_task_panic",
  1081			.data		= &sysctl_hung_task_panic,
  1082			.maxlen		= sizeof(int),
  1083			.mode		= 0644,
  1084			.proc_handler	= proc_dointvec_minmax,
  1085			.extra1		= &zero,
  1086			.extra2		= &one,
  1087		},
  1088		{
  1089			.procname	= "hung_task_check_count",
  1090			.data		= &sysctl_hung_task_check_count,
  1091			.maxlen		= sizeof(int),
  1092			.mode		= 0644,
  1093			.proc_handler	= proc_dointvec_minmax,
  1094			.extra1		= &zero,
  1095		},
  1096		{
  1097			.procname	= "hung_task_timeout_secs",
  1098			.data		= &sysctl_hung_task_timeout_secs,
  1099			.maxlen		= sizeof(unsigned long),
  1100			.mode		= 0644,
  1101			.proc_handler	= proc_dohung_task_timeout_secs,
  1102			.extra2		= &hung_task_timeout_max,
  1103		},
  1104		{
  1105			.procname	= "hung_task_check_interval_secs",
  1106			.data		= &sysctl_hung_task_check_interval_secs,
  1107			.maxlen		= sizeof(unsigned long),
  1108			.mode		= 0644,
  1109			.proc_handler	= proc_dohung_task_timeout_secs,
  1110			.extra2		= &hung_task_timeout_max,
  1111		},
  1112		{
  1113			.procname	= "hung_task_warnings",
  1114			.data		= &sysctl_hung_task_warnings,
  1115			.maxlen		= sizeof(int),
  1116			.mode		= 0644,
  1117			.proc_handler	= proc_dointvec_minmax,
  1118			.extra1		= &neg_one,
  1119		},
  1120	#endif
  1121	#ifdef CONFIG_RT_MUTEXES
  1122		{
  1123			.procname	= "max_lock_depth",
  1124			.data		= &max_lock_depth,
  1125			.maxlen		= sizeof(int),
  1126			.mode		= 0644,
  1127			.proc_handler	= proc_dointvec,
  1128		},
  1129	#endif
  1130		{
  1131			.procname	= "poweroff_cmd",
  1132			.data		= &poweroff_cmd,
  1133			.maxlen		= POWEROFF_CMD_PATH_LEN,
  1134			.mode		= 0644,
  1135			.proc_handler	= proc_dostring,
  1136		},
  1137	#ifdef CONFIG_KEYS
  1138		{
  1139			.procname	= "keys",
  1140			.mode		= 0555,
  1141			.child		= key_sysctls,
  1142		},
  1143	#endif
  1144	#ifdef CONFIG_PERF_EVENTS
  1145		/*
  1146		 * User-space scripts rely on the existence of this file
  1147		 * as a feature check for perf_events being enabled.
  1148		 *
  1149		 * So it's an ABI, do not remove!
  1150		 */
  1151		{
  1152			.procname	= "perf_event_paranoid",
  1153			.data		= &sysctl_perf_event_paranoid,
  1154			.maxlen		= sizeof(sysctl_perf_event_paranoid),
  1155			.mode		= 0644,
  1156			.proc_handler	= proc_dointvec,
  1157		},
  1158		{
  1159			.procname	= "perf_event_mlock_kb",
  1160			.data		= &sysctl_perf_event_mlock,
  1161			.maxlen		= sizeof(sysctl_perf_event_mlock),
  1162			.mode		= 0644,
  1163			.proc_handler	= proc_dointvec,
  1164		},
  1165		{
  1166			.procname	= "perf_event_max_sample_rate",
  1167			.data		= &sysctl_perf_event_sample_rate,
  1168			.maxlen		= sizeof(sysctl_perf_event_sample_rate),
  1169			.mode		= 0644,
  1170			.proc_handler	= perf_proc_update_handler,
  1171			.extra1		= &one,
  1172		},
  1173		{
  1174			.procname	= "perf_cpu_time_max_percent",
  1175			.data		= &sysctl_perf_cpu_time_max_percent,
  1176			.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
  1177			.mode		= 0644,
  1178			.proc_handler	= perf_cpu_time_max_percent_handler,
  1179			.extra1		= &zero,
  1180			.extra2		= &one_hundred,
  1181		},
  1182		{
  1183			.procname	= "perf_event_max_stack",
  1184			.data		= &sysctl_perf_event_max_stack,
  1185			.maxlen		= sizeof(sysctl_perf_event_max_stack),
  1186			.mode		= 0644,
  1187			.proc_handler	= perf_event_max_stack_handler,
  1188			.extra1		= &zero,
  1189			.extra2		= &six_hundred_forty_kb,
  1190		},
  1191		{
  1192			.procname	= "perf_event_max_contexts_per_stack",
  1193			.data		= &sysctl_perf_event_max_contexts_per_stack,
  1194			.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
  1195			.mode		= 0644,
  1196			.proc_handler	= perf_event_max_stack_handler,
  1197			.extra1		= &zero,
  1198			.extra2		= &one_thousand,
  1199		},
  1200	#endif
  1201		{
  1202			.procname	= "panic_on_warn",
  1203			.data		= &panic_on_warn,
  1204			.maxlen		= sizeof(int),
  1205			.mode		= 0644,
  1206			.proc_handler	= proc_dointvec_minmax,
  1207			.extra1		= &zero,
  1208			.extra2		= &one,
  1209		},
  1210	#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
  1211		{
  1212			.procname	= "timer_migration",
  1213			.data		= &sysctl_timer_migration,
  1214			.maxlen		= sizeof(unsigned int),
  1215			.mode		= 0644,
  1216			.proc_handler	= timer_migration_handler,
  1217			.extra1		= &zero,
  1218			.extra2		= &one,
  1219		},
  1220	#endif
  1221	#ifdef CONFIG_BPF_SYSCALL
  1222		{
  1223			.procname	= "unprivileged_bpf_disabled",
  1224			.data		= &sysctl_unprivileged_bpf_disabled,
  1225			.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
  1226			.mode		= 0644,
  1227			/* only handle a transition from default "0" to "1" */
  1228			.proc_handler	= proc_dointvec_minmax,
  1229			.extra1		= &one,
  1230			.extra2		= &one,
  1231		},
  1232	#endif
  1233	#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
  1234		{
  1235			.procname	= "panic_on_rcu_stall",
  1236			.data		= &sysctl_panic_on_rcu_stall,
  1237			.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
  1238			.mode		= 0644,
  1239			.proc_handler	= proc_dointvec_minmax,
  1240			.extra1		= &zero,
  1241			.extra2		= &one,
  1242		},
  1243	#endif
  1244	#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
  1245		{
  1246			.procname	= "stack_erasing",
  1247			.data		= NULL,
  1248			.maxlen		= sizeof(int),
  1249			.mode		= 0600,
  1250			.proc_handler	= stack_erasing_sysctl,
  1251			.extra1		= &zero,
  1252			.extra2		= &one,
  1253		},
  1254	#endif
  1255		{
  1256			.procname       = "cgroup_default_retry",
> 1257			.data           = &sysctl_cgroup_default_retry,
  1258			.maxlen         = sizeof(unsigned int),
  1259			.mode           = 0644,
  1260			.proc_handler   = proc_dointvec_minmax,
> 1261			.extra1         = &sysctl_cgroup_default_retry_min,
> 1262			.extra2         = &sysctl_cgroup_default_retry_max,
  1263		},
  1264		{ }
  1265	};
  1266	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index 3682e99..bdba86f 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -452,11 +452,14 @@  About use_hierarchy, see Section 6.
 
 5.1 force_empty
   memory.force_empty interface is provided to make cgroup's memory usage empty.
-  When writing anything to this
+  When writing o or 1 or >18 to this
 
   # echo 0 > memory.force_empty
 
-  the cgroup will be reclaimed and as many pages reclaimed as possible.
+  the cgroup will be reclaimed and as many pages reclaimed as possible
+  synchronously.
+  writing 2 to 18 to this, the cgroup will delay the memory reclaim to css offline.
+  if memory reclaim fail one call, will delay to workqueue to recalaim as many as value.
 
   The typical use case for this interface is before calling rmdir().
   Because rmdir() moves all pages to parent, some out-of-use page caches can be
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c0527d8..fc0b9b1 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -99,6 +99,7 @@  show up in /proc/sys/kernel:
 - unknown_nmi_panic
 - watchdog
 - watchdog_thresh
+- cgroup_default_retry
 - version
 
 ==============================================================
@@ -1137,3 +1138,12 @@  The softlockup threshold is (2 * watchdog_thresh). Setting this
 tunable to zero will disable lockup detection altogether.
 
 ==============================================================
+
+cgroup_default_retry:
+
+This value can be used to control the default of memory cgroup reclaim
+times . The default value is 0 .
+
+the max value is 16 the min value is 0.
+
+==============================================================
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83ae11c..d6fbb77 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -311,9 +311,15 @@  struct mem_cgroup {
 	struct list_head event_list;
 	spinlock_t event_list_lock;
 
+	int max_retry;
+	int current_retry;
+
 	struct mem_cgroup_per_node *nodeinfo[0];
 	/* WARNING: nodeinfo must be the last member here */
 };
+extern int sysctl_cgroup_default_retry;
+extern int sysctl_cgroup_default_retry_min;
+extern int sysctl_cgroup_default_retry_max;
 
 /*
  * size of first charge trial. "32" comes from vmscan.c's magic value.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba4d9e8..b6dbb10 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1252,6 +1252,15 @@  static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 		.extra2		= &one,
 	},
 #endif
+	{
+		.procname       = "cgroup_default_retry",
+		.data           = &sysctl_cgroup_default_retry,
+		.maxlen         = sizeof(unsigned int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1         = &sysctl_cgroup_default_retry_min,
+		.extra2         = &sysctl_cgroup_default_retry_max,
+	},
 	{ }
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index af7f18b..2b13c2b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -77,6 +77,10 @@ 
 struct cgroup_subsys memory_cgrp_subsys __read_mostly;
 EXPORT_SYMBOL(memory_cgrp_subsys);
 
+int sysctl_cgroup_default_retry __read_mostly;
+int sysctl_cgroup_default_retry_min;
+int sysctl_cgroup_default_retry_max = 16;
+
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #define MEM_CGROUP_RECLAIM_RETRIES	5
@@ -2911,10 +2915,21 @@  static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
 					    char *buf, size_t nbytes,
 					    loff_t off)
 {
+	unsigned long val;
+	ssize_t ret;
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 
 	if (mem_cgroup_is_root(memcg))
 		return -EINVAL;
+
+	buf = strstrip(buf);
+	ret = kstrtoul(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+	if (val > 1 && val < 18) {
+		memcg->max_retry = val - 1;
+		return nbytes;
+	}
 	return mem_cgroup_force_empty(memcg) ?: nbytes;
 }
 
@@ -4521,6 +4536,8 @@  static struct mem_cgroup *mem_cgroup_alloc(void)
 
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
 		static_branch_inc(&memcg_sockets_enabled_key);
+	memcg->max_retry = sysctl_cgroup_default_retry;
+	memcg->current_retry  = 0;
 
 	return &memcg->css;
 fail: