@@ -14,17 +14,23 @@
#ifndef MADV_PAGEOUT
#define MADV_PAGEOUT 21
#endif
+#ifndef MADV_COLLAPSE
+#define MADV_COLLAPSE 25
+#endif
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
static unsigned long page_size;
static int hpage_pmd_nr;
+static int num_khugepaged_wakeups;
#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
#define PID_SMAPS "/proc/self/smaps"
struct collapse_context {
const char *name;
+ bool (*init_context)(void);
+ bool (*cleanup_context)(void);
void (*collapse)(const char *msg, char *p, bool expect);
bool enforce_pte_scan_limits;
};
@@ -264,6 +270,17 @@ static void write_num(const char *name, unsigned long num)
}
}
+/*
+ * Use this macro instead of write_settings inside tests, and should
+ * be called at most once per callsite.
+ *
+ * Hack to statically count the number of times khugepaged is woken up due to
+ * writes to
+ * /sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs,
+ * and is stored in __COUNTER__.
+ */
+#define WRITE_SETTINGS(s) do { __COUNTER__; write_settings(s); } while (0)
+
static void write_settings(struct settings *settings)
{
struct khugepaged_settings *khugepaged = &settings->khugepaged;
@@ -332,7 +349,7 @@ static void adjust_settings(void)
{
printf("Adjust settings...");
- write_settings(&default_settings);
+ WRITE_SETTINGS(&default_settings);
success("OK");
}
@@ -440,20 +457,25 @@ static bool check_swap(void *addr, unsigned long size)
return swap;
}
-static void *alloc_mapping(void)
+static void *alloc_mapping_at(void *at, size_t size)
{
void *p;
- p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (p != BASE_ADDR) {
- printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+ p = mmap(at, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (p != at) {
+ printf("Failed to allocate VMA at %p\n", at);
exit(EXIT_FAILURE);
}
return p;
}
+static void *alloc_mapping(void)
+{
+ return alloc_mapping_at(BASE_ADDR, hpage_pmd_size);
+}
+
static void fill_memory(int *p, unsigned long start, unsigned long end)
{
int i;
@@ -573,7 +595,7 @@ static void collapse_max_ptes_none(struct collapse_context *context)
void *p;
settings.khugepaged.max_ptes_none = max_ptes_none;
- write_settings(&settings);
+ WRITE_SETTINGS(&settings);
p = alloc_mapping();
@@ -591,7 +613,7 @@ static void collapse_max_ptes_none(struct collapse_context *context)
}
munmap(p, hpage_pmd_size);
- write_settings(&default_settings);
+ WRITE_SETTINGS(&default_settings);
}
static void collapse_swapin_single_pte(struct collapse_context *context)
@@ -947,6 +969,87 @@ static void collapse_max_ptes_shared(struct collapse_context *context)
munmap(p, hpage_pmd_size);
}
+static void madvise_collapse(const char *msg, char *p, bool expect)
+{
+ int ret;
+
+ printf("%s...", msg);
+ /* Sanity check */
+ if (check_huge(p)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ ret = madvise(p, hpage_pmd_size, MADV_COLLAPSE);
+ if (((bool)ret) == expect)
+ fail("Fail: Bad return value");
+ else if (check_huge(p) != expect)
+ fail("Fail: check_huge()");
+ else
+ success("OK");
+}
+
+static struct khugepaged_disable_state {
+ void *p;
+ size_t map_size;
+} khugepaged_disable_state;
+
+static bool disable_khugepaged(void)
+{
+ /*
+ * Hack to "disable" khugepaged by setting
+ * /transparent_hugepage/khugepaged/scan_sleep_millisecs to some large
+ * value, then feeding it enough suitable VMAs to scan and subsequently
+ * sleep.
+ *
+ * khugepaged is woken up on writes to
+ * /transparent_hugepage/khugepaged/scan_sleep_millisecs, so care must
+ * be taken to not inadvertently wake khugepaged in these tests.
+ *
+ * Feed khugepaged 1 hugepage-sized VMA to scan and sleep on, then
+ * N more for each time khugepaged would be woken up.
+ */
+ size_t map_size = (num_khugepaged_wakeups + 1) * hpage_pmd_size;
+ void *p;
+ bool ret = true;
+ int full_scans;
+ int timeout = 6; /* 3 seconds */
+
+ default_settings.khugepaged.scan_sleep_millisecs = 1000 * 60 * 10;
+ default_settings.khugepaged.pages_to_scan = 1;
+ write_settings(&default_settings);
+
+ p = alloc_mapping_at(((char *)BASE_ADDR) + (1UL << 30), map_size);
+ fill_memory(p, 0, map_size);
+
+ full_scans = read_num("khugepaged/full_scans") + 2;
+
+ printf("disabling khugepaged...");
+ while (timeout--) {
+ if (read_num("khugepaged/full_scans") >= full_scans) {
+ fail("Fail");
+ ret = false;
+ break;
+ }
+ printf(".");
+ usleep(TICK);
+ }
+ success("OK");
+ khugepaged_disable_state.p = p;
+ khugepaged_disable_state.map_size = map_size;
+ return ret;
+}
+
+static bool enable_khugepaged(void)
+{
+ printf("enabling khugepaged...");
+ munmap(khugepaged_disable_state.p, khugepaged_disable_state.map_size);
+ write_settings(&saved_settings);
+ success("OK");
+ return true;
+}
+
static void khugepaged_collapse(const char *msg, char *p, bool expect)
{
if (wait_for_scan(msg, p))
@@ -962,9 +1065,18 @@ int main(void)
struct collapse_context contexts[] = {
{
.name = "khugepaged",
+ .init_context = NULL,
+ .cleanup_context = NULL,
.collapse = &khugepaged_collapse,
.enforce_pte_scan_limits = true,
},
+ {
+ .name = "madvise",
+ .init_context = &disable_khugepaged,
+ .cleanup_context = &enable_khugepaged,
+ .collapse = &madvise_collapse,
+ .enforce_pte_scan_limits = false,
+ },
};
int i;
@@ -973,6 +1085,7 @@ int main(void)
page_size = getpagesize();
hpage_pmd_size = read_num("hpage_pmd_size");
hpage_pmd_nr = hpage_pmd_size / page_size;
+ num_khugepaged_wakeups = __COUNTER__;
default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
@@ -988,6 +1101,8 @@ int main(void)
struct collapse_context *c = &contexts[i];
printf("\n*** Testing context: %s ***\n", c->name);
+ if (c->init_context && !c->init_context())
+ continue;
collapse_full(c);
collapse_empty(c);
collapse_single_pte_entry(c);
@@ -1000,6 +1115,8 @@ int main(void)
collapse_fork(c);
collapse_fork_compound(c);
collapse_max_ptes_shared(c);
+ if (c->cleanup_context && !c->cleanup_context())
+ break;
}
restore_settings(0);
Add MADV_COLLAPSE selftests. Extend struct collapse_context to support context initialization/cleanup. This is used by madvise collapse context to "disable" and "enable" khugepaged, since it would otherwise interfere with the tests. The mechanism used to "disable" khugepaged is a hack: it sets /sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs to a large value and feeds khugepaged enough suitable VMAs/pages to keep khugepaged sleeping for the duration of the madvise collapse tests. Since khugepaged is woken when this file is written, enough VMAs must be queued to put khugepaged back to sleep when the tests write to this file in write_settings(). Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- tools/testing/selftests/vm/khugepaged.c | 133 ++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 8 deletions(-)