@@ -141,38 +141,6 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
return true;
}
-/*
- * to be used on vmas which are known to support THP.
- * Use transparent_hugepage_active otherwise
- */
-static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
-{
-
- /*
- * If the hardware/firmware marked hugepage support disabled.
- */
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
- return false;
-
- if (!transhuge_vma_enabled(vma, vma->vm_flags))
- return false;
-
- if (vma_is_temporary_stack(vma))
- return false;
-
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
- return true;
-
- if (vma_is_dax(vma))
- return true;
-
- if (transparent_hugepage_flags &
- (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
- return !!(vma->vm_flags & VM_HUGEPAGE);
-
- return false;
-}
-
bool transparent_hugepage_active(struct vm_area_struct *vma);
#define transparent_hugepage_use_zero_page() \
@@ -302,6 +270,7 @@ static inline struct list_head *page_deferred_list(struct page *page)
*/
return &page[2].deferred_list;
}
+inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma);
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -26,16 +26,9 @@ static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
}
#endif
-#define khugepaged_enabled() \
- (transparent_hugepage_flags & \
- ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
-#define khugepaged_always() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_FLAG))
-#define khugepaged_req_madv() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+extern inline int khugepaged_enabled(void);
+extern inline int khugepaged_always(struct vm_area_struct *vma);
+extern inline int khugepaged_req_madv(struct vm_area_struct *vma);
#define khugepaged_defrag() \
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
@@ -57,9 +50,9 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
unsigned long vm_flags)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
- if ((khugepaged_always() ||
- (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
- (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
+ if ((khugepaged_always(vma) ||
+ (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
+ (khugepaged_req_madv(vma) && (vm_flags & VM_HUGEPAGE))) &&
!(vm_flags & VM_NOHUGEPAGE) &&
!test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
if (__khugepaged_enter(vma->vm_mm))
@@ -28,6 +28,13 @@ struct page;
struct mm_struct;
struct kmem_cache;
+/*
+ * Increase when sub cgroup enable transparent hugepage, decrease when
+ * sub cgroup disable transparent hugepage. Help decide whether to run
+ * khugepaged.
+ */
+extern atomic_t sub_thp_count;
+
/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
@@ -342,6 +349,7 @@ struct mem_cgroup {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split deferred_split_queue;
+ unsigned long thp_flag;
#endif
struct mem_cgroup_per_node *nodeinfo[];
@@ -1127,6 +1135,34 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long mem_cgroup_thp_flag(struct mem_cgroup *memcg)
+{
+ if (unlikely(memcg == NULL) || mem_cgroup_disabled() ||
+ mem_cgroup_is_root(memcg))
+ return transparent_hugepage_flags;
+
+ return memcg->thp_flag;
+}
+
+static inline int memcg_sub_thp_enabled(void)
+{
+ return atomic_read(&sub_thp_count) != 0;
+}
+
+static inline void memcg_sub_thp_enable(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_is_root(memcg))
+ atomic_inc(&sub_thp_count);
+}
+
+static inline void memcg_sub_thp_disable(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_is_root(memcg))
+ atomic_dec(&sub_thp_count);
+}
+#endif
+
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
@@ -1524,6 +1560,27 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
{
return 0;
}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long mem_cgroup_thp_flag(struct mem_cgroup *memcg)
+{
+ return transparent_hugepage_flags;
+}
+
+static inline int memcg_sub_thp_enabled(void)
+{
+ return 0;
+}
+
+static inline void memcg_sub_thp_enable(struct mem_cgroup *memcg)
+{
+}
+
+static inline void memcg_sub_thp_disable(struct mem_cgroup *memcg)
+{
+}
+#endif
+
#endif /* CONFIG_MEMCG */
static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
@@ -3174,3 +3174,36 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
trace_remove_migration_pmd(address, pmd_val(pmde));
}
#endif
+
+/*
+ * to be used on vmas which are known to support THP.
+ * Use transparent_hugepage_active otherwise
+ */
+inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
+
+ /*
+ * If the hardware/firmware marked hugepage support disabled.
+ */
+ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
+ return false;
+
+ if (!transhuge_vma_enabled(vma, vma->vm_flags))
+ return false;
+
+ if (vma_is_temporary_stack(vma))
+ return false;
+
+ if (mem_cgroup_thp_flag(memcg) & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+ return true;
+
+ if (vma_is_dax(vma))
+ return true;
+
+ if (mem_cgroup_thp_flag(memcg) &
+ (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+ return !!(vma->vm_flags & VM_HUGEPAGE);
+
+ return false;
+}
@@ -454,7 +454,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
return shmem_huge_enabled(vma);
/* THP settings require madvise. */
- if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always(vma))
return false;
/* Only regular file is valid */
@@ -1537,6 +1537,40 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
goto drop_hpage;
}
+inline int khugepaged_enabled(void)
+{
+ if ((transparent_hugepage_flags &
+ ((1<<TRANSPARENT_HUGEPAGE_FLAG) |
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))) ||
+ memcg_sub_thp_enabled())
+ return 1;
+ else
+ return 0;
+}
+
+inline int khugepaged_req_madv(struct vm_area_struct *vma)
+{
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
+
+ if (mem_cgroup_thp_flag(memcg) &
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+ return 1;
+ else
+ return 0;
+}
+
+inline int khugepaged_always(struct vm_area_struct *vma)
+{
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
+
+ if (mem_cgroup_thp_flag(memcg) &
+ (1<<TRANSPARENT_HUGEPAGE_FLAG))
+ return 1;
+ else
+ return 0;
+}
+
+
static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
{
struct mm_struct *mm = mm_slot->mm;
@@ -63,6 +63,7 @@
#include <linux/resume_user_mode.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
+#include <linux/khugepaged.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
@@ -99,6 +100,8 @@ static bool cgroup_memory_noswap __ro_after_init;
static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
#endif
+atomic_t sub_thp_count __read_mostly = ATOMIC_INIT(0);
+
/* Whether legacy memory+swap accounting is active */
static bool do_memsw_account(void)
{
@@ -4823,6 +4826,71 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
}
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int mem_cgroup_thp_flag_show(struct seq_file *sf, void *v)
+{
+ const char *output;
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
+ unsigned long flag = mem_cgroup_thp_flag(memcg);
+
+ if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &flag))
+ output = "[always] madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &flag))
+ output = "always [madvise] never";
+ else
+ output = "always madvise [never]";
+
+ seq_printf(sf, "%s\n", output);
+ return 0;
+}
+
+static ssize_t mem_cgroup_thp_flag_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ ssize_t ret = nbytes;
+ unsigned long *flag;
+
+ if (!mem_cgroup_is_root(memcg))
+ flag = &memcg->thp_flag;
+ else
+ flag = &transparent_hugepage_flags;
+
+ if (sysfs_streq(buf, "always")) {
+ if (!test_bit(TRANSPARENT_HUGEPAGE_FLAG, flag)) {
+ set_bit(TRANSPARENT_HUGEPAGE_FLAG, flag);
+ /* change disable to enable */
+ if (!test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, flag))
+ memcg_sub_thp_enable(memcg);
+ }
+ } else if (sysfs_streq(buf, "madvise")) {
+ if (!test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, flag)) {
+ set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, flag);
+ /* change disable to enable */
+ if (!test_bit(TRANSPARENT_HUGEPAGE_FLAG, flag))
+ memcg_sub_thp_enable(memcg);
+ }
+ } else if (sysfs_streq(buf, "never")) {
+ /* change enable to disable */
+ if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, flag) ||
+ test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, flag)) {
+ clear_bit(TRANSPARENT_HUGEPAGE_FLAG, flag);
+ clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, flag);
+ memcg_sub_thp_disable(memcg);
+ }
+ } else
+ ret = -EINVAL;
+
+ if (ret > 0) {
+ int err = start_stop_khugepaged();
+
+ if (err)
+ ret = err;
+ }
+ return ret;
+}
+#endif
+
static struct cftype mem_cgroup_legacy_files[] = {
{
.name = "usage_in_bytes",
@@ -4948,6 +5016,13 @@ static struct cftype mem_cgroup_legacy_files[] = {
.write = mem_cgroup_reset,
.read_u64 = mem_cgroup_read_u64,
},
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ {
+ .name = "transparent_hugepage.enabled",
+ .seq_show = mem_cgroup_thp_flag_show,
+ .write = mem_cgroup_thp_flag_write,
+ },
+#endif
{ }, /* terminate */
};
@@ -5145,6 +5220,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
if (parent) {
memcg->swappiness = mem_cgroup_swappiness(parent);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ memcg->thp_flag = mem_cgroup_thp_flag(parent);
+ if (memcg->thp_flag &
+ ((1<<TRANSPARENT_HUGEPAGE_FLAG) |
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
+ memcg_sub_thp_enable(memcg);
+#endif
memcg->oom_kill_disable = parent->oom_kill_disable;
page_counter_init(&memcg->memory, &parent->memory);
@@ -5220,7 +5302,9 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
memcg_offline_kmem(memcg);
reparent_shrinker_deferred(memcg);
wb_memcg_offline(memcg);
-
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ memcg_sub_thp_disable(memcg);
+#endif
drain_all_stock(memcg);
mem_cgroup_id_put(memcg);