@@ -782,6 +782,10 @@ void __mod_lruvec_kmem_state(void *p, en
rcu_read_unlock();
}
+/*
+ * mod_objcg_mlstate() may be called with irq enabled, so
+ * mod_memcg_lruvec_state() should be used.
+ */
static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
@@ -792,7 +796,7 @@ static inline void mod_objcg_mlstate(str
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
lruvec = mem_cgroup_lruvec(memcg, pgdat);
- __mod_memcg_lruvec_state(lruvec, idx, nr);
+ mod_memcg_lruvec_state(lruvec, idx, nr);
rcu_read_unlock();
}
@@ -2054,17 +2058,23 @@ void unlock_page_memcg(struct page *page
}
EXPORT_SYMBOL(unlock_page_memcg);
-struct memcg_stock_pcp {
- struct mem_cgroup *cached; /* this never be root cgroup */
- unsigned int nr_pages;
-
+struct obj_stock {
#ifdef CONFIG_MEMCG_KMEM
struct obj_cgroup *cached_objcg;
struct pglist_data *cached_pgdat;
unsigned int nr_bytes;
int nr_slab_reclaimable_b;
int nr_slab_unreclaimable_b;
+#else
+ int dummy[0];
#endif
+};
+
+struct memcg_stock_pcp {
+ struct mem_cgroup *cached; /* this never be root cgroup */
+ unsigned int nr_pages;
+ struct obj_stock task_obj;
+ struct obj_stock irq_obj;
struct work_struct work;
unsigned long flags;
@@ -2074,12 +2084,12 @@ static DEFINE_PER_CPU(struct memcg_stock
static DEFINE_MUTEX(percpu_charge_mutex);
#ifdef CONFIG_MEMCG_KMEM
-static void drain_obj_stock(struct memcg_stock_pcp *stock);
+static void drain_obj_stock(struct obj_stock *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg);
#else
-static inline void drain_obj_stock(struct memcg_stock_pcp *stock)
+static inline void drain_obj_stock(struct obj_stock *stock)
{
}
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
@@ -2089,6 +2099,41 @@ static bool obj_stock_flush_required(str
}
#endif
+/*
+ * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+ * sequence used in this case to access content from object stock is slow.
+ * To optimize for user context access, there are now two object stocks for
+ * task context and interrupt context access respectively.
+ *
+ * The task context object stock can be accessed by disabling preemption only
+ * which is cheap in non-preempt kernel. The interrupt context object stock
+ * can only be accessed after disabling interrupt. User context code can
+ * access interrupt object stock, but not vice versa.
+ */
+static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
+{
+ struct memcg_stock_pcp *stock;
+
+ if (likely(in_task())) {
+ *pflags = 0UL;
+ preempt_disable();
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->task_obj;
+ }
+
+ local_irq_save(*pflags);
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->irq_obj;
+}
+
+static inline void put_obj_stock(unsigned long flags)
+{
+ if (likely(in_task()))
+ preempt_enable();
+ else
+ local_irq_restore(flags);
+}
+
/**
* consume_stock: Try to consume stocked charge on this cpu.
* @memcg: memcg to consume from.
@@ -2155,7 +2200,9 @@ static void drain_local_stock(struct wor
local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
- drain_obj_stock(stock);
+ drain_obj_stock(&stock->irq_obj);
+ if (in_task())
+ drain_obj_stock(&stock->task_obj);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
@@ -3015,13 +3062,10 @@ void __memcg_kmem_uncharge_page(struct p
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
- struct memcg_stock_pcp *stock;
unsigned long flags;
+ struct obj_stock *stock = get_obj_stock(&flags);
int *bytes;
- local_irq_save(flags);
- stock = this_cpu_ptr(&memcg_stock);
-
/*
* Save vmstat data in stock and skip vmstat array update unless
* accumulating over a page of vmstat data or when pgdat or idx
@@ -3070,29 +3114,26 @@ void mod_objcg_state(struct obj_cgroup *
if (nr)
mod_objcg_mlstate(objcg, pgdat, idx, nr);
- local_irq_restore(flags);
+ put_obj_stock(flags);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
{
- struct memcg_stock_pcp *stock;
unsigned long flags;
+ struct obj_stock *stock = get_obj_stock(&flags);
bool ret = false;
- local_irq_save(flags);
-
- stock = this_cpu_ptr(&memcg_stock);
if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
stock->nr_bytes -= nr_bytes;
ret = true;
}
- local_irq_restore(flags);
+ put_obj_stock(flags);
return ret;
}
-static void drain_obj_stock(struct memcg_stock_pcp *stock)
+static void drain_obj_stock(struct obj_stock *stock)
{
struct obj_cgroup *old = stock->cached_objcg;
@@ -3148,8 +3189,13 @@ static bool obj_stock_flush_required(str
{
struct mem_cgroup *memcg;
- if (stock->cached_objcg) {
- memcg = obj_cgroup_memcg(stock->cached_objcg);
+ if (in_task() && stock->task_obj.cached_objcg) {
+ memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
+ if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+ return true;
+ }
+ if (stock->irq_obj.cached_objcg) {
+ memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
return true;
}
@@ -3160,13 +3206,10 @@ static bool obj_stock_flush_required(str
static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
bool allow_uncharge)
{
- struct memcg_stock_pcp *stock;
unsigned long flags;
+ struct obj_stock *stock = get_obj_stock(&flags);
unsigned int nr_pages = 0;
- local_irq_save(flags);
-
- stock = this_cpu_ptr(&memcg_stock);
if (stock->cached_objcg != objcg) { /* reset if necessary */
drain_obj_stock(stock);
obj_cgroup_get(objcg);
@@ -3182,7 +3225,7 @@ static void refill_obj_stock(struct obj_
stock->nr_bytes &= (PAGE_SIZE - 1);
}
- local_irq_restore(flags);
+ put_obj_stock(flags);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -6790,6 +6833,7 @@ static void uncharge_page(struct page *p
unsigned long nr_pages;
struct mem_cgroup *memcg;
struct obj_cgroup *objcg;
+ bool use_objcg = PageMemcgKmem(page);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -6798,7 +6842,7 @@ static void uncharge_page(struct page *p
* page memcg or objcg at this point, we have fully
* exclusive access to the page.
*/
- if (PageMemcgKmem(page)) {
+ if (use_objcg) {
objcg = __page_objcg(page);
/*
* This get matches the put at the end of the function and
@@ -6826,7 +6870,7 @@ static void uncharge_page(struct page *p
nr_pages = compound_nr(page);
- if (PageMemcgKmem(page)) {
+ if (use_objcg) {
ug->nr_memory += nr_pages;
ug->nr_kmem += nr_pages;