diff mbox series

[2/4] numa: append per-node execution info in memory.numa_stat

Message ID 825ebaf0-9f71-bbe1-f054-7fa585d61af1@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series per cpu cgroup numa suite | expand

Commit Message

王贇 July 3, 2019, 3:29 a.m. UTC
This patch introduced numa execution information, to imply the numa
efficiency.

By doing 'cat /sys/fs/cgroup/memory/CGROUP_PATH/memory.numa_stat', we
see new output line heading with 'exectime', like:

  exectime 311900 407166

which means the tasks of this cgroup executed 311900 micro seconds on
node 0, and 407166 ms on node 1.

Combined with the memory node info, we can estimate the numa efficiency,
for example if the node memory info is:

  total=206892 N0=21933 N1=185171

By monitoring the increments, if the topology keep in this way and
locality is not nice, then it imply numa balancing can't help migrate
the memory from node 1 to 0 which is accessing by tasks on node 0, or
tasks can't migrate to node 1 for some reason, then you may consider
to bind the cgroup on the cpus of node 1.

Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
---
 include/linux/memcontrol.h |  1 +
 mm/memcontrol.c            | 13 +++++++++++++
 2 files changed, 14 insertions(+)

Comments

Peter Zijlstra July 11, 2019, 1:45 p.m. UTC | #1
On Wed, Jul 03, 2019 at 11:29:15AM +0800, 王贇 wrote:

> +++ b/include/linux/memcontrol.h
> @@ -190,6 +190,7 @@ enum memcg_numa_locality_interval {
> 
>  struct memcg_stat_numa {
>  	u64 locality[NR_NL_INTERVAL];
> +	u64 exectime;

Maybe call the field jiffies, because that's what it counts.

>  };
> 
>  #endif
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2edf3f5ac4b9..d5f48365770f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3575,6 +3575,18 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
>  		seq_printf(m, " %u", jiffies_to_msecs(sum));
>  	}
>  	seq_putc(m, '\n');
> +
> +	seq_puts(m, "exectime");
> +	for_each_online_node(nr) {
> +		int cpu;
> +		u64 sum = 0;
> +
> +		for_each_cpu(cpu, cpumask_of_node(nr))
> +			sum += per_cpu(memcg->stat_numa->exectime, cpu);
> +
> +		seq_printf(m, " %llu", jiffies_to_msecs(sum));
> +	}
> +	seq_putc(m, '\n');
>  #endif
> 
>  	return 0;
王贇 July 12, 2019, 3:17 a.m. UTC | #2
On 2019/7/11 下午9:45, Peter Zijlstra wrote:
> On Wed, Jul 03, 2019 at 11:29:15AM +0800, 王贇 wrote:
> 
>> +++ b/include/linux/memcontrol.h
>> @@ -190,6 +190,7 @@ enum memcg_numa_locality_interval {
>>
>>  struct memcg_stat_numa {
>>  	u64 locality[NR_NL_INTERVAL];
>> +	u64 exectime;
> 
> Maybe call the field jiffies, because that's what it counts.

Sure, will be in next version.

Regards,
Michael Wang

> 
>>  };
>>
>>  #endif
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 2edf3f5ac4b9..d5f48365770f 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -3575,6 +3575,18 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
>>  		seq_printf(m, " %u", jiffies_to_msecs(sum));
>>  	}
>>  	seq_putc(m, '\n');
>> +
>> +	seq_puts(m, "exectime");
>> +	for_each_online_node(nr) {
>> +		int cpu;
>> +		u64 sum = 0;
>> +
>> +		for_each_cpu(cpu, cpumask_of_node(nr))
>> +			sum += per_cpu(memcg->stat_numa->exectime, cpu);
>> +
>> +		seq_printf(m, " %llu", jiffies_to_msecs(sum));
>> +	}
>> +	seq_putc(m, '\n');
>>  #endif
>>
>>  	return 0;
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0a30d14c9f43..deeca9db17d8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -190,6 +190,7 @@  enum memcg_numa_locality_interval {

 struct memcg_stat_numa {
 	u64 locality[NR_NL_INTERVAL];
+	u64 exectime;
 };

 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2edf3f5ac4b9..d5f48365770f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3575,6 +3575,18 @@  static int memcg_numa_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, " %u", jiffies_to_msecs(sum));
 	}
 	seq_putc(m, '\n');
+
+	seq_puts(m, "exectime");
+	for_each_online_node(nr) {
+		int cpu;
+		u64 sum = 0;
+
+		for_each_cpu(cpu, cpumask_of_node(nr))
+			sum += per_cpu(memcg->stat_numa->exectime, cpu);
+
+		seq_printf(m, " %llu", jiffies_to_msecs(sum));
+	}
+	seq_putc(m, '\n');
 #endif

 	return 0;
@@ -3606,6 +3618,7 @@  void memcg_stat_numa_update(struct task_struct *p)
 	memcg = mem_cgroup_from_task(p);
 	if (idx != -1)
 		this_cpu_inc(memcg->stat_numa->locality[idx]);
+	this_cpu_inc(memcg->stat_numa->exectime);
 	rcu_read_unlock();
 }
 #endif