diff mbox series

[RFC,v2,3/5] mm: Add a tracepoint when OOM victim selection is failed

Message ID 20230810081319.65668-4-zhouchuyi@bytedance.com (mailing list archive)
State RFC
Headers show
Series mm: Select victim using bpf_oom_evaluate_task | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/tree_selection success Not a local patch, async

Commit Message

Chuyi Zhou Aug. 10, 2023, 8:13 a.m. UTC
This patch add a tracepoint to mark the scenario where nothing was
chosen for OOM killer. This would allow BPF programs to catch the fact
that the BPF OOM policy didn't work well.

Suggested-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
 include/trace/events/oom.h | 18 ++++++++++++++++++
 mm/oom_kill.c              |  1 +
 2 files changed, 19 insertions(+)

Comments

Alan Maguire Aug. 16, 2023, 11:54 a.m. UTC | #1
On 10/08/2023 09:13, Chuyi Zhou wrote:
> This patch add a tracepoint to mark the scenario where nothing was
> chosen for OOM killer. This would allow BPF programs to catch the fact
> that the BPF OOM policy didn't work well.
> 
> Suggested-by: Alan Maguire <alan.maguire@oracle.com>
> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
> ---
>  include/trace/events/oom.h | 18 ++++++++++++++++++
>  mm/oom_kill.c              |  1 +
>  2 files changed, 19 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 26a11e4a2c36..b6ae1134229c 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -6,6 +6,7 @@
>  #define _TRACE_OOM_H
>  #include <linux/tracepoint.h>
>  #include <trace/events/mmflags.h>
> +#include <linux/oom.h>
>  
>  TRACE_EVENT(oom_score_adj_update,
>  
> @@ -151,6 +152,23 @@ TRACE_EVENT(skip_task_reaping,
>  	TP_printk("pid=%d", __entry->pid)
>  );
>  
> +TRACE_EVENT(select_bad_process_end,
> +

would oom_select_bad_process_fail be a better name here?
"_end" is kind of neutral, whereas "_fail" indicates something
unexpected happened.

> +	TP_PROTO(struct oom_control *oc),
> +
> +	TP_ARGS(oc),
> +
> +	TP_STRUCT__entry(
> +		__array(char, policy_name, POLICY_NAME_LEN)
> +	),
> +
> +	TP_fast_assign(
> +		memcpy(__entry->policy_name, oc->policy_name, POLICY_NAME_LEN);
> +	),
> +
> +	TP_printk("policy_name=%s", __entry->policy_name)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 3239dcdba4d7..af40a1b750fa 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -1235,6 +1235,7 @@ bool out_of_memory(struct oom_control *oc)
>  	select_bad_process(oc);
>  	/* Found nothing?!?! */
>  	if (!oc->chosen) {
> +		trace_select_bad_process_end(oc);
>  		dump_header(oc, NULL);
>  		pr_warn("Out of memory and no killable processes...\n");
>  		/*
diff mbox series

Patch

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 26a11e4a2c36..b6ae1134229c 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -6,6 +6,7 @@ 
 #define _TRACE_OOM_H
 #include <linux/tracepoint.h>
 #include <trace/events/mmflags.h>
+#include <linux/oom.h>
 
 TRACE_EVENT(oom_score_adj_update,
 
@@ -151,6 +152,23 @@  TRACE_EVENT(skip_task_reaping,
 	TP_printk("pid=%d", __entry->pid)
 );
 
+TRACE_EVENT(select_bad_process_end,
+
+	TP_PROTO(struct oom_control *oc),
+
+	TP_ARGS(oc),
+
+	TP_STRUCT__entry(
+		__array(char, policy_name, POLICY_NAME_LEN)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->policy_name, oc->policy_name, POLICY_NAME_LEN);
+	),
+
+	TP_printk("policy_name=%s", __entry->policy_name)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3239dcdba4d7..af40a1b750fa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1235,6 +1235,7 @@  bool out_of_memory(struct oom_control *oc)
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
+		trace_select_bad_process_end(oc);
 		dump_header(oc, NULL);
 		pr_warn("Out of memory and no killable processes...\n");
 		/*