diff mbox series

[RFC,v2,2/5] mm: Add policy_name to identify OOM policies

Message ID 20230810081319.65668-3-zhouchuyi@bytedance.com (mailing list archive)
State RFC
Headers show
Series mm: Select victim using bpf_oom_evaluate_task | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/tree_selection success Not a local patch, async

Commit Message

Chuyi Zhou Aug. 10, 2023, 8:13 a.m. UTC
This patch adds a new metadata policy_name in oom_control and report it
in dump_header(), so we can know what has been the selection policy. In
BPF program, we can call kfunc set_oom_policy_name to set the current
user-defined policy name. The in-kernel policy_name is "default".

Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
---
 include/linux/oom.h |  7 +++++++
 mm/oom_kill.c       | 42 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 3 deletions(-)

Comments

Jonathan Corbet Aug. 14, 2023, 8:51 p.m. UTC | #1
Chuyi Zhou <zhouchuyi@bytedance.com> writes:

> This patch adds a new metadata policy_name in oom_control and report it
> in dump_header(), so we can know what has been the selection policy. In
> BPF program, we can call kfunc set_oom_policy_name to set the current
> user-defined policy name. The in-kernel policy_name is "default".
>
> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
> ---
>  include/linux/oom.h |  7 +++++++
>  mm/oom_kill.c       | 42 +++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 46 insertions(+), 3 deletions(-)

So I have a possibly dumb question here...

> diff --git a/include/linux/oom.h b/include/linux/oom.h
> index 7d0c9c48a0c5..69d0f2ec6ea6 100644
> --- a/include/linux/oom.h
> +++ b/include/linux/oom.h
> @@ -22,6 +22,10 @@ enum oom_constraint {
>  	CONSTRAINT_MEMCG,
>  };
>  
> +enum {
> +	POLICY_NAME_LEN = 16,
> +};

We've defined our name length, fine...

>  /*
>   * Details of the page allocation that triggered the oom killer that are used to
>   * determine what should be killed.
> @@ -52,6 +56,9 @@ struct oom_control {
>  
>  	/* Used to print the constraint info. */
>  	enum oom_constraint constraint;
> +
> +	/* Used to report the policy info. */
> +	char policy_name[POLICY_NAME_LEN];
>  };

...that is the length of the array, appended to the structure...

>  extern struct mutex oom_lock;
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 255c9ef1d808..3239dcdba4d7 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -443,6 +443,35 @@ static int dump_task(struct task_struct *p, void *arg)
>  	return 0;
>  }
>  
> +__bpf_kfunc void set_oom_policy_name(struct oom_control *oc, const char *src, size_t sz)
> +{
> +	memset(oc->policy_name, 0, sizeof(oc->policy_name));
> +
> +	if (sz > POLICY_NAME_LEN)
> +		sz = POLICY_NAME_LEN;
> +
> +	memcpy(oc->policy_name, src, sz);
> +}

This truncates the name, possibly leaving it without a terminating NUL
character, right?

> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> +		  "kfuncs which will be used in BPF programs");
> +
> +__weak noinline void bpf_set_policy_name(struct oom_control *oc)
> +{
> +}
> +
> +__diag_pop();
> +
> +BTF_SET8_START(bpf_oom_policy_kfunc_ids)
> +BTF_ID_FLAGS(func, set_oom_policy_name)
> +BTF_SET8_END(bpf_oom_policy_kfunc_ids)
> +
> +static const struct btf_kfunc_id_set bpf_oom_policy_kfunc_set = {
> +	.owner          = THIS_MODULE,
> +	.set            = &bpf_oom_policy_kfunc_ids,
> +};
> +
>  /**
>   * dump_tasks - dump current memory state of all system tasks
>   * @oc: pointer to struct oom_control
> @@ -484,8 +513,8 @@ static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
>  
>  static void dump_header(struct oom_control *oc, struct task_struct *p)
>  {
> -	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
> -		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
> +	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, policy_name=%s, oom_score_adj=%hd\n",
> +		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, oc->policy_name,

...and if the policy name is unterminated, this print will run off the
end of the structure.

Am I missing something here?

Thanks,

jon
Chuyi Zhou Aug. 15, 2023, 2:28 a.m. UTC | #2
Hello,

在 2023/8/15 04:51, Jonathan Corbet 写道:
> Chuyi Zhou <zhouchuyi@bytedance.com> writes:
> 
>> This patch adds a new metadata policy_name in oom_control and report it
>> in dump_header(), so we can know what has been the selection policy. In
>> BPF program, we can call kfunc set_oom_policy_name to set the current
>> user-defined policy name. The in-kernel policy_name is "default".
>>
>> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com>
>> ---
>>   include/linux/oom.h |  7 +++++++
>>   mm/oom_kill.c       | 42 +++++++++++++++++++++++++++++++++++++++---
>>   2 files changed, 46 insertions(+), 3 deletions(-)
> 
> So I have a possibly dumb question here...
> 
>> diff --git a/include/linux/oom.h b/include/linux/oom.h
>> index 7d0c9c48a0c5..69d0f2ec6ea6 100644
>> --- a/include/linux/oom.h
>> +++ b/include/linux/oom.h
>> @@ -22,6 +22,10 @@ enum oom_constraint {
>>   	CONSTRAINT_MEMCG,
>>   };
>>   
>> +enum {
>> +	POLICY_NAME_LEN = 16,
>> +};
> 
> We've defined our name length, fine...
> 
>>   /*
>>    * Details of the page allocation that triggered the oom killer that are used to
>>    * determine what should be killed.
>> @@ -52,6 +56,9 @@ struct oom_control {
>>   
>>   	/* Used to print the constraint info. */
>>   	enum oom_constraint constraint;
>> +
>> +	/* Used to report the policy info. */
>> +	char policy_name[POLICY_NAME_LEN];
>>   };
> 
> ...that is the length of the array, appended to the structure...
> 
>>   extern struct mutex oom_lock;
>> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
>> index 255c9ef1d808..3239dcdba4d7 100644
>> --- a/mm/oom_kill.c
>> +++ b/mm/oom_kill.c
>> @@ -443,6 +443,35 @@ static int dump_task(struct task_struct *p, void *arg)
>>   	return 0;
>>   }
>>   
>> +__bpf_kfunc void set_oom_policy_name(struct oom_control *oc, const char *src, size_t sz)
>> +{
>> +	memset(oc->policy_name, 0, sizeof(oc->policy_name));
>> +
>> +	if (sz > POLICY_NAME_LEN)
>> +		sz = POLICY_NAME_LEN;
>> +
>> +	memcpy(oc->policy_name, src, sz);
>> +}
> 
> This truncates the name, possibly leaving it without a terminating NUL
> character, right?
> 

Yes, indeed. We should guarantee that the policy_name is terminated with 
a NULL character to avoid some undefined behavior.

Thanks for your helpful review.

------
Chuyi Zhou
Bixuan Cui Sept. 14, 2023, 12:02 p.m. UTC | #3
在 2023/8/15 4:51, Jonathan Corbet 写道:
>>   /**
>>    * dump_tasks - dump current memory state of all system tasks
>>    * @oc: pointer to struct oom_control
>> @@ -484,8 +513,8 @@ static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
>>   
>>   static void dump_header(struct oom_control *oc, struct task_struct *p)
>>   {
>> -	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
>> -		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
>> +	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, policy_name=%s, oom_score_adj=%hd\n",
>> +		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, oc->policy_name,
> ...and if the policy name is unterminated, this print will run off the
> end of the structure.
> 
> Am I missing something here?
Perhaps it is inaccurate to use policy name in this way. For example, 
some one use BPF_PROG(bpf_oom_evaluate_task, ...) but do not set the 
policy name through bpf_set_policy_name. In this way, the result is 
still policy name=default, which ultimately leads to error print in the 
dump_header.
I think a better way:

+static const char *const policy_select[] = {
+    "OOM_DEFAULT";
+    "BPF_ABORT",
+    "BPF_NEXT",
+    "BPF_SELECT",
+};

struct oom_control {

  	/* Used to print the constraint info. */
  	enum oom_constraint constraint;
+
+	/* Used to report the policy select. */
+	int policy_select;
  };

static int oom_evaluate_task(struct task_struct *task, void *arg)
{
...

+	switch (bpf_oom_evaluate_task(task, oc)) {
+	case BPF_EVAL_ABORT:
+              oc->policy_select = BPF_EVAL_ABORT;
+		goto abort; /* abort search process */
+	case BPF_EVAL_NEXT:
+              oc->policy_select = BPF_EVAL_NEXT;
+		goto next; /* ignore the task */
+	case BPF_EVAL_SELECT:
+             oc->policy_select = BPF_EVAL_SELECT;
+		goto select; /* select the task */
+	default:
+		break; /* No BPF policy */
+	}

  static void dump_header(struct oom_control *oc, struct task_struct *p)
  {
-	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
oom_score_adj=%hd\n",
-		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
+	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
policy_name=%s, oom_score_adj=%hd\n",
+		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, 
policy_select[oc->policy_select],
  			current->signal->oom_score_adj);


And all definitions of oc should be added
struct oom_control oc = {
      .select = NO_BPF_POLICY,
}

Delete set_oom_policy_name, it makes no sense for users to set policy 
names. :-)

Thanks
Bixuan Cui
Bixuan Cui Sept. 14, 2023, 12:04 p.m. UTC | #4
在 2023/8/15 4:51, Jonathan Corbet 写道:
>>   /**
>>    * dump_tasks - dump current memory state of all system tasks
>>    * @oc: pointer to struct oom_control
>> @@ -484,8 +513,8 @@ static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
>>   
>>   static void dump_header(struct oom_control *oc, struct task_struct *p)
>>   {
>> -	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
>> -		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
>> +	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, policy_name=%s, oom_score_adj=%hd\n",
>> +		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, oc->policy_name,
> ...and if the policy name is unterminated, this print will run off the
> end of the structure.
> 
> Am I missing something here?
Perhaps it is inaccurate to use policy name in this way. For example, 
some one use BPF_PROG(bpf_oom_evaluate_task, ...) but do not set the 
policy name through bpf_set_policy_name. In this way, the result is 
still policy name=default, which ultimately leads to error print in the 
dump_header.
I think a better way:

+static const char *const policy_select[] = {
+    "OOM_DEFAULT";
+    "BPF_ABORT",
+    "BPF_NEXT",
+    "BPF_SELECT",
+};

struct oom_control {

  	/* Used to print the constraint info. */
  	enum oom_constraint constraint;
+
+	/* Used to report the policy select. */
+	int policy_select;
  };

static int oom_evaluate_task(struct task_struct *task, void *arg)
{
...

+	switch (bpf_oom_evaluate_task(task, oc)) {
+	case BPF_EVAL_ABORT:
+              oc->policy_select = BPF_EVAL_ABORT;
+		goto abort; /* abort search process */
+	case BPF_EVAL_NEXT:
+              oc->policy_select = BPF_EVAL_NEXT;
+		goto next; /* ignore the task */
+	case BPF_EVAL_SELECT:
+             oc->policy_select = BPF_EVAL_SELECT;
+		goto select; /* select the task */
+	default:
+		break; /* No BPF policy */
+	}

  static void dump_header(struct oom_control *oc, struct task_struct *p)
  {
-	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
oom_score_adj=%hd\n",
-		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
+	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
policy_name=%s, oom_score_adj=%hd\n",
+		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, 
policy_select[oc->policy_select],
  			current->signal->oom_score_adj);


And all definitions of oc should be added
struct oom_control oc = {
      .select = NO_BPF_POLICY,
}

Delete set_oom_policy_name, it makes no sense for users to set policy 
names. :-)

Thanks
Bixuan Cui
Chuyi Zhou Sept. 14, 2023, 12:50 p.m. UTC | #5
Hello,

在 2023/9/14 20:02, Bixuan Cui 写道:
> 
> 
> 在 2023/8/15 4:51, Jonathan Corbet 写道:
>>>   /**
>>>    * dump_tasks - dump current memory state of all system tasks
>>>    * @oc: pointer to struct oom_control
>>> @@ -484,8 +513,8 @@ static void dump_oom_summary(struct oom_control 
>>> *oc, struct task_struct *victim)
>>>   static void dump_header(struct oom_control *oc, struct task_struct *p)
>>>   {
>>> -    pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
>>> oom_score_adj=%hd\n",
>>> -        current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
>>> +    pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
>>> policy_name=%s, oom_score_adj=%hd\n",
>>> +        current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, 
>>> oc->policy_name,
>> ...and if the policy name is unterminated, this print will run off the
>> end of the structure.
>>
>> Am I missing something here?
> Perhaps it is inaccurate to use policy name in this way. For example, 
> some one use BPF_PROG(bpf_oom_evaluate_task, ...) but do not set the 
> policy name through bpf_set_policy_name. In this way, the result is 
> still policy name=default, which ultimately leads to error print in the 
> dump_header.
> I think a better way:
> 
> +static const char *const policy_select[] = {
> +    "OOM_DEFAULT";
> +    "BPF_ABORT",
> +    "BPF_NEXT",
> +    "BPF_SELECT",
> +};
> 
> struct oom_control {
> 
>       /* Used to print the constraint info. */
>       enum oom_constraint constraint;
> +
> +    /* Used to report the policy select. */
> +    int policy_select;
>   };
> 
> static int oom_evaluate_task(struct task_struct *task, void *arg)
> {
> ...
> 
> +    switch (bpf_oom_evaluate_task(task, oc)) {
> +    case BPF_EVAL_ABORT:
> +              oc->policy_select = BPF_EVAL_ABORT;
> +        goto abort; /* abort search process */
> +    case BPF_EVAL_NEXT:
> +              oc->policy_select = BPF_EVAL_NEXT;
> +        goto next; /* ignore the task */
> +    case BPF_EVAL_SELECT:
> +             oc->policy_select = BPF_EVAL_SELECT;
> +        goto select; /* select the task */
> +    default:
> +        break; /* No BPF policy */
> +    }
> 
>   static void dump_header(struct oom_control *oc, struct task_struct *p)
>   {
> -    pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
> oom_score_adj=%hd\n",
> -        current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
> +    pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, 
> policy_name=%s, oom_score_adj=%hd\n",
> +        current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, 
> policy_select[oc->policy_select],
>               current->signal->oom_score_adj);
> 
> 

The policy_name may be different from the previous OOM reporting, even 
though they are using the same policy.

> And all definitions of oc should be added
> struct oom_control oc = {
>       .select = NO_BPF_POLICY,
> }
> 
> Delete set_oom_policy_name, it makes no sense for users to set policy 
> names. :-)
> 

There can be multiple OOM policy in the system at the same time.

If we need to apply different OOM policies to different memcgs based on 
different scenarios, we can use this hook(set_oom_policy_name) to set 
name to identify which policy in invoked at that time.

Just some thoughts.

Thanks.

> Thanks
> Bixuan Cui
> 
> 
> 
> 
>
Bixuan Cui Sept. 15, 2023, 2:28 a.m. UTC | #6
在 2023/9/14 20:50, Chuyi Zhou 写道:
>>
>> Delete set_oom_policy_name, it makes no sense for users to set policy 
>> names. 
Chuyi Zhou Sept. 15, 2023, 3:31 a.m. UTC | #7
在 2023/9/15 10:28, Bixuan Cui 写道:
> 
> 
> 在 2023/9/14 20:50, Chuyi Zhou 写道:
>>>
>>> Delete set_oom_policy_name, it makes no sense for users to set policy 
>>> names. 
diff mbox series

Patch

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 7d0c9c48a0c5..69d0f2ec6ea6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -22,6 +22,10 @@  enum oom_constraint {
 	CONSTRAINT_MEMCG,
 };
 
+enum {
+	POLICY_NAME_LEN = 16,
+};
+
 /*
  * Details of the page allocation that triggered the oom killer that are used to
  * determine what should be killed.
@@ -52,6 +56,9 @@  struct oom_control {
 
 	/* Used to print the constraint info. */
 	enum oom_constraint constraint;
+
+	/* Used to report the policy info. */
+	char policy_name[POLICY_NAME_LEN];
 };
 
 extern struct mutex oom_lock;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 255c9ef1d808..3239dcdba4d7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -443,6 +443,35 @@  static int dump_task(struct task_struct *p, void *arg)
 	return 0;
 }
 
+__bpf_kfunc void set_oom_policy_name(struct oom_control *oc, const char *src, size_t sz)
+{
+	memset(oc->policy_name, 0, sizeof(oc->policy_name));
+
+	if (sz > POLICY_NAME_LEN)
+		sz = POLICY_NAME_LEN;
+
+	memcpy(oc->policy_name, src, sz);
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "kfuncs which will be used in BPF programs");
+
+__weak noinline void bpf_set_policy_name(struct oom_control *oc)
+{
+}
+
+__diag_pop();
+
+BTF_SET8_START(bpf_oom_policy_kfunc_ids)
+BTF_ID_FLAGS(func, set_oom_policy_name)
+BTF_SET8_END(bpf_oom_policy_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_oom_policy_kfunc_set = {
+	.owner          = THIS_MODULE,
+	.set            = &bpf_oom_policy_kfunc_ids,
+};
+
 /**
  * dump_tasks - dump current memory state of all system tasks
  * @oc: pointer to struct oom_control
@@ -484,8 +513,8 @@  static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
 
 static void dump_header(struct oom_control *oc, struct task_struct *p)
 {
-	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
-		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
+	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, policy_name=%s, oom_score_adj=%hd\n",
+		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, oc->policy_name,
 			current->signal->oom_score_adj);
 	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
 		pr_warn("COMPACTION is disabled!!!\n");
@@ -775,8 +804,11 @@  static int __init oom_init(void)
 	err = register_btf_fmodret_id_set(&oom_bpf_fmodret_set);
 	if (err)
 		pr_warn("error while registering oom fmodret entrypoints: %d", err);
+	err = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
+					&bpf_oom_policy_kfunc_set);
+	if (err)
+		pr_warn("error while registering oom kfunc entrypoints: %d", err);
 #endif
-
 	return 0;
 }
 subsys_initcall(oom_init)
@@ -1196,6 +1228,10 @@  bool out_of_memory(struct oom_control *oc)
 		return true;
 	}
 
+	set_oom_policy_name(oc, "default", sizeof("default"));
+
+	bpf_set_policy_name(oc);
+
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {