diff mbox series

[bpf-next,v8,3/4] bpf: Add kfunc bpf_rcu_read_lock/unlock()

Message ID 20221122195335.1782147-1-yhs@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Add bpf_rcu_read_lock() support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1396 this patch: 1402
netdev/cc_maintainers warning 7 maintainers not CCed: sdf@google.com martin.lau@linux.dev kpsingh@kernel.org haoluo@google.com jolsa@kernel.org song@kernel.org john.fastabend@gmail.com
netdev/build_clang fail Errors and warnings before: 166 this patch: 168
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1386 this patch: 1392
netdev/checkpatch warning WARNING: line length of 101 exceeds 80 columns WARNING: line length of 109 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 fail Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc

Commit Message

Yonghong Song Nov. 22, 2022, 7:53 p.m. UTC
Add two kfunc's bpf_rcu_read_lock() and bpf_rcu_read_unlock(). These two kfunc's
can be used for all program types. The following is an example about how
rcu pointer are used w.r.t. bpf_rcu_read_lock()/bpf_rcu_read_unlock().

  struct task_struct {
    ...
    struct task_struct              *last_wakee;
    struct task_struct __rcu        *real_parent;
    ...
  };

Let us say prog does 'task = bpf_get_current_task_btf()' to get a
'task' pointer. The basic rules are:
  - 'real_parent = task->real_parent' should be inside bpf_rcu_read_lock
    region.  this is to simulate rcu_dereference() operation. The
    'real_parent' is marked as MEM_RCU only if (1). task->real_parent is
    inside bpf_rcu_read_lock region, and (2). task is a trusted ptr,
    marked with PTR_TRUSTED or ref_obj_id != 0. In other words,
    MEM_RCU marked ptr can be 'trusted' inside the bpf_rcu_read_lock region.
  - 'last_wakee = real_parent->last_wakee' should be inside bpf_rcu_read_lock
    region since it tries to access rcu protected memory.
  - the ptr 'last_wakee' will be marked as PTR_UNTRUSTED since in general
    it is not clear whether the object pointed by 'last_wakee' is valid or
    not even inside bpf_rcu_read_lock region.

To prevent rcu pointer leaks outside the rcu read lock region.
The verifier will clear all rcu pointer register state to unknown, i.e.,
scalar_value, at bpf_rcu_read_unlock() kfunc call site,
so later dereference becomes impossible.

The current implementation does not support nested rcu read lock
region in the prog.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h          |   3 +
 include/linux/bpf_verifier.h |   4 +-
 kernel/bpf/btf.c             |   3 +
 kernel/bpf/helpers.c         |  12 ++++
 kernel/bpf/verifier.c        | 118 ++++++++++++++++++++++++++++++++---
 5 files changed, 129 insertions(+), 11 deletions(-)

Comments

Martin KaFai Lau Nov. 23, 2022, 12:24 a.m. UTC | #1
On 11/22/22 11:53 AM, Yonghong Song wrote:
> +	if (flag & MEM_RCU) {
> +		/* Mark value register as MEM_RCU only if it is protected by
> +		 * bpf_rcu_read_lock() and the ptr reg is trusted (PTR_TRUSTED or
> +		 * ref_obj_id != 0). MEM_RCU itself can already indicate
> +		 * trustedness inside the rcu read lock region. But Mark it
> +		 * as PTR_TRUSTED as well similar to MEM_ALLOC.
> +		 */
> +		if (!env->cur_state->active_rcu_lock ||
> +		    (!(reg->type & PTR_TRUSTED) && !reg->ref_obj_id))

Can is_trusted_reg() be reused or MEM_ALLOC is not applicable here?

> +			flag &= ~MEM_RCU;
> +		else
> +			flag |= PTR_TRUSTED;
> +	} else if (reg->type & MEM_RCU) {
> +		/* ptr (reg) is marked as MEM_RCU, but value reg is not marked as MEM_RCU.
> +		 * Mark the value reg as PTR_UNTRUSTED conservatively.
> +		 */
> +		flag |= PTR_UNTRUSTED;

Should PTR_UNTRUSTED tagging be limited to ret == PTR_TO_BTF_ID instead of 
tagging SCALAR also?

[ ... ]

> @@ -11754,6 +11840,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
>   		return -EINVAL;
>   	}
>   
> +	if (env->prog->aux->sleepable && env->cur_state->active_rcu_lock) {

I don't know the details about ld_abs :).  Why sleepable check is needed here?

Others lgtm.
Yonghong Song Nov. 23, 2022, 1:06 a.m. UTC | #2
On 11/22/22 4:24 PM, Martin KaFai Lau wrote:
> On 11/22/22 11:53 AM, Yonghong Song wrote:
>> +    if (flag & MEM_RCU) {
>> +        /* Mark value register as MEM_RCU only if it is protected by
>> +         * bpf_rcu_read_lock() and the ptr reg is trusted 
>> (PTR_TRUSTED or
>> +         * ref_obj_id != 0). MEM_RCU itself can already indicate
>> +         * trustedness inside the rcu read lock region. But Mark it
>> +         * as PTR_TRUSTED as well similar to MEM_ALLOC.
>> +         */
>> +        if (!env->cur_state->active_rcu_lock ||
>> +            (!(reg->type & PTR_TRUSTED) && !reg->ref_obj_id))
> 
> Can is_trusted_reg() be reused or MEM_ALLOC is not applicable here?

Currently MEM_ALLOC is returned by bpf_obj_new() which takes prog btf.
currently MEM_RCU is only enabled with btf_struct_access() for
vmlinux btf. So if MEM_ALLOC is in reg->type, then MEM_RCU
should not appear.

But I guess we could still use is_trusted_reg() here since it
does cover the use case here.

> 
>> +            flag &= ~MEM_RCU;
>> +        else
>> +            flag |= PTR_TRUSTED;
>> +    } else if (reg->type & MEM_RCU) {
>> +        /* ptr (reg) is marked as MEM_RCU, but value reg is not 
>> marked as MEM_RCU.
>> +         * Mark the value reg as PTR_UNTRUSTED conservatively.
>> +         */
>> +        flag |= PTR_UNTRUSTED;
> 
> Should PTR_UNTRUSTED tagging be limited to ret == PTR_TO_BTF_ID instead 
> of tagging SCALAR also?

We should be okay here. flag is a local variable. It is used in
below function when reg_type is not SCALAR_VALUE.

static void mark_btf_ld_reg(struct bpf_verifier_env *env,
                             struct bpf_reg_state *regs, u32 regno,
                             enum bpf_reg_type reg_type,
                             struct btf *btf, u32 btf_id,
                             enum bpf_type_flag flag)
{
         if (reg_type == SCALAR_VALUE) {
                 mark_reg_unknown(env, regs, regno);
                 return;
         }
         mark_reg_known_zero(env, regs, regno);
         regs[regno].type = PTR_TO_BTF_ID | flag;
         regs[regno].btf = btf;
         regs[regno].btf_id = btf_id;
}

> 
> [ ... ]
> 
>> @@ -11754,6 +11840,11 @@ static int check_ld_abs(struct 
>> bpf_verifier_env *env, struct bpf_insn *insn)
>>           return -EINVAL;
>>       }
>> +    if (env->prog->aux->sleepable && env->cur_state->active_rcu_lock) {
> 
> I don't know the details about ld_abs :).  Why sleepable check is needed 
> here?

Do we still care about ld_abs??

Actually I added this since spin_lock excludes this. But taking a deep
look at the function convert_bpf_ld_abs, it is converted to a bunch of
bpf insns and bpf_skb_load_helper_{8,16,32}() which eventually calls
skb_copy_bits(). Checking skb_copy_bits(), it seems the function is not 
sleepable. Will remove this in the next revision.

> 
> Others lgtm.
Martin KaFai Lau Nov. 23, 2022, 1:19 a.m. UTC | #3
On 11/22/22 5:06 PM, Yonghong Song wrote:
> We should be okay here. flag is a local variable. It is used in
> below function when reg_type is not SCALAR_VALUE.
> 
> static void mark_btf_ld_reg(struct bpf_verifier_env *env,
>                              struct bpf_reg_state *regs, u32 regno,
>                              enum bpf_reg_type reg_type,
>                              struct btf *btf, u32 btf_id,
>                              enum bpf_type_flag flag)
> {
>          if (reg_type == SCALAR_VALUE) {
>                  mark_reg_unknown(env, regs, regno);

Ah, got it.

>>> @@ -11754,6 +11840,11 @@ static int check_ld_abs(struct bpf_verifier_env 
>>> *env, struct bpf_insn *insn)
>>>           return -EINVAL;
>>>       }
>>> +    if (env->prog->aux->sleepable && env->cur_state->active_rcu_lock) {
>>
>> I don't know the details about ld_abs :).  Why sleepable check is needed here?
> 
> Do we still care about ld_abs??
> 
> Actually I added this since spin_lock excludes this. But taking a deep

 From looking at check_ld_abs() again, I just noticed this comment:

         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
          * gen_ld_abs() may terminate the program at runtime, leading to
          * reference leak.
          */

I think active_rcu_lock should be tested.  My question was more on why the 
env->prog->aux->sleepable test is also needed.
Yonghong Song Nov. 23, 2022, 1:24 a.m. UTC | #4
On 11/22/22 5:19 PM, Martin KaFai Lau wrote:
> On 11/22/22 5:06 PM, Yonghong Song wrote:
>> We should be okay here. flag is a local variable. It is used in
>> below function when reg_type is not SCALAR_VALUE.
>>
>> static void mark_btf_ld_reg(struct bpf_verifier_env *env,
>>                              struct bpf_reg_state *regs, u32 regno,
>>                              enum bpf_reg_type reg_type,
>>                              struct btf *btf, u32 btf_id,
>>                              enum bpf_type_flag flag)
>> {
>>          if (reg_type == SCALAR_VALUE) {
>>                  mark_reg_unknown(env, regs, regno);
> 
> Ah, got it.
> 
>>>> @@ -11754,6 +11840,11 @@ static int check_ld_abs(struct 
>>>> bpf_verifier_env *env, struct bpf_insn *insn)
>>>>           return -EINVAL;
>>>>       }
>>>> +    if (env->prog->aux->sleepable && 
>>>> env->cur_state->active_rcu_lock) {
>>>
>>> I don't know the details about ld_abs :).  Why sleepable check is 
>>> needed here?
>>
>> Do we still care about ld_abs??
>>
>> Actually I added this since spin_lock excludes this. But taking a deep
> 
>  From looking at check_ld_abs() again, I just noticed this comment:
> 
>          /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
>           * gen_ld_abs() may terminate the program at runtime, leading to
>           * reference leak.
>           */
> 
> I think active_rcu_lock should be tested.  My question was more on why 
> the env->prog->aux->sleepable test is also needed.

Will remove env->prog->aux->sleepable in the next version. It is a 
leftover missed with v8 not to focus on sleepable aspect of the lock.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 43fd7eeeeabb..c6aa6912ea16 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -572,6 +572,9 @@  enum bpf_type_flag {
 	 */
 	PTR_TRUSTED		= BIT(12 + BPF_BASE_TYPE_BITS),
 
+	/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
+	MEM_RCU			= BIT(13 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 545152ac136c..1f3ce54e50ed 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -344,6 +344,7 @@  struct bpf_verifier_state {
 		u32 id;
 	} active_lock;
 	bool speculative;
+	bool active_rcu_lock;
 
 	/* first and last insn idx of this verifier state */
 	u32 first_insn_idx;
@@ -445,6 +446,7 @@  struct bpf_insn_aux_data {
 	u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
 	bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
 	bool zext_dst; /* this insn zero extends dst reg */
+	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
 	u8 alu_state; /* used in combination with alu_limit */
 
 	/* below fields are initialized once */
@@ -680,7 +682,7 @@  static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	}
 }
 
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
 
 static inline bool bpf_type_has_unsafe_modifiers(u32 type)
 {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1a59cc7ad730..68df0df27302 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6237,6 +6237,9 @@  static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
 				/* check __percpu tag */
 				if (strcmp(tag_value, "percpu") == 0)
 					tmp_flag = MEM_PERCPU;
+				/* check __rcu tag */
+				if (strcmp(tag_value, "rcu") == 0)
+					tmp_flag = MEM_RCU;
 			}
 
 			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index c3b798a1e3e9..3c5b41604c7b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1891,6 +1891,16 @@  void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
 	return obj__ign;
 }
 
+void bpf_rcu_read_lock(void)
+{
+	rcu_read_lock();
+}
+
+void bpf_rcu_read_unlock(void)
+{
+	rcu_read_unlock();
+}
+
 __diag_pop();
 
 BTF_SET8_START(generic_btf_ids)
@@ -1921,6 +1931,8 @@  BTF_ID(func, bpf_task_release)
 BTF_SET8_START(common_btf_ids)
 BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
 BTF_ID_FLAGS(func, bpf_rdonly_cast)
+BTF_ID_FLAGS(func, bpf_rcu_read_lock)
+BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
 BTF_SET8_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 068cc885903c..f6105723d6ca 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -527,6 +527,14 @@  static bool is_callback_calling_function(enum bpf_func_id func_id)
 	       func_id == BPF_FUNC_user_ringbuf_drain;
 }
 
+static bool is_storage_get_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_sk_storage_get ||
+	       func_id == BPF_FUNC_inode_storage_get ||
+	       func_id == BPF_FUNC_task_storage_get ||
+	       func_id == BPF_FUNC_cgrp_storage_get;
+}
+
 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 					const struct bpf_map *map)
 {
@@ -589,11 +597,12 @@  static const char *reg_type_str(struct bpf_verifier_env *env,
 			strncpy(postfix, "_or_null", 16);
 	}
 
-	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s",
+	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
 		 type & MEM_RDONLY ? "rdonly_" : "",
 		 type & MEM_RINGBUF ? "ringbuf_" : "",
 		 type & MEM_USER ? "user_" : "",
 		 type & MEM_PERCPU ? "percpu_" : "",
+		 type & MEM_RCU ? "rcu_" : "",
 		 type & PTR_UNTRUSTED ? "untrusted_" : "",
 		 type & PTR_TRUSTED ? "trusted_" : ""
 	);
@@ -1220,6 +1229,7 @@  static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		dst_state->frame[i] = NULL;
 	}
 	dst_state->speculative = src->speculative;
+	dst_state->active_rcu_lock = src->active_rcu_lock;
 	dst_state->curframe = src->curframe;
 	dst_state->active_lock.ptr = src->active_lock.ptr;
 	dst_state->active_lock.id = src->active_lock.id;
@@ -4737,9 +4747,30 @@  static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	if (type_flag(reg->type) & PTR_UNTRUSTED)
 		flag |= PTR_UNTRUSTED;
 
-	/* Any pointer obtained from walking a trusted pointer is no longer trusted. */
+	/* By default any pointer obtained from walking a trusted pointer is
+	 * no longer trusted except the rcu case below.
+	 */
 	flag &= ~PTR_TRUSTED;
 
+	if (flag & MEM_RCU) {
+		/* Mark value register as MEM_RCU only if it is protected by
+		 * bpf_rcu_read_lock() and the ptr reg is trusted (PTR_TRUSTED or
+		 * ref_obj_id != 0). MEM_RCU itself can already indicate
+		 * trustedness inside the rcu read lock region. But Mark it
+		 * as PTR_TRUSTED as well similar to MEM_ALLOC.
+		 */
+		if (!env->cur_state->active_rcu_lock ||
+		    (!(reg->type & PTR_TRUSTED) && !reg->ref_obj_id))
+			flag &= ~MEM_RCU;
+		else
+			flag |= PTR_TRUSTED;
+	} else if (reg->type & MEM_RCU) {
+		/* ptr (reg) is marked as MEM_RCU, but value reg is not marked as MEM_RCU.
+		 * Mark the value reg as PTR_UNTRUSTED conservatively.
+		 */
+		flag |= PTR_UNTRUSTED;
+	}
+
 	if (atype == BPF_READ && value_regno >= 0)
 		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
 
@@ -5897,6 +5928,7 @@  static const struct bpf_reg_types btf_ptr_types = {
 	.types = {
 		PTR_TO_BTF_ID,
 		PTR_TO_BTF_ID | PTR_TRUSTED,
+		PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
 	},
 };
 static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6075,6 +6107,7 @@  int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	case PTR_TO_BTF_ID:
 	case PTR_TO_BTF_ID | MEM_ALLOC:
 	case PTR_TO_BTF_ID | PTR_TRUSTED:
+	case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
 	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 		/* When referenced PTR_TO_BTF_ID is passed to release function,
 		 * it's fixed offset must be 0.	In the other cases, fixed offset
@@ -7539,6 +7572,17 @@  static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		return err;
 	}
 
+	if (env->cur_state->active_rcu_lock) {
+		if (fn->might_sleep) {
+			verbose(env, "sleepable helper %s#%din rcu_read_lock region\n",
+				func_id_name(func_id), func_id);
+			return -EINVAL;
+		}
+
+		if (env->prog->aux->sleepable && is_storage_get_function(func_id))
+			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
+	}
+
 	meta.func_id = func_id;
 	/* check args */
 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
@@ -8163,6 +8207,8 @@  enum special_kfunc_type {
 	KF_bpf_list_pop_back,
 	KF_bpf_cast_to_kern_ctx,
 	KF_bpf_rdonly_cast,
+	KF_bpf_rcu_read_lock,
+	KF_bpf_rcu_read_unlock,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -8185,6 +8231,18 @@  BTF_ID(func, bpf_list_pop_front)
 BTF_ID(func, bpf_list_pop_back)
 BTF_ID(func, bpf_cast_to_kern_ctx)
 BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rcu_read_lock)
+BTF_ID(func, bpf_rcu_read_unlock)
+
+static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
+}
+
+static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
+}
 
 static enum kfunc_ptr_arg_type
 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
@@ -8817,6 +8875,7 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	const struct btf_type *t, *func, *func_proto, *ptr_type;
 	struct bpf_reg_state *regs = cur_regs(env);
 	const char *func_name, *ptr_type_name;
+	bool sleepable, rcu_lock, rcu_unlock;
 	struct bpf_kfunc_call_arg_meta meta;
 	u32 i, nargs, func_id, ptr_type_id;
 	int err, insn_idx = *insn_idx_p;
@@ -8858,11 +8917,38 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -EACCES;
 	}
 
-	if (is_kfunc_sleepable(&meta) && !env->prog->aux->sleepable) {
+	sleepable = is_kfunc_sleepable(&meta);
+	if (sleepable && !env->prog->aux->sleepable) {
 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
 		return -EACCES;
 	}
 
+	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
+	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
+	if (env->cur_state->active_rcu_lock) {
+		struct bpf_func_state *state;
+		struct bpf_reg_state *reg;
+
+		if (rcu_lock) {
+			verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
+			return -EINVAL;
+		} else if (rcu_unlock) {
+			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+				if (reg->type & MEM_RCU)
+					__mark_reg_unknown(env, reg);
+			}));
+			env->cur_state->active_rcu_lock = false;
+		} else if (sleepable) {
+			verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
+			return -EACCES;
+		}
+	} else if (rcu_lock) {
+		env->cur_state->active_rcu_lock = true;
+	} else if (rcu_unlock) {
+		verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
+		return -EINVAL;
+	}
+
 	/* Check the arguments */
 	err = check_kfunc_args(env, &meta);
 	if (err < 0)
@@ -11754,6 +11840,11 @@  static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return -EINVAL;
 	}
 
+	if (env->prog->aux->sleepable && env->cur_state->active_rcu_lock) {
+		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
+		return -EINVAL;
+	}
+
 	if (regs[ctx_reg].type != PTR_TO_CTX) {
 		verbose(env,
 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -13019,6 +13110,9 @@  static bool states_equal(struct bpf_verifier_env *env,
 	    old->active_lock.id != cur->active_lock.id)
 		return false;
 
+	if (old->active_rcu_lock != cur->active_rcu_lock)
+		return false;
+
 	/* for states to be equal callsites have to be the same
 	 * and all frame states need to be equivalent
 	 */
@@ -13706,6 +13800,11 @@  static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				if (env->cur_state->active_rcu_lock) {
+					verbose(env, "bpf_rcu_read_unlock is missing\n");
+					return -EINVAL;
+				}
+
 				/* We must do check_reference_leak here before
 				 * prepare_func_exit to handle the case when
 				 * state->curframe > 0, it may be a callback
@@ -14802,6 +14901,7 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		case PTR_TO_BTF_ID:
 		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
 		case PTR_TO_BTF_ID | PTR_TRUSTED:
+		case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
 		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
 		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
 		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
@@ -15494,14 +15594,12 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
-		if (insn->imm == BPF_FUNC_task_storage_get ||
-		    insn->imm == BPF_FUNC_sk_storage_get ||
-		    insn->imm == BPF_FUNC_inode_storage_get ||
-		    insn->imm == BPF_FUNC_cgrp_storage_get) {
-			if (env->prog->aux->sleepable)
-				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
-			else
+		if (is_storage_get_function(insn->imm)) {
+			if (!env->prog->aux->sleepable ||
+			    env->insn_aux_data[i + delta].storage_get_func_atomic)
 				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+			else
+				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
 			insn_buf[1] = *insn;
 			cnt = 2;