diff mbox series

[bpf-next,v5,1/5] bpf: Add bpf_link support for sk_msg and sk_skb progs

Message ID 20240406160404.177055-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Add bpf_link support for sk_msg and sk_skb progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7516 this patch: 7516
netdev/build_tools success Errors and warnings before: 1 this patch: 1
netdev/cc_maintainers warning 11 maintainers not CCed: sdf@google.com kpsingh@kernel.org netdev@vger.kernel.org song@kernel.org martin.lau@linux.dev kuba@kernel.org haoluo@google.com jolsa@kernel.org edumazet@google.com eddyz87@gmail.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 1228 this patch: 1228
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7898 this patch: 7898
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Yonghong Song April 6, 2024, 4:04 p.m. UTC
Add bpf_link support for sk_msg and sk_skb programs. We have an
internal request to support bpf_link for sk_msg programs so user
space can have a uniform handling with bpf_link based libbpf
APIs. Using bpf_link based libbpf API also has a benefit which
makes system robust by decoupling prog life cycle and
attachment life cycle.

Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h            |   6 +
 include/linux/skmsg.h          |   4 +
 include/uapi/linux/bpf.h       |   5 +
 kernel/bpf/syscall.c           |   4 +
 net/core/sock_map.c            | 270 ++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h |   5 +
 6 files changed, 277 insertions(+), 17 deletions(-)

Comments

Andrii Nakryiko April 6, 2024, 6:47 p.m. UTC | #1
On Sat, Apr 6, 2024 at 9:04 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> Add bpf_link support for sk_msg and sk_skb programs. We have an
> internal request to support bpf_link for sk_msg programs so user
> space can have a uniform handling with bpf_link based libbpf
> APIs. Using bpf_link based libbpf API also has a benefit which
> makes system robust by decoupling prog life cycle and
> attachment life cycle.
>
> Reviewed-by: John Fastabend <john.fastabend@gmail.com>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  include/linux/bpf.h            |   6 +
>  include/linux/skmsg.h          |   4 +
>  include/uapi/linux/bpf.h       |   5 +
>  kernel/bpf/syscall.c           |   4 +
>  net/core/sock_map.c            | 270 ++++++++++++++++++++++++++++++---
>  tools/include/uapi/linux/bpf.h |   5 +
>  6 files changed, 277 insertions(+), 17 deletions(-)
>

Please check bpf_prog_attach_check_attach_type(), it probably should
be updated as well. Other than that looks good.

[...]

>  static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> -                               struct bpf_prog *old, u32 which)
> +                               struct bpf_prog *old, struct bpf_link *link,
> +                               u32 which)
>  {
>         struct bpf_prog **pprog;
> +       struct bpf_link **plink;
>         int ret;
>
> -       ret = sock_map_prog_lookup(map, &pprog, which);
> +       ret = sock_map_prog_link_lookup(map, &pprog, &plink, NULL, link && !prog, which);
>         if (ret)
> -               return ret;
> +               goto out;

probably could have kept `return ret;` here?

>
> -       if (old)
> -               return psock_replace_prog(pprog, prog, old);
> +       if (old) {
> +               ret = psock_replace_prog(pprog, prog, old);
> +               if (!ret)
> +                       *plink = NULL;
> +       } else {
> +               psock_set_prog(pprog, prog);
> +               if (link)
> +                       *plink = link;
> +       }
>
> -       psock_set_prog(pprog, prog);
> -       return 0;
> +out:

and wouldn't need out: then

> +       return ret;
>  }
>

[...]
Eduard Zingerman April 6, 2024, 11:18 p.m. UTC | #2
On Sat, 2024-04-06 at 09:04 -0700, Yonghong Song wrote:

[...]

> @@ -1454,55 +1466,95 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
>  	return NULL;
>  }
>  
> -static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
> -				u32 which)
> +static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
> +				     struct bpf_link ***plink, struct bpf_link *link,
> +				     bool skip_link_check, u32 which)
>  {
>  	struct sk_psock_progs *progs = sock_map_progs(map);
> +	struct bpf_prog **cur_pprog;
> +	struct bpf_link **cur_plink;
>  
>  	if (!progs)
>  		return -EOPNOTSUPP;
>  
>  	switch (which) {
>  	case BPF_SK_MSG_VERDICT:
> -		*pprog = &progs->msg_parser;
> +		cur_pprog = &progs->msg_parser;
> +		cur_plink = &progs->msg_parser_link;
>  		break;
>  #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
>  	case BPF_SK_SKB_STREAM_PARSER:
> -		*pprog = &progs->stream_parser;
> +		cur_pprog = &progs->stream_parser;
> +		cur_plink = &progs->stream_parser_link;
>  		break;
>  #endif
>  	case BPF_SK_SKB_STREAM_VERDICT:
>  		if (progs->skb_verdict)
>  			return -EBUSY;
> -		*pprog = &progs->stream_verdict;
> +		cur_pprog = &progs->stream_verdict;
> +		cur_plink = &progs->stream_verdict_link;
>  		break;
>  	case BPF_SK_SKB_VERDICT:
>  		if (progs->stream_verdict)
>  			return -EBUSY;
> -		*pprog = &progs->skb_verdict;
> +		cur_pprog = &progs->skb_verdict;
> +		cur_plink = &progs->skb_verdict_link;
>  		break;
>  	default:
>  		return -EOPNOTSUPP;
>  	}
>  
> +	/* The link check will be skipped for link_detach case. */
> +	if (!skip_link_check) {
> +		/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
> +		 * exists for that prog.
> +		 */
> +		if (!link && *cur_plink)
> +			return -EBUSY;
> +
> +		/* for bpf_link based prog_update, return error if the stored bpf_link
> +		 * does not match the incoming bpf_link.
> +		 */
> +		if (link && link != *cur_plink)
> +			return -EBUSY;
> +	}

I still think that this check should be factored out to callers,
this allows to reduce the number of function parameters,
and better separate unrelated logical error conditions.
E.g. like in the patch at the end of this email
(applied on top of the current patch).

[...]

---

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4af44277568e..a642215faa20 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1467,8 +1467,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 }
 
 static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
-				     struct bpf_link ***plink, struct bpf_link *link,
-				     bool skip_link_check, u32 which)
+				     struct bpf_link ***plink, u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
 	struct bpf_prog **cur_pprog;
@@ -1504,21 +1503,6 @@ static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***ppr
 		return -EOPNOTSUPP;
 	}
 
-	/* The link check will be skipped for link_detach case. */
-	if (!skip_link_check) {
-		/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
-		 * exists for that prog.
-		 */
-		if (!link && *cur_plink)
-			return -EBUSY;
-
-		/* for bpf_link based prog_update, return error if the stored bpf_link
-		 * does not match the incoming bpf_link.
-		 */
-		if (link && link != *cur_plink)
-			return -EBUSY;
-	}
-
 	*pprog = cur_pprog;
 	if (plink)
 		*plink = cur_plink;
@@ -1539,9 +1523,14 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	struct bpf_link **plink;
 	int ret;
 
-	ret = sock_map_prog_link_lookup(map, &pprog, &plink, NULL, link && !prog, which);
+	ret = sock_map_prog_link_lookup(map, &pprog, &plink, which);
 	if (ret)
-		goto out;
+		return ret;
+	/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
+	 * exists for that prog.
+	 */
+	if ((!link || prog) && *plink)
+		return -EBUSY;
 
 	if (old) {
 		ret = psock_replace_prog(pprog, prog, old);
@@ -1553,8 +1542,7 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 			*plink = link;
 	}
 
-out:
-	return ret;
+	return 0;
 }
 
 int sock_map_bpf_prog_query(const union bpf_attr *attr,
@@ -1579,7 +1567,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
 
 	rcu_read_lock();
 
-	ret = sock_map_prog_link_lookup(map, &pprog, NULL, NULL, true, attr->query.attach_type);
+	ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type);
 	if (ret)
 		goto end;
 
@@ -1770,10 +1758,15 @@ static int sock_map_link_update_prog(struct bpf_link *link,
 		goto out;
 	}
 
-	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, link, false,
+	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
 					sockmap_link->attach_type);
 	if (ret)
 		goto out;
+	/* for bpf_link based prog_update, return error if the stored bpf_link
+	 * does not match the incoming bpf_link.
+	 */
+	if (link != *plink)
+		return -EBUSY;
 
 	if (old) {
 		ret = psock_replace_prog(pprog, prog, old);
Yonghong Song April 8, 2024, 4:24 a.m. UTC | #3
On 4/6/24 11:47 AM, Andrii Nakryiko wrote:
> On Sat, Apr 6, 2024 at 9:04 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>> Add bpf_link support for sk_msg and sk_skb programs. We have an
>> internal request to support bpf_link for sk_msg programs so user
>> space can have a uniform handling with bpf_link based libbpf
>> APIs. Using bpf_link based libbpf API also has a benefit which
>> makes system robust by decoupling prog life cycle and
>> attachment life cycle.
>>
>> Reviewed-by: John Fastabend <john.fastabend@gmail.com>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   include/linux/bpf.h            |   6 +
>>   include/linux/skmsg.h          |   4 +
>>   include/uapi/linux/bpf.h       |   5 +
>>   kernel/bpf/syscall.c           |   4 +
>>   net/core/sock_map.c            | 270 ++++++++++++++++++++++++++++++---
>>   tools/include/uapi/linux/bpf.h |   5 +
>>   6 files changed, 277 insertions(+), 17 deletions(-)
>>
> Please check bpf_prog_attach_check_attach_type(), it probably should
> be updated as well. Other than that looks good.

We are fine here. In function attach_type_to_prog_type(), we already
have checking:
         case BPF_SK_MSG_VERDICT:
                 return BPF_PROG_TYPE_SK_MSG;
         case BPF_SK_SKB_STREAM_PARSER:
         case BPF_SK_SKB_STREAM_VERDICT:
         case BPF_SK_SKB_VERDICT:
                 return BPF_PROG_TYPE_SK_SKB;

>
> [...]
>
>>   static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
>> -                               struct bpf_prog *old, u32 which)
>> +                               struct bpf_prog *old, struct bpf_link *link,
>> +                               u32 which)
>>   {
>>          struct bpf_prog **pprog;
>> +       struct bpf_link **plink;
>>          int ret;
>>
>> -       ret = sock_map_prog_lookup(map, &pprog, which);
>> +       ret = sock_map_prog_link_lookup(map, &pprog, &plink, NULL, link && !prog, which);
>>          if (ret)
>> -               return ret;
>> +               goto out;
> probably could have kept `return ret;` here?
>
>> -       if (old)
>> -               return psock_replace_prog(pprog, prog, old);
>> +       if (old) {
>> +               ret = psock_replace_prog(pprog, prog, old);
>> +               if (!ret)
>> +                       *plink = NULL;
>> +       } else {
>> +               psock_set_prog(pprog, prog);
>> +               if (link)
>> +                       *plink = link;
>> +       }
>>
>> -       psock_set_prog(pprog, prog);
>> -       return 0;
>> +out:
> and wouldn't need out: then

Ack. I can make this change.

>
>> +       return ret;
>>   }
>>
> [...]
Yonghong Song April 8, 2024, 5:01 a.m. UTC | #4
On 4/6/24 4:18 PM, Eduard Zingerman wrote:
> On Sat, 2024-04-06 at 09:04 -0700, Yonghong Song wrote:
>
> [...]
>
>> @@ -1454,55 +1466,95 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
>>   	return NULL;
>>   }
>>   
>> -static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
>> -				u32 which)
>> +static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
>> +				     struct bpf_link ***plink, struct bpf_link *link,
>> +				     bool skip_link_check, u32 which)
>>   {
>>   	struct sk_psock_progs *progs = sock_map_progs(map);
>> +	struct bpf_prog **cur_pprog;
>> +	struct bpf_link **cur_plink;
>>   
>>   	if (!progs)
>>   		return -EOPNOTSUPP;
>>   
>>   	switch (which) {
>>   	case BPF_SK_MSG_VERDICT:
>> -		*pprog = &progs->msg_parser;
>> +		cur_pprog = &progs->msg_parser;
>> +		cur_plink = &progs->msg_parser_link;
>>   		break;
>>   #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
>>   	case BPF_SK_SKB_STREAM_PARSER:
>> -		*pprog = &progs->stream_parser;
>> +		cur_pprog = &progs->stream_parser;
>> +		cur_plink = &progs->stream_parser_link;
>>   		break;
>>   #endif
>>   	case BPF_SK_SKB_STREAM_VERDICT:
>>   		if (progs->skb_verdict)
>>   			return -EBUSY;
>> -		*pprog = &progs->stream_verdict;
>> +		cur_pprog = &progs->stream_verdict;
>> +		cur_plink = &progs->stream_verdict_link;
>>   		break;
>>   	case BPF_SK_SKB_VERDICT:
>>   		if (progs->stream_verdict)
>>   			return -EBUSY;
>> -		*pprog = &progs->skb_verdict;
>> +		cur_pprog = &progs->skb_verdict;
>> +		cur_plink = &progs->skb_verdict_link;
>>   		break;
>>   	default:
>>   		return -EOPNOTSUPP;
>>   	}
>>   
>> +	/* The link check will be skipped for link_detach case. */
>> +	if (!skip_link_check) {
>> +		/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
>> +		 * exists for that prog.
>> +		 */
>> +		if (!link && *cur_plink)
>> +			return -EBUSY;
>> +
>> +		/* for bpf_link based prog_update, return error if the stored bpf_link
>> +		 * does not match the incoming bpf_link.
>> +		 */
>> +		if (link && link != *cur_plink)
>> +			return -EBUSY;
>> +	}
> I still think that this check should be factored out to callers,
> this allows to reduce the number of function parameters,
> and better separate unrelated logical error conditions.
> E.g. like in the patch at the end of this email
> (applied on top of the current patch).

Thanks Eduard. I also bothered with too many arguments for
the function. I guess your suggested change below indeed
better as we still keep the attach type enum in the function
and factored following code in different situations.
Will make the change in the next revision!

>
> [...]
>
> ---
>
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 4af44277568e..a642215faa20 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -1467,8 +1467,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
>   }
>   
>   static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
> -				     struct bpf_link ***plink, struct bpf_link *link,
> -				     bool skip_link_check, u32 which)
> +				     struct bpf_link ***plink, u32 which)
>   {
>   	struct sk_psock_progs *progs = sock_map_progs(map);
>   	struct bpf_prog **cur_pprog;
> @@ -1504,21 +1503,6 @@ static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***ppr
>   		return -EOPNOTSUPP;
>   	}
>   
> -	/* The link check will be skipped for link_detach case. */
> -	if (!skip_link_check) {
> -		/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
> -		 * exists for that prog.
> -		 */
> -		if (!link && *cur_plink)
> -			return -EBUSY;
> -
> -		/* for bpf_link based prog_update, return error if the stored bpf_link
> -		 * does not match the incoming bpf_link.
> -		 */
> -		if (link && link != *cur_plink)
> -			return -EBUSY;
> -	}
> -
>   	*pprog = cur_pprog;
>   	if (plink)
>   		*plink = cur_plink;
> @@ -1539,9 +1523,14 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
>   	struct bpf_link **plink;
>   	int ret;
>   
> -	ret = sock_map_prog_link_lookup(map, &pprog, &plink, NULL, link && !prog, which);
> +	ret = sock_map_prog_link_lookup(map, &pprog, &plink, which);
>   	if (ret)
> -		goto out;
> +		return ret;
> +	/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
> +	 * exists for that prog.
> +	 */
> +	if ((!link || prog) && *plink)
> +		return -EBUSY;
>   
>   	if (old) {
>   		ret = psock_replace_prog(pprog, prog, old);
> @@ -1553,8 +1542,7 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
>   			*plink = link;
>   	}
>   
> -out:
> -	return ret;
> +	return 0;
>   }
>   
>   int sock_map_bpf_prog_query(const union bpf_attr *attr,
> @@ -1579,7 +1567,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
>   
>   	rcu_read_lock();
>   
> -	ret = sock_map_prog_link_lookup(map, &pprog, NULL, NULL, true, attr->query.attach_type);
> +	ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type);
>   	if (ret)
>   		goto end;
>   
> @@ -1770,10 +1758,15 @@ static int sock_map_link_update_prog(struct bpf_link *link,
>   		goto out;
>   	}
>   
> -	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, link, false,
> +	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
>   					sockmap_link->attach_type);
>   	if (ret)
>   		goto out;
> +	/* for bpf_link based prog_update, return error if the stored bpf_link
> +	 * does not match the incoming bpf_link.
> +	 */
> +	if (link != *plink)
> +		return -EBUSY;
>   
>   	if (old) {
>   		ret = psock_replace_prog(pprog, prog, old);
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 62762390c93d..5034c1b4ded7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2996,6 +2996,7 @@  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
 int sock_map_bpf_prog_query(const union bpf_attr *attr,
 			    union bpf_attr __user *uattr);
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog);
 
 void sock_map_unhash(struct sock *sk);
 void sock_map_destroy(struct sock *sk);
@@ -3094,6 +3095,11 @@  static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
 {
 	return -EINVAL;
 }
+
+static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e65ec3fd2799..9c8dd4c01412 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -58,6 +58,10 @@  struct sk_psock_progs {
 	struct bpf_prog			*stream_parser;
 	struct bpf_prog			*stream_verdict;
 	struct bpf_prog			*skb_verdict;
+	struct bpf_link			*msg_parser_link;
+	struct bpf_link			*stream_parser_link;
+	struct bpf_link			*stream_verdict_link;
+	struct bpf_link			*skb_verdict_link;
 };
 
 enum sk_psock_state_bits {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fe9f11c8abe..cee0a7915c08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1135,6 +1135,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
 	BPF_LINK_TYPE_NETKIT = 13,
+	BPF_LINK_TYPE_SOCKMAP = 14,
 	__MAX_BPF_LINK_TYPE,
 };
 
@@ -6724,6 +6725,10 @@  struct bpf_link_info {
 			__u32 ifindex;
 			__u32 attach_type;
 		} netkit;
+		struct {
+			__u32 map_id;
+			__u32 attach_type;
+		} sockmap;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e44c276e8617..7d392ec83655 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5213,6 +5213,10 @@  static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	case BPF_PROG_TYPE_SK_LOOKUP:
 		ret = netns_bpf_link_create(attr, prog);
 		break;
+	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_SK_SKB:
+		ret = sock_map_link_create(attr, prog);
+		break;
 #ifdef CONFIG_NET
 	case BPF_PROG_TYPE_XDP:
 		ret = bpf_xdp_link_attach(attr, prog);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 27d733c0f65e..4af44277568e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,8 +24,16 @@  struct bpf_stab {
 #define SOCK_CREATE_FLAG_MASK				\
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
+/* This mutex is used to
+ *  - protect race between prog/link attach/detach and link prog update, and
+ *  - protect race between releasing and accessing map in bpf_link.
+ * A single global mutex lock is used since it is expected contention is low.
+ */
+static DEFINE_MUTEX(sockmap_mutex);
+
 static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-				struct bpf_prog *old, u32 which);
+				struct bpf_prog *old, struct bpf_link *link,
+				u32 which);
 static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
 
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
@@ -71,7 +79,9 @@  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-	ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+	mutex_lock(&sockmap_mutex);
+	ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
+	mutex_unlock(&sockmap_mutex);
 	fdput(f);
 	return ret;
 }
@@ -103,7 +113,9 @@  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
 		goto put_prog;
 	}
 
-	ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+	mutex_lock(&sockmap_mutex);
+	ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type);
+	mutex_unlock(&sockmap_mutex);
 put_prog:
 	bpf_prog_put(prog);
 put_map:
@@ -1454,55 +1466,95 @@  static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
-				u32 which)
+static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+				     struct bpf_link ***plink, struct bpf_link *link,
+				     bool skip_link_check, u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
+	struct bpf_prog **cur_pprog;
+	struct bpf_link **cur_plink;
 
 	if (!progs)
 		return -EOPNOTSUPP;
 
 	switch (which) {
 	case BPF_SK_MSG_VERDICT:
-		*pprog = &progs->msg_parser;
+		cur_pprog = &progs->msg_parser;
+		cur_plink = &progs->msg_parser_link;
 		break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		*pprog = &progs->stream_parser;
+		cur_pprog = &progs->stream_parser;
+		cur_plink = &progs->stream_parser_link;
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
 		if (progs->skb_verdict)
 			return -EBUSY;
-		*pprog = &progs->stream_verdict;
+		cur_pprog = &progs->stream_verdict;
+		cur_plink = &progs->stream_verdict_link;
 		break;
 	case BPF_SK_SKB_VERDICT:
 		if (progs->stream_verdict)
 			return -EBUSY;
-		*pprog = &progs->skb_verdict;
+		cur_pprog = &progs->skb_verdict;
+		cur_plink = &progs->skb_verdict_link;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	/* The link check will be skipped for link_detach case. */
+	if (!skip_link_check) {
+		/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
+		 * exists for that prog.
+		 */
+		if (!link && *cur_plink)
+			return -EBUSY;
+
+		/* for bpf_link based prog_update, return error if the stored bpf_link
+		 * does not match the incoming bpf_link.
+		 */
+		if (link && link != *cur_plink)
+			return -EBUSY;
+	}
+
+	*pprog = cur_pprog;
+	if (plink)
+		*plink = cur_plink;
 	return 0;
 }
 
+/* Handle the following four cases:
+ * prog_attach: prog != NULL, old == NULL, link == NULL
+ * prog_detach: prog == NULL, old != NULL, link == NULL
+ * link_attach: prog != NULL, old == NULL, link != NULL
+ * link_detach: prog == NULL, old != NULL, link != NULL
+ */
 static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-				struct bpf_prog *old, u32 which)
+				struct bpf_prog *old, struct bpf_link *link,
+				u32 which)
 {
 	struct bpf_prog **pprog;
+	struct bpf_link **plink;
 	int ret;
 
-	ret = sock_map_prog_lookup(map, &pprog, which);
+	ret = sock_map_prog_link_lookup(map, &pprog, &plink, NULL, link && !prog, which);
 	if (ret)
-		return ret;
+		goto out;
 
-	if (old)
-		return psock_replace_prog(pprog, prog, old);
+	if (old) {
+		ret = psock_replace_prog(pprog, prog, old);
+		if (!ret)
+			*plink = NULL;
+	} else {
+		psock_set_prog(pprog, prog);
+		if (link)
+			*plink = link;
+	}
 
-	psock_set_prog(pprog, prog);
-	return 0;
+out:
+	return ret;
 }
 
 int sock_map_bpf_prog_query(const union bpf_attr *attr,
@@ -1527,7 +1579,7 @@  int sock_map_bpf_prog_query(const union bpf_attr *attr,
 
 	rcu_read_lock();
 
-	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+	ret = sock_map_prog_link_lookup(map, &pprog, NULL, NULL, true, attr->query.attach_type);
 	if (ret)
 		goto end;
 
@@ -1657,6 +1709,190 @@  void sock_map_close(struct sock *sk, long timeout)
 }
 EXPORT_SYMBOL_GPL(sock_map_close);
 
+struct sockmap_link {
+	struct bpf_link link;
+	struct bpf_map *map;
+	enum bpf_attach_type attach_type;
+};
+
+static void sock_map_link_release(struct bpf_link *link)
+{
+	struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+
+	mutex_lock(&sockmap_mutex);
+	if (!sockmap_link->map)
+		goto out;
+
+	WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
+					  sockmap_link->attach_type));
+
+	bpf_map_put_with_uref(sockmap_link->map);
+	sockmap_link->map = NULL;
+out:
+	mutex_unlock(&sockmap_mutex);
+}
+
+static int sock_map_link_detach(struct bpf_link *link)
+{
+	sock_map_link_release(link);
+	return 0;
+}
+
+static void sock_map_link_dealloc(struct bpf_link *link)
+{
+	kfree(link);
+}
+
+/* Handle the following two cases:
+ * case 1: link != NULL, prog != NULL, old != NULL
+ * case 2: link != NULL, prog != NULL, old == NULL
+ */
+static int sock_map_link_update_prog(struct bpf_link *link,
+				     struct bpf_prog *prog,
+				     struct bpf_prog *old)
+{
+	const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+	struct bpf_prog **pprog, *old_link_prog;
+	struct bpf_link **plink;
+	int ret = 0;
+
+	mutex_lock(&sockmap_mutex);
+
+	/* If old prog is not NULL, ensure old prog is the same as link->prog. */
+	if (old && link->prog != old) {
+		ret = -EPERM;
+		goto out;
+	}
+	/* Ensure link->prog has the same type/attach_type as the new prog. */
+	if (link->prog->type != prog->type ||
+	    link->prog->expected_attach_type != prog->expected_attach_type) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, link, false,
+					sockmap_link->attach_type);
+	if (ret)
+		goto out;
+
+	if (old) {
+		ret = psock_replace_prog(pprog, prog, old);
+		if (ret)
+			goto out;
+	} else {
+		psock_set_prog(pprog, prog);
+	}
+
+	bpf_prog_inc(prog);
+	old_link_prog = xchg(&link->prog, prog);
+	bpf_prog_put(old_link_prog);
+
+out:
+	mutex_unlock(&sockmap_mutex);
+	return ret;
+}
+
+static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link)
+{
+	u32 map_id = 0;
+
+	mutex_lock(&sockmap_mutex);
+	if (sockmap_link->map)
+		map_id = sockmap_link->map->id;
+	mutex_unlock(&sockmap_mutex);
+	return map_id;
+}
+
+static int sock_map_link_fill_info(const struct bpf_link *link,
+				   struct bpf_link_info *info)
+{
+	const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+	u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+	info->sockmap.map_id = map_id;
+	info->sockmap.attach_type = sockmap_link->attach_type;
+	return 0;
+}
+
+static void sock_map_link_show_fdinfo(const struct bpf_link *link,
+				      struct seq_file *seq)
+{
+	const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link);
+	u32 map_id = sock_map_link_get_map_id(sockmap_link);
+
+	seq_printf(seq, "map_id:\t%u\n", map_id);
+	seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+}
+
+static const struct bpf_link_ops sock_map_link_ops = {
+	.release = sock_map_link_release,
+	.dealloc = sock_map_link_dealloc,
+	.detach = sock_map_link_detach,
+	.update_prog = sock_map_link_update_prog,
+	.fill_link_info = sock_map_link_fill_info,
+	.show_fdinfo = sock_map_link_show_fdinfo,
+};
+
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct bpf_link_primer link_primer;
+	struct sockmap_link *sockmap_link;
+	enum bpf_attach_type attach_type;
+	struct bpf_map *map;
+	int ret;
+
+	if (attr->link_create.flags)
+		return -EINVAL;
+
+	map = bpf_map_get_with_uref(attr->link_create.target_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER);
+	if (!sockmap_link) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	attach_type = attr->link_create.attach_type;
+	bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+	sockmap_link->map = map;
+	sockmap_link->attach_type = attach_type;
+
+	ret = bpf_link_prime(&sockmap_link->link, &link_primer);
+	if (ret) {
+		kfree(sockmap_link);
+		goto out;
+	}
+
+	mutex_lock(&sockmap_mutex);
+	ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type);
+	mutex_unlock(&sockmap_mutex);
+	if (ret) {
+		bpf_link_cleanup(&link_primer);
+		goto out;
+	}
+
+	/* Increase refcnt for the prog since when old prog is replaced with
+	 * psock_replace_prog() and psock_set_prog() its refcnt will be decreased.
+	 *
+	 * Actually, we do not need to increase refcnt for the prog since bpf_link
+	 * will hold a reference. But in order to have less complexity w.r.t.
+	 * replacing/setting prog, let us increase the refcnt to make things simpler.
+	 */
+	bpf_prog_inc(prog);
+
+	return bpf_link_settle(&link_primer);
+
+out:
+	bpf_map_put_with_uref(map);
+	return ret;
+}
+
 static int sock_map_iter_attach_target(struct bpf_prog *prog,
 				       union bpf_iter_link_info *linfo,
 				       struct bpf_iter_aux_info *aux)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fe9f11c8abe..cee0a7915c08 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1135,6 +1135,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
 	BPF_LINK_TYPE_NETKIT = 13,
+	BPF_LINK_TYPE_SOCKMAP = 14,
 	__MAX_BPF_LINK_TYPE,
 };
 
@@ -6724,6 +6725,10 @@  struct bpf_link_info {
 			__u32 ifindex;
 			__u32 attach_type;
 		} netkit;
+		struct {
+			__u32 map_id;
+			__u32 attach_type;
+		} sockmap;
 	};
 } __attribute__((aligned(8)));