diff mbox series

[RFC,net-next,v6,04/13] bpf: stop UDP sock accessing TCP fields in sock_op BPF CALLs

Message ID 20250121012901.87763-5-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang success Errors and warnings before: 40 this patch: 40
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 25 this patch: 25
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 49 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Jan. 21, 2025, 1:28 a.m. UTC
In the next round, we will support the UDP proto for SO_TIMESTAMPING
bpf extension, so we need to ensure there is no safety problem, which
is ususally caused by UDP socket trying to access TCP fields.

These approaches can be categorized into two groups:
1. add TCP protocol check
2. add sock op check

Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
 net/core/filter.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

Comments

Martin KaFai Lau Jan. 25, 2025, 12:28 a.m. UTC | #1
On 1/20/25 5:28 PM, Jason Xing wrote:
> In the next round, we will support the UDP proto for SO_TIMESTAMPING
> bpf extension, so we need to ensure there is no safety problem, which
> is ususally caused by UDP socket trying to access TCP fields.
> 
> These approaches can be categorized into two groups:
> 1. add TCP protocol check
> 2. add sock op check

Same as patch 3. The commit message needs adjustment. I would combine patch 3 
and patch 4 because ...

> 
> Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> ---
>   net/core/filter.c | 19 +++++++++++++++++--
>   1 file changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index fdd305b4cfbb..934431886876 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5523,6 +5523,11 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
>   	return -EINVAL;
>   }
>   
> +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
> +{
> +	return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;

More bike shedding...

After sleeping on it again, I think it can just test the 
bpf_sock->allow_tcp_access instead.


> +}
> +
>   static int _bpf_setsockopt(struct sock *sk, int level, int optname,
>   			   char *optval, int optlen)
>   {
> @@ -5673,7 +5678,12 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
>   BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
>   	   int, level, int, optname, char *, optval, int, optlen)
>   {
> -	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
> +	struct sock *sk = bpf_sock->sk;
> +
> +	if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))

afaict, the new timestamping callbacks still can do setsockopt and it is 
incorrect. It should be:

	if (!bpf_sock->allow_tcp_access)
		return -EOPNOTSUPP;

I recalled I have asked in v5 but it may be buried in the long thread, so asking 
here again. Please add test(s) to check that the new timestamping callbacks 
cannot call setsockopt and read/write to some of the tcp_sock fields through the 
bpf_sock_ops.

> +		sock_owned_by_me(sk);

Not needed and instead...

> +
> +	return __bpf_setsockopt(sk, level, optname, optval, optlen);

keep the original _bpf_setsockopt().

>   }
>   
>   static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
> @@ -5759,6 +5769,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
>   	   int, level, int, optname, char *, optval, int, optlen)
>   {
>   	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
> +	    bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
>   	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {

No need to allow getsockopt regardless what SOL_* it is asking. To keep it 
simple, I would just disable both getsockopt and setsockopt for all SOL_* for 
the new timestamping callbacks. Nothing is lost, the bpf prog can directly read 
the sk.

>   		int ret, copy_len = 0;
>   		const u8 *start;
> @@ -5800,7 +5811,8 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
>   	struct sock *sk = bpf_sock->sk;
>   	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
>   
> -	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
> +	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk) ||
> +	    sk->sk_protocol != IPPROTO_TCP)

Same here. It should disallow this "set" helper for the timestamping callbacks 
which do not hold the lock.

>   		return -EINVAL;
>   
>   	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
> @@ -7609,6 +7621,9 @@ BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
>   	u8 search_kind, search_len, copy_len, magic_len;
>   	int ret;
>   
> +	if (!is_locked_tcp_sock_ops(bpf_sock))
> +		return -EOPNOTSUPP;

This is correct, just change it to "!bpf_sock->allow_tcp_access".

All the above changed helpers should use the same test and the same return handling.

> +
>   	/* 2 byte is the minimal option len except TCPOPT_NOP and
>   	 * TCPOPT_EOL which are useless for the bpf prog to learn
>   	 * and this helper disallow loading them also.
Jason Xing Jan. 25, 2025, 1:15 a.m. UTC | #2
On Sat, Jan 25, 2025 at 8:28 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/20/25 5:28 PM, Jason Xing wrote:
> > In the next round, we will support the UDP proto for SO_TIMESTAMPING
> > bpf extension, so we need to ensure there is no safety problem, which
> > is ususally caused by UDP socket trying to access TCP fields.
> >
> > These approaches can be categorized into two groups:
> > 1. add TCP protocol check
> > 2. add sock op check
>
> Same as patch 3. The commit message needs adjustment. I would combine patch 3
> and patch 4 because ...

I wonder if you refer to "squashing" patch 4 into patch 3?

>
> >
> > Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> > ---
> >   net/core/filter.c | 19 +++++++++++++++++--
> >   1 file changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index fdd305b4cfbb..934431886876 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -5523,6 +5523,11 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
> >       return -EINVAL;
> >   }
> >
> > +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
> > +{
> > +     return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
>
> More bike shedding...
>
> After sleeping on it again, I think it can just test the
> bpf_sock->allow_tcp_access instead.

Sorry, I don't think it can work for all the cases because:
1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
if req exists, there is no allow_tcp_access initialization. Then
calling some function like bpf_sock_ops_setsockopt will be rejected
because allow_tcp_access is zero.
2) tcp_call_bpf() only set allow_tcp_access only when the socket is
fullsock. As far as I know, all the callers have the full stock for
now, but in the future it might not.

If we should use allow_tcp_access to test, then the following patch
should be folded into patch 3, right?
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e5b9a654254..9cd7d4446617 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -525,6 +525,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk,
struct sk_buff *skb,
                sock_ops.sk = sk;
        }

+       sock_ops.allow_tcp_access = 1;
        sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
        sock_ops.remaining_opt_len = *remaining;
        /* tcp_current_mss() does not pass a skb */


>
>
> > +}
> > +
> >   static int _bpf_setsockopt(struct sock *sk, int level, int optname,
> >                          char *optval, int optlen)
> >   {
> > @@ -5673,7 +5678,12 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
> >   BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> >          int, level, int, optname, char *, optval, int, optlen)
> >   {
> > -     return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
> > +     struct sock *sk = bpf_sock->sk;
> > +
> > +     if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))
>
> afaict, the new timestamping callbacks still can do setsockopt and it is
> incorrect. It should be:
>
>         if (!bpf_sock->allow_tcp_access)
>                 return -EOPNOTSUPP;
>
> I recalled I have asked in v5 but it may be buried in the long thread, so asking
> here again. Please add test(s) to check that the new timestamping callbacks
> cannot call setsockopt and read/write to some of the tcp_sock fields through the
> bpf_sock_ops.
>
> > +             sock_owned_by_me(sk);
>
> Not needed and instead...

Sorry I don't get you here. What I was doing was letting non
timestamping callbacks be checked by the sock_owned_by_me() function.

If the callback belongs to timestamping, we will skip the check.

>
> > +
> > +     return __bpf_setsockopt(sk, level, optname, optval, optlen);
>
> keep the original _bpf_setsockopt().

Oh, I remembered we've already assumed/agreed the timestamping socket
must be full sock. I will use it.

>
> >   }
> >
> >   static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
> > @@ -5759,6 +5769,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> >          int, level, int, optname, char *, optval, int, optlen)
> >   {
> >       if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
> > +         bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
> >           optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
>
> No need to allow getsockopt regardless what SOL_* it is asking. To keep it
> simple, I would just disable both getsockopt and setsockopt for all SOL_* for

Really? I'm shocked because the selftests in this series call
bpf_sock_ops_getsockopt() and bpf_sock_ops_setsockopt() in patch
[13/13]:
...
if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
...

> the new timestamping callbacks. Nothing is lost, the bpf prog can directly read
> the sk.
>
> >               int ret, copy_len = 0;
> >               const u8 *start;
> > @@ -5800,7 +5811,8 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
> >       struct sock *sk = bpf_sock->sk;
> >       int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
> >
> > -     if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
> > +     if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk) ||
> > +         sk->sk_protocol != IPPROTO_TCP)
>
> Same here. It should disallow this "set" helper for the timestamping callbacks
> which do not hold the lock.
>
> >               return -EINVAL;
> >
> >       tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
> > @@ -7609,6 +7621,9 @@ BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
> >       u8 search_kind, search_len, copy_len, magic_len;
> >       int ret;
> >
> > +     if (!is_locked_tcp_sock_ops(bpf_sock))
> > +             return -EOPNOTSUPP;
>
> This is correct, just change it to "!bpf_sock->allow_tcp_access".
>
> All the above changed helpers should use the same test and the same return handling.
>
> > +
> >       /* 2 byte is the minimal option len except TCPOPT_NOP and
> >        * TCPOPT_EOL which are useless for the bpf prog to learn
> >        * and this helper disallow loading them also.
>
Jason Xing Jan. 25, 2025, 1:32 a.m. UTC | #3
On Sat, Jan 25, 2025 at 9:15 AM Jason Xing <kerneljasonxing@gmail.com> wrote:
>
> On Sat, Jan 25, 2025 at 8:28 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
> >
> > On 1/20/25 5:28 PM, Jason Xing wrote:
> > > In the next round, we will support the UDP proto for SO_TIMESTAMPING
> > > bpf extension, so we need to ensure there is no safety problem, which
> > > is ususally caused by UDP socket trying to access TCP fields.
> > >
> > > These approaches can be categorized into two groups:
> > > 1. add TCP protocol check
> > > 2. add sock op check
> >
> > Same as patch 3. The commit message needs adjustment. I would combine patch 3
> > and patch 4 because ...
>
> I wonder if you refer to "squashing" patch 4 into patch 3?
>
> >
> > >
> > > Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
> > > ---
> > >   net/core/filter.c | 19 +++++++++++++++++--
> > >   1 file changed, 17 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > index fdd305b4cfbb..934431886876 100644
> > > --- a/net/core/filter.c
> > > +++ b/net/core/filter.c
> > > @@ -5523,6 +5523,11 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
> > >       return -EINVAL;
> > >   }
> > >
> > > +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
> > > +{
> > > +     return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
> >
> > More bike shedding...
> >
> > After sleeping on it again, I think it can just test the
> > bpf_sock->allow_tcp_access instead.
>
> Sorry, I don't think it can work for all the cases because:
> 1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
> if req exists, there is no allow_tcp_access initialization. Then
> calling some function like bpf_sock_ops_setsockopt will be rejected
> because allow_tcp_access is zero.
> 2) tcp_call_bpf() only set allow_tcp_access only when the socket is
> fullsock. As far as I know, all the callers have the full stock for
> now, but in the future it might not.
>
> If we should use allow_tcp_access to test, then the following patch
> should be folded into patch 3, right?
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 0e5b9a654254..9cd7d4446617 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -525,6 +525,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk,
> struct sk_buff *skb,
>                 sock_ops.sk = sk;
>         }
>
> +       sock_ops.allow_tcp_access = 1;
>         sock_ops.args[0] = bpf_skops_write_hdr_opt_arg0(skb, synack_type);
>         sock_ops.remaining_opt_len = *remaining;
>         /* tcp_current_mss() does not pass a skb */
>
>
> >
> >
> > > +}
> > > +
> > >   static int _bpf_setsockopt(struct sock *sk, int level, int optname,
> > >                          char *optval, int optlen)
> > >   {
> > > @@ -5673,7 +5678,12 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
> > >   BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> > >          int, level, int, optname, char *, optval, int, optlen)
> > >   {
> > > -     return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
> > > +     struct sock *sk = bpf_sock->sk;
> > > +
> > > +     if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))
> >
> > afaict, the new timestamping callbacks still can do setsockopt and it is
> > incorrect. It should be:
> >
> >         if (!bpf_sock->allow_tcp_access)
> >                 return -EOPNOTSUPP;
> >
> > I recalled I have asked in v5 but it may be buried in the long thread, so asking
> > here again. Please add test(s) to check that the new timestamping callbacks
> > cannot call setsockopt and read/write to some of the tcp_sock fields through the
> > bpf_sock_ops.
> >
> > > +             sock_owned_by_me(sk);
> >
> > Not needed and instead...
>
> Sorry I don't get you here. What I was doing was letting non
> timestamping callbacks be checked by the sock_owned_by_me() function.
>
> If the callback belongs to timestamping, we will skip the check.
>
> >
> > > +
> > > +     return __bpf_setsockopt(sk, level, optname, optval, optlen);
> >
> > keep the original _bpf_setsockopt().
>
> Oh, I remembered we've already assumed/agreed the timestamping socket
> must be full sock. I will use it.

Oh, no. We cannot use it because it will WARN us if the socket is not held:
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen)
{
        if (sk_fullsock(sk))
                sock_owned_by_me(sk);
        return __bpf_setsockopt(sk, level, optname, optval, optlen);
}

Let me rephrase what I know about the TCP and UDP cases:
1) the sockets are full socket.
2) the sockets are under the protection of socket lock, but in the
future they might not.

So we need to check if it's a fullsock but we don't expect to get any
warnings because the socket is not locked.

Am I right about those two?

Thanks,
Jason

>
> >
> > >   }
> > >
> > >   static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
> > > @@ -5759,6 +5769,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> > >          int, level, int, optname, char *, optval, int, optlen)
> > >   {
> > >       if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
> > > +         bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
> > >           optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
> >
> > No need to allow getsockopt regardless what SOL_* it is asking. To keep it
> > simple, I would just disable both getsockopt and setsockopt for all SOL_* for
>
> Really? I'm shocked because the selftests in this series call
> bpf_sock_ops_getsockopt() and bpf_sock_ops_setsockopt() in patch
> [13/13]:
> ...
> if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
> ...
>
> > the new timestamping callbacks. Nothing is lost, the bpf prog can directly read
> > the sk.
> >
> > >               int ret, copy_len = 0;
> > >               const u8 *start;
> > > @@ -5800,7 +5811,8 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
> > >       struct sock *sk = bpf_sock->sk;
> > >       int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
> > >
> > > -     if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
> > > +     if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk) ||
> > > +         sk->sk_protocol != IPPROTO_TCP)
> >
> > Same here. It should disallow this "set" helper for the timestamping callbacks
> > which do not hold the lock.
> >
> > >               return -EINVAL;
> > >
> > >       tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
> > > @@ -7609,6 +7621,9 @@ BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
> > >       u8 search_kind, search_len, copy_len, magic_len;
> > >       int ret;
> > >
> > > +     if (!is_locked_tcp_sock_ops(bpf_sock))
> > > +             return -EOPNOTSUPP;
> >
> > This is correct, just change it to "!bpf_sock->allow_tcp_access".
> >
> > All the above changed helpers should use the same test and the same return handling.
> >
> > > +
> > >       /* 2 byte is the minimal option len except TCPOPT_NOP and
> > >        * TCPOPT_EOL which are useless for the bpf prog to learn
> > >        * and this helper disallow loading them also.
> >
Martin KaFai Lau Jan. 25, 2025, 2:25 a.m. UTC | #4
On 1/24/25 5:15 PM, Jason Xing wrote:
>>> +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
>>> +{
>>> +     return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
>>
>> More bike shedding...
>>
>> After sleeping on it again, I think it can just test the
>> bpf_sock->allow_tcp_access instead.
> 
> Sorry, I don't think it can work for all the cases because:
> 1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
> if req exists, there is no allow_tcp_access initialization. Then
> calling some function like bpf_sock_ops_setsockopt will be rejected
> because allow_tcp_access is zero.
> 2) tcp_call_bpf() only set allow_tcp_access only when the socket is
> fullsock. As far as I know, all the callers have the full stock for
> now, but in the future it might not.

Note that the existing helper bpf_sock_ops_cb_flags_set and 
bpf_sock_ops_{set,get}sockopt itself have done the sk_fullsock() test and then 
return -EINVAL. bpf_sock->sk is fullsock or not does not matter to these helpers.

You are right on the BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB 
but the only helper left that testing allow_tcp_access is not enough is 
bpf_sock_ops_load_hdr_opt(). Potentially, it can test "if 
(!bpf_sock->allow_tcp_access && !bpf_sock->syn_skb) { return -EOPNOTSUPP; }".

Agree to stay with the current "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB" 
as in this patch. It is cleaner.

>>> @@ -5673,7 +5678,12 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
>>>    BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
>>>           int, level, int, optname, char *, optval, int, optlen)
>>>    {
>>> -     return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
>>> +     struct sock *sk = bpf_sock->sk;
>>> +
>>> +     if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))
>>
>> afaict, the new timestamping callbacks still can do setsockopt and it is
>> incorrect. It should be:
>>
>>          if (!bpf_sock->allow_tcp_access)
>>                  return -EOPNOTSUPP;
>>
>> I recalled I have asked in v5 but it may be buried in the long thread, so asking
>> here again. Please add test(s) to check that the new timestamping callbacks
>> cannot call setsockopt and read/write to some of the tcp_sock fields through the
>> bpf_sock_ops.
>>
>>> +             sock_owned_by_me(sk);
>>
>> Not needed and instead...
> 
> Sorry I don't get you here. What I was doing was letting non
> timestamping callbacks be checked by the sock_owned_by_me() function.
> 
> If the callback belongs to timestamping, we will skip the check.

It will skip the sock_owned_by_me() test and
continue to do the following __bpf_setsockopt() which the new timetamping 
callback should not do, no?

It should be just this at the very beginning of bpf_sock_ops_setsockopt:

	if (!is_locked_tcp_sock_ops(bpf_sock))
		return -EOPNOTSUPP;
> 
>>
>>> +
>>> +     return __bpf_setsockopt(sk, level, optname, optval, optlen);
>>
>> keep the original _bpf_setsockopt().
> 
> Oh, I remembered we've already assumed/agreed the timestamping socket
> must be full sock. I will use it.
> 
>>
>>>    }
>>>
>>>    static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
>>> @@ -5759,6 +5769,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
>>>           int, level, int, optname, char *, optval, int, optlen)
>>>    {
>>>        if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
>>> +         bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
>>>            optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
>>
>> No need to allow getsockopt regardless what SOL_* it is asking. To keep it
>> simple, I would just disable both getsockopt and setsockopt for all SOL_* for
> 
> Really? I'm shocked because the selftests in this series call
> bpf_sock_ops_getsockopt() and bpf_sock_ops_setsockopt() in patch
> [13/13]:

Yes, really. It may be late Friday for me here. Please double check your test if 
the bpf_set/getsockopt is called from the new timestamp callback or it is only 
called from the existing BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB callback.

Note that I am only asking to disable the set/getsockopt, 
bpf_sock_ops_cb_flags_set, and the bpf_sock_ops_load_hdr_opt for the new 
timestamping callbacks.
Jason Xing Jan. 25, 2025, 2:58 a.m. UTC | #5
On Sat, Jan 25, 2025 at 10:26 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/24/25 5:15 PM, Jason Xing wrote:
> >>> +static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
> >>> +{
> >>> +     return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
> >>
> >> More bike shedding...
> >>
> >> After sleeping on it again, I think it can just test the
> >> bpf_sock->allow_tcp_access instead.
> >
> > Sorry, I don't think it can work for all the cases because:
> > 1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
> > if req exists, there is no allow_tcp_access initialization. Then
> > calling some function like bpf_sock_ops_setsockopt will be rejected
> > because allow_tcp_access is zero.
> > 2) tcp_call_bpf() only set allow_tcp_access only when the socket is
> > fullsock. As far as I know, all the callers have the full stock for
> > now, but in the future it might not.
>
> Note that the existing helper bpf_sock_ops_cb_flags_set and
> bpf_sock_ops_{set,get}sockopt itself have done the sk_fullsock() test and then
> return -EINVAL. bpf_sock->sk is fullsock or not does not matter to these helpers.
>
> You are right on the BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB
> but the only helper left that testing allow_tcp_access is not enough is
> bpf_sock_ops_load_hdr_opt(). Potentially, it can test "if
> (!bpf_sock->allow_tcp_access && !bpf_sock->syn_skb) { return -EOPNOTSUPP; }".
>
> Agree to stay with the current "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB"
> as in this patch. It is cleaner.
>
> >>> @@ -5673,7 +5678,12 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
> >>>    BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> >>>           int, level, int, optname, char *, optval, int, optlen)
> >>>    {
> >>> -     return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
> >>> +     struct sock *sk = bpf_sock->sk;
> >>> +
> >>> +     if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))
> >>
> >> afaict, the new timestamping callbacks still can do setsockopt and it is
> >> incorrect. It should be:
> >>
> >>          if (!bpf_sock->allow_tcp_access)
> >>                  return -EOPNOTSUPP;
> >>
> >> I recalled I have asked in v5 but it may be buried in the long thread, so asking
> >> here again. Please add test(s) to check that the new timestamping callbacks
> >> cannot call setsockopt and read/write to some of the tcp_sock fields through the
> >> bpf_sock_ops.
> >>
> >>> +             sock_owned_by_me(sk);
> >>
> >> Not needed and instead...
> >
> > Sorry I don't get you here. What I was doing was letting non
> > timestamping callbacks be checked by the sock_owned_by_me() function.
> >
> > If the callback belongs to timestamping, we will skip the check.
>
> It will skip the sock_owned_by_me() test and
> continue to do the following __bpf_setsockopt() which the new timetamping
> callback should not do, no?
>
> It should be just this at the very beginning of bpf_sock_ops_setsockopt:
>
>         if (!is_locked_tcp_sock_ops(bpf_sock))
>                 return -EOPNOTSUPP;
> >
> >>
> >>> +
> >>> +     return __bpf_setsockopt(sk, level, optname, optval, optlen);
> >>
> >> keep the original _bpf_setsockopt().
> >
> > Oh, I remembered we've already assumed/agreed the timestamping socket
> > must be full sock. I will use it.
> >
> >>
> >>>    }
> >>>
> >>>    static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
> >>> @@ -5759,6 +5769,7 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
> >>>           int, level, int, optname, char *, optval, int, optlen)
> >>>    {
> >>>        if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
> >>> +         bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
> >>>            optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
> >>
> >> No need to allow getsockopt regardless what SOL_* it is asking. To keep it
> >> simple, I would just disable both getsockopt and setsockopt for all SOL_* for
> >
> > Really? I'm shocked because the selftests in this series call
> > bpf_sock_ops_getsockopt() and bpf_sock_ops_setsockopt() in patch
> > [13/13]:
>
> Yes, really. It may be late Friday for me here. Please double check your test if
> the bpf_set/getsockopt is called from the new timestamp callback or it is only
> called from the existing BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB callback.

Oh, after reading the above a few times, I finally realized you're
right. I'll do it. Thanks!!

>
> Note that I am only asking to disable the set/getsockopt,
> bpf_sock_ops_cb_flags_set, and the bpf_sock_ops_load_hdr_opt for the new
> timestamping callbacks.

Right, right!

Thanks,
Jason
Martin KaFai Lau Jan. 25, 2025, 3:12 a.m. UTC | #6
On 1/24/25 6:25 PM, Martin KaFai Lau wrote:
>>
>> Sorry, I don't think it can work for all the cases because:
>> 1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
>> if req exists, there is no allow_tcp_access initialization. Then
>> calling some function like bpf_sock_ops_setsockopt will be rejected
>> because allow_tcp_access is zero.
>> 2) tcp_call_bpf() only set allow_tcp_access only when the socket is
>> fullsock. As far as I know, all the callers have the full stock for
>> now, but in the future it might not.
> 
> Note that the existing helper bpf_sock_ops_cb_flags_set and 
> bpf_sock_ops_{set,get}sockopt itself have done the sk_fullsock() test and then 
> return -EINVAL. bpf_sock->sk is fullsock or not does not matter to these helpers.
> 
> You are right on the BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB 
> but the only helper left that testing allow_tcp_access is not enough is 
> bpf_sock_ops_load_hdr_opt(). Potentially, it can test "if (!bpf_sock- 
>  >allow_tcp_access && !bpf_sock->syn_skb) { return -EOPNOTSUPP; }".
> 
> Agree to stay with the current "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB" 
> as in this patch. It is cleaner.

Also ignore my earlier comment on merging patch 3 and 4. Better keep patch 4 on 
its own since it is not reusing the allow_tcp_access test. Instead, stay with 
the "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB" test.
Jason Xing Jan. 25, 2025, 3:43 a.m. UTC | #7
On Sat, Jan 25, 2025 at 11:12 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 1/24/25 6:25 PM, Martin KaFai Lau wrote:
> >>
> >> Sorry, I don't think it can work for all the cases because:
> >> 1) please see BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB,
> >> if req exists, there is no allow_tcp_access initialization. Then
> >> calling some function like bpf_sock_ops_setsockopt will be rejected
> >> because allow_tcp_access is zero.
> >> 2) tcp_call_bpf() only set allow_tcp_access only when the socket is
> >> fullsock. As far as I know, all the callers have the full stock for
> >> now, but in the future it might not.
> >
> > Note that the existing helper bpf_sock_ops_cb_flags_set and
> > bpf_sock_ops_{set,get}sockopt itself have done the sk_fullsock() test and then
> > return -EINVAL. bpf_sock->sk is fullsock or not does not matter to these helpers.
> >
> > You are right on the BPF_SOCK_OPS_WRITE_HDR_OPT_CB/BPF_SOCK_OPS_HDR_OPT_LEN_CB
> > but the only helper left that testing allow_tcp_access is not enough is
> > bpf_sock_ops_load_hdr_opt(). Potentially, it can test "if (!bpf_sock-
> >  >allow_tcp_access && !bpf_sock->syn_skb) { return -EOPNOTSUPP; }".
> >
> > Agree to stay with the current "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB"
> > as in this patch. It is cleaner.
>
> Also ignore my earlier comment on merging patch 3 and 4. Better keep patch 4 on
> its own since it is not reusing the allow_tcp_access test. Instead, stay with
> the "bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB" test.

Got it!

Thanks,
Jason
diff mbox series

Patch

diff --git a/net/core/filter.c b/net/core/filter.c
index fdd305b4cfbb..934431886876 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5523,6 +5523,11 @@  static int __bpf_setsockopt(struct sock *sk, int level, int optname,
 	return -EINVAL;
 }
 
+static bool is_locked_tcp_sock_ops(struct bpf_sock_ops_kern *bpf_sock)
+{
+	return bpf_sock->op <= BPF_SOCK_OPS_WRITE_HDR_OPT_CB;
+}
+
 static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 			   char *optval, int optlen)
 {
@@ -5673,7 +5678,12 @@  static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
 BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
-	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
+	struct sock *sk = bpf_sock->sk;
+
+	if (is_locked_tcp_sock_ops(bpf_sock) && sk_fullsock(sk))
+		sock_owned_by_me(sk);
+
+	return __bpf_setsockopt(sk, level, optname, optval, optlen);
 }
 
 static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
@@ -5759,6 +5769,7 @@  BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
 	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
+	    bpf_sock->sk->sk_protocol == IPPROTO_TCP &&
 	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
 		int ret, copy_len = 0;
 		const u8 *start;
@@ -5800,7 +5811,8 @@  BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
 	struct sock *sk = bpf_sock->sk;
 	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
 
-	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
+	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk) ||
+	    sk->sk_protocol != IPPROTO_TCP)
 		return -EINVAL;
 
 	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
@@ -7609,6 +7621,9 @@  BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
 	u8 search_kind, search_len, copy_len, magic_len;
 	int ret;
 
+	if (!is_locked_tcp_sock_ops(bpf_sock))
+		return -EOPNOTSUPP;
+
 	/* 2 byte is the minimal option len except TCPOPT_NOP and
 	 * TCPOPT_EOL which are useless for the bpf prog to learn
 	 * and this helper disallow loading them also.