diff mbox series

[v8,08/26] tcp: authopt: Disable via sysctl by default

Message ID 298e4e87ce3a822b4217b309438483959082e6bb.1662361354.git.cdleonard@gmail.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series tcp: Initial support for RFC5925 auth option | expand

Commit Message

Leonard Crestez Sept. 5, 2022, 7:05 a.m. UTC
This is mainly intended to protect against local privilege escalations
through a rarely used feature so it is deliberately not namespaced.

Enforcement is only at the setsockopt level, this should be enough to
ensure that the tcp_authopt_needed static key never turns on.

No effort is made to handle disabling when the feature is already in
use.

Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
---
 Documentation/networking/ip-sysctl.rst |  6 ++++
 include/net/tcp_authopt.h              |  1 +
 net/ipv4/sysctl_net_ipv4.c             | 39 ++++++++++++++++++++++++++
 net/ipv4/tcp_authopt.c                 | 25 +++++++++++++++++
 4 files changed, 71 insertions(+)

Comments

Eric Dumazet Sept. 6, 2022, 11:11 p.m. UTC | #1
On Mon, Sep 5, 2022 at 12:06 AM Leonard Crestez <cdleonard@gmail.com> wrote:
>
> This is mainly intended to protect against local privilege escalations
> through a rarely used feature so it is deliberately not namespaced.
>
> Enforcement is only at the setsockopt level, this should be enough to
> ensure that the tcp_authopt_needed static key never turns on.
>
> No effort is made to handle disabling when the feature is already in
> use.
>
> Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
> ---
>  Documentation/networking/ip-sysctl.rst |  6 ++++
>  include/net/tcp_authopt.h              |  1 +
>  net/ipv4/sysctl_net_ipv4.c             | 39 ++++++++++++++++++++++++++
>  net/ipv4/tcp_authopt.c                 | 25 +++++++++++++++++
>  4 files changed, 71 insertions(+)
>
> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
> index a759872a2883..41be0e69d767 100644
> --- a/Documentation/networking/ip-sysctl.rst
> +++ b/Documentation/networking/ip-sysctl.rst
> @@ -1038,10 +1038,16 @@ tcp_challenge_ack_limit - INTEGER
>         Note that this per netns rate limit can allow some side channel
>         attacks and probably should not be enabled.
>         TCP stack implements per TCP socket limits anyway.
>         Default: INT_MAX (unlimited)
>
> +tcp_authopt - BOOLEAN
> +       Enable the TCP Authentication Option (RFC5925), a replacement for TCP
> +       MD5 Signatures (RFC2835).
> +
> +       Default: 0
> +
>  UDP variables
>  =============
>
>  udp_l3mdev_accept - BOOLEAN
>         Enabling this option allows a "global" bound socket to work
> diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
> index 7ad34a6987ec..1f5020b790dd 100644
> --- a/include/net/tcp_authopt.h
> +++ b/include/net/tcp_authopt.h
> @@ -80,10 +80,11 @@ struct tcphdr_authopt {
>  };
>
>  #ifdef CONFIG_TCP_AUTHOPT
>  DECLARE_STATIC_KEY_FALSE(tcp_authopt_needed_key);
>  #define tcp_authopt_needed (static_branch_unlikely(&tcp_authopt_needed_key))
> +extern int sysctl_tcp_authopt;
>  void tcp_authopt_free(struct sock *sk, struct tcp_authopt_info *info);
>  void tcp_authopt_clear(struct sock *sk);
>  int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen);
>  int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key);
>  int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen);
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 5490c285668b..908a3ef15b47 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -17,10 +17,11 @@
>  #include <net/udp.h>
>  #include <net/cipso_ipv4.h>
>  #include <net/ping.h>
>  #include <net/protocol.h>
>  #include <net/netevent.h>
> +#include <net/tcp_authopt.h>
>
>  static int tcp_retr1_max = 255;
>  static int ip_local_port_range_min[] = { 1, 1 };
>  static int ip_local_port_range_max[] = { 65535, 65535 };
>  static int tcp_adv_win_scale_min = -31;
> @@ -413,10 +414,37 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
>
>         return ret;
>  }
>  #endif
>
> +#ifdef CONFIG_TCP_AUTHOPT
> +static int proc_tcp_authopt(struct ctl_table *ctl,
> +                           int write, void *buffer, size_t *lenp,
> +                           loff_t *ppos)
> +{
> +       int val = sysctl_tcp_authopt;

val = READ_ONCE(sysctl_tcp_authopt);

> +       struct ctl_table tmp = {
> +               .data = &val,
> +               .mode = ctl->mode,
> +               .maxlen = sizeof(val),
> +               .extra1 = SYSCTL_ZERO,
> +               .extra2 = SYSCTL_ONE,
> +       };
> +       int err;
> +
> +       err = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> +       if (err)
> +               return err;
> +       if (sysctl_tcp_authopt && !val) {

READ_ONCE(sysctl_tcp_authopt)

Note that this test would still be racy, because another cpu might
change sysctl_tcp_authopt right after the read.

> +               net_warn_ratelimited("Enabling TCP Authentication Option is permanent\n");
> +               return -EINVAL;
> +       }
> +       sysctl_tcp_authopt = val;

WRITE_ONCE(sysctl_tcp_authopt, val),  or even better:

if (val)
     cmpxchg(&sysctl_tcp_authopt, 0, val);

> +       return 0;
> +}
> +#endif
> +
>  static struct ctl_table ipv4_table[] = {
>         {
>                 .procname       = "tcp_max_orphans",
>                 .data           = &sysctl_tcp_max_orphans,
>                 .maxlen         = sizeof(int),
> @@ -524,10 +552,21 @@ static struct ctl_table ipv4_table[] = {
>                 .mode           = 0644,
>                 .proc_handler   = proc_douintvec_minmax,
>                 .extra1         = &sysctl_fib_sync_mem_min,
>                 .extra2         = &sysctl_fib_sync_mem_max,
>         },
> +#ifdef CONFIG_TCP_AUTHOPT
> +       {
> +               .procname       = "tcp_authopt",
> +               .data           = &sysctl_tcp_authopt,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_tcp_authopt,
> +               .extra1         = SYSCTL_ZERO,
> +               .extra2         = SYSCTL_ONE,
> +       },
> +#endif
>         { }
>  };
>
>  static struct ctl_table ipv4_net_table[] = {
>         /* tcp_max_tw_buckets must be first in this table. */
> diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
> index 4f7cbe1e17f3..9d02da8d6964 100644
> --- a/net/ipv4/tcp_authopt.c
> +++ b/net/ipv4/tcp_authopt.c
> @@ -4,10 +4,15 @@
>  #include <net/ipv6.h>
>  #include <net/tcp.h>
>  #include <linux/kref.h>
>  #include <crypto/hash.h>
>
> +/* This is mainly intended to protect against local privilege escalations through
> + * a rarely used feature so it is deliberately not namespaced.
> + */
> +int sysctl_tcp_authopt;
> +
>  /* This is enabled when first struct tcp_authopt_info is allocated and never released */
>  DEFINE_STATIC_KEY_FALSE(tcp_authopt_needed_key);
>  EXPORT_SYMBOL(tcp_authopt_needed_key);
>
>  /* All current algorithms have a mac length of 12 but crypto API digestsize can be larger */
> @@ -437,17 +442,30 @@ static int _copy_from_sockptr_tolerant(u8 *dst,
>                 memset(dst + srclen, 0, dstlen - srclen);
>
>         return err;
>  }
>
> +static int check_sysctl_tcp_authopt(void)
> +{
> +       if (!sysctl_tcp_authopt) {

READ_ONCE(...)

> +               net_warn_ratelimited("TCP Authentication Option disabled by sysctl.\n");
> +               return -EPERM;
> +       }
> +
> +       return 0;
> +}
> +
>  int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
>  {
>         struct tcp_authopt opt;
>         struct tcp_authopt_info *info;
>         int err;
>
>         sock_owned_by_me(sk);
> +       err = check_sysctl_tcp_authopt();
> +       if (err)
> +               return err;
>
>         err = _copy_from_sockptr_tolerant((u8 *)&opt, sizeof(opt), optval, optlen);
>         if (err)
>                 return err;
>
> @@ -465,13 +483,17 @@ int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
>
>  int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>         struct tcp_authopt_info *info;
> +       int err;
>
>         memset(opt, 0, sizeof(*opt));
>         sock_owned_by_me(sk);
> +       err = check_sysctl_tcp_authopt();
> +       if (err)
> +               return err;
>
>         info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
>         if (!info)
>                 return -ENOENT;
>
> @@ -493,10 +515,13 @@ int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
>         struct netns_tcp_authopt *net = sock_net_tcp_authopt(sk);
>         struct tcp_authopt_alg_imp *alg;
>         int err;
>
>         sock_owned_by_me(sk);
> +       err = check_sysctl_tcp_authopt();
> +       if (err)
> +               return err;
>         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
>                 return -EPERM;
>
>         err = _copy_from_sockptr_tolerant((u8 *)&opt, sizeof(opt), optval, optlen);
>         if (err)
> --
> 2.25.1
>
Leonard Crestez Sept. 7, 2022, 4:53 p.m. UTC | #2
On 9/7/22 02:11, Eric Dumazet wrote:
> On Mon, Sep 5, 2022 at 12:06 AM Leonard Crestez <cdleonard@gmail.com> wrote:
>>
>> This is mainly intended to protect against local privilege escalations
>> through a rarely used feature so it is deliberately not namespaced.
>>
>> Enforcement is only at the setsockopt level, this should be enough to
>> ensure that the tcp_authopt_needed static key never turns on.
>>
>> No effort is made to handle disabling when the feature is already in
>> use.
>>
>> Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
>> ---
>>   Documentation/networking/ip-sysctl.rst |  6 ++++
>>   include/net/tcp_authopt.h              |  1 +
>>   net/ipv4/sysctl_net_ipv4.c             | 39 ++++++++++++++++++++++++++
>>   net/ipv4/tcp_authopt.c                 | 25 +++++++++++++++++
>>   4 files changed, 71 insertions(+)
>>
>> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
>> index a759872a2883..41be0e69d767 100644
>> --- a/Documentation/networking/ip-sysctl.rst
>> +++ b/Documentation/networking/ip-sysctl.rst
>> @@ -1038,10 +1038,16 @@ tcp_challenge_ack_limit - INTEGER
>>          Note that this per netns rate limit can allow some side channel
>>          attacks and probably should not be enabled.
>>          TCP stack implements per TCP socket limits anyway.
>>          Default: INT_MAX (unlimited)
>>
>> +tcp_authopt - BOOLEAN
>> +       Enable the TCP Authentication Option (RFC5925), a replacement for TCP
>> +       MD5 Signatures (RFC2835).
>> +
>> +       Default: 0
>> +

...

>> +#ifdef CONFIG_TCP_AUTHOPT
>> +static int proc_tcp_authopt(struct ctl_table *ctl,
>> +                           int write, void *buffer, size_t *lenp,
>> +                           loff_t *ppos)
>> +{
>> +       int val = sysctl_tcp_authopt;
> 
> val = READ_ONCE(sysctl_tcp_authopt);
> 
>> +       struct ctl_table tmp = {
>> +               .data = &val,
>> +               .mode = ctl->mode,
>> +               .maxlen = sizeof(val),
>> +               .extra1 = SYSCTL_ZERO,
>> +               .extra2 = SYSCTL_ONE,
>> +       };
>> +       int err;
>> +
>> +       err = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
>> +       if (err)
>> +               return err;
>> +       if (sysctl_tcp_authopt && !val) {
> 
> READ_ONCE(sysctl_tcp_authopt)
> 
> Note that this test would still be racy, because another cpu might
> change sysctl_tcp_authopt right after the read.

What meaningful races are possible here? This is a variable that changes 
from 0 to 1 at most once.

In theory if two processes attempt to assign "non-zero" at the same time 
then one will "win" and the other will get an error but races between 
userspace writing different values are possible for any sysctl. The 
solution seems to be "write sysctls from a single place".

All the checks are in sockopts - in theory if the sysctl is written on 
one CPU then a sockopt can still fail on another CPU until caches are 
flushed. Is this what you're worried about?

In theory doing READ_ONCE might incur a slight penalty on sockopt but 
not noticeable.

> 
>> +               net_warn_ratelimited("Enabling TCP Authentication Option is permanent\n");
>> +               return -EINVAL;
>> +       }
>> +       sysctl_tcp_authopt = val;
> 
> WRITE_ONCE(sysctl_tcp_authopt, val),  or even better:
> 
> if (val)
>       cmpxchg(&sysctl_tcp_authopt, 0, val);
> 
>> +       return 0;
>> +}
>> +#endif
>> +

This would be useful if we did any sort of initialization here but we 
don't. Crypto is initialized somewhere completely different.
Eric Dumazet Sept. 7, 2022, 5:04 p.m. UTC | #3
On Wed, Sep 7, 2022 at 9:53 AM Leonard Crestez <cdleonard@gmail.com> wrote:
>
> On 9/7/22 02:11, Eric Dumazet wrote:
> > On Mon, Sep 5, 2022 at 12:06 AM Leonard Crestez <cdleonard@gmail.com> wrote:
> >>
> >> This is mainly intended to protect against local privilege escalations
> >> through a rarely used feature so it is deliberately not namespaced.
> >>
> >> Enforcement is only at the setsockopt level, this should be enough to
> >> ensure that the tcp_authopt_needed static key never turns on.
> >>
> >> No effort is made to handle disabling when the feature is already in
> >> use.
> >>
> >> Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
> >> ---
> >>   Documentation/networking/ip-sysctl.rst |  6 ++++
> >>   include/net/tcp_authopt.h              |  1 +
> >>   net/ipv4/sysctl_net_ipv4.c             | 39 ++++++++++++++++++++++++++
> >>   net/ipv4/tcp_authopt.c                 | 25 +++++++++++++++++
> >>   4 files changed, 71 insertions(+)
> >>
> >> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
> >> index a759872a2883..41be0e69d767 100644
> >> --- a/Documentation/networking/ip-sysctl.rst
> >> +++ b/Documentation/networking/ip-sysctl.rst
> >> @@ -1038,10 +1038,16 @@ tcp_challenge_ack_limit - INTEGER
> >>          Note that this per netns rate limit can allow some side channel
> >>          attacks and probably should not be enabled.
> >>          TCP stack implements per TCP socket limits anyway.
> >>          Default: INT_MAX (unlimited)
> >>
> >> +tcp_authopt - BOOLEAN
> >> +       Enable the TCP Authentication Option (RFC5925), a replacement for TCP
> >> +       MD5 Signatures (RFC2835).
> >> +
> >> +       Default: 0
> >> +
>
> ...
>
> >> +#ifdef CONFIG_TCP_AUTHOPT
> >> +static int proc_tcp_authopt(struct ctl_table *ctl,
> >> +                           int write, void *buffer, size_t *lenp,
> >> +                           loff_t *ppos)
> >> +{
> >> +       int val = sysctl_tcp_authopt;
> >
> > val = READ_ONCE(sysctl_tcp_authopt);
> >
> >> +       struct ctl_table tmp = {
> >> +               .data = &val,
> >> +               .mode = ctl->mode,
> >> +               .maxlen = sizeof(val),
> >> +               .extra1 = SYSCTL_ZERO,
> >> +               .extra2 = SYSCTL_ONE,
> >> +       };
> >> +       int err;
> >> +
> >> +       err = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> >> +       if (err)
> >> +               return err;
> >> +       if (sysctl_tcp_authopt && !val) {
> >
> > READ_ONCE(sysctl_tcp_authopt)
> >
> > Note that this test would still be racy, because another cpu might
> > change sysctl_tcp_authopt right after the read.
>
> What meaningful races are possible here? This is a variable that changes
> from 0 to 1 at most once.

Two cpus can issue writes of 0 and 1 values at the same time.

Depending on scheduling writing the 0 can 'win' the race and overwrite
the value back to 0.

This is in clear violation of the claim you are making (that the
sysctl can only go once from 0 to 1)

>
> In theory if two processes attempt to assign "non-zero" at the same time
> then one will "win" and the other will get an error but races between
> userspace writing different values are possible for any sysctl. The
> solution seems to be "write sysctls from a single place".
>
> All the checks are in sockopts - in theory if the sysctl is written on
> one CPU then a sockopt can still fail on another CPU until caches are
> flushed. Is this what you're worried about?
>
> In theory doing READ_ONCE might incur a slight penalty on sockopt but
> not noticeable.

Not at all. There is _no_ penalty using READ_ONCE(). Unless it is done
in a loop and this prevents some compiler optimization.

Please use WRITE_ONCE() and READ_ONCE() for all sysctl values used in
TCP stack (and elsewhere)

See all the silly patches we had recently.

>
> >
> >> +               net_warn_ratelimited("Enabling TCP Authentication Option is permanent\n");
> >> +               return -EINVAL;
> >> +       }
> >> +       sysctl_tcp_authopt = val;
> >
> > WRITE_ONCE(sysctl_tcp_authopt, val),  or even better:
> >
> > if (val)
> >       cmpxchg(&sysctl_tcp_authopt, 0, val);
> >
> >> +       return 0;
> >> +}
> >> +#endif
> >> +
>
> This would be useful if we did any sort of initialization here but we
> don't. Crypto is initialized somewhere completely different.
Leonard Crestez Sept. 7, 2022, 5:58 p.m. UTC | #4
On 9/7/22 20:04, Eric Dumazet wrote:
> On Wed, Sep 7, 2022 at 9:53 AM Leonard Crestez <cdleonard@gmail.com> wrote:
>> On 9/7/22 02:11, Eric Dumazet wrote:
>>> On Mon, Sep 5, 2022 at 12:06 AM Leonard Crestez <cdleonard@gmail.com> wrote:
>>>>
>>>> This is mainly intended to protect against local privilege escalations
>>>> through a rarely used feature so it is deliberately not namespaced.
>>>>
>>>> Enforcement is only at the setsockopt level, this should be enough to
>>>> ensure that the tcp_authopt_needed static key never turns on.
>>>>
>>>> No effort is made to handle disabling when the feature is already in
>>>> use.
>>>>
>>>> Signed-off-by: Leonard Crestez <cdleonard@gmail.com>
>>>> ---
>>>>    Documentation/networking/ip-sysctl.rst |  6 ++++
>>>>    include/net/tcp_authopt.h              |  1 +
>>>>    net/ipv4/sysctl_net_ipv4.c             | 39 ++++++++++++++++++++++++++
>>>>    net/ipv4/tcp_authopt.c                 | 25 +++++++++++++++++
>>>>    4 files changed, 71 insertions(+)
>>>>
>>>> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
>>>> index a759872a2883..41be0e69d767 100644
>>>> --- a/Documentation/networking/ip-sysctl.rst
>>>> +++ b/Documentation/networking/ip-sysctl.rst
>>>> @@ -1038,10 +1038,16 @@ tcp_challenge_ack_limit - INTEGER
>>>>           Note that this per netns rate limit can allow some side channel
>>>>           attacks and probably should not be enabled.
>>>>           TCP stack implements per TCP socket limits anyway.
>>>>           Default: INT_MAX (unlimited)
>>>>
>>>> +tcp_authopt - BOOLEAN
>>>> +       Enable the TCP Authentication Option (RFC5925), a replacement for TCP
>>>> +       MD5 Signatures (RFC2835).
>>>> +
>>>> +       Default: 0
>>>> +
>>
>> ...
>>
>>>> +#ifdef CONFIG_TCP_AUTHOPT
>>>> +static int proc_tcp_authopt(struct ctl_table *ctl,
>>>> +                           int write, void *buffer, size_t *lenp,
>>>> +                           loff_t *ppos)
>>>> +{
>>>> +       int val = sysctl_tcp_authopt;
>>>
>>> val = READ_ONCE(sysctl_tcp_authopt);
>>>
>>>> +       struct ctl_table tmp = {
>>>> +               .data = &val,
>>>> +               .mode = ctl->mode,
>>>> +               .maxlen = sizeof(val),
>>>> +               .extra1 = SYSCTL_ZERO,
>>>> +               .extra2 = SYSCTL_ONE,
>>>> +       };
>>>> +       int err;
>>>> +
>>>> +       err = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
>>>> +       if (err)
>>>> +               return err;
>>>> +       if (sysctl_tcp_authopt && !val) {
>>>
>>> READ_ONCE(sysctl_tcp_authopt)
>>>
>>> Note that this test would still be racy, because another cpu might
>>> change sysctl_tcp_authopt right after the read.
>>
>> What meaningful races are possible here? This is a variable that changes
>> from 0 to 1 at most once.
> 
> Two cpus can issue writes of 0 and 1 values at the same time.
> 
> Depending on scheduling writing the 0 can 'win' the race and overwrite
> the value back to 0.
> 
> This is in clear violation of the claim you are making (that the
> sysctl can only go once from 0 to 1)

Not clear why anyone would attempt to write 0, maybe to ensure that it's 
still disabled?

But you're right that userspace CAN do that and the kernel CAN misbehave 
in this scenario so it would be better to just make the changes you 
suggested.

>> In theory if two processes attempt to assign "non-zero" at the same time
>> then one will "win" and the other will get an error but races between
>> userspace writing different values are possible for any sysctl. The
>> solution seems to be "write sysctls from a single place".
>>
>> All the checks are in sockopts - in theory if the sysctl is written on
>> one CPU then a sockopt can still fail on another CPU until caches are
>> flushed. Is this what you're worried about?
>>
>> In theory doing READ_ONCE might incur a slight penalty on sockopt but
>> not noticeable.
> 
> Not at all. There is _no_ penalty using READ_ONCE(). Unless it is done
> in a loop and this prevents some compiler optimization.
> 
> Please use WRITE_ONCE() and READ_ONCE() for all sysctl values used in
> TCP stack (and elsewhere)
> 
> See all the silly patches we had recently.

OK
Herbert Xu Sept. 7, 2022, 10:49 p.m. UTC | #5
On Tue, Sep 06, 2022 at 04:11:58PM -0700, Eric Dumazet wrote:
>
> WRITE_ONCE(sysctl_tcp_authopt, val),  or even better:
> 
> if (val)
>      cmpxchg(&sysctl_tcp_authopt, 0, val);

What's the point of the cmpxchg? Since you're simply trying to prevent
sysctl_tcp_authopt from going back to zero, then the if clause
by itself is enough:

	if (val)
		WRITE_ONCE(sysctl_tcp_authopt, val);

Cheers,
Eric Dumazet Sept. 7, 2022, 10:58 p.m. UTC | #6
On Wed, Sep 7, 2022 at 3:50 PM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Tue, Sep 06, 2022 at 04:11:58PM -0700, Eric Dumazet wrote:
> >
> > WRITE_ONCE(sysctl_tcp_authopt, val),  or even better:
> >
> > if (val)
> >      cmpxchg(&sysctl_tcp_authopt, 0, val);
>
> What's the point of the cmpxchg? Since you're simply trying to prevent
> sysctl_tcp_authopt from going back to zero, then the if clause
> by itself is enough:
>
>         if (val)
>                 WRITE_ONCE(sysctl_tcp_authopt, val);
>

Ack.

Original patch was doing something racy, I have not though about the
most efficient way to deal with it.
diff mbox series

Patch

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index a759872a2883..41be0e69d767 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1038,10 +1038,16 @@  tcp_challenge_ack_limit - INTEGER
 	Note that this per netns rate limit can allow some side channel
 	attacks and probably should not be enabled.
 	TCP stack implements per TCP socket limits anyway.
 	Default: INT_MAX (unlimited)
 
+tcp_authopt - BOOLEAN
+	Enable the TCP Authentication Option (RFC5925), a replacement for TCP
+	MD5 Signatures (RFC2835).
+
+	Default: 0
+
 UDP variables
 =============
 
 udp_l3mdev_accept - BOOLEAN
 	Enabling this option allows a "global" bound socket to work
diff --git a/include/net/tcp_authopt.h b/include/net/tcp_authopt.h
index 7ad34a6987ec..1f5020b790dd 100644
--- a/include/net/tcp_authopt.h
+++ b/include/net/tcp_authopt.h
@@ -80,10 +80,11 @@  struct tcphdr_authopt {
 };
 
 #ifdef CONFIG_TCP_AUTHOPT
 DECLARE_STATIC_KEY_FALSE(tcp_authopt_needed_key);
 #define tcp_authopt_needed (static_branch_unlikely(&tcp_authopt_needed_key))
+extern int sysctl_tcp_authopt;
 void tcp_authopt_free(struct sock *sk, struct tcp_authopt_info *info);
 void tcp_authopt_clear(struct sock *sk);
 int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen);
 int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *key);
 int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5490c285668b..908a3ef15b47 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -17,10 +17,11 @@ 
 #include <net/udp.h>
 #include <net/cipso_ipv4.h>
 #include <net/ping.h>
 #include <net/protocol.h>
 #include <net/netevent.h>
+#include <net/tcp_authopt.h>
 
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 static int tcp_adv_win_scale_min = -31;
@@ -413,10 +414,37 @@  static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
 
 	return ret;
 }
 #endif
 
+#ifdef CONFIG_TCP_AUTHOPT
+static int proc_tcp_authopt(struct ctl_table *ctl,
+			    int write, void *buffer, size_t *lenp,
+			    loff_t *ppos)
+{
+	int val = sysctl_tcp_authopt;
+	struct ctl_table tmp = {
+		.data = &val,
+		.mode = ctl->mode,
+		.maxlen = sizeof(val),
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+	int err;
+
+	err = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (err)
+		return err;
+	if (sysctl_tcp_authopt && !val) {
+		net_warn_ratelimited("Enabling TCP Authentication Option is permanent\n");
+		return -EINVAL;
+	}
+	sysctl_tcp_authopt = val;
+	return 0;
+}
+#endif
+
 static struct ctl_table ipv4_table[] = {
 	{
 		.procname	= "tcp_max_orphans",
 		.data		= &sysctl_tcp_max_orphans,
 		.maxlen		= sizeof(int),
@@ -524,10 +552,21 @@  static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec_minmax,
 		.extra1		= &sysctl_fib_sync_mem_min,
 		.extra2		= &sysctl_fib_sync_mem_max,
 	},
+#ifdef CONFIG_TCP_AUTHOPT
+	{
+		.procname	= "tcp_authopt",
+		.data		= &sysctl_tcp_authopt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_tcp_authopt,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
 	{ }
 };
 
 static struct ctl_table ipv4_net_table[] = {
 	/* tcp_max_tw_buckets must be first in this table. */
diff --git a/net/ipv4/tcp_authopt.c b/net/ipv4/tcp_authopt.c
index 4f7cbe1e17f3..9d02da8d6964 100644
--- a/net/ipv4/tcp_authopt.c
+++ b/net/ipv4/tcp_authopt.c
@@ -4,10 +4,15 @@ 
 #include <net/ipv6.h>
 #include <net/tcp.h>
 #include <linux/kref.h>
 #include <crypto/hash.h>
 
+/* This is mainly intended to protect against local privilege escalations through
+ * a rarely used feature so it is deliberately not namespaced.
+ */
+int sysctl_tcp_authopt;
+
 /* This is enabled when first struct tcp_authopt_info is allocated and never released */
 DEFINE_STATIC_KEY_FALSE(tcp_authopt_needed_key);
 EXPORT_SYMBOL(tcp_authopt_needed_key);
 
 /* All current algorithms have a mac length of 12 but crypto API digestsize can be larger */
@@ -437,17 +442,30 @@  static int _copy_from_sockptr_tolerant(u8 *dst,
 		memset(dst + srclen, 0, dstlen - srclen);
 
 	return err;
 }
 
+static int check_sysctl_tcp_authopt(void)
+{
+	if (!sysctl_tcp_authopt) {
+		net_warn_ratelimited("TCP Authentication Option disabled by sysctl.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
 int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
 {
 	struct tcp_authopt opt;
 	struct tcp_authopt_info *info;
 	int err;
 
 	sock_owned_by_me(sk);
+	err = check_sysctl_tcp_authopt();
+	if (err)
+		return err;
 
 	err = _copy_from_sockptr_tolerant((u8 *)&opt, sizeof(opt), optval, optlen);
 	if (err)
 		return err;
 
@@ -465,13 +483,17 @@  int tcp_set_authopt(struct sock *sk, sockptr_t optval, unsigned int optlen)
 
 int tcp_get_authopt_val(struct sock *sk, struct tcp_authopt *opt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_authopt_info *info;
+	int err;
 
 	memset(opt, 0, sizeof(*opt));
 	sock_owned_by_me(sk);
+	err = check_sysctl_tcp_authopt();
+	if (err)
+		return err;
 
 	info = rcu_dereference_check(tp->authopt_info, lockdep_sock_is_held(sk));
 	if (!info)
 		return -ENOENT;
 
@@ -493,10 +515,13 @@  int tcp_set_authopt_key(struct sock *sk, sockptr_t optval, unsigned int optlen)
 	struct netns_tcp_authopt *net = sock_net_tcp_authopt(sk);
 	struct tcp_authopt_alg_imp *alg;
 	int err;
 
 	sock_owned_by_me(sk);
+	err = check_sysctl_tcp_authopt();
+	if (err)
+		return err;
 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = _copy_from_sockptr_tolerant((u8 *)&opt, sizeof(opt), optval, optlen);
 	if (err)