diff mbox series

[net-next,4/7] net: implement per-cpu reserves for memory_allocated

Message ID 20220609063412.2205738-5-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Commit 3cd3399dd7a84ada85cb839989cdf7310e302c7d
Delegated to: Netdev Maintainers
Headers show
Series net: reduce tcp_memory_allocated inflation | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2885 this patch: 2885
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 631 this patch: 631
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3015 this patch: 3015
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet June 9, 2022, 6:34 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

We plan keeping sk->sk_forward_alloc as small as possible
in future patches.

This means we are going to call sk_memory_allocated_add()
and sk_memory_allocated_sub() more often.

Implement a per-cpu cache of +1/-1 MB, to reduce number
of changes to sk->sk_prot->memory_allocated, which
would otherwise be cause of false sharing.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

Comments

Soheil Hassas Yeganeh June 9, 2022, 1:33 p.m. UTC | #1
On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> We plan keeping sk->sk_forward_alloc as small as possible
> in future patches.
>
> This means we are going to call sk_memory_allocated_add()
> and sk_memory_allocated_sub() more often.
>
> Implement a per-cpu cache of +1/-1 MB, to reduce number
> of changes to sk->sk_prot->memory_allocated, which
> would otherwise be cause of false sharing.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

> ---
>  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
>         return !!*sk->sk_prot->memory_pressure;
>  }
>
> +static inline long
> +proto_memory_allocated(const struct proto *prot)
> +{
> +       return max(0L, atomic_long_read(prot->memory_allocated));
> +}
> +
>  static inline long
>  sk_memory_allocated(const struct sock *sk)
>  {
> -       return atomic_long_read(sk->sk_prot->memory_allocated);
> +       return proto_memory_allocated(sk->sk_prot);
>  }
>
> +/* 1 MB per cpu, in page units */
> +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> +
>  static inline long
>  sk_memory_allocated_add(struct sock *sk, int amt)
>  {
> -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> +       int local_reserve;
> +
> +       preempt_disable();
> +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);

This is just a nitpick, but we could
__this_cpu_write(*sk->sk_prot->per_cpu_fw_alloc, 0) instead which
should be slightly faster.

> +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> +       }
> +       preempt_enable();
> +       return sk_memory_allocated(sk);
>  }
>
>  static inline void
>  sk_memory_allocated_sub(struct sock *sk, int amt)
>  {
> -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> +       int local_reserve;
> +
> +       preempt_disable();
> +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> +       }
> +       preempt_enable();
>  }
>
>  #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
> @@ -1441,12 +1467,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
>         return percpu_counter_sum_positive(prot->sockets_allocated);
>  }
>
> -static inline long
> -proto_memory_allocated(struct proto *prot)
> -{
> -       return atomic_long_read(prot->memory_allocated);
> -}
> -
>  static inline bool
>  proto_memory_pressure(struct proto *prot)
>  {
> --
> 2.36.1.255.ge46751e96f-goog
>
Eric Dumazet June 9, 2022, 1:47 p.m. UTC | #2
On Thu, Jun 9, 2022 at 6:34 AM Soheil Hassas Yeganeh <soheil@google.com> wrote:
>
> On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > From: Eric Dumazet <edumazet@google.com>
> >
> > We plan keeping sk->sk_forward_alloc as small as possible
> > in future patches.
> >
> > This means we are going to call sk_memory_allocated_add()
> > and sk_memory_allocated_sub() more often.
> >
> > Implement a per-cpu cache of +1/-1 MB, to reduce number
> > of changes to sk->sk_prot->memory_allocated, which
> > would otherwise be cause of false sharing.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>
> Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
>
> > ---
> >  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
> >  1 file changed, 29 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/net/sock.h b/include/net/sock.h
> > index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> > --- a/include/net/sock.h
> > +++ b/include/net/sock.h
> > @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
> >         return !!*sk->sk_prot->memory_pressure;
> >  }
> >
> > +static inline long
> > +proto_memory_allocated(const struct proto *prot)
> > +{
> > +       return max(0L, atomic_long_read(prot->memory_allocated));
> > +}
> > +
> >  static inline long
> >  sk_memory_allocated(const struct sock *sk)
> >  {
> > -       return atomic_long_read(sk->sk_prot->memory_allocated);
> > +       return proto_memory_allocated(sk->sk_prot);
> >  }
> >
> > +/* 1 MB per cpu, in page units */
> > +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> > +
> >  static inline long
> >  sk_memory_allocated_add(struct sock *sk, int amt)
> >  {
> > -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> > +       int local_reserve;
> > +
> > +       preempt_disable();
> > +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
>
> This is just a nitpick, but we could
> __this_cpu_write(*sk->sk_prot->per_cpu_fw_alloc, 0) instead which
> should be slightly faster.

This would require us to block irqs, not only preempt_disable()/preempt_enable()

Otherwise when doing the write, there is no guarantee we replace the
intended value,
as an interrupt could have changed this cpu per_cpu_fw_alloc.

A __this_cpu_cmpxchg() would make sure of that, but would be more
expensive than __this_cpu_sub() and would require a loop.

 With my change, there is a tiny possibility that
*sk->sk_prot->per_cpu_fw_alloc, is not in the -1/+1 1MB range,
but no lasting consequences, next update will consolidate things, and
tcp_memory_allocated will not drift.

>
> > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > +       }
> > +       preempt_enable();
> > +       return sk_memory_allocated(sk);
> >  }
> >
> >  static inline void
> >  sk_memory_allocated_sub(struct sock *sk, int amt)
> >  {
> > -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> > +       int local_reserve;
> > +
> > +       preempt_disable();
> > +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > +       }
> > +       preempt_enable();
> >  }
> >
> >  #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
> > @@ -1441,12 +1467,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
> >         return percpu_counter_sum_positive(prot->sockets_allocated);
> >  }
> >
> > -static inline long
> > -proto_memory_allocated(struct proto *prot)
> > -{
> > -       return atomic_long_read(prot->memory_allocated);
> > -}
> > -
> >  static inline bool
> >  proto_memory_pressure(struct proto *prot)
> >  {
> > --
> > 2.36.1.255.ge46751e96f-goog
> >
Soheil Hassas Yeganeh June 9, 2022, 1:48 p.m. UTC | #3
On Thu, Jun 9, 2022 at 9:47 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Jun 9, 2022 at 6:34 AM Soheil Hassas Yeganeh <soheil@google.com> wrote:
> >
> > On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > >
> > > From: Eric Dumazet <edumazet@google.com>
> > >
> > > We plan keeping sk->sk_forward_alloc as small as possible
> > > in future patches.
> > >
> > > This means we are going to call sk_memory_allocated_add()
> > > and sk_memory_allocated_sub() more often.
> > >
> > > Implement a per-cpu cache of +1/-1 MB, to reduce number
> > > of changes to sk->sk_prot->memory_allocated, which
> > > would otherwise be cause of false sharing.
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> >
> > Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
> >
> > > ---
> > >  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
> > >  1 file changed, 29 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/include/net/sock.h b/include/net/sock.h
> > > index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> > > --- a/include/net/sock.h
> > > +++ b/include/net/sock.h
> > > @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
> > >         return !!*sk->sk_prot->memory_pressure;
> > >  }
> > >
> > > +static inline long
> > > +proto_memory_allocated(const struct proto *prot)
> > > +{
> > > +       return max(0L, atomic_long_read(prot->memory_allocated));
> > > +}
> > > +
> > >  static inline long
> > >  sk_memory_allocated(const struct sock *sk)
> > >  {
> > > -       return atomic_long_read(sk->sk_prot->memory_allocated);
> > > +       return proto_memory_allocated(sk->sk_prot);
> > >  }
> > >
> > > +/* 1 MB per cpu, in page units */
> > > +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> > > +
> > >  static inline long
> > >  sk_memory_allocated_add(struct sock *sk, int amt)
> > >  {
> > > -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> > > +       int local_reserve;
> > > +
> > > +       preempt_disable();
> > > +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > > +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> > > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> >
> > This is just a nitpick, but we could
> > __this_cpu_write(*sk->sk_prot->per_cpu_fw_alloc, 0) instead which
> > should be slightly faster.
>
> This would require us to block irqs, not only preempt_disable()/preempt_enable()
>
> Otherwise when doing the write, there is no guarantee we replace the
> intended value,
> as an interrupt could have changed this cpu per_cpu_fw_alloc.
>
> A __this_cpu_cmpxchg() would make sure of that, but would be more
> expensive than __this_cpu_sub() and would require a loop.
>
>  With my change, there is a tiny possibility that
> *sk->sk_prot->per_cpu_fw_alloc, is not in the -1/+1 1MB range,
> but no lasting consequences, next update will consolidate things, and
> tcp_memory_allocated will not drift.

Ah that makes sense. Thank you for the explanation!

> >
> > > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > > +       }
> > > +       preempt_enable();
> > > +       return sk_memory_allocated(sk);
> > >  }
> > >
> > >  static inline void
> > >  sk_memory_allocated_sub(struct sock *sk, int amt)
> > >  {
> > > -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> > > +       int local_reserve;
> > > +
> > > +       preempt_disable();
> > > +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > > +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> > > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > > +       }
> > > +       preempt_enable();
> > >  }
> > >
> > >  #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
> > > @@ -1441,12 +1467,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
> > >         return percpu_counter_sum_positive(prot->sockets_allocated);
> > >  }
> > >
> > > -static inline long
> > > -proto_memory_allocated(struct proto *prot)
> > > -{
> > > -       return atomic_long_read(prot->memory_allocated);
> > > -}
> > > -
> > >  static inline bool
> > >  proto_memory_pressure(struct proto *prot)
> > >  {
> > > --
> > > 2.36.1.255.ge46751e96f-goog
> > >
Neal Cardwell June 9, 2022, 2:46 p.m. UTC | #4
/


On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> We plan keeping sk->sk_forward_alloc as small as possible
> in future patches.
>
> This means we are going to call sk_memory_allocated_add()
> and sk_memory_allocated_sub() more often.
>
> Implement a per-cpu cache of +1/-1 MB, to reduce number
> of changes to sk->sk_prot->memory_allocated, which
> would otherwise be cause of false sharing.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
>         return !!*sk->sk_prot->memory_pressure;
>  }
>
> +static inline long
> +proto_memory_allocated(const struct proto *prot)
> +{
> +       return max(0L, atomic_long_read(prot->memory_allocated));
> +}
> +
>  static inline long
>  sk_memory_allocated(const struct sock *sk)
>  {
> -       return atomic_long_read(sk->sk_prot->memory_allocated);
> +       return proto_memory_allocated(sk->sk_prot);
>  }
>
> +/* 1 MB per cpu, in page units */
> +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> +
>  static inline long
>  sk_memory_allocated_add(struct sock *sk, int amt)
>  {
> -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> +       int local_reserve;
> +
> +       preempt_disable();
> +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> +       }
> +       preempt_enable();
> +       return sk_memory_allocated(sk);
>  }
>
>  static inline void
>  sk_memory_allocated_sub(struct sock *sk, int amt)
>  {
> -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> +       int local_reserve;
> +
> +       preempt_disable();
> +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);

I would have thought these last two lines would be:

               __this_cpu_add(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
               atomic_long_sub(local_reserve, sk->sk_prot->memory_allocated);

Otherwise I don't see how sk->sk_prot->memory_allocated) ever
decreases in these sk_memory_allocated_add/sk_memory_allocated_sub
functions?

That is, is there a copy-and-paste/typo issue in these two lines? Or
is my understanding backwards? (In which case I apologize for the
noise!)

thanks,
neal





> +       }
> +       preempt_enable();
>  }
>
>  #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
> @@ -1441,12 +1467,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
>         return percpu_counter_sum_positive(prot->sockets_allocated);
>  }
>
> -static inline long
> -proto_memory_allocated(struct proto *prot)
> -{
> -       return atomic_long_read(prot->memory_allocated);
> -}
> -
>  static inline bool
>  proto_memory_pressure(struct proto *prot)
>  {
> --
> 2.36.1.255.ge46751e96f-goog
>
Shakeel Butt June 9, 2022, 3:07 p.m. UTC | #5
On Thu, Jun 9, 2022 at 7:46 AM Neal Cardwell <ncardwell@google.com> wrote:
>
> /
>
>
> On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > From: Eric Dumazet <edumazet@google.com>
> >
> > We plan keeping sk->sk_forward_alloc as small as possible
> > in future patches.
> >
> > This means we are going to call sk_memory_allocated_add()
> > and sk_memory_allocated_sub() more often.
> >
> > Implement a per-cpu cache of +1/-1 MB, to reduce number
> > of changes to sk->sk_prot->memory_allocated, which
> > would otherwise be cause of false sharing.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
> >  1 file changed, 29 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/net/sock.h b/include/net/sock.h
> > index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> > --- a/include/net/sock.h
> > +++ b/include/net/sock.h
> > @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
> >         return !!*sk->sk_prot->memory_pressure;
> >  }
> >
> > +static inline long
> > +proto_memory_allocated(const struct proto *prot)
> > +{
> > +       return max(0L, atomic_long_read(prot->memory_allocated));
> > +}
> > +
> >  static inline long
> >  sk_memory_allocated(const struct sock *sk)
> >  {
> > -       return atomic_long_read(sk->sk_prot->memory_allocated);
> > +       return proto_memory_allocated(sk->sk_prot);
> >  }
> >
> > +/* 1 MB per cpu, in page units */
> > +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> > +
> >  static inline long
> >  sk_memory_allocated_add(struct sock *sk, int amt)
> >  {
> > -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> > +       int local_reserve;
> > +
> > +       preempt_disable();
> > +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > +       }
> > +       preempt_enable();
> > +       return sk_memory_allocated(sk);
> >  }
> >
> >  static inline void
> >  sk_memory_allocated_sub(struct sock *sk, int amt)
> >  {
> > -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> > +       int local_reserve;
> > +
> > +       preempt_disable();
> > +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
>
> I would have thought these last two lines would be:
>
>                __this_cpu_add(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
>                atomic_long_sub(local_reserve, sk->sk_prot->memory_allocated);
>
> Otherwise I don't see how sk->sk_prot->memory_allocated) ever
> decreases in these sk_memory_allocated_add/sk_memory_allocated_sub
> functions?
>
> That is, is there a copy-and-paste/typo issue in these two lines? Or
> is my understanding backwards? (In which case I apologize for the
> noise!)

local_reserve is negative in that case and adding a negative number is
subtraction.
Neal Cardwell June 9, 2022, 3:09 p.m. UTC | #6
On Thu, Jun 9, 2022 at 11:07 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Thu, Jun 9, 2022 at 7:46 AM Neal Cardwell <ncardwell@google.com> wrote:
> >
> > /
> >
> >
> > On Thu, Jun 9, 2022 at 2:34 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > >
> > > From: Eric Dumazet <edumazet@google.com>
> > >
> > > We plan keeping sk->sk_forward_alloc as small as possible
> > > in future patches.
> > >
> > > This means we are going to call sk_memory_allocated_add()
> > > and sk_memory_allocated_sub() more often.
> > >
> > > Implement a per-cpu cache of +1/-1 MB, to reduce number
> > > of changes to sk->sk_prot->memory_allocated, which
> > > would otherwise be cause of false sharing.
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  include/net/sock.h | 38 +++++++++++++++++++++++++++++---------
> > >  1 file changed, 29 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/include/net/sock.h b/include/net/sock.h
> > > index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
> > > --- a/include/net/sock.h
> > > +++ b/include/net/sock.h
> > > @@ -1397,22 +1397,48 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
> > >         return !!*sk->sk_prot->memory_pressure;
> > >  }
> > >
> > > +static inline long
> > > +proto_memory_allocated(const struct proto *prot)
> > > +{
> > > +       return max(0L, atomic_long_read(prot->memory_allocated));
> > > +}
> > > +
> > >  static inline long
> > >  sk_memory_allocated(const struct sock *sk)
> > >  {
> > > -       return atomic_long_read(sk->sk_prot->memory_allocated);
> > > +       return proto_memory_allocated(sk->sk_prot);
> > >  }
> > >
> > > +/* 1 MB per cpu, in page units */
> > > +#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
> > > +
> > >  static inline long
> > >  sk_memory_allocated_add(struct sock *sk, int amt)
> > >  {
> > > -       return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
> > > +       int local_reserve;
> > > +
> > > +       preempt_disable();
> > > +       local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > > +       if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
> > > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> > > +       }
> > > +       preempt_enable();
> > > +       return sk_memory_allocated(sk);
> > >  }
> > >
> > >  static inline void
> > >  sk_memory_allocated_sub(struct sock *sk, int amt)
> > >  {
> > > -       atomic_long_sub(amt, sk->sk_prot->memory_allocated);
> > > +       int local_reserve;
> > > +
> > > +       preempt_disable();
> > > +       local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
> > > +       if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
> > > +               __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> > > +               atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
> >
> > I would have thought these last two lines would be:
> >
> >                __this_cpu_add(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
> >                atomic_long_sub(local_reserve, sk->sk_prot->memory_allocated);
> >
> > Otherwise I don't see how sk->sk_prot->memory_allocated) ever
> > decreases in these sk_memory_allocated_add/sk_memory_allocated_sub
> > functions?
> >
> > That is, is there a copy-and-paste/typo issue in these two lines? Or
> > is my understanding backwards? (In which case I apologize for the
> > noise!)
>
> local_reserve is negative in that case and adding a negative number is
> subtraction.

Yes, sorry about that. In parallel Soheil just pointed out to me OOB
that the code is correct because at that point in the code we know
that local_reserve is negative...

Sorry for the noise!

neal
Shakeel Butt June 9, 2022, 3:12 p.m. UTC | #7
On Wed, Jun 8, 2022 at 11:34 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> We plan keeping sk->sk_forward_alloc as small as possible
> in future patches.
>
> This means we are going to call sk_memory_allocated_add()
> and sk_memory_allocated_sub() more often.
>
> Implement a per-cpu cache of +1/-1 MB, to reduce number
> of changes to sk->sk_prot->memory_allocated, which
> would otherwise be cause of false sharing.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Shakeel Butt <shakeelb@google.com>
Eric Dumazet June 9, 2022, 3:43 p.m. UTC | #8
On Thu, Jun 9, 2022 at 8:09 AM Neal Cardwell <ncardwell@google.com> wrote:

> Yes, sorry about that. In parallel Soheil just pointed out to me OOB
> that the code is correct because at that point in the code we know
> that local_reserve is negative...
>
> Sorry for the noise!

No worries Neal, I made the same mistake when writing the function :)

Once we determined the new pcpu reserve X is out-of-range (-1MB ..
+1MB) we have to transfer it to shared memory_allocated

Regardless of the value X, the transfert is the same regardless of
initial raise/decrease intent :

pcpu_reserve -= X;  // using this_cpu op which is IRQ safe
memory_reserve += X;  // using atomic op, IRQ and SMP safe
diff mbox series

Patch

diff --git a/include/net/sock.h b/include/net/sock.h
index 825f8cbf791f02d798f17dd4f7a2659cebb0e98a..59040fee74e7de8d63fbf719f46e172906c134bb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1397,22 +1397,48 @@  static inline bool sk_under_memory_pressure(const struct sock *sk)
 	return !!*sk->sk_prot->memory_pressure;
 }
 
+static inline long
+proto_memory_allocated(const struct proto *prot)
+{
+	return max(0L, atomic_long_read(prot->memory_allocated));
+}
+
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
-	return atomic_long_read(sk->sk_prot->memory_allocated);
+	return proto_memory_allocated(sk->sk_prot);
 }
 
+/* 1 MB per cpu, in page units */
+#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+
 static inline long
 sk_memory_allocated_add(struct sock *sk, int amt)
 {
-	return atomic_long_add_return(amt, sk->sk_prot->memory_allocated);
+	int local_reserve;
+
+	preempt_disable();
+	local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+	if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+		__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+		atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+	}
+	preempt_enable();
+	return sk_memory_allocated(sk);
 }
 
 static inline void
 sk_memory_allocated_sub(struct sock *sk, int amt)
 {
-	atomic_long_sub(amt, sk->sk_prot->memory_allocated);
+	int local_reserve;
+
+	preempt_disable();
+	local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
+	if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+		__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
+		atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
+	}
+	preempt_enable();
 }
 
 #define SK_ALLOC_PERCPU_COUNTER_BATCH 16
@@ -1441,12 +1467,6 @@  proto_sockets_allocated_sum_positive(struct proto *prot)
 	return percpu_counter_sum_positive(prot->sockets_allocated);
 }
 
-static inline long
-proto_memory_allocated(struct proto *prot)
-{
-	return atomic_long_read(prot->memory_allocated);
-}
-
 static inline bool
 proto_memory_pressure(struct proto *prot)
 {