diff mbox series

[net-next,1/5] net: napi: Make napi_defer_hard_irqs per-NAPI

Message ID 20240829131214.169977-2-jdamato@fastly.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Add support for per-NAPI config via netlink | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl fail Tree is dirty after regen; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 54 this patch: 54
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang fail Errors and warnings before: 100 this patch: 102
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4063 this patch: 4056
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 101 this patch: 102
netdev/source_inline success Was 0 now: 0

Commit Message

Joe Damato Aug. 29, 2024, 1:11 p.m. UTC
Allow per-NAPI defer_hard_irqs setting.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device defer_hard_irq field.
Reads from sysfs will read from the net_device field.

sysfs code was updated to guard against what appears to be a potential
overflow as the field is an int, but the value passed in is an unsigned
long.

The ability to set defer_hard_irqs on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
---
 include/linux/netdevice.h | 23 +++++++++++++++++++++++
 net/core/dev.c            | 29 ++++++++++++++++++++++++++---
 net/core/net-sysfs.c      |  5 ++++-
 3 files changed, 53 insertions(+), 4 deletions(-)

Comments

Eric Dumazet Aug. 29, 2024, 1:46 p.m. UTC | #1
On Thu, Aug 29, 2024 at 3:13 PM Joe Damato <jdamato@fastly.com> wrote:
>
> Allow per-NAPI defer_hard_irqs setting.
>
> The existing sysfs parameter is respected; writes to sysfs will write to
> all NAPI structs for the device and the net_device defer_hard_irq field.
> Reads from sysfs will read from the net_device field.
>
> sysfs code was updated to guard against what appears to be a potential
> overflow as the field is an int, but the value passed in is an unsigned
> long.
>
> The ability to set defer_hard_irqs on specific NAPI instances will be
> added in a later commit, via netdev-genl.
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> ---
>  include/linux/netdevice.h | 23 +++++++++++++++++++++++
>  net/core/dev.c            | 29 ++++++++++++++++++++++++++---
>  net/core/net-sysfs.c      |  5 ++++-
>  3 files changed, 53 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index fce70990b209..7d53380da4c0 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -371,6 +371,7 @@ struct napi_struct {
>         struct list_head        rx_list; /* Pending GRO_NORMAL skbs */
>         int                     rx_count; /* length of rx_list */
>         unsigned int            napi_id;
> +       int                     defer_hard_irqs;
>         struct hrtimer          timer;
>         struct task_struct      *thread;
>         /* control-path-only fields follow */
> @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
>                 __napi_schedule_irqoff(n);
>  }
>

Since dev->napi_defer_hard_irqs is no longer used in fast path,
please move it outside of the net_device_read_rx group.

Also update Documentation/networking/net_cachelines/net_device.rst

> +/**
> + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> + * @n: napi struct to get the defer_hard_irqs field from
> + *
> + * Returns the per-NAPI value of the defar_hard_irqs field.
> + */
> +int napi_get_defer_hard_irqs(const struct napi_struct *n);
> +
> +/**
> + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
> + * @n: napi_struct to set the defer_hard_irqs field
> + * @defer: the value the field should be set to
> + */
> +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
> +
> +/**
> + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
> + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
> + * @defer: the defer_hard_irqs value to set
> + */
> +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);
> +
>  /**
>   * napi_complete_done - NAPI processing complete
>   * @n: NAPI context
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 63987b8b7c85..f7baff0da057 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6212,6 +6212,28 @@ void __napi_schedule_irqoff(struct napi_struct *n)
>  }
>  EXPORT_SYMBOL(__napi_schedule_irqoff);
>
> +int napi_get_defer_hard_irqs(const struct napi_struct *n)
> +{
> +       return READ_ONCE(n->defer_hard_irqs);
> +}
> +EXPORT_SYMBOL_GPL(napi_get_defer_hard_irqs);
> +
> +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer)
> +{
> +       WRITE_ONCE(n->defer_hard_irqs, defer);
> +}
> +EXPORT_SYMBOL_GPL(napi_set_defer_hard_irqs);
> +
> +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer)
> +{
> +       struct napi_struct *napi;
> +
> +       WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
> +       list_for_each_entry(napi, &netdev->napi_list, dev_list)
> +               napi_set_defer_hard_irqs(napi, defer);
> +}
> +EXPORT_SYMBOL_GPL(netdev_set_defer_hard_irqs);
> +
>  bool napi_complete_done(struct napi_struct *n, int work_done)
>  {
>         unsigned long flags, val, new, timeout = 0;
> @@ -6230,7 +6252,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
>         if (work_done) {
>                 if (n->gro_bitmask)
>                         timeout = READ_ONCE(n->dev->gro_flush_timeout);
> -               n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
> +               n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
>         }
>         if (n->defer_hard_irqs_count > 0) {
>                 n->defer_hard_irqs_count--;
> @@ -6368,7 +6390,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
>         bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
>
>         if (flags & NAPI_F_PREFER_BUSY_POLL) {
> -               napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
> +               napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
>                 timeout = READ_ONCE(napi->dev->gro_flush_timeout);
>                 if (napi->defer_hard_irqs_count && timeout) {
>                         hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
> @@ -6650,6 +6672,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
>         INIT_HLIST_NODE(&napi->napi_hash_node);
>         hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
>         napi->timer.function = napi_watchdog;
> +       napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
>         init_gro_hash(napi);
>         napi->skb = NULL;
>         INIT_LIST_HEAD(&napi->rx_list);
> @@ -11032,7 +11055,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
>
>         if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
>                 dev->gro_flush_timeout = 20000;
> -               dev->napi_defer_hard_irqs = 1;
> +               netdev_set_defer_hard_irqs(dev, 1);
>         }
>  }
>  EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 0e2084ce7b75..8272f0144d81 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -425,7 +425,10 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
>
>  static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
>  {
> -       WRITE_ONCE(dev->napi_defer_hard_irqs, val);
> +       if (val > S32_MAX)
> +               return -EINVAL;
> +
> +       netdev_set_defer_hard_irqs(dev, (int)val);
>         return 0;
>  }
>
> --
> 2.25.1
>
Jakub Kicinski Aug. 29, 2024, 10:05 p.m. UTC | #2
On Thu, 29 Aug 2024 13:11:57 +0000 Joe Damato wrote:
> +/**
> + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> + * @n: napi struct to get the defer_hard_irqs field from
> + *
> + * Returns the per-NAPI value of the defar_hard_irqs field.
> + */
> +int napi_get_defer_hard_irqs(const struct napi_struct *n);
> +
> +/**
> + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
> + * @n: napi_struct to set the defer_hard_irqs field
> + * @defer: the value the field should be set to
> + */
> +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
> +
> +/**
> + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
> + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
> + * @defer: the defer_hard_irqs value to set
> + */
> +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);

Do you expect drivers or modules to call these?
I'm not sure we need the wrappers just to cover up the READ/WRITE_ONCE()
but if you do want to keep them they can be static inlines in
net/core/dev.h

nit: IIUC the kdoc should go on the definition, not the declaration.
Simon Horman Aug. 30, 2024, 8:36 a.m. UTC | #3
On Thu, Aug 29, 2024 at 01:11:57PM +0000, Joe Damato wrote:
> Allow per-NAPI defer_hard_irqs setting.
> 
> The existing sysfs parameter is respected; writes to sysfs will write to
> all NAPI structs for the device and the net_device defer_hard_irq field.
> Reads from sysfs will read from the net_device field.
> 
> sysfs code was updated to guard against what appears to be a potential
> overflow as the field is an int, but the value passed in is an unsigned
> long.
> 
> The ability to set defer_hard_irqs on specific NAPI instances will be
> added in a later commit, via netdev-genl.
> 
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
> Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> ---
>  include/linux/netdevice.h | 23 +++++++++++++++++++++++
>  net/core/dev.c            | 29 ++++++++++++++++++++++++++---
>  net/core/net-sysfs.c      |  5 ++++-
>  3 files changed, 53 insertions(+), 4 deletions(-)

...

> @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
>  		__napi_schedule_irqoff(n);
>  }
>  
> +/**
> + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> + * @n: napi struct to get the defer_hard_irqs field from
> + *
> + * Returns the per-NAPI value of the defar_hard_irqs field.
> + */
> +int napi_get_defer_hard_irqs(const struct napi_struct *n);

Hi Joe,

As it looks like there will be a v2 anyway, a minor nit from my side.

Thanks for documenting the return value, but I believe that
./scripts/kernel-doc -none -Wall expects "Return: " or "Returns: "

Likewise in patch 3/5.

...
Joe Damato Aug. 30, 2024, 9:11 a.m. UTC | #4
On Fri, Aug 30, 2024 at 09:36:41AM +0100, Simon Horman wrote:
> On Thu, Aug 29, 2024 at 01:11:57PM +0000, Joe Damato wrote:
> > Allow per-NAPI defer_hard_irqs setting.
> > 
> > The existing sysfs parameter is respected; writes to sysfs will write to
> > all NAPI structs for the device and the net_device defer_hard_irq field.
> > Reads from sysfs will read from the net_device field.
> > 
> > sysfs code was updated to guard against what appears to be a potential
> > overflow as the field is an int, but the value passed in is an unsigned
> > long.
> > 
> > The ability to set defer_hard_irqs on specific NAPI instances will be
> > added in a later commit, via netdev-genl.
> > 
> > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > Reviewed-by: Martin Karsten <mkarsten@uwaterloo.ca>
> > Tested-by: Martin Karsten <mkarsten@uwaterloo.ca>
> > ---
> >  include/linux/netdevice.h | 23 +++++++++++++++++++++++
> >  net/core/dev.c            | 29 ++++++++++++++++++++++++++---
> >  net/core/net-sysfs.c      |  5 ++++-
> >  3 files changed, 53 insertions(+), 4 deletions(-)
> 
> ...
> 
> > @@ -534,6 +535,28 @@ static inline void napi_schedule_irqoff(struct napi_struct *n)
> >  		__napi_schedule_irqoff(n);
> >  }
> >  
> > +/**
> > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> > + * @n: napi struct to get the defer_hard_irqs field from
> > + *
> > + * Returns the per-NAPI value of the defar_hard_irqs field.
> > + */
> > +int napi_get_defer_hard_irqs(const struct napi_struct *n);
> 
> Hi Joe,
> 
> As it looks like there will be a v2 anyway, a minor nit from my side.
> 
> Thanks for documenting the return value, but I believe that
> ./scripts/kernel-doc -none -Wall expects "Return: " or "Returns: "
> 
> Likewise in patch 3/5.

Thanks Simon, will make sure to take care of this in the v2.
Joe Damato Aug. 30, 2024, 9:14 a.m. UTC | #5
On Thu, Aug 29, 2024 at 03:05:02PM -0700, Jakub Kicinski wrote:
> On Thu, 29 Aug 2024 13:11:57 +0000 Joe Damato wrote:
> > +/**
> > + * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
> > + * @n: napi struct to get the defer_hard_irqs field from
> > + *
> > + * Returns the per-NAPI value of the defar_hard_irqs field.
> > + */
> > +int napi_get_defer_hard_irqs(const struct napi_struct *n);
> > +
> > +/**
> > + * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
> > + * @n: napi_struct to set the defer_hard_irqs field
> > + * @defer: the value the field should be set to
> > + */
> > +void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
> > +
> > +/**
> > + * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
> > + * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
> > + * @defer: the defer_hard_irqs value to set
> > + */
> > +void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);
> 
> Do you expect drivers or modules to call these?
> I'm not sure we need the wrappers just to cover up the READ/WRITE_ONCE()
> but if you do want to keep them they can be static inlines in
> net/core/dev.h

It looked like there were a few call sites for these in
net/core/dev.c, the sysfs code, and the netlink code.

I figured having it all wrapped up somewhere might be better than
repeating the READ/WRITE_ONCE() stuff.

I have no preference on whether there are wrappers or not, though.
If you'd like me to drop the wrappers for the v2, let me know.

Otherwise: I'll make them static inlines as you suggested.

Let me know if you have a preference here because I am neutral.

> nit: IIUC the kdoc should go on the definition, not the declaration.

My mistake; thanks. I suppose if I move them as static inlines, I'll
just move the kdoc as well and the problem solves itself :)
kernel test robot Aug. 30, 2024, 4:50 p.m. UTC | #6
Hi Joe,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Joe-Damato/net-napi-Make-napi_defer_hard_irqs-per-NAPI/20240829-211617
base:   net-next/main
patch link:    https://lore.kernel.org/r/20240829131214.169977-2-jdamato%40fastly.com
patch subject: [PATCH net-next 1/5] net: napi: Make napi_defer_hard_irqs per-NAPI
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20240831/202408310038.VztWz8YR-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240831/202408310038.VztWz8YR-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408310038.VztWz8YR-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/net/ethernet/intel/idpf/idpf_dev.c:4:
   In file included from drivers/net/ethernet/intel/idpf/idpf.h:22:
>> drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: error: static assertion failed due to requirement '__builtin_offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - (__builtin_offsetof(struct idpf_q_vector, __cacheline_group_begin__read_write) + sizeof ((((struct idpf_q_vector *)0)->__cacheline_group_begin__read_write))) == (424 + 2 * sizeof(struct dim))': offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - offsetofend(struct idpf_q_vector, __cacheline_group_begin__read_write) == (424 + 2 * sizeof(struct dim))
     475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     476 |                             424 + 2 * sizeof(struct dim),
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     477 |                             8 + sizeof(cpumask_var_t));
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
      62 |         libeth_cacheline_group_assert(type, read_write, rw);                  \
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:17:16: note: expanded from macro 'libeth_cacheline_group_assert'
      17 |         static_assert(offsetof(type, __cacheline_group_end__##grp) -          \
         |         ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      18 |                       offsetofend(type, __cacheline_group_begin__##grp) ==    \
         |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      19 |                       (sz))
         |                       ~~~~~
   include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
      16 | #define offsetof(TYPE, MEMBER)  __builtin_offsetof(TYPE, MEMBER)
         |                                 ^
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: note: expression evaluates to '768 == 760'
     475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     476 |                             424 + 2 * sizeof(struct dim),
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     477 |                             8 + sizeof(cpumask_var_t));
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
      62 |         libeth_cacheline_group_assert(type, read_write, rw);                  \
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:18:59: note: expanded from macro 'libeth_cacheline_group_assert'
      17 |         static_assert(offsetof(type, __cacheline_group_end__##grp) -          \
         |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      18 |                       offsetofend(type, __cacheline_group_begin__##grp) ==    \
         |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
      19 |                       (sz))
         |                       ~~~~~
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   1 error generated.
--
   In file included from drivers/net/ethernet/intel/idpf/idpf_main.c:4:
   In file included from drivers/net/ethernet/intel/idpf/idpf.h:22:
>> drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: error: static assertion failed due to requirement '__builtin_offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - (__builtin_offsetof(struct idpf_q_vector, __cacheline_group_begin__read_write) + sizeof ((((struct idpf_q_vector *)0)->__cacheline_group_begin__read_write))) == (424 + 2 * sizeof(struct dim))': offsetof(struct idpf_q_vector, __cacheline_group_end__read_write) - offsetofend(struct idpf_q_vector, __cacheline_group_begin__read_write) == (424 + 2 * sizeof(struct dim))
     475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     476 |                             424 + 2 * sizeof(struct dim),
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     477 |                             8 + sizeof(cpumask_var_t));
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
      62 |         libeth_cacheline_group_assert(type, read_write, rw);                  \
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:17:16: note: expanded from macro 'libeth_cacheline_group_assert'
      17 |         static_assert(offsetof(type, __cacheline_group_end__##grp) -          \
         |         ~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      18 |                       offsetofend(type, __cacheline_group_begin__##grp) ==    \
         |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      19 |                       (sz))
         |                       ~~~~~
   include/linux/stddef.h:16:32: note: expanded from macro 'offsetof'
      16 | #define offsetof(TYPE, MEMBER)  __builtin_offsetof(TYPE, MEMBER)
         |                                 ^
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   drivers/net/ethernet/intel/idpf/idpf_txrx.h:475:1: note: expression evaluates to '768 == 760'
     475 | libeth_cacheline_set_assert(struct idpf_q_vector, 104,
         | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     476 |                             424 + 2 * sizeof(struct dim),
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     477 |                             8 + sizeof(cpumask_var_t));
         |                             ~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:62:2: note: expanded from macro 'libeth_cacheline_set_assert'
      62 |         libeth_cacheline_group_assert(type, read_write, rw);                  \
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/net/libeth/cache.h:18:59: note: expanded from macro 'libeth_cacheline_group_assert'
      17 |         static_assert(offsetof(type, __cacheline_group_end__##grp) -          \
         |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      18 |                       offsetofend(type, __cacheline_group_begin__##grp) ==    \
         |                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
      19 |                       (sz))
         |                       ~~~~~
   include/linux/build_bug.h:77:50: note: expanded from macro 'static_assert'
      77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
         |                                  ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/build_bug.h:78:56: note: expanded from macro '__static_assert'
      78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
         |                                                        ^~~~
   drivers/net/ethernet/intel/idpf/idpf_main.c:167:39: warning: shift count >= width of type [-Wshift-count-overflow]
     167 |         err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
         |                                              ^~~~~~~~~~~~~~~~
   include/linux/dma-mapping.h:77:54: note: expanded from macro 'DMA_BIT_MASK'
      77 | #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
         |                                                      ^ ~~~
   1 warning and 1 error generated.


vim +475 drivers/net/ethernet/intel/idpf/idpf_txrx.h

4930fbf419a72d Pavan Kumar Linga 2023-08-07  412  
4930fbf419a72d Pavan Kumar Linga 2023-08-07  413  /**
4930fbf419a72d Pavan Kumar Linga 2023-08-07  414   * struct idpf_q_vector
1c325aac10a82f Alan Brady        2023-08-07  415   * @vport: Vport back pointer
5a816aae2d463d Alexander Lobakin 2024-06-20  416   * @num_rxq: Number of RX queues
d4d5587182664b Pavan Kumar Linga 2023-08-07  417   * @num_txq: Number of TX queues
5a816aae2d463d Alexander Lobakin 2024-06-20  418   * @num_bufq: Number of buffer queues
e4891e4687c8dd Alexander Lobakin 2024-06-20  419   * @num_complq: number of completion queues
5a816aae2d463d Alexander Lobakin 2024-06-20  420   * @rx: Array of RX queues to service
1c325aac10a82f Alan Brady        2023-08-07  421   * @tx: Array of TX queues to service
5a816aae2d463d Alexander Lobakin 2024-06-20  422   * @bufq: Array of buffer queues to service
e4891e4687c8dd Alexander Lobakin 2024-06-20  423   * @complq: array of completion queues
5a816aae2d463d Alexander Lobakin 2024-06-20  424   * @intr_reg: See struct idpf_intr_reg
5a816aae2d463d Alexander Lobakin 2024-06-20  425   * @napi: napi handler
5a816aae2d463d Alexander Lobakin 2024-06-20  426   * @total_events: Number of interrupts processed
c2d548cad1508d Joshua Hay        2023-08-07  427   * @tx_dim: Data for TX net_dim algorithm
1c325aac10a82f Alan Brady        2023-08-07  428   * @tx_itr_value: TX interrupt throttling rate
1c325aac10a82f Alan Brady        2023-08-07  429   * @tx_intr_mode: Dynamic ITR or not
1c325aac10a82f Alan Brady        2023-08-07  430   * @tx_itr_idx: TX ITR index
3a8845af66edb3 Alan Brady        2023-08-07  431   * @rx_dim: Data for RX net_dim algorithm
95af467d9a4e3b Alan Brady        2023-08-07  432   * @rx_itr_value: RX interrupt throttling rate
95af467d9a4e3b Alan Brady        2023-08-07  433   * @rx_intr_mode: Dynamic ITR or not
95af467d9a4e3b Alan Brady        2023-08-07  434   * @rx_itr_idx: RX ITR index
5a816aae2d463d Alexander Lobakin 2024-06-20  435   * @v_idx: Vector index
bf9bf7042a38eb Alexander Lobakin 2024-06-20  436   * @affinity_mask: CPU affinity mask
4930fbf419a72d Pavan Kumar Linga 2023-08-07  437   */
4930fbf419a72d Pavan Kumar Linga 2023-08-07  438  struct idpf_q_vector {
5a816aae2d463d Alexander Lobakin 2024-06-20  439  	__cacheline_group_begin_aligned(read_mostly);
1c325aac10a82f Alan Brady        2023-08-07  440  	struct idpf_vport *vport;
1c325aac10a82f Alan Brady        2023-08-07  441  
5a816aae2d463d Alexander Lobakin 2024-06-20  442  	u16 num_rxq;
d4d5587182664b Pavan Kumar Linga 2023-08-07  443  	u16 num_txq;
5a816aae2d463d Alexander Lobakin 2024-06-20  444  	u16 num_bufq;
e4891e4687c8dd Alexander Lobakin 2024-06-20  445  	u16 num_complq;
5a816aae2d463d Alexander Lobakin 2024-06-20  446  	struct idpf_rx_queue **rx;
e4891e4687c8dd Alexander Lobakin 2024-06-20  447  	struct idpf_tx_queue **tx;
5a816aae2d463d Alexander Lobakin 2024-06-20  448  	struct idpf_buf_queue **bufq;
e4891e4687c8dd Alexander Lobakin 2024-06-20  449  	struct idpf_compl_queue **complq;
e4891e4687c8dd Alexander Lobakin 2024-06-20  450  
5a816aae2d463d Alexander Lobakin 2024-06-20  451  	struct idpf_intr_reg intr_reg;
5a816aae2d463d Alexander Lobakin 2024-06-20  452  	__cacheline_group_end_aligned(read_mostly);
5a816aae2d463d Alexander Lobakin 2024-06-20  453  
5a816aae2d463d Alexander Lobakin 2024-06-20  454  	__cacheline_group_begin_aligned(read_write);
5a816aae2d463d Alexander Lobakin 2024-06-20  455  	struct napi_struct napi;
5a816aae2d463d Alexander Lobakin 2024-06-20  456  	u16 total_events;
5a816aae2d463d Alexander Lobakin 2024-06-20  457  
c2d548cad1508d Joshua Hay        2023-08-07  458  	struct dim tx_dim;
1c325aac10a82f Alan Brady        2023-08-07  459  	u16 tx_itr_value;
1c325aac10a82f Alan Brady        2023-08-07  460  	bool tx_intr_mode;
1c325aac10a82f Alan Brady        2023-08-07  461  	u32 tx_itr_idx;
1c325aac10a82f Alan Brady        2023-08-07  462  
3a8845af66edb3 Alan Brady        2023-08-07  463  	struct dim rx_dim;
95af467d9a4e3b Alan Brady        2023-08-07  464  	u16 rx_itr_value;
95af467d9a4e3b Alan Brady        2023-08-07  465  	bool rx_intr_mode;
95af467d9a4e3b Alan Brady        2023-08-07  466  	u32 rx_itr_idx;
5a816aae2d463d Alexander Lobakin 2024-06-20  467  	__cacheline_group_end_aligned(read_write);
95af467d9a4e3b Alan Brady        2023-08-07  468  
5a816aae2d463d Alexander Lobakin 2024-06-20  469  	__cacheline_group_begin_aligned(cold);
5a816aae2d463d Alexander Lobakin 2024-06-20  470  	u16 v_idx;
bf9bf7042a38eb Alexander Lobakin 2024-06-20  471  
bf9bf7042a38eb Alexander Lobakin 2024-06-20  472  	cpumask_var_t affinity_mask;
5a816aae2d463d Alexander Lobakin 2024-06-20  473  	__cacheline_group_end_aligned(cold);
4930fbf419a72d Pavan Kumar Linga 2023-08-07  474  };
5a816aae2d463d Alexander Lobakin 2024-06-20 @475  libeth_cacheline_set_assert(struct idpf_q_vector, 104,
5a816aae2d463d Alexander Lobakin 2024-06-20  476  			    424 + 2 * sizeof(struct dim),
5a816aae2d463d Alexander Lobakin 2024-06-20  477  			    8 + sizeof(cpumask_var_t));
0fe45467a1041e Pavan Kumar Linga 2023-08-07  478
Jakub Kicinski Aug. 30, 2024, 8:21 p.m. UTC | #7
On Fri, 30 Aug 2024 10:14:41 +0100 Joe Damato wrote:
> Otherwise: I'll make them static inlines as you suggested.
> 
> Let me know if you have a preference here because I am neutral.

No strong preference either, static inlines seem like a good
middle ground :)
Joe Damato Aug. 30, 2024, 8:23 p.m. UTC | #8
On Fri, Aug 30, 2024 at 01:21:12PM -0700, Jakub Kicinski wrote:
> On Fri, 30 Aug 2024 10:14:41 +0100 Joe Damato wrote:
> > Otherwise: I'll make them static inlines as you suggested.
> > 
> > Let me know if you have a preference here because I am neutral.
> 
> No strong preference either, static inlines seem like a good
> middle ground :)

Ack. Already queued up in my v2 branch.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fce70990b209..7d53380da4c0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -371,6 +371,7 @@  struct napi_struct {
 	struct list_head	rx_list; /* Pending GRO_NORMAL skbs */
 	int			rx_count; /* length of rx_list */
 	unsigned int		napi_id;
+	int			defer_hard_irqs;
 	struct hrtimer		timer;
 	struct task_struct	*thread;
 	/* control-path-only fields follow */
@@ -534,6 +535,28 @@  static inline void napi_schedule_irqoff(struct napi_struct *n)
 		__napi_schedule_irqoff(n);
 }
 
+/**
+ * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
+ * @n: napi struct to get the defer_hard_irqs field from
+ *
+ * Returns the per-NAPI value of the defar_hard_irqs field.
+ */
+int napi_get_defer_hard_irqs(const struct napi_struct *n);
+
+/**
+ * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
+ * @n: napi_struct to set the defer_hard_irqs field
+ * @defer: the value the field should be set to
+ */
+void napi_set_defer_hard_irqs(struct napi_struct *n, int defer);
+
+/**
+ * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
+ * @netdev: the net_device for which all NAPIs will have their defer_hard_irqs set
+ * @defer: the defer_hard_irqs value to set
+ */
+void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer);
+
 /**
  * napi_complete_done - NAPI processing complete
  * @n: NAPI context
diff --git a/net/core/dev.c b/net/core/dev.c
index 63987b8b7c85..f7baff0da057 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6212,6 +6212,28 @@  void __napi_schedule_irqoff(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule_irqoff);
 
+int napi_get_defer_hard_irqs(const struct napi_struct *n)
+{
+	return READ_ONCE(n->defer_hard_irqs);
+}
+EXPORT_SYMBOL_GPL(napi_get_defer_hard_irqs);
+
+void napi_set_defer_hard_irqs(struct napi_struct *n, int defer)
+{
+	WRITE_ONCE(n->defer_hard_irqs, defer);
+}
+EXPORT_SYMBOL_GPL(napi_set_defer_hard_irqs);
+
+void netdev_set_defer_hard_irqs(struct net_device *netdev, int defer)
+{
+	struct napi_struct *napi;
+
+	WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
+	list_for_each_entry(napi, &netdev->napi_list, dev_list)
+		napi_set_defer_hard_irqs(napi, defer);
+}
+EXPORT_SYMBOL_GPL(netdev_set_defer_hard_irqs);
+
 bool napi_complete_done(struct napi_struct *n, int work_done)
 {
 	unsigned long flags, val, new, timeout = 0;
@@ -6230,7 +6252,7 @@  bool napi_complete_done(struct napi_struct *n, int work_done)
 	if (work_done) {
 		if (n->gro_bitmask)
 			timeout = READ_ONCE(n->dev->gro_flush_timeout);
-		n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
+		n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
 	}
 	if (n->defer_hard_irqs_count > 0) {
 		n->defer_hard_irqs_count--;
@@ -6368,7 +6390,7 @@  static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
 	bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 
 	if (flags & NAPI_F_PREFER_BUSY_POLL) {
-		napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
+		napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
 		timeout = READ_ONCE(napi->dev->gro_flush_timeout);
 		if (napi->defer_hard_irqs_count && timeout) {
 			hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
@@ -6650,6 +6672,7 @@  void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
 	INIT_HLIST_NODE(&napi->napi_hash_node);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
+	napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
 	init_gro_hash(napi);
 	napi->skb = NULL;
 	INIT_LIST_HEAD(&napi->rx_list);
@@ -11032,7 +11055,7 @@  void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
 
 	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
 		dev->gro_flush_timeout = 20000;
-		dev->napi_defer_hard_irqs = 1;
+		netdev_set_defer_hard_irqs(dev, 1);
 	}
 }
 EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 0e2084ce7b75..8272f0144d81 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -425,7 +425,10 @@  NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
 
 static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
 {
-	WRITE_ONCE(dev->napi_defer_hard_irqs, val);
+	if (val > S32_MAX)
+		return -EINVAL;
+
+	netdev_set_defer_hard_irqs(dev, (int)val);
 	return 0;
 }