diff mbox series

[RFC,net-next,v2,3/9] netdev: add netdev_rx_queue_restart()

Message ID 20240502045410.3524155-4-dw@davidwei.uk (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series bnxt: implement queue api | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 955 this patch: 955
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 938 this patch: 938
netdev/verify_signedoff fail author Signed-off-by missing
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 967 this patch: 967
netdev/checkpatch warning WARNING: Improper SPDX comment style for 'net/core/netdev_rx_queue.c', please use '//' instead WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

David Wei May 2, 2024, 4:54 a.m. UTC
From: Mina Almasry <almasrymina@google.com>

Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
taken from Mina's work in [1] with a slight modification of taking
rtnl_lock() during the queue stop and start ops.

For bnxt specifically, if the firmware doesn't support
BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
attempt to reset the whole device.

[1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t

Signed-off-by: David Wei <dw@davidwei.uk>
---
 include/net/netdev_rx_queue.h |  3 ++
 net/core/Makefile             |  1 +
 net/core/netdev_rx_queue.c    | 58 +++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)
 create mode 100644 net/core/netdev_rx_queue.c

Comments

Mina Almasry May 2, 2024, 5:27 p.m. UTC | #1
On Wed, May 1, 2024 at 9:54 PM David Wei <dw@davidwei.uk> wrote:
>
> From: Mina Almasry <almasrymina@google.com>
>
> Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
> taken from Mina's work in [1] with a slight modification of taking
> rtnl_lock() during the queue stop and start ops.
>
> For bnxt specifically, if the firmware doesn't support
> BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
> the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
> attempt to reset the whole device.
>
> [1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t
>
> Signed-off-by: David Wei <dw@davidwei.uk>
> ---
>  include/net/netdev_rx_queue.h |  3 ++
>  net/core/Makefile             |  1 +
>  net/core/netdev_rx_queue.c    | 58 +++++++++++++++++++++++++++++++++++
>  3 files changed, 62 insertions(+)
>  create mode 100644 net/core/netdev_rx_queue.c
>
> diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
> index aa1716fb0e53..e78ca52d67fb 100644
> --- a/include/net/netdev_rx_queue.h
> +++ b/include/net/netdev_rx_queue.h
> @@ -54,4 +54,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
>         return index;
>  }
>  #endif
> +
> +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
> +
>  #endif
> diff --git a/net/core/Makefile b/net/core/Makefile
> index 21d6fbc7e884..f2aa63c167a3 100644
> --- a/net/core/Makefile
> +++ b/net/core/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
>
>  obj-y += net-sysfs.o
>  obj-y += hotdata.o
> +obj-y += netdev_rx_queue.o
>  obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
>  obj-$(CONFIG_PROC_FS) += net-procfs.o
>  obj-$(CONFIG_NET_PKTGEN) += pktgen.o
> diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
> new file mode 100644
> index 000000000000..9633fb36f6d1
> --- /dev/null
> +++ b/net/core/netdev_rx_queue.c
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/netdevice.h>
> +#include <net/netdev_queues.h>
> +#include <net/netdev_rx_queue.h>
> +
> +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq)
> +{
> +       void *new_mem;
> +       void *old_mem;
> +       int err;
> +
> +       if (!dev->queue_mgmt_ops->ndo_queue_stop ||
> +           !dev->queue_mgmt_ops->ndo_queue_mem_free ||
> +           !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
> +           !dev->queue_mgmt_ops->ndo_queue_start)
> +               return -EOPNOTSUPP;
> +
> +       new_mem = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, rxq);
> +       if (!new_mem)
> +               return -ENOMEM;
> +
> +       rtnl_lock();
> +       err = dev->queue_mgmt_ops->ndo_queue_stop(dev, rxq, &old_mem);
> +       if (err)
> +               goto err_free_new_mem;
> +
> +       err = dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, new_mem);
> +       if (err)
> +               goto err_start_queue;
> +       rtnl_unlock();
> +
> +       dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
> +
> +       return 0;
> +
> +err_start_queue:
> +       /* Restarting the queue with old_mem should be successful as we haven't
> +        * changed any of the queue configuration, and there is not much we can
> +        * do to recover from a failure here.
> +        *
> +        * WARN if the we fail to recover the old rx queue, and at least free
> +        * old_mem so we don't also leak that.
> +        */
> +       if (dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, old_mem)) {
> +               WARN(1,
> +                    "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
> +                    rxq);
> +               dev->queue_mgmt_ops->ndo_queue_mem_free(dev, &old_mem);
> +       }
> +
> +err_free_new_mem:
> +       dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
> +       rtnl_unlock();
> +
> +       return err;
> +}

The function looks good to me. It's very similar to what we are doing with GVE.

> +EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);

I would still prefer not to export this, unless necessary, and it
seems it's not at the moment (we only need to call it from core net
and core io uring which doesn't need an export).

Unexporting later, as far as my primitive understanding goes, is maybe
tricky because it may break out of tree drivers that decided to call
this. I don't feel strongly about unexporting, but someone else may.
Simon Horman May 4, 2024, 12:20 p.m. UTC | #2
On Wed, May 01, 2024 at 09:54:04PM -0700, David Wei wrote:
> From: Mina Almasry <almasrymina@google.com>
> 
> Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
> taken from Mina's work in [1] with a slight modification of taking
> rtnl_lock() during the queue stop and start ops.
> 
> For bnxt specifically, if the firmware doesn't support
> BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
> the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
> attempt to reset the whole device.
> 
> [1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t
> 
> Signed-off-by: David Wei <dw@davidwei.uk>

nit: Mina's From line is above, but there is no corresponding Signed-off-by
     line here.

> ---
>  include/net/netdev_rx_queue.h |  3 ++
>  net/core/Makefile             |  1 +
>  net/core/netdev_rx_queue.c    | 58 +++++++++++++++++++++++++++++++++++
>  3 files changed, 62 insertions(+)
>  create mode 100644 net/core/netdev_rx_queue.c
> 
> diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
> index aa1716fb0e53..e78ca52d67fb 100644
> --- a/include/net/netdev_rx_queue.h
> +++ b/include/net/netdev_rx_queue.h
> @@ -54,4 +54,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
>  	return index;
>  }
>  #endif
> +
> +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
> +
>  #endif
> diff --git a/net/core/Makefile b/net/core/Makefile
> index 21d6fbc7e884..f2aa63c167a3 100644
> --- a/net/core/Makefile
> +++ b/net/core/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
>  
>  obj-y += net-sysfs.o
>  obj-y += hotdata.o
> +obj-y += netdev_rx_queue.o
>  obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
>  obj-$(CONFIG_PROC_FS) += net-procfs.o
>  obj-$(CONFIG_NET_PKTGEN) += pktgen.o
> diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
> new file mode 100644
> index 000000000000..9633fb36f6d1
> --- /dev/null
> +++ b/net/core/netdev_rx_queue.c
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0 */

nit: my understanding is that, as a .c file, the correct SPDX format is:

// SPDX-License-Identifier: GPL-2.0

...
David Wei May 6, 2024, 12:38 a.m. UTC | #3
On 2024-05-02 10:27, Mina Almasry wrote:
> On Wed, May 1, 2024 at 9:54 PM David Wei <dw@davidwei.uk> wrote:
>>

...

>> +int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq)
>> +{
>> +       void *new_mem;
>> +       void *old_mem;
>> +       int err;
>> +
>> +       if (!dev->queue_mgmt_ops->ndo_queue_stop ||
>> +           !dev->queue_mgmt_ops->ndo_queue_mem_free ||
>> +           !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
>> +           !dev->queue_mgmt_ops->ndo_queue_start)
>> +               return -EOPNOTSUPP;
>> +
>> +       new_mem = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, rxq);
>> +       if (!new_mem)
>> +               return -ENOMEM;
>> +
>> +       rtnl_lock();
>> +       err = dev->queue_mgmt_ops->ndo_queue_stop(dev, rxq, &old_mem);
>> +       if (err)
>> +               goto err_free_new_mem;
>> +
>> +       err = dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, new_mem);
>> +       if (err)
>> +               goto err_start_queue;
>> +       rtnl_unlock();
>> +
>> +       dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
>> +
>> +       return 0;
>> +
>> +err_start_queue:
>> +       /* Restarting the queue with old_mem should be successful as we haven't
>> +        * changed any of the queue configuration, and there is not much we can
>> +        * do to recover from a failure here.
>> +        *
>> +        * WARN if the we fail to recover the old rx queue, and at least free
>> +        * old_mem so we don't also leak that.
>> +        */
>> +       if (dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, old_mem)) {
>> +               WARN(1,
>> +                    "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
>> +                    rxq);
>> +               dev->queue_mgmt_ops->ndo_queue_mem_free(dev, &old_mem);
>> +       }
>> +
>> +err_free_new_mem:
>> +       dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
>> +       rtnl_unlock();
>> +
>> +       return err;
>> +}
> 
> The function looks good to me. It's very similar to what we are doing with GVE.
> 
>> +EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);
> 
> I would still prefer not to export this, unless necessary, and it
> seems it's not at the moment (we only need to call it from core net
> and core io uring which doesn't need an export).
> 
> Unexporting later, as far as my primitive understanding goes, is maybe
> tricky because it may break out of tree drivers that decided to call
> this. I don't feel strongly about unexporting, but someone else may.

Sorry, I didn't mean to ignore you, I forgot to do it. :(

I'll change it for the next series.
David Wei May 6, 2024, 12:41 a.m. UTC | #4
On 2024-05-04 05:20, Simon Horman wrote:
> On Wed, May 01, 2024 at 09:54:04PM -0700, David Wei wrote:
>> From: Mina Almasry <almasrymina@google.com>
>>
>> Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
>> taken from Mina's work in [1] with a slight modification of taking
>> rtnl_lock() during the queue stop and start ops.
>>
>> For bnxt specifically, if the firmware doesn't support
>> BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
>> the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
>> attempt to reset the whole device.
>>
>> [1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
> 
> nit: Mina's From line is above, but there is no corresponding Signed-off-by
>      line here.

This patch isn't a clean cherry pick, I pulled the core logic of
netdev_rx_queue_restart() from the middle of another patch. In these
cases should I be manually adding Signed-off-by tag?

...

>> diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
>> new file mode 100644
>> index 000000000000..9633fb36f6d1
>> --- /dev/null
>> +++ b/net/core/netdev_rx_queue.c
>> @@ -0,0 +1,58 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
> 
> nit: my understanding is that, as a .c file, the correct SPDX format is:
> 
> // SPDX-License-Identifier: GPL-2.0

Thanks, I will fix this.

> 
> ...
Simon Horman May 7, 2024, 4:46 p.m. UTC | #5
On Sun, May 05, 2024 at 05:41:14PM -0700, David Wei wrote:
> On 2024-05-04 05:20, Simon Horman wrote:
> > On Wed, May 01, 2024 at 09:54:04PM -0700, David Wei wrote:
> >> From: Mina Almasry <almasrymina@google.com>
> >>
> >> Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
> >> taken from Mina's work in [1] with a slight modification of taking
> >> rtnl_lock() during the queue stop and start ops.
> >>
> >> For bnxt specifically, if the firmware doesn't support
> >> BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
> >> the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
> >> attempt to reset the whole device.
> >>
> >> [1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t
> >>
> >> Signed-off-by: David Wei <dw@davidwei.uk>
> > 
> > nit: Mina's From line is above, but there is no corresponding Signed-off-by
> >      line here.
> 
> This patch isn't a clean cherry pick, I pulled the core logic of
> netdev_rx_queue_restart() from the middle of another patch. In these
> cases should I be manually adding Signed-off-by tag?

As you asked:

I think if the patch is materially Mina's work - lets say more than 80% -
then a From line and a Signed-off-by tag is appropriate. N.B. this
implies Mina supplied a Signed-off-by tag at some point.

Otherwise I think it's fine to drop both the From line and Signed-off-by tag.
And as a courtesy acknowledge Mina's work some other way.

e.g. based on work by Mina Almasry <almasrymina@google.com>

But perhaps it's as well to as Mina what he thinks :)

...
Mina Almasry May 8, 2024, 5:13 p.m. UTC | #6
On Tue, May 7, 2024 at 9:47 AM Simon Horman <horms@kernel.org> wrote:
>
> On Sun, May 05, 2024 at 05:41:14PM -0700, David Wei wrote:
> > On 2024-05-04 05:20, Simon Horman wrote:
> > > On Wed, May 01, 2024 at 09:54:04PM -0700, David Wei wrote:
> > >> From: Mina Almasry <almasrymina@google.com>
> > >>
> > >> Add netdev_rx_queue_restart() function to netdev_rx_queue.h. This is
> > >> taken from Mina's work in [1] with a slight modification of taking
> > >> rtnl_lock() during the queue stop and start ops.
> > >>
> > >> For bnxt specifically, if the firmware doesn't support
> > >> BNXT_RST_RING_SP_EVENT, then ndo_queue_stop() returns -EOPNOTSUPP and
> > >> the whole restart fails. Unlike bnxt_rx_ring_reset(), there is no
> > >> attempt to reset the whole device.
> > >>
> > >> [1]: https://lore.kernel.org/linux-kernel/20240403002053.2376017-6-almasrymina@google.com/#t
> > >>
> > >> Signed-off-by: David Wei <dw@davidwei.uk>
> > >
> > > nit: Mina's From line is above, but there is no corresponding Signed-off-by
> > >      line here.
> >
> > This patch isn't a clean cherry pick, I pulled the core logic of
> > netdev_rx_queue_restart() from the middle of another patch. In these
> > cases should I be manually adding Signed-off-by tag?
>
> As you asked:
>
> I think if the patch is materially Mina's work - lets say more than 80% -
> then a From line and a Signed-off-by tag is appropriate. N.B. this
> implies Mina supplied a Signed-off-by tag at some point.
>
> Otherwise I think it's fine to drop both the From line and Signed-off-by tag.
> And as a courtesy acknowledge Mina's work some other way.
>
> e.g. based on work by Mina Almasry <almasrymina@google.com>
>
> But perhaps it's as well to as Mina what he thinks :)
>

I'm fine with whatever here. This work is mostly off of Jakub's design
anyway. Either Signed-off-by or Suggested-by or 'based on work by' is
fine with me.

However from the other thread, it looks like David is delegating me to
send follow up versions of this, possibly with the Devmem TCP series,
which works better for me anyway :D
diff mbox series

Patch

diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index aa1716fb0e53..e78ca52d67fb 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -54,4 +54,7 @@  get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
 	return index;
 }
 #endif
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+
 #endif
diff --git a/net/core/Makefile b/net/core/Makefile
index 21d6fbc7e884..f2aa63c167a3 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,6 +19,7 @@  obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
 
 obj-y += net-sysfs.o
 obj-y += hotdata.o
+obj-y += netdev_rx_queue.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
new file mode 100644
index 000000000000..9633fb36f6d1
--- /dev/null
+++ b/net/core/netdev_rx_queue.c
@@ -0,0 +1,58 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/netdevice.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq)
+{
+	void *new_mem;
+	void *old_mem;
+	int err;
+
+	if (!dev->queue_mgmt_ops->ndo_queue_stop ||
+	    !dev->queue_mgmt_ops->ndo_queue_mem_free ||
+	    !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
+	    !dev->queue_mgmt_ops->ndo_queue_start)
+		return -EOPNOTSUPP;
+
+	new_mem = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, rxq);
+	if (!new_mem)
+		return -ENOMEM;
+
+	rtnl_lock();
+	err = dev->queue_mgmt_ops->ndo_queue_stop(dev, rxq, &old_mem);
+	if (err)
+		goto err_free_new_mem;
+
+	err = dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, new_mem);
+	if (err)
+		goto err_start_queue;
+	rtnl_unlock();
+
+	dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+
+	return 0;
+
+err_start_queue:
+	/* Restarting the queue with old_mem should be successful as we haven't
+	 * changed any of the queue configuration, and there is not much we can
+	 * do to recover from a failure here.
+	 *
+	 * WARN if the we fail to recover the old rx queue, and at least free
+	 * old_mem so we don't also leak that.
+	 */
+	if (dev->queue_mgmt_ops->ndo_queue_start(dev, rxq, old_mem)) {
+		WARN(1,
+		     "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
+		     rxq);
+		dev->queue_mgmt_ops->ndo_queue_mem_free(dev, &old_mem);
+	}
+
+err_free_new_mem:
+	dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);