diff mbox series

[net-next,3/3] ibmveth: Ethtool set queue support

Message ID 20220921215056.113516-3-nnac123@linux.ibm.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,1/3] ibmveth: Copy tx skbs into a premapped buffer | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter warning Series does not have a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 8 maintainers not CCed: linuxppc-dev@lists.ozlabs.org kuba@kernel.org mpe@ellerman.id.au davem@davemloft.net npiggin@gmail.com christophe.leroy@csgroup.eu edumazet@google.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: Possible unnecessary 'out of memory' message
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nick Child Sept. 21, 2022, 9:50 p.m. UTC
Implement channel management functions to allow dynamic addition and
removal of transmit queues. The `ethtool --show-channels` and
`ethtool --set-channels` commands can be used to get and set the
number of queues, respectively. Allow the ability to add as many
transmit queues as available processors but never allow more than the
hard maximum of 16. The number of receive queues is one and cannot be
modified.

Depending on whether the requested number of queues is larger or
smaller than the current value, either allocate or free long term
buffers. Since long term buffer construction and destruction can
occur in two different areas, from either channel set requests or
device open/close, define functions for performing this work. If
allocation of a new buffer fails, then attempt to revert back to the
previous number of queues.

Signed-off-by: Nick Child <nnac123@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmveth.c | 168 ++++++++++++++++++++++++-----
 drivers/net/ethernet/ibm/ibmveth.h |   2 +-
 2 files changed, 140 insertions(+), 30 deletions(-)

Comments

Jakub Kicinski Sept. 26, 2022, 6:44 p.m. UTC | #1
On Wed, 21 Sep 2022 16:50:56 -0500 Nick Child wrote:
> diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
> index 7abd67c2336e..2c5ded4f3b67 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -141,6 +141,13 @@ static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
>  	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
>  }
>  
> +static unsigned int ibmveth_real_max_tx_queues(void)
> +{
> +	unsigned int n_cpu =  num_online_cpus();

nit: double space after =

> +	return n_cpu > IBMVETH_MAX_QUEUES ? IBMVETH_MAX_QUEUES : n_cpu;

min()

> +static void ibmveth_get_channels(struct net_device *netdev,
> +				 struct ethtool_channels *channels)
> +{
> +	channels->max_tx = ibmveth_real_max_tx_queues();
> +	channels->tx_count = netdev->real_num_tx_queues;
> +
> +	channels->max_rx = netdev->real_num_rx_queues;
> +	channels->rx_count = netdev->real_num_rx_queues;

Which is 1, right?

> +	channels->max_other = 0;
> +	channels->other_count = 0;
> +	channels->max_combined = 0;
> +	channels->combined_count = 0;

AFAIR the core zeros the input, no need to explicitly set to 0 here.

> +static int ibmveth_set_channels(struct net_device *netdev,
> +				struct ethtool_channels *channels)
> +{
> +	struct ibmveth_adapter *adapter = netdev_priv(netdev);
> +	int rc, rc2, i;
> +	unsigned int fallback_num, goal;
> +
> +	/* Higher levels will catch basic input errors */
> +	if (channels->tx_count > ibmveth_real_max_tx_queues())
> +		return -EINVAL;

AFAIR core validates this.

> +	if (channels->tx_count == netdev->real_num_tx_queues)
> +		return 0;

And this.

> +	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
> +	 * but we may need to alloc/free the ltb's.
> +	 */
> +	netif_tx_stop_all_queues(netdev);

What if the device is not UP?

> +	fallback_num = netdev->real_num_tx_queues;

fallback is not great naming. Why not old or prev?

> +	goal = channels->tx_count;
> +
> +setup_tx_queues:
> +	/* Allocate any queue that we need */
> +	for (i = 0; i < goal; i++) {
> +		if (adapter->tx_ltb_ptr[i])
> +			continue;
> +
> +		rc = ibmveth_allocate_tx_ltb(adapter, i);
> +		if (!rc)
> +			continue;

I don't understand why you start from zero here. The first real_num_tx
should already be allocated. If you assume this it removes the need for
the error handling below. Either the number of queues is increased and
on failure we go back to the old one, or it's decreased which can't
fail.

> +		if (goal == fallback_num)
> +			goto full_restart;
> +
> +		netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
> +			   fallback_num);
> +		goal = fallback_num;
> +		goto setup_tx_queues;
> +	}
> +	/* Free any that are no longer needed */
> +	for (; i < fallback_num; i++) {
> +		if (adapter->tx_ltb_ptr[i])
> +			ibmveth_free_tx_ltb(adapter, i);
> +	}
> +
> +	rc = netif_set_real_num_tx_queues(netdev, goal);

You can assume this doesn't fail on decrease.

> +	if (rc) {
> +		if (goal == fallback_num)
> +			goto full_restart;
> +		netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
> +			   fallback_num);
> +		goal = fallback_num;
> +		goto setup_tx_queues;
> +	}

>  #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
>  #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
> -#define IBMVETH_MAX_QUEUES 8
> +#define IBMVETH_MAX_QUEUES 16

Why does the same series introduce the max as 8 and then bump to 16?
Why can't patch 1 just use 16 from the start?
Nick Child Sept. 27, 2022, 7:42 p.m. UTC | #2
On 9/26/22 13:44, Jakub Kicinski wrote:
> On Wed, 21 Sep 2022 16:50:56 -0500 Nick Child wrote:
>> diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
>> index 7abd67c2336e..2c5ded4f3b67 100644
>> --- a/drivers/net/ethernet/ibm/ibmveth.c
>> +++ b/drivers/net/ethernet/ibm/ibmveth.c

>> +static void ibmveth_get_channels(struct net_device *netdev,
>> +				 struct ethtool_channels *channels)
>> +{
>> +	channels->max_tx = ibmveth_real_max_tx_queues();
>> +	channels->tx_count = netdev->real_num_tx_queues;
>> +
>> +	channels->max_rx = netdev->real_num_rx_queues;
>> +	channels->rx_count = netdev->real_num_rx_queues;
> 
> Which is 1, right?

Correct, there will only be one rx queue. I chose doing this over using
hard-coded values to ensure that the values presented to the user are
actually being implemented in the networking layers.

>> +static int ibmveth_set_channels(struct net_device *netdev,
>> +				struct ethtool_channels *channels)

>> +	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
>> +	 * but we may need to alloc/free the ltb's.
>> +	 */
>> +	netif_tx_stop_all_queues(netdev);
> 
> What if the device is not UP?

 From my understanding this will just set the __QUEUE_STATE_DRV_XOFF bit
of all of the queues. I don't think this will cause any issues if the
device is DOWN. Please let me know if you are worried about anything
in particular here.

Just as a side note, ibmveth never sets carrier state to UP since it
cannot determine the link status of the physical device being
virtualized. The state is instead left as UNKNOWN when not DOWN.


The other comments from Jakub's review are left out because I fully
agree with them and will apply them to a v2 soon (as well as adding
anything else that comes up before then).

Thank you very much for the review Jakub :)

--Nick Child
Jakub Kicinski Sept. 27, 2022, 11:43 p.m. UTC | #3
On Tue, 27 Sep 2022 14:42:39 -0500 Nick Child wrote:
> >> +static int ibmveth_set_channels(struct net_device *netdev,
> >> +				struct ethtool_channels *channels)  
> 
> >> +	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
> >> +	 * but we may need to alloc/free the ltb's.
> >> +	 */
> >> +	netif_tx_stop_all_queues(netdev);  
> > 
> > What if the device is not UP?  
> 
>  From my understanding this will just set the __QUEUE_STATE_DRV_XOFF bit
> of all of the queues. I don't think this will cause any issues if the
> device is DOWN. Please let me know if you are worried about anything
> in particular here.

My concern was that the memory is allocated on open and freed on close.
So we shouldn't be doing any extra work if the device is closed, just
record the desired number of queues and return success.

> Just as a side note, ibmveth never sets carrier state to UP since it
> cannot determine the link status of the physical device being
> virtualized. The state is instead left as UNKNOWN when not DOWN.

Sorry for lack of clarity - UP as in IFF_UP, not IFF_LOWER_UP.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 7abd67c2336e..2c5ded4f3b67 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -141,6 +141,13 @@  static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
 }
 
+static unsigned int ibmveth_real_max_tx_queues(void)
+{
+	unsigned int n_cpu =  num_online_cpus();
+
+	return n_cpu > IBMVETH_MAX_QUEUES ? IBMVETH_MAX_QUEUES : n_cpu;
+}
+
 /* setup the initial settings for a buffer pool */
 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
 				     u32 pool_index, u32 pool_size,
@@ -456,6 +463,38 @@  static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
 	}
 }
 
+static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
+{
+	dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx],
+			 adapter->tx_ltb_size, DMA_TO_DEVICE);
+	kfree(adapter->tx_ltb_ptr[idx]);
+	adapter->tx_ltb_ptr[idx] = NULL;
+}
+
+static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx)
+{
+	adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size,
+					   GFP_KERNEL);
+	if (!adapter->tx_ltb_ptr[idx]) {
+		netdev_err(adapter->netdev,
+			   "unable to allocate tx long term buffer\n");
+		return -ENOMEM;
+	}
+	adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev,
+						  adapter->tx_ltb_ptr[idx],
+						  adapter->tx_ltb_size,
+						  DMA_TO_DEVICE);
+	if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) {
+		netdev_err(adapter->netdev,
+			   "unable to DMA map tx long term buffer\n");
+		kfree(adapter->tx_ltb_ptr[idx]);
+		adapter->tx_ltb_ptr[idx] = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
         union ibmveth_buf_desc rxq_desc, u64 mac_address)
 {
@@ -538,22 +577,9 @@  static int ibmveth_open(struct net_device *netdev)
 		goto out_unmap_buffer_list;
 	}
 
-	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
-		adapter->tx_ltb_ptr[i] = kzalloc(adapter->tx_ltb_size,
-						 GFP_KERNEL);
-		if (!adapter->tx_ltb_ptr[i]) {
-			netdev_err(netdev,
-				   "unable to allocate transmit long term buffer\n");
-			goto out_free_tx_ltb_ptrs;
-		}
-		adapter->tx_ltb_dma[i] = dma_map_single(dev,
-							adapter->tx_ltb_ptr[i],
-							adapter->tx_ltb_size,
-							DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, adapter->tx_ltb_dma[i])) {
-			netdev_err(netdev, "unable to DMA map transmit long term buffer\n");
-			goto out_unmap_tx_dma;
-		}
+	for (i = 0; i < netdev->real_num_tx_queues; i++) {
+		if (ibmveth_allocate_tx_ltb(adapter, i))
+			goto out_free_tx_ltb;
 	}
 
 	adapter->rx_queue.index = 0;
@@ -632,14 +658,9 @@  static int ibmveth_open(struct net_device *netdev)
 	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
 			 DMA_BIDIRECTIONAL);
 
-out_unmap_tx_dma:
-	kfree(adapter->tx_ltb_ptr[i]);
-
-out_free_tx_ltb_ptrs:
+out_free_tx_ltb:
 	while (--i >= 0) {
-		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
-				 adapter->tx_ltb_size, DMA_TO_DEVICE);
-		kfree(adapter->tx_ltb_ptr[i]);
+		ibmveth_free_tx_ltb(adapter, i);
 	}
 
 out_unmap_buffer_list:
@@ -704,11 +725,8 @@  static int ibmveth_close(struct net_device *netdev)
 			ibmveth_free_buffer_pool(adapter,
 						 &adapter->rx_buff_pool[i]);
 
-	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
-		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
-				 adapter->tx_ltb_size, DMA_TO_DEVICE);
-		kfree(adapter->tx_ltb_ptr[i]);
-	}
+	for (i = 0; i < netdev->real_num_tx_queues; i++)
+		ibmveth_free_tx_ltb(adapter, i);
 
 	netdev_dbg(netdev, "close complete\n");
 
@@ -974,6 +992,88 @@  static void ibmveth_get_ethtool_stats(struct net_device *dev,
 		data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
 }
 
+static void ibmveth_get_channels(struct net_device *netdev,
+				 struct ethtool_channels *channels)
+{
+	channels->max_tx = ibmveth_real_max_tx_queues();
+	channels->tx_count = netdev->real_num_tx_queues;
+
+	channels->max_rx = netdev->real_num_rx_queues;
+	channels->rx_count = netdev->real_num_rx_queues;
+
+	channels->max_other = 0;
+	channels->other_count = 0;
+	channels->max_combined = 0;
+	channels->combined_count = 0;
+}
+
+static int ibmveth_set_channels(struct net_device *netdev,
+				struct ethtool_channels *channels)
+{
+	struct ibmveth_adapter *adapter = netdev_priv(netdev);
+	int rc, rc2, i;
+	unsigned int fallback_num, goal;
+
+	/* Higher levels will catch basic input errors */
+	if (channels->tx_count > ibmveth_real_max_tx_queues())
+		return -EINVAL;
+
+	if (channels->tx_count == netdev->real_num_tx_queues)
+		return 0;
+
+	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
+	 * but we may need to alloc/free the ltb's.
+	 */
+	netif_tx_stop_all_queues(netdev);
+	fallback_num = netdev->real_num_tx_queues;
+	goal = channels->tx_count;
+
+setup_tx_queues:
+	/* Allocate any queue that we need */
+	for (i = 0; i < goal; i++) {
+		if (adapter->tx_ltb_ptr[i])
+			continue;
+
+		rc = ibmveth_allocate_tx_ltb(adapter, i);
+		if (!rc)
+			continue;
+
+		if (goal == fallback_num)
+			goto full_restart;
+
+		netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
+			   fallback_num);
+		goal = fallback_num;
+		goto setup_tx_queues;
+	}
+	/* Free any that are no longer needed */
+	for (; i < fallback_num; i++) {
+		if (adapter->tx_ltb_ptr[i])
+			ibmveth_free_tx_ltb(adapter, i);
+	}
+
+	rc = netif_set_real_num_tx_queues(netdev, goal);
+	if (rc) {
+		if (goal == fallback_num)
+			goto full_restart;
+		netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
+			   fallback_num);
+		goal = fallback_num;
+		goto setup_tx_queues;
+	}
+
+	netif_tx_wake_all_queues(netdev);
+	return rc;
+
+full_restart:
+	netdev_err(netdev, "Failed to fallback to old number of queues, restarting\n");
+	ibmveth_close(netdev);
+	rc2 = ibmveth_open(netdev);
+	if (rc2)
+		return rc2;
+	return rc;
+}
+
 static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_drvinfo		         = netdev_get_drvinfo,
 	.get_link		         = ethtool_op_get_link,
@@ -982,6 +1082,8 @@  static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_ethtool_stats	         = ibmveth_get_ethtool_stats,
 	.get_link_ksettings	         = ibmveth_get_link_ksettings,
 	.set_link_ksettings              = ibmveth_set_link_ksettings,
+	.get_channels			 = ibmveth_get_channels,
+	.set_channels			 = ibmveth_set_channels
 };
 
 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1609,7 +1711,6 @@  static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 
 	netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
-
 	if (!netdev)
 		return -ENOMEM;
 
@@ -1675,7 +1776,16 @@  static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 			kobject_uevent(kobj, KOBJ_ADD);
 	}
 
+	rc = netif_set_real_num_tx_queues(netdev, ibmveth_real_max_tx_queues());
+	if (rc) {
+		netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n",
+			   rc);
+		free_netdev(netdev);
+		return rc;
+	}
 	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
+	for (i = 0; i < IBMVETH_MAX_QUEUES; i++)
+		adapter->tx_ltb_ptr[i] = NULL;
 
 	netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
 	netdev_dbg(netdev, "registering netdev...\n");
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 7f058a551577..610d7a8be28a 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -99,7 +99,7 @@  static inline long h_illan_attributes(unsigned long unit_address,
 #define IBMVETH_FILT_LIST_SIZE 4096
 #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
 #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
-#define IBMVETH_MAX_QUEUES 8
+#define IBMVETH_MAX_QUEUES 16
 
 static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
 static int pool_count[] = { 256, 512, 256, 256, 256 };