Message ID | 20250129181703.148027-1-sankararaman.jayaraman@broadcom.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net,v2] vmxnet3: Fix tx queue race condition with XDP | expand |
On Wed, 29 Jan 2025 23:47:03 +0530 Sankararaman Jayaraman wrote: > If XDP traffic runs on a CPU which is greater than or equal to > the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq() > always picks up queue 0 for transmission as it uses reciprocal scale > instead of simple modulo operation. > > vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above > returned queue without any locking which can lead to race conditions > when multiple XDP xmits run in parallel on different CPU's. > > This patch uses a simple module scheme when the current CPU equals or > exceeds the number of Tx queues on the NIC. It also adds locking in > vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions. > > Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.") > Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com> > Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com> Please add a --- separator between commit message and change log > Changes v1-> v2: > Retained the copyright dates as it is. > Used spin_lock()/spin_unlock() instead of spin_lock_irqsave(). Wrong way around AFAICT. The lock is taken on the xmit path, and driver supports netpoll. But this path won't be called from IRQ. So the right type of call is very likely _irq(). Please do not post next version of the patch in reply to previous posting. Instead add to the change log a lore link to previous posting. See: https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#changes-requested Actually, also make sure you read at least the tl;dr section, too. > @@ -226,6 +231,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, > struct vmxnet3_adapter *adapter = netdev_priv(dev); > struct vmxnet3_tx_queue *tq; > int i; > + struct netdev_queue *nq; Reverse length order. So: struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i;
diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588..e3f94b3374f9 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -226,6 +231,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; int i; + struct netdev_queue *nq; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) return -ENETDOWN; @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; }