@@ -111,6 +111,8 @@ struct igc_ring {
struct sk_buff *skb;
};
};
+
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
/* Board specific private data structure */
@@ -375,6 +377,8 @@ enum igc_tx_flags {
/* olinfo flags */
IGC_TX_FLAGS_IPV4 = 0x10,
IGC_TX_FLAGS_CSUM = 0x20,
+
+ IGC_TX_FLAGS_XDP = 0x100,
};
enum igc_boards {
@@ -397,7 +401,10 @@ enum igc_boards {
struct igc_tx_buffer {
union igc_adv_tx_desc *next_to_watch;
unsigned long time_stamp;
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
unsigned int bytecount;
u16 gso_segs;
__be16 protocol;
@@ -25,6 +25,7 @@
#define IGC_XDP_PASS 0
#define IGC_XDP_CONSUMED BIT(0)
+#define IGC_XDP_TX BIT(1)
static int debug = -1;
@@ -181,8 +182,10 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
while (i != tx_ring->next_to_use) {
union igc_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
+ if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+ xdp_return_frame(tx_buffer->xdpf);
+ else
+ dev_kfree_skb_any(tx_buffer->skb);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -410,6 +413,8 @@ void igc_free_rx_resources(struct igc_ring *rx_ring)
{
igc_clean_rx_ring(rx_ring);
+ igc_xdp_unregister_rxq_info(rx_ring);
+
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
@@ -447,7 +452,11 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
{
struct net_device *ndev = rx_ring->netdev;
struct device *dev = rx_ring->dev;
- int size, desc_len;
+ int size, desc_len, res;
+
+ res = igc_xdp_register_rxq_info(rx_ring);
+ if (res < 0)
+ return res;
size = sizeof(struct igc_rx_buffer) * rx_ring->count;
rx_ring->rx_buffer_info = vzalloc(size);
@@ -473,6 +482,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
return 0;
err:
+ igc_xdp_unregister_rxq_info(rx_ring);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
@@ -1909,6 +1919,101 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
}
}
+static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
+ struct xdp_frame *xdpf,
+ struct igc_ring *ring)
+{
+ dma_addr_t dma;
+
+ dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(ring->dev, dma)) {
+ netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+ return -ENOMEM;
+ }
+
+ buffer->xdpf = xdpf;
+ buffer->tx_flags = IGC_TX_FLAGS_XDP;
+ buffer->protocol = 0;
+ buffer->bytecount = xdpf->len;
+ buffer->gso_segs = 1;
+ buffer->time_stamp = jiffies;
+ dma_unmap_len_set(buffer, len, xdpf->len);
+ dma_unmap_addr_set(buffer, dma, dma);
+ return 0;
+}
+
+/* This function requires __netif_tx_lock is held by the caller. */
+static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
+ struct xdp_frame *xdpf)
+{
+ struct igc_tx_buffer *buffer;
+ union igc_adv_tx_desc *desc;
+ u32 cmd_type, olinfo_status;
+ int err;
+
+ if (!igc_desc_unused(ring))
+ return -EBUSY;
+
+ buffer = &ring->tx_buffer_info[ring->next_to_use];
+ err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
+ if (err)
+ return err;
+
+ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+ IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+ buffer->bytecount;
+ olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+ desc = IGC_TX_DESC(ring, ring->next_to_use);
+ desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+ desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
+
+ netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
+
+ buffer->next_to_watch = desc;
+
+ ring->next_to_use++;
+ if (ring->next_to_use == ring->count)
+ ring->next_to_use = 0;
+
+ return 0;
+}
+
+static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
+ int cpu)
+{
+ int index = cpu;
+
+ if (unlikely(index < 0))
+ index = 0;
+
+ while (index >= adapter->num_tx_queues)
+ index -= adapter->num_tx_queues;
+
+ return adapter->tx_ring[index];
+}
+
+static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ struct igc_ring *ring;
+ int res;
+
+ if (unlikely(!xdpf))
+ return -EFAULT;
+
+ ring = igc_xdp_get_tx_ring(adapter, cpu);
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
+ res = igc_xdp_init_tx_descriptor(ring, xdpf);
+ __netif_tx_unlock(nq);
+ return res;
+}
+
static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
struct xdp_buff *xdp)
{
@@ -1929,6 +2034,12 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
case XDP_PASS:
res = IGC_XDP_PASS;
break;
+ case XDP_TX:
+ if (igc_xdp_xmit_back(adapter, xdp) < 0)
+ res = IGC_XDP_CONSUMED;
+ else
+ res = IGC_XDP_TX;
+ break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
@@ -1945,20 +2056,49 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
return ERR_PTR(-res);
}
+/* This function assumes __netif_tx_lock is held by the caller. */
+static void igc_flush_tx_descriptors(struct igc_ring *ring)
+{
+ /* Once tail pointer is updated, hardware can fetch the descriptors
+ * any time so we issue a write membar here to ensure all memory
+ * writes are complete before the tail pointer is updated.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
+static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
+{
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ struct igc_ring *ring;
+
+ if (status & IGC_XDP_TX) {
+ ring = igc_xdp_get_tx_ring(adapter, cpu);
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
+ igc_flush_tx_descriptors(ring);
+ __netif_tx_unlock(nq);
+ }
+}
+
static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
{
unsigned int total_bytes = 0, total_packets = 0;
+ struct igc_adapter *adapter = q_vector->adapter;
struct igc_ring *rx_ring = q_vector->rx.ring;
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = igc_desc_unused(rx_ring);
+ int xdp_status = 0;
while (likely(total_packets < budget)) {
union igc_adv_rx_desc *rx_desc;
struct igc_rx_buffer *rx_buffer;
+ unsigned int size, truesize;
ktime_t timestamp = 0;
struct xdp_buff xdp;
int pkt_offset = 0;
- unsigned int size;
void *pktbuf;
/* return some buffers to hardware, one at a time is too slow */
@@ -1979,6 +2119,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
dma_rmb();
rx_buffer = igc_get_rx_buffer(rx_ring, size);
+ truesize = igc_get_rx_frame_truesize(rx_ring, size);
pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
@@ -1990,19 +2131,29 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
}
if (!skb) {
- struct igc_adapter *adapter = q_vector->adapter;
-
xdp.data = pktbuf + pkt_offset;
xdp.data_end = xdp.data + size;
xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring);
xdp_set_data_meta_invalid(&xdp);
- xdp.frame_sz = igc_get_rx_frame_truesize(rx_ring, size);
+ xdp.frame_sz = truesize;
+ xdp.rxq = &rx_ring->xdp_rxq;
skb = igc_xdp_run_prog(adapter, &xdp);
}
if (IS_ERR(skb)) {
- rx_buffer->pagecnt_bias++;
+ unsigned int xdp_res = -PTR_ERR(skb);
+
+ switch (xdp_res) {
+ case IGC_XDP_CONSUMED:
+ rx_buffer->pagecnt_bias++;
+ break;
+ case IGC_XDP_TX:
+ igc_rx_buffer_flip(rx_buffer, truesize);
+ xdp_status |= xdp_res;
+ break;
+ }
+
total_packets++;
total_bytes += size;
} else if (skb)
@@ -2048,6 +2199,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
total_packets++;
}
+ if (xdp_status)
+ igc_finalize_xdp(adapter, xdp_status);
+
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
@@ -2109,8 +2263,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
total_bytes += tx_buffer->bytecount;
total_packets += tx_buffer->gso_segs;
- /* free the skb */
- napi_consume_skb(tx_buffer->skb, napi_budget);
+ if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+ xdp_return_frame(tx_buffer->xdpf);
+ else
+ napi_consume_skb(tx_buffer->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -31,3 +31,30 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
return 0;
}
+
+int igc_xdp_register_rxq_info(struct igc_ring *ring)
+{
+ struct net_device *dev = ring->netdev;
+ int err;
+
+ err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0);
+ if (err) {
+ netdev_err(dev, "Failed to register xdp rxq info\n");
+ return err;
+ }
+
+ err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err) {
+ netdev_err(dev, "Failed to register xdp rxq mem model\n");
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+ return err;
+ }
+
+ return 0;
+}
+
+void igc_xdp_unregister_rxq_info(struct igc_ring *ring)
+{
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+}
@@ -7,4 +7,7 @@
int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
struct netlink_ext_ack *extack);
+int igc_xdp_register_rxq_info(struct igc_ring *ring);
+void igc_xdp_unregister_rxq_info(struct igc_ring *ring);
+
#endif /* _IGC_XDP_H_ */