diff mbox series

[RFC,bpf-next,1/8] xsk: add struct xdp_sock to netdev_rx_queue

Message ID 20211116073742.7941-2-ciara.loftus@intel.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series XDP_REDIRECT_XSK and Batched AF_XDP Rx | expand

Checks

Context Check Description
bpf/vmtest-bpf-next fail VM_Test
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4757 this patch: 4757
netdev/cc_maintainers warning 5 maintainers not CCed: kafai@fb.com andrii@kernel.org yhs@fb.com songliubraving@fb.com kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 863 this patch: 863
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4914 this patch: 4914
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ciara Loftus Nov. 16, 2021, 7:37 a.m. UTC
Storing a reference to the XDP socket in the netdev_rx_queue structure
makes a single socket accessible without requiring a lookup in the XSKMAP.
A future commit will introduce the XDP_REDIRECT_XSK action which
indicates to use this reference instead of performing the lookup. Since
an rx ring is required for redirection, only store the reference if an
rx ring is configured.

When multiple sockets exist for a given context (netdev, qid), a
reference is not stored because in this case we fallback to the default
behavior of using the XSKMAP to redirect the packets.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 include/linux/netdevice.h |  2 ++
 net/xdp/xsk.c             | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec42495a43a..1ad2491f0391 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -736,6 +736,8 @@  struct netdev_rx_queue {
 	struct net_device		*dev;
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
+	struct xdp_sock			*xsk;
+	refcount_t			xsk_refcnt;
 #endif
 } ____cacheline_aligned_in_smp;
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f16074eb53c7..94ee524b9ca8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -728,6 +728,30 @@  static void xsk_unbind_dev(struct xdp_sock *xs)
 
 	/* Wait for driver to stop using the xdp socket. */
 	xp_del_xsk(xs->pool, xs);
+	if (xs->rx) {
+		if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 1) {
+			refcount_set(&dev->_rx[xs->queue_id].xsk_refcnt, 0);
+			WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, NULL);
+		} else {
+			refcount_dec(&dev->_rx[xs->queue_id].xsk_refcnt);
+			/* If the refcnt returns to one again store the reference to the
+			 * remaining socket in the netdev_rx_queue.
+			 */
+			if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 1) {
+				struct net *net = dev_net(dev);
+				struct xdp_sock *xsk;
+				struct sock *sk;
+
+				mutex_lock(&net->xdp.lock);
+				sk = sk_head(&net->xdp.list);
+				xsk = xdp_sk(sk);
+				mutex_lock(&xsk->mutex);
+				WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, xsk);
+				mutex_unlock(&xsk->mutex);
+				mutex_unlock(&net->xdp.lock);
+			}
+		}
+	}
 	xs->dev = NULL;
 	synchronize_net();
 	dev_put(dev);
@@ -972,6 +996,16 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 	xs->queue_id = qid;
 	xp_add_xsk(xs->pool, xs);
 
+	if (xs->rx) {
+		if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 0) {
+			WRITE_ONCE(dev->_rx[qid].xsk, xs);
+			refcount_set(&dev->_rx[qid].xsk_refcnt, 1);
+		} else {
+			refcount_inc(&dev->_rx[qid].xsk_refcnt);
+			WRITE_ONCE(dev->_rx[qid].xsk, NULL);
+		}
+	}
+
 out_unlock:
 	if (err) {
 		dev_put(dev);