diff mbox series

[RFC,v2,net-next,2/2] net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT.

Message ID 20240229183109.646865-3-bigeasy@linutronix.de (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series Use per-task storage for XDP-redirects on PREEMPT_RT | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next

Commit Message

Sebastian Andrzej Siewior Feb. 29, 2024, 6:17 p.m. UTC
The per-CPU flush lists, which are accessed from within the NAPI callback
(xdp_do_flush() for instance), are per-CPU. There are subject to the
same problem as struct bpf_redirect_info.

Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the
access.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/filter.h | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/bpf/cpumap.c    | 24 ++++++++----------------
 kernel/bpf/devmap.c    | 16 ++++++++--------
 net/xdp/xsk.c          | 19 +++++++++++--------
 4 files changed, 65 insertions(+), 32 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6d065991f9e9c..41c3dbbc4d163 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -706,6 +706,9 @@  struct bpf_redirect_info {
 
 struct bpf_net_context {
 	struct bpf_redirect_info ri;
+	struct list_head cpu_map_flush_list;
+	struct list_head dev_map_flush_list;
+	struct list_head xskmap_map_flush_list;
 };
 
 #ifndef CONFIG_PREEMPT_RT
@@ -718,6 +721,10 @@  static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bp
 	ctx = this_cpu_read(bpf_net_context);
 	if (ctx != NULL)
 		return NULL;
+	INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+	INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+	INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+
 	this_cpu_write(bpf_net_context, bpf_net_ctx);
 	return bpf_net_ctx;
 }
@@ -748,6 +755,10 @@  static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bp
 
 	if (tsk->bpf_net_context != NULL)
 		return NULL;
+	INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+	INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+	INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+
 	tsk->bpf_net_context = bpf_net_ctx;
 	return bpf_net_ctx;
 }
@@ -780,6 +791,33 @@  static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
 	return &bpf_net_ctx->ri;
 }
 
+static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!bpf_net_ctx)
+		return NULL;
+	return &bpf_net_ctx->cpu_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!bpf_net_ctx)
+		return NULL;
+	return &bpf_net_ctx->dev_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!bpf_net_ctx)
+		return NULL;
+	return &bpf_net_ctx->xskmap_map_flush_list;
+}
+
 DEFINE_FREE(bpf_net_ctx_clear, struct bpf_net_context *, if (_T) bpf_net_ctx_clear(_T));
 
 /* flags for bpf_redirect_info kern_flags */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 6d140f0769d4c..59677cea28e18 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -78,8 +78,6 @@  struct bpf_cpu_map {
 	struct bpf_cpu_map_entry __rcu **cpu_map;
 };
 
-static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
-
 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 {
 	u32 value_size = attr->value_size;
@@ -703,7 +701,7 @@  static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
  */
 static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -755,9 +753,12 @@  int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 
 void __cpu_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq, *tmp;
 
+	if (!flush_list)
+		return;
+
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
 		bq_flush_to_queue(bq);
 
@@ -769,20 +770,11 @@  void __cpu_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool cpu_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&cpu_map_flush_list)))
+	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
+
+	if (!flush_list || list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
 		return false;
 	__cpu_map_flush();
 	return true;
 }
 #endif
-
-static int __init cpu_map_init(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
-	return 0;
-}
-
-subsys_initcall(cpu_map_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a936c704d4e77..463f3c5eaa895 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -83,7 +83,6 @@  struct bpf_dtab {
 	u32 n_buckets;
 };
 
-static DEFINE_PER_CPU(struct list_head, dev_flush_list);
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
@@ -407,9 +406,12 @@  static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
  */
 void __dev_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq, *tmp;
 
+	if (!flush_list)
+		return;
+
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
 		bq_xmit_all(bq, XDP_XMIT_FLUSH);
 		bq->dev_rx = NULL;
@@ -421,7 +423,9 @@  void __dev_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool dev_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&dev_flush_list)))
+	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
+
+	if (!flush_list || list_empty(bpf_net_ctx_get_dev_flush_list()))
 		return false;
 	__dev_flush();
 	return true;
@@ -452,7 +456,7 @@  static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -1155,15 +1159,11 @@  static struct notifier_block dev_map_notifier = {
 
 static int __init dev_map_init(void)
 {
-	int cpu;
-
 	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
 	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
 		     offsetof(struct _bpf_dtab_netdev, dev));
 	register_netdevice_notifier(&dev_map_notifier);
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
 	return 0;
 }
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b78c0e095e221..7a130d0871de1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@ 
 #define TX_BATCH_SIZE 32
 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
 
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -372,9 +370,12 @@  static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	int err;
 
+	if (!flush_list)
+		return -EINVAL;
+
 	err = xsk_rcv(xs, xdp);
 	if (err)
 		return err;
@@ -387,9 +388,11 @@  int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 void __xsk_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	struct xdp_sock *xs, *tmp;
 
+	if (!flush_list)
+		return;
 	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
 		xsk_flush(xs);
 		__list_del_clearprev(&xs->flush_node);
@@ -399,7 +402,9 @@  void __xsk_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool xsk_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
+	if (!flush_list || list_empty(flush_list))
 		return false;
 	__xsk_map_flush();
 	return true;
@@ -1770,7 +1775,7 @@  static struct pernet_operations xsk_net_ops = {
 
 static int __init xsk_init(void)
 {
-	int err, cpu;
+	int err;
 
 	err = proto_register(&xsk_proto, 0 /* no slab */);
 	if (err)
@@ -1788,8 +1793,6 @@  static int __init xsk_init(void)
 	if (err)
 		goto out_pernet;
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
 	return 0;
 
 out_pernet: