@@ -1513,7 +1513,8 @@ bool dev_map_can_have_prog(struct bpf_map *map);
void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
-bool cpu_map_prog_allowed(struct bpf_map *map);
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb);
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1710,6 +1711,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0;
}
+static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{
return false;
@@ -863,8 +863,8 @@ struct sk_buff {
__u8 tc_skip_classify:1;
__u8 tc_at_ingress:1;
#endif
-#ifdef CONFIG_NET_REDIRECT
__u8 redirected:1;
+#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
#ifdef CONFIG_TLS_DEVICE
@@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
static inline bool skb_is_redirected(const struct sk_buff *skb)
{
-#ifdef CONFIG_NET_REDIRECT
return skb->redirected;
-#else
- return false;
-#endif
}
static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
{
-#ifdef CONFIG_NET_REDIRECT
skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
skb->from_ingress = from_ingress;
if (skb->from_ingress)
skb->tstamp = 0;
@@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
static inline void skb_reset_redirect(struct sk_buff *skb)
{
-#ifdef CONFIG_NET_REDIRECT
skb->redirected = 0;
-#endif
}
static inline bool skb_csum_is_sctp(struct sk_buff *skb)
@@ -16,6 +16,7 @@
* netstack, and assigning dedicated CPUs for this stage. This
* basically allows for 10G wirespeed pre-filtering via bpf.
*/
+#include <linux/bitops.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ptr_ring.h>
@@ -168,6 +169,49 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
+static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+ struct list_head *listp,
+ struct xdp_cpumap_stats *stats)
+{
+ struct sk_buff *skb, *tmp;
+ struct xdp_buff xdp;
+ u32 act;
+ int err;
+
+ if (!rcpu->prog)
+ return;
+
+ list_for_each_entry_safe(skb, tmp, listp, list) {
+ act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_REDIRECT:
+ skb_list_del_init(skb);
+ err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
+ rcpu->prog);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ stats->drop++;
+ } else {
+ stats->redirect++;
+ }
+ return;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(skb->dev, rcpu->prog, act);
+ fallthrough;
+ case XDP_DROP:
+ skb_list_del_init(skb);
+ kfree_skb(skb);
+ stats->drop++;
+ return;
+ }
+ }
+}
+
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
void **frames, int n,
struct xdp_cpumap_stats *stats)
@@ -179,8 +223,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
if (!rcpu->prog)
return n;
- rcu_read_lock_bh();
-
xdp_set_return_frame_no_direct();
xdp.rxq = &rxq;
@@ -227,17 +269,34 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
}
}
+ xdp_clear_return_frame_no_direct();
+
+ return nframes;
+}
+
+#define CPUMAP_BATCH 8
+
+static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+ int xdp_n, struct xdp_cpumap_stats *stats,
+ struct list_head *list)
+{
+ int nframes;
+
+ rcu_read_lock_bh();
+
+ nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
+
if (stats->redirect)
- xdp_do_flush_map();
+ xdp_do_flush();
- xdp_clear_return_frame_no_direct();
+ if (unlikely(!list_empty(list)))
+ cpu_map_bpf_prog_run_skb(rcpu, list, stats);
- rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
+ rcu_read_unlock_bh();
return nframes;
}
-#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
{
@@ -254,9 +313,9 @@ static int cpu_map_kthread_run(void *data)
struct xdp_cpumap_stats stats = {}; /* zero stats */
unsigned int kmem_alloc_drops = 0, sched = 0;
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+ int i, n, m, nframes, xdp_n;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
- int i, n, m, nframes;
LIST_HEAD(list);
/* Release CPU reschedule checks */
@@ -280,9 +339,20 @@ static int cpu_map_kthread_run(void *data)
*/
n = __ptr_ring_consume_batched(rcpu->queue, frames,
CPUMAP_BATCH);
- for (i = 0; i < n; i++) {
+ for (i = 0, xdp_n = 0; i < n; i++) {
void *f = frames[i];
- struct page *page = virt_to_page(f);
+ struct page *page;
+
+ if (unlikely(__ptr_test_bit(0, &f))) {
+ struct sk_buff *skb = f;
+
+ __ptr_clear_bit(0, &skb);
+ list_add_tail(&skb->list, &list);
+ continue;
+ }
+
+ frames[xdp_n++] = f;
+ page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by
* build_skb_around via page_is_pfmemalloc(), and when
@@ -292,7 +362,7 @@ static int cpu_map_kthread_run(void *data)
}
/* Support running another XDP prog on this CPU */
- nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+ nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) {
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
if (unlikely(m == 0)) {
@@ -330,12 +400,6 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-bool cpu_map_prog_allowed(struct bpf_map *map)
-{
- return map->map_type == BPF_MAP_TYPE_CPUMAP &&
- map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
-}
-
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
{
struct bpf_prog *prog;
@@ -696,6 +760,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0;
}
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ __skb_pull(skb, skb->mac_len);
+ skb_set_redirected(skb, false);
+ __ptr_set_bit(0, &skb);
+
+ ret = ptr_ring_produce(rcpu->queue, skb);
+ if (ret < 0)
+ goto trace;
+
+ wake_up_process(rcpu->kthread);
+trace:
+ trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
+ return ret;
+}
+
void __cpu_map_flush(void)
{
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
@@ -5665,8 +5665,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
- cpu_map_prog_allowed(new->aux->used_maps[i])) {
+ if (dev_map_can_have_prog(new->aux->used_maps[i])) {
mutex_unlock(&new->aux->used_maps_mutex);
return -EINVAL;
}
@@ -4038,8 +4038,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
goto err;
consume_skb(skb);
break;
+ case BPF_MAP_TYPE_CPUMAP:
+ err = cpu_map_generic_redirect(fwd, skb);
+ if (unlikely(err))
+ goto err;
+ break;
default:
- /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC;
goto err;
}