@@ -23,7 +23,6 @@ struct net_hotdata {
struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
- struct list_head ptype_all;
struct kmem_cache *skbuff_cache;
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
@@ -83,6 +83,9 @@ struct net {
struct llist_node defer_free_list;
struct llist_node cleanup_list; /* namespaces on death row */
+ struct list_head ptype_all;
+ struct list_head ptype_specific;
+
#ifdef CONFIG_KEYS
struct key_tag *key_domain; /* Key domain of operation tag */
#endif
@@ -572,11 +572,19 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
- if (pt->type == htons(ETH_P_ALL))
- return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
- else
+ if (pt->type == htons(ETH_P_ALL)) {
+ if (!pt->af_packet_net && !pt->dev)
+ return NULL;
+
return pt->dev ? &pt->dev->ptype_specific :
- &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ &pt->af_packet_net->ptype_all;
+ }
+
+ if (pt->dev)
+ return &pt->dev->ptype_specific;
+
+ return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
+ &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
@@ -596,6 +604,9 @@ void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
+ if (WARN_ON_ONCE(!head))
+ return;
+
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
@@ -620,6 +631,9 @@ void __dev_remove_pack(struct packet_type *pt)
struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
+ if (!head)
+ return;
+
spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
@@ -2469,7 +2483,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
*/
bool dev_nit_active(struct net_device *dev)
{
- return !list_empty(&net_hotdata.ptype_all) ||
+ return !list_empty(&dev_net(dev)->ptype_all) ||
!list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);
@@ -2481,7 +2495,7 @@ EXPORT_SYMBOL_GPL(dev_nit_active);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
- struct list_head *ptype_list = &net_hotdata.ptype_all;
+ struct list_head *ptype_list = &dev_net(dev)->ptype_all;
struct packet_type *ptype, *pt_prev = NULL;
struct sk_buff *skb2 = NULL;
@@ -2529,7 +2543,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
pt_prev = ptype;
}
- if (ptype_list == &net_hotdata.ptype_all) {
+ if (ptype_list == &dev_net(dev)->ptype_all) {
ptype_list = &dev->ptype_all;
goto again;
}
@@ -5718,7 +5732,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
if (pfmemalloc)
goto skip_taps;
- list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
+ list_for_each_entry_rcu(ptype, &dev_net(skb->dev)->ptype_all, list) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -5830,6 +5844,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
&ptype_base[ntohs(type) &
PTYPE_HASH_MASK]);
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &dev_net(orig_dev)->ptype_specific);
}
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
@@ -7,7 +7,6 @@
struct net_hotdata net_hotdata __cacheline_aligned = {
.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
- .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
.gro_normal_batch = 8,
.netdev_budget = 300,
@@ -185,7 +185,7 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
}
}
- list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) {
+ list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) {
if (i == pos)
return pt;
++i;
@@ -210,6 +210,7 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
@@ -232,16 +233,15 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
goto found;
}
}
-
- nxt = net_hotdata.ptype_all.next;
- goto ptype_all;
+ nxt = net->ptype_all.next;
+ goto net_ptype_all;
}
- if (pt->type == htons(ETH_P_ALL)) {
-ptype_all:
- if (nxt != &net_hotdata.ptype_all)
+ if (pt->af_packet_net) {
+net_ptype_all:
+ if (nxt != &net->ptype_all)
goto found;
- hash = 0;
+
nxt = ptype_base[0].next;
} else
hash = ntohs(pt->type) & PTYPE_HASH_MASK;
@@ -340,6 +340,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
#endif
+ INIT_LIST_HEAD(&net->ptype_all);
+ INIT_LIST_HEAD(&net->ptype_specific);
preinit_net_sysctl(net);
}
Currently network taps unbound to any interface are linked in the global ptype_all list, affecting the performance in all the network namespaces. Add per netns ptypes chains, so that in the mentioned case only the netns owning the packet socket(s) is affected. While at that drop the global ptype_all list: no in kernel user registers a tap on "any" type without specifying either the target device or the target namespace (and IMHO doing that would not make any sense). Note that this adds a conditional in the fast path (to check for per netns ptype_specific list) and increases the dataset size by a cacheline (owing the per netns lists). The next patch will try to address the above. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- include/net/hotdata.h | 1 - include/net/net_namespace.h | 3 +++ net/core/dev.c | 32 ++++++++++++++++++++++++-------- net/core/hotdata.c | 1 - net/core/net-procfs.c | 16 ++++++++-------- net/core/net_namespace.c | 2 ++ 6 files changed, 37 insertions(+), 18 deletions(-)