@@ -395,6 +395,7 @@ config TUN
tristate "Universal TUN/TAP device driver support"
depends on INET
select CRC32
+ select SKB_EXTENSIONS
help
TUN/TAP provides packet reception and transmission for user space
programs. It can be viewed as a simple Point-to-Point or Ethernet
@@ -209,6 +209,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct tun_vnet_hash_container __rcu *vnet_hash;
struct ethtool_link_ksettings link_ksettings;
/* init args */
struct file *file;
@@ -451,9 +452,14 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
e->rps_rxhash = hash;
}
+static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
+{
+ return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
{
- return NULL;
+ return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
}
/* We try to identify a flow through its rxhash. The reason that
@@ -462,14 +468,21 @@ static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
* the userspace application move between processors, we may get a
* different rxq no. here.
*/
-static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static u16 tun_automq_select_queue(struct tun_struct *tun,
+ const struct tun_vnet_hash_container *vnet_hash,
+ struct sk_buff *skb)
{
+ struct flow_keys keys;
+ struct flow_keys_basic keys_basic;
struct tun_flow_entry *e;
u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
- txq = __skb_get_hash_symmetric(skb);
+ memset(&keys, 0, sizeof(keys));
+ skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+ txq = flow_hash_from_keys(&keys);
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
if (e) {
tun_flow_save_rps_rxhash(e, txq);
@@ -478,6 +491,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
txq = reciprocal_scale(txq, numqueues);
}
+ keys_basic = (struct flow_keys_basic) {
+ .control = keys.control,
+ .basic = keys.basic
+ };
+ tun_vnet_hash_report(vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
+ tun_add_hash);
+
return txq;
}
@@ -513,8 +533,15 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
u16 ret;
rcu_read_lock();
- if (!tun_ebpf_select_queue(tun, skb, &ret))
- ret = tun_automq_select_queue(tun, skb);
+ if (!tun_ebpf_select_queue(tun, skb, &ret)) {
+ struct tun_vnet_hash_container *vnet_hash = rcu_dereference(tun->vnet_hash);
+
+ if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_RSS))
+ ret = tun_vnet_rss_select_queue(READ_ONCE(tun->numqueues), vnet_hash,
+ skb, tun_add_hash);
+ else
+ ret = tun_automq_select_queue(tun, vnet_hash, skb);
+ }
rcu_read_unlock();
return ret;
@@ -2235,6 +2262,7 @@ static void tun_free_netdev(struct net_device *dev)
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
__tun_set_ebpf(tun, &tun->filter_prog, NULL);
+ kfree_rcu_mightsleep(rcu_access_pointer(tun->vnet_hash));
}
static void tun_setup(struct net_device *dev)
@@ -3014,16 +3042,22 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
} else {
memset(&ifr, 0, sizeof(ifr));
}
- if (cmd == TUNGETFEATURES) {
+ switch (cmd) {
+ case TUNGETFEATURES:
/* Currently this just means: "what IFF flags are valid?".
* This is needed because we never checked for invalid flags on
* TUNSETIFF.
*/
return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
TUN_FEATURES, (unsigned int __user*)argp);
- } else if (cmd == TUNSETQUEUE) {
+
+ case TUNSETQUEUE:
return tun_set_queue(file, &ifr);
- } else if (cmd == SIOCGSKNS) {
+
+ case TUNGETVNETHASHCAP:
+ return tun_vnet_ioctl_gethashcap(argp);
+
+ case SIOCGSKNS:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
return open_related_ns(&net->ns, get_net_ns);
@@ -3264,6 +3298,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = open_related_ns(&net->ns, get_net_ns);
break;
+ case TUNSETVNETHASH:
+ ret = tun_vnet_ioctl_sethash(&tun->vnet_hash, argp);
+ break;
+
default:
ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
break;
@@ -4842,6 +4842,9 @@ enum skb_ext_id {
#endif
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
SKB_EXT_MCTP,
+#endif
+#if IS_ENABLED(CONFIG_TUN)
+ SKB_EXT_TUN_VNET_HASH,
#endif
SKB_EXT_NUM, /* must be last */
};
@@ -64,6 +64,7 @@
#include <linux/mpls.h>
#include <linux/kcov.h>
#include <linux/iov_iter.h>
+#include <linux/virtio_net.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -4969,6 +4970,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
[SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
#endif
+#if IS_ENABLED(CONFIG_TUN)
+ [SKB_EXT_TUN_VNET_HASH] = SKB_EXT_CHUNKSIZEOF(struct virtio_net_hash),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
Add ioctls and storage required for the virtio-net hash feature to TUN. Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> --- drivers/net/Kconfig | 1 + drivers/net/tun.c | 54 ++++++++++++++++++++++++++++++++++++++++++-------- include/linux/skbuff.h | 3 +++ net/core/skbuff.c | 4 ++++ 4 files changed, 54 insertions(+), 8 deletions(-)