diff mbox series

[RFC,v3,6/9] tun: Introduce virtio-net hash reporting feature

Message ID 20240915-rss-v3-6-c630015db082@daynix.com (mailing list archive)
State New
Headers show
Series tun: Introduce virtio-net hashing feature | expand

Commit Message

Akihiko Odaki Sept. 15, 2024, 1:17 a.m. UTC
Allow the guest to reuse the hash value to make receive steering
consistent between the host and guest, and to save hash computation.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 Documentation/networking/tuntap.rst |   7 ++
 drivers/net/Kconfig                 |   1 +
 drivers/net/tun.c                   | 146 +++++++++++++++++++++++++++++++-----
 include/uapi/linux/if_tun.h         |  44 +++++++++++
 4 files changed, 180 insertions(+), 18 deletions(-)

Comments

Willem de Bruijn Sept. 18, 2024, 1:17 p.m. UTC | #1
Akihiko Odaki wrote:
> Allow the guest to reuse the hash value to make receive steering
> consistent between the host and guest, and to save hash computation.
> 
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
>  Documentation/networking/tuntap.rst |   7 ++
>  drivers/net/Kconfig                 |   1 +
>  drivers/net/tun.c                   | 146 +++++++++++++++++++++++++++++++-----
>  include/uapi/linux/if_tun.h         |  44 +++++++++++
>  4 files changed, 180 insertions(+), 18 deletions(-)
> 
> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
> index 4d7087f727be..86b4ae8caa8a 100644
> --- a/Documentation/networking/tuntap.rst
> +++ b/Documentation/networking/tuntap.rst
> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
>        return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
>    }
>  
> +3.4 Reference
> +-------------
> +
> +``linux/if_tun.h`` defines the interface described below:
> +
> +.. kernel-doc:: include/uapi/linux/if_tun.h
> +
>  Universal TUN/TAP device driver Frequently Asked Question
>  =========================================================
>  
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index 9920b3a68ed1..e2a7bd703550 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -395,6 +395,7 @@ config TUN
>  	tristate "Universal TUN/TAP device driver support"
>  	depends on INET
>  	select CRC32
> +	select SKB_EXTENSIONS
>  	help
>  	  TUN/TAP provides packet reception and transmission for user space
>  	  programs.  It can be viewed as a simple Point-to-Point or Ethernet
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 9d93ab9ee58f..b8fcd71becac 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -173,6 +173,10 @@ struct tun_prog {
>  	struct bpf_prog *prog;
>  };
>  
> +struct tun_vnet_hash_container {
> +	struct tun_vnet_hash common;
> +};
> +
>  /* Since the socket were moved to tun_file, to preserve the behavior of persist
>   * device, socket filter, sndbuf and vnet header size were restore when the
>   * file were attached to a persist device.
> @@ -210,6 +214,7 @@ struct tun_struct {
>  	struct bpf_prog __rcu *xdp_prog;
>  	struct tun_prog __rcu *steering_prog;
>  	struct tun_prog __rcu *filter_prog;
> +	struct tun_vnet_hash_container __rcu *vnet_hash;

This is just

+struct tun_vnet_hash {
+       u32 value;
+       u16 report;
+};

Can just be fields in the struct directly.

Also, only one bit really used for report, so probably can be
condensed further.

>  	struct ethtool_link_ksettings link_ksettings;
>  	/* init args */
>  	struct file *file;
> @@ -221,6 +226,11 @@ struct veth {
>  	__be16 h_vlan_TCI;
>  };
>  
> +static const struct tun_vnet_hash tun_vnet_hash_cap = {
> +	.flags = TUN_VNET_HASH_REPORT,
> +	.types = VIRTIO_NET_SUPPORTED_HASH_TYPES
> +};
> +
>  static void tun_flow_init(struct tun_struct *tun);
>  static void tun_flow_uninit(struct tun_struct *tun);
>  
> @@ -322,10 +332,17 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
>  	if (get_user(be, argp))
>  		return -EFAULT;
>  
> -	if (be)
> +	if (be) {
> +		struct tun_vnet_hash_container *vnet_hash = rtnl_dereference(tun->vnet_hash);
> +
> +		if (!(tun->flags & TUN_VNET_LE) &&
> +		    vnet_hash && (vnet_hash->flags & TUN_VNET_HASH_REPORT))
> +			return -EBUSY;
> +

Doesn't be here imply !tun->flags & TUN_VNET_LE? Same again below.

>  		tun->flags |= TUN_VNET_BE;
> -	else
> +	} else {
>  		tun->flags &= ~TUN_VNET_BE;
> +	}
>  
>  	return 0;
>  }
> @@ -522,14 +539,20 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
>   * the userspace application move between processors, we may get a
>   * different rxq no. here.
>   */
> -static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> +static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb,
> +				   const struct tun_vnet_hash_container *vnet_hash)
>  {
> +	struct tun_vnet_hash_ext *ext;
> +	struct flow_keys keys;
>  	struct tun_flow_entry *e;
>  	u32 txq, numqueues;
>  
>  	numqueues = READ_ONCE(tun->numqueues);
>  
> -	txq = __skb_get_hash_symmetric(skb);
> +	memset(&keys, 0, sizeof(keys));
> +	skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
> +
> +	txq = flow_hash_from_keys(&keys);
>  	e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>  	if (e) {
>  		tun_flow_save_rps_rxhash(e, txq);
> @@ -538,6 +561,16 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>  		txq = reciprocal_scale(txq, numqueues);
>  	}
>  
> +	if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT)) {
> +		ext = skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
> +		if (ext) {
> +			u32 types = vnet_hash->common.types;
> +
> +			ext->report = virtio_net_hash_report(types, keys.basic);
> +			ext->value = skb->l4_hash ? skb->hash : txq;
> +		}
> +	}
> +
>  	return txq;
>  }
>  
> @@ -565,10 +598,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
>  	u16 ret;
>  
>  	rcu_read_lock();
> -	if (rcu_dereference(tun->steering_prog))
> +	if (rcu_dereference(tun->steering_prog)) {
>  		ret = tun_ebpf_select_queue(tun, skb);
> -	else
> -		ret = tun_automq_select_queue(tun, skb);
> +	} else {
> +		struct tun_vnet_hash_container *vnet_hash = rcu_dereference(tun->vnet_hash);
> +
> +		ret = tun_automq_select_queue(tun, skb, vnet_hash);

Already passing tun, no need to pass tun->vnet_hash separately.
> +	}
>  	rcu_read_unlock();
>  
>  	return ret;
> @@ -2120,33 +2156,63 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>  	}
>  
>  	if (vnet_hdr_sz) {
> -		struct virtio_net_hdr gso;
> +		struct tun_vnet_hash_ext *ext;
> +		size_t vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
> +		union {
> +			struct virtio_net_hdr hdr;
> +			struct virtio_net_hdr_v1_hash hdr_v1_hash;
> +		} vnet_hdr;
> +		int ret;
>  
>  		if (iov_iter_count(iter) < vnet_hdr_sz)
>  			return -EINVAL;
>  
> -		if (virtio_net_hdr_from_skb(skb, &gso,
> -					    tun_is_little_endian(tun), true,
> -					    vlan_hlen)) {
> +		ext = vnet_hdr_sz < sizeof(vnet_hdr.hdr_v1_hash) ?
> +		      NULL : skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
> +
> +		if (ext) {
> +			struct virtio_net_hash hash = {
> +				.value = ext->value,
> +				.report = ext->report,
> +			};
> +
> +			vnet_hdr_content_sz = sizeof(vnet_hdr.hdr_v1_hash);
> +			ret = virtio_net_hdr_v1_hash_from_skb(skb,
> +							      &vnet_hdr.hdr_v1_hash,
> +							      true,
> +							      vlan_hlen,
> +							      &hash);
> +		} else {
> +			vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
> +			ret = virtio_net_hdr_from_skb(skb,
> +						      &vnet_hdr.hdr,
> +						      tun_is_little_endian(tun),
> +						      true,
> +						      vlan_hlen);
> +		}
> +

This is why just setting the fields directly rather than adding
virtio_net_hdr_v1_hash_from_skb is actually simpler.

> +		if (ret) {
>  			struct skb_shared_info *sinfo = skb_shinfo(skb);
>  
>  			if (net_ratelimit()) {
>  				netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
> -					   sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
> -					   tun16_to_cpu(tun, gso.hdr_len));
> +					   sinfo->gso_type,
> +					   tun16_to_cpu(tun, vnet_hdr.hdr.gso_size),
> +					   tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len));
>  				print_hex_dump(KERN_ERR, "tun: ",
>  					       DUMP_PREFIX_NONE,
>  					       16, 1, skb->head,
> -					       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
> +					       min(tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len), 64),
> +					       true);
>  			}
>  			WARN_ON_ONCE(1);
>  			return -EINVAL;
>  		}
>  
> -		if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
> +		if (copy_to_iter(&vnet_hdr, vnet_hdr_content_sz, iter) != vnet_hdr_content_sz)
>  			return -EFAULT;
>  
> -		iov_iter_zero(vnet_hdr_sz - sizeof(gso), iter);
> +		iov_iter_zero(vnet_hdr_sz - vnet_hdr_content_sz, iter);
>  	}
>  
>  	if (vlan_hlen) {
> @@ -3094,6 +3160,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  	int le;
>  	int ret;
>  	bool do_notify = false;
> +	struct tun_vnet_hash vnet_hash_common;
> +	struct tun_vnet_hash_container *vnet_hash;
>  
>  	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
>  	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
> @@ -3115,6 +3183,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
>  			return -EPERM;
>  		return open_related_ns(&net->ns, get_net_ns);
> +	} else if (cmd == TUNGETVNETHASHCAP) {
> +		return copy_to_user(argp, &tun_vnet_hash_cap, sizeof(tun_vnet_hash_cap)) ?
> +		       -EFAULT : 0;
>  	}
>  
>  	rtnl_lock();
> @@ -3314,6 +3385,13 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  			break;
>  		}
>  
> +		vnet_hash = rtnl_dereference(tun->vnet_hash);
> +		if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
> +		    vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr_v1_hash)) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +
>  		tun->vnet_hdr_sz = vnet_hdr_sz;
>  		break;
>  
> @@ -3328,10 +3406,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  			ret = -EFAULT;
>  			break;
>  		}
> -		if (le)
> +		if (le) {
>  			tun->flags |= TUN_VNET_LE;
> -		else
> +		} else {
> +			vnet_hash = rtnl_dereference(tun->vnet_hash);
> +			if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
> +			    !tun_legacy_is_little_endian(tun)) {
> +				ret = -EBUSY;
> +				break;
> +			}
> +
>  			tun->flags &= ~TUN_VNET_LE;
> +		}
>  		break;
>  
>  	case TUNGETVNETBE:
> @@ -3396,6 +3482,30 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  		ret = open_related_ns(&net->ns, get_net_ns);
>  		break;
>  
> +	case TUNSETVNETHASH:
> +		if (copy_from_user(&vnet_hash_common, argp, sizeof(vnet_hash_common))) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +		argp = (struct tun_vnet_hash __user *)argp + 1;
> +
> +		if ((vnet_hash_common.flags & TUN_VNET_HASH_REPORT) &&
> +		    (tun->vnet_hdr_sz < sizeof(struct virtio_net_hdr_v1_hash) ||
> +		     !tun_is_little_endian(tun))) {
> +			ret = -EBUSY;
> +			break;
> +		}
> +
> +		vnet_hash = kmalloc(sizeof(vnet_hash->common), GFP_KERNEL);
> +		if (!vnet_hash) {
> +			ret = -ENOMEM;
> +			break;
> +		}
> +
> +		vnet_hash->common = vnet_hash_common;
> +		kfree_rcu_mightsleep(rcu_replace_pointer_rtnl(tun->vnet_hash, vnet_hash));
> +		break;
> +
>  	default:
>  		ret = -EINVAL;
>  		break;
> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> index 287cdc81c939..1561e8ce0a0a 100644
> --- a/include/uapi/linux/if_tun.h
> +++ b/include/uapi/linux/if_tun.h
> @@ -62,6 +62,30 @@
>  #define TUNSETCARRIER _IOW('T', 226, int)
>  #define TUNGETDEVNETNS _IO('T', 227)
>  
> +/**
> + * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
> + *
> + * The argument is a pointer to &struct tun_vnet_hash which will store the
> + * maximal virtio_net hashing configuration.
> + */
> +#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
> +
> +/**
> + * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
> + *
> + * The argument is a pointer to &struct tun_vnet_hash.
> + *
> + * %TUNSETVNETHDRSZ ioctl must be called with a number greater than or equal to
> + * the size of &struct virtio_net_hdr_v1_hash before calling this ioctl with
> + * %TUN_VNET_HASH_REPORT.
> + *
> + * The virtio_net header must be configured as little-endian before calling this
> + * ioctl with %TUN_VNET_HASH_REPORT.
> + *
> + * This ioctl currently has no effect on XDP packets.
> + */
> +#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
> +
>  /* TUNSETIFF ifr flags */
>  #define IFF_TUN		0x0001
>  #define IFF_TAP		0x0002
> @@ -115,4 +139,24 @@ struct tun_filter {
>  	__u8   addr[][ETH_ALEN];
>  };
>  
> +/**
> + * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
> + */
> +#define TUN_VNET_HASH_REPORT	0x0001
> +
> +/**
> + * struct tun_vnet_hash - virtio_net hashing configuration
> + * @flags:
> + *		Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
> + * @pad:
> + *		Should be filled with zero before passing to %TUNSETVNETHASH
> + * @types:
> + *		Bitmask of allowed hash types
> + */
> +struct tun_vnet_hash {
> +	__u16 flags;
> +	__u8 pad[2];
> +	__u32 types;
> +};
> +

The values for flags and types should probably be defined here.

>  #endif /* _UAPI__IF_TUN_H */
> 
> -- 
> 2.46.0
>
Akihiko Odaki Sept. 23, 2024, 6:35 p.m. UTC | #2
On 2024/09/18 15:17, Willem de Bruijn wrote:
> Akihiko Odaki wrote:
>> Allow the guest to reuse the hash value to make receive steering
>> consistent between the host and guest, and to save hash computation.
>>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>>   Documentation/networking/tuntap.rst |   7 ++
>>   drivers/net/Kconfig                 |   1 +
>>   drivers/net/tun.c                   | 146 +++++++++++++++++++++++++++++++-----
>>   include/uapi/linux/if_tun.h         |  44 +++++++++++
>>   4 files changed, 180 insertions(+), 18 deletions(-)
>>
>> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
>> index 4d7087f727be..86b4ae8caa8a 100644
>> --- a/Documentation/networking/tuntap.rst
>> +++ b/Documentation/networking/tuntap.rst
>> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
>>         return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
>>     }
>>   
>> +3.4 Reference
>> +-------------
>> +
>> +``linux/if_tun.h`` defines the interface described below:
>> +
>> +.. kernel-doc:: include/uapi/linux/if_tun.h
>> +
>>   Universal TUN/TAP device driver Frequently Asked Question
>>   =========================================================
>>   
>> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
>> index 9920b3a68ed1..e2a7bd703550 100644
>> --- a/drivers/net/Kconfig
>> +++ b/drivers/net/Kconfig
>> @@ -395,6 +395,7 @@ config TUN
>>   	tristate "Universal TUN/TAP device driver support"
>>   	depends on INET
>>   	select CRC32
>> +	select SKB_EXTENSIONS
>>   	help
>>   	  TUN/TAP provides packet reception and transmission for user space
>>   	  programs.  It can be viewed as a simple Point-to-Point or Ethernet
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 9d93ab9ee58f..b8fcd71becac 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -173,6 +173,10 @@ struct tun_prog {
>>   	struct bpf_prog *prog;
>>   };
>>   
>> +struct tun_vnet_hash_container {
>> +	struct tun_vnet_hash common;
>> +};
>> +
>>   /* Since the socket were moved to tun_file, to preserve the behavior of persist
>>    * device, socket filter, sndbuf and vnet header size were restore when the
>>    * file were attached to a persist device.
>> @@ -210,6 +214,7 @@ struct tun_struct {
>>   	struct bpf_prog __rcu *xdp_prog;
>>   	struct tun_prog __rcu *steering_prog;
>>   	struct tun_prog __rcu *filter_prog;
>> +	struct tun_vnet_hash_container __rcu *vnet_hash;
> 
> This is just
> 
> +struct tun_vnet_hash {
> +       u32 value;
> +       u16 report;
> +};
> 
> Can just be fields in the struct directly.

I will change to store struct tun_vnet_hash directly.

> 
> Also, only one bit really used for report, so probably can be
> condensed further.

It is more than one bit; the report types are defined as follows:
#define VIRTIO_NET_HASH_REPORT_NONE            0
#define VIRTIO_NET_HASH_REPORT_IPv4            1
#define VIRTIO_NET_HASH_REPORT_TCPv4           2
#define VIRTIO_NET_HASH_REPORT_UDPv4           3
#define VIRTIO_NET_HASH_REPORT_IPv6            4
#define VIRTIO_NET_HASH_REPORT_TCPv6           5
#define VIRTIO_NET_HASH_REPORT_UDPv6           6
#define VIRTIO_NET_HASH_REPORT_IPv6_EX         7
#define VIRTIO_NET_HASH_REPORT_TCPv6_EX        8
#define VIRTIO_NET_HASH_REPORT_UDPv6_EX        9

> 
>>   	struct ethtool_link_ksettings link_ksettings;
>>   	/* init args */
>>   	struct file *file;
>> @@ -221,6 +226,11 @@ struct veth {
>>   	__be16 h_vlan_TCI;
>>   };
>>   
>> +static const struct tun_vnet_hash tun_vnet_hash_cap = {
>> +	.flags = TUN_VNET_HASH_REPORT,
>> +	.types = VIRTIO_NET_SUPPORTED_HASH_TYPES
>> +};
>> +
>>   static void tun_flow_init(struct tun_struct *tun);
>>   static void tun_flow_uninit(struct tun_struct *tun);
>>   
>> @@ -322,10 +332,17 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
>>   	if (get_user(be, argp))
>>   		return -EFAULT;
>>   
>> -	if (be)
>> +	if (be) {
>> +		struct tun_vnet_hash_container *vnet_hash = rtnl_dereference(tun->vnet_hash);
>> +
>> +		if (!(tun->flags & TUN_VNET_LE) &&
>> +		    vnet_hash && (vnet_hash->flags & TUN_VNET_HASH_REPORT))
>> +			return -EBUSY;
>> +
> 
> Doesn't be here imply !tun->flags & TUN_VNET_LE? Same again below.

Unfortunately no. TUN_VNET_LE and TUN_VNET_BE can be set at the same 
time, and TUN_VNET_LE is enforced in such a case.

> 
>>   		tun->flags |= TUN_VNET_BE;
>> -	else
>> +	} else {
>>   		tun->flags &= ~TUN_VNET_BE;
>> +	}
>>   
>>   	return 0;
>>   }
>> @@ -522,14 +539,20 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
>>    * the userspace application move between processors, we may get a
>>    * different rxq no. here.
>>    */
>> -static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>> +static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb,
>> +				   const struct tun_vnet_hash_container *vnet_hash)
>>   {
>> +	struct tun_vnet_hash_ext *ext;
>> +	struct flow_keys keys;
>>   	struct tun_flow_entry *e;
>>   	u32 txq, numqueues;
>>   
>>   	numqueues = READ_ONCE(tun->numqueues);
>>   
>> -	txq = __skb_get_hash_symmetric(skb);
>> +	memset(&keys, 0, sizeof(keys));
>> +	skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
>> +
>> +	txq = flow_hash_from_keys(&keys);
>>   	e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
>>   	if (e) {
>>   		tun_flow_save_rps_rxhash(e, txq);
>> @@ -538,6 +561,16 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>>   		txq = reciprocal_scale(txq, numqueues);
>>   	}
>>   
>> +	if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT)) {
>> +		ext = skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
>> +		if (ext) {
>> +			u32 types = vnet_hash->common.types;
>> +
>> +			ext->report = virtio_net_hash_report(types, keys.basic);
>> +			ext->value = skb->l4_hash ? skb->hash : txq;
>> +		}
>> +	}
>> +
>>   	return txq;
>>   }
>>   
>> @@ -565,10 +598,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
>>   	u16 ret;
>>   
>>   	rcu_read_lock();
>> -	if (rcu_dereference(tun->steering_prog))
>> +	if (rcu_dereference(tun->steering_prog)) {
>>   		ret = tun_ebpf_select_queue(tun, skb);
>> -	else
>> -		ret = tun_automq_select_queue(tun, skb);
>> +	} else {
>> +		struct tun_vnet_hash_container *vnet_hash = rcu_dereference(tun->vnet_hash);
>> +
>> +		ret = tun_automq_select_queue(tun, skb, vnet_hash);
> 
> Already passing tun, no need to pass tun->vnet_hash separately.

I will remove the parameter with v4.

>> +	}
>>   	rcu_read_unlock();
>>   
>>   	return ret;
>> @@ -2120,33 +2156,63 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>>   	}
>>   
>>   	if (vnet_hdr_sz) {
>> -		struct virtio_net_hdr gso;
>> +		struct tun_vnet_hash_ext *ext;
>> +		size_t vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
>> +		union {
>> +			struct virtio_net_hdr hdr;
>> +			struct virtio_net_hdr_v1_hash hdr_v1_hash;
>> +		} vnet_hdr;
>> +		int ret;
>>   
>>   		if (iov_iter_count(iter) < vnet_hdr_sz)
>>   			return -EINVAL;
>>   
>> -		if (virtio_net_hdr_from_skb(skb, &gso,
>> -					    tun_is_little_endian(tun), true,
>> -					    vlan_hlen)) {
>> +		ext = vnet_hdr_sz < sizeof(vnet_hdr.hdr_v1_hash) ?
>> +		      NULL : skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
>> +
>> +		if (ext) {
>> +			struct virtio_net_hash hash = {
>> +				.value = ext->value,
>> +				.report = ext->report,
>> +			};
>> +
>> +			vnet_hdr_content_sz = sizeof(vnet_hdr.hdr_v1_hash);
>> +			ret = virtio_net_hdr_v1_hash_from_skb(skb,
>> +							      &vnet_hdr.hdr_v1_hash,
>> +							      true,
>> +							      vlan_hlen,
>> +							      &hash);
>> +		} else {
>> +			vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
>> +			ret = virtio_net_hdr_from_skb(skb,
>> +						      &vnet_hdr.hdr,
>> +						      tun_is_little_endian(tun),
>> +						      true,
>> +						      vlan_hlen);
>> +		}
>> +
> 
> This is why just setting the fields directly rather than adding
> virtio_net_hdr_v1_hash_from_skb is actually simpler.

I'll make a change accordingly in v4.

> 
>> +		if (ret) {
>>   			struct skb_shared_info *sinfo = skb_shinfo(skb);
>>   
>>   			if (net_ratelimit()) {
>>   				netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
>> -					   sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
>> -					   tun16_to_cpu(tun, gso.hdr_len));
>> +					   sinfo->gso_type,
>> +					   tun16_to_cpu(tun, vnet_hdr.hdr.gso_size),
>> +					   tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len));
>>   				print_hex_dump(KERN_ERR, "tun: ",
>>   					       DUMP_PREFIX_NONE,
>>   					       16, 1, skb->head,
>> -					       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
>> +					       min(tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len), 64),
>> +					       true);
>>   			}
>>   			WARN_ON_ONCE(1);
>>   			return -EINVAL;
>>   		}
>>   
>> -		if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
>> +		if (copy_to_iter(&vnet_hdr, vnet_hdr_content_sz, iter) != vnet_hdr_content_sz)
>>   			return -EFAULT;
>>   
>> -		iov_iter_zero(vnet_hdr_sz - sizeof(gso), iter);
>> +		iov_iter_zero(vnet_hdr_sz - vnet_hdr_content_sz, iter);
>>   	}
>>   
>>   	if (vlan_hlen) {
>> @@ -3094,6 +3160,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>>   	int le;
>>   	int ret;
>>   	bool do_notify = false;
>> +	struct tun_vnet_hash vnet_hash_common;
>> +	struct tun_vnet_hash_container *vnet_hash;
>>   
>>   	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
>>   	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
>> @@ -3115,6 +3183,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>>   		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
>>   			return -EPERM;
>>   		return open_related_ns(&net->ns, get_net_ns);
>> +	} else if (cmd == TUNGETVNETHASHCAP) {
>> +		return copy_to_user(argp, &tun_vnet_hash_cap, sizeof(tun_vnet_hash_cap)) ?
>> +		       -EFAULT : 0;
>>   	}
>>   
>>   	rtnl_lock();
>> @@ -3314,6 +3385,13 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>>   			break;
>>   		}
>>   
>> +		vnet_hash = rtnl_dereference(tun->vnet_hash);
>> +		if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
>> +		    vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr_v1_hash)) {
>> +			ret = -EBUSY;
>> +			break;
>> +		}
>> +
>>   		tun->vnet_hdr_sz = vnet_hdr_sz;
>>   		break;
>>   
>> @@ -3328,10 +3406,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>>   			ret = -EFAULT;
>>   			break;
>>   		}
>> -		if (le)
>> +		if (le) {
>>   			tun->flags |= TUN_VNET_LE;
>> -		else
>> +		} else {
>> +			vnet_hash = rtnl_dereference(tun->vnet_hash);
>> +			if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
>> +			    !tun_legacy_is_little_endian(tun)) {
>> +				ret = -EBUSY;
>> +				break;
>> +			}
>> +
>>   			tun->flags &= ~TUN_VNET_LE;
>> +		}
>>   		break;
>>   
>>   	case TUNGETVNETBE:
>> @@ -3396,6 +3482,30 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>>   		ret = open_related_ns(&net->ns, get_net_ns);
>>   		break;
>>   
>> +	case TUNSETVNETHASH:
>> +		if (copy_from_user(&vnet_hash_common, argp, sizeof(vnet_hash_common))) {
>> +			ret = -EFAULT;
>> +			break;
>> +		}
>> +		argp = (struct tun_vnet_hash __user *)argp + 1;
>> +
>> +		if ((vnet_hash_common.flags & TUN_VNET_HASH_REPORT) &&
>> +		    (tun->vnet_hdr_sz < sizeof(struct virtio_net_hdr_v1_hash) ||
>> +		     !tun_is_little_endian(tun))) {
>> +			ret = -EBUSY;
>> +			break;
>> +		}
>> +
>> +		vnet_hash = kmalloc(sizeof(vnet_hash->common), GFP_KERNEL);
>> +		if (!vnet_hash) {
>> +			ret = -ENOMEM;
>> +			break;
>> +		}
>> +
>> +		vnet_hash->common = vnet_hash_common;
>> +		kfree_rcu_mightsleep(rcu_replace_pointer_rtnl(tun->vnet_hash, vnet_hash));
>> +		break;
>> +
>>   	default:
>>   		ret = -EINVAL;
>>   		break;
>> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
>> index 287cdc81c939..1561e8ce0a0a 100644
>> --- a/include/uapi/linux/if_tun.h
>> +++ b/include/uapi/linux/if_tun.h
>> @@ -62,6 +62,30 @@
>>   #define TUNSETCARRIER _IOW('T', 226, int)
>>   #define TUNGETDEVNETNS _IO('T', 227)
>>   
>> +/**
>> + * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
>> + *
>> + * The argument is a pointer to &struct tun_vnet_hash which will store the
>> + * maximal virtio_net hashing configuration.
>> + */
>> +#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
>> +
>> +/**
>> + * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
>> + *
>> + * The argument is a pointer to &struct tun_vnet_hash.
>> + *
>> + * %TUNSETVNETHDRSZ ioctl must be called with a number greater than or equal to
>> + * the size of &struct virtio_net_hdr_v1_hash before calling this ioctl with
>> + * %TUN_VNET_HASH_REPORT.
>> + *
>> + * The virtio_net header must be configured as little-endian before calling this
>> + * ioctl with %TUN_VNET_HASH_REPORT.
>> + *
>> + * This ioctl currently has no effect on XDP packets.
>> + */
>> +#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
>> +
>>   /* TUNSETIFF ifr flags */
>>   #define IFF_TUN		0x0001
>>   #define IFF_TAP		0x0002
>> @@ -115,4 +139,24 @@ struct tun_filter {
>>   	__u8   addr[][ETH_ALEN];
>>   };
>>   
>> +/**
>> + * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
>> + */
>> +#define TUN_VNET_HASH_REPORT	0x0001
>> +
>> +/**
>> + * struct tun_vnet_hash - virtio_net hashing configuration
>> + * @flags:
>> + *		Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
>> + * @pad:
>> + *		Should be filled with zero before passing to %TUNSETVNETHASH
>> + * @types:
>> + *		Bitmask of allowed hash types
>> + */
>> +struct tun_vnet_hash {
>> +	__u16 flags;
>> +	__u8 pad[2];
>> +	__u32 types;
>> +};
>> +
> 
> The values for flags and types should probably be defined here.

I put TUN_VNET_HASH_REPORT before struct tun_vnet_hash following the 
examples of TUN_PKT_STRIP/struct tun_pi and TUN_FLT_ALLMULTI/struct 
tun_filter. The types are defined in: include/uapi/linux/virtio_net.h

Regards,
Akihiko Odaki

> 
>>   #endif /* _UAPI__IF_TUN_H */
>>
>> -- 
>> 2.46.0
>>
> 
>
diff mbox series

Patch

diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
index 4d7087f727be..86b4ae8caa8a 100644
--- a/Documentation/networking/tuntap.rst
+++ b/Documentation/networking/tuntap.rst
@@ -206,6 +206,13 @@  enable is true we enable it, otherwise we disable it::
       return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
   }
 
+3.4 Reference
+-------------
+
+``linux/if_tun.h`` defines the interface described below:
+
+.. kernel-doc:: include/uapi/linux/if_tun.h
+
 Universal TUN/TAP device driver Frequently Asked Question
 =========================================================
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9920b3a68ed1..e2a7bd703550 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -395,6 +395,7 @@  config TUN
 	tristate "Universal TUN/TAP device driver support"
 	depends on INET
 	select CRC32
+	select SKB_EXTENSIONS
 	help
 	  TUN/TAP provides packet reception and transmission for user space
 	  programs.  It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9d93ab9ee58f..b8fcd71becac 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -173,6 +173,10 @@  struct tun_prog {
 	struct bpf_prog *prog;
 };
 
+struct tun_vnet_hash_container {
+	struct tun_vnet_hash common;
+};
+
 /* Since the socket were moved to tun_file, to preserve the behavior of persist
  * device, socket filter, sndbuf and vnet header size were restore when the
  * file were attached to a persist device.
@@ -210,6 +214,7 @@  struct tun_struct {
 	struct bpf_prog __rcu *xdp_prog;
 	struct tun_prog __rcu *steering_prog;
 	struct tun_prog __rcu *filter_prog;
+	struct tun_vnet_hash_container __rcu *vnet_hash;
 	struct ethtool_link_ksettings link_ksettings;
 	/* init args */
 	struct file *file;
@@ -221,6 +226,11 @@  struct veth {
 	__be16 h_vlan_TCI;
 };
 
+static const struct tun_vnet_hash tun_vnet_hash_cap = {
+	.flags = TUN_VNET_HASH_REPORT,
+	.types = VIRTIO_NET_SUPPORTED_HASH_TYPES
+};
+
 static void tun_flow_init(struct tun_struct *tun);
 static void tun_flow_uninit(struct tun_struct *tun);
 
@@ -322,10 +332,17 @@  static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
 	if (get_user(be, argp))
 		return -EFAULT;
 
-	if (be)
+	if (be) {
+		struct tun_vnet_hash_container *vnet_hash = rtnl_dereference(tun->vnet_hash);
+
+		if (!(tun->flags & TUN_VNET_LE) &&
+		    vnet_hash && (vnet_hash->flags & TUN_VNET_HASH_REPORT))
+			return -EBUSY;
+
 		tun->flags |= TUN_VNET_BE;
-	else
+	} else {
 		tun->flags &= ~TUN_VNET_BE;
+	}
 
 	return 0;
 }
@@ -522,14 +539,20 @@  static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
  * the userspace application move between processors, we may get a
  * different rxq no. here.
  */
-static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb,
+				   const struct tun_vnet_hash_container *vnet_hash)
 {
+	struct tun_vnet_hash_ext *ext;
+	struct flow_keys keys;
 	struct tun_flow_entry *e;
 	u32 txq, numqueues;
 
 	numqueues = READ_ONCE(tun->numqueues);
 
-	txq = __skb_get_hash_symmetric(skb);
+	memset(&keys, 0, sizeof(keys));
+	skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+	txq = flow_hash_from_keys(&keys);
 	e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
 	if (e) {
 		tun_flow_save_rps_rxhash(e, txq);
@@ -538,6 +561,16 @@  static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 		txq = reciprocal_scale(txq, numqueues);
 	}
 
+	if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT)) {
+		ext = skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+		if (ext) {
+			u32 types = vnet_hash->common.types;
+
+			ext->report = virtio_net_hash_report(types, keys.basic);
+			ext->value = skb->l4_hash ? skb->hash : txq;
+		}
+	}
+
 	return txq;
 }
 
@@ -565,10 +598,13 @@  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
 	u16 ret;
 
 	rcu_read_lock();
-	if (rcu_dereference(tun->steering_prog))
+	if (rcu_dereference(tun->steering_prog)) {
 		ret = tun_ebpf_select_queue(tun, skb);
-	else
-		ret = tun_automq_select_queue(tun, skb);
+	} else {
+		struct tun_vnet_hash_container *vnet_hash = rcu_dereference(tun->vnet_hash);
+
+		ret = tun_automq_select_queue(tun, skb, vnet_hash);
+	}
 	rcu_read_unlock();
 
 	return ret;
@@ -2120,33 +2156,63 @@  static ssize_t tun_put_user(struct tun_struct *tun,
 	}
 
 	if (vnet_hdr_sz) {
-		struct virtio_net_hdr gso;
+		struct tun_vnet_hash_ext *ext;
+		size_t vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
+		union {
+			struct virtio_net_hdr hdr;
+			struct virtio_net_hdr_v1_hash hdr_v1_hash;
+		} vnet_hdr;
+		int ret;
 
 		if (iov_iter_count(iter) < vnet_hdr_sz)
 			return -EINVAL;
 
-		if (virtio_net_hdr_from_skb(skb, &gso,
-					    tun_is_little_endian(tun), true,
-					    vlan_hlen)) {
+		ext = vnet_hdr_sz < sizeof(vnet_hdr.hdr_v1_hash) ?
+		      NULL : skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
+
+		if (ext) {
+			struct virtio_net_hash hash = {
+				.value = ext->value,
+				.report = ext->report,
+			};
+
+			vnet_hdr_content_sz = sizeof(vnet_hdr.hdr_v1_hash);
+			ret = virtio_net_hdr_v1_hash_from_skb(skb,
+							      &vnet_hdr.hdr_v1_hash,
+							      true,
+							      vlan_hlen,
+							      &hash);
+		} else {
+			vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
+			ret = virtio_net_hdr_from_skb(skb,
+						      &vnet_hdr.hdr,
+						      tun_is_little_endian(tun),
+						      true,
+						      vlan_hlen);
+		}
+
+		if (ret) {
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
 
 			if (net_ratelimit()) {
 				netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
-					   sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
-					   tun16_to_cpu(tun, gso.hdr_len));
+					   sinfo->gso_type,
+					   tun16_to_cpu(tun, vnet_hdr.hdr.gso_size),
+					   tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len));
 				print_hex_dump(KERN_ERR, "tun: ",
 					       DUMP_PREFIX_NONE,
 					       16, 1, skb->head,
-					       min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+					       min(tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len), 64),
+					       true);
 			}
 			WARN_ON_ONCE(1);
 			return -EINVAL;
 		}
 
-		if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
+		if (copy_to_iter(&vnet_hdr, vnet_hdr_content_sz, iter) != vnet_hdr_content_sz)
 			return -EFAULT;
 
-		iov_iter_zero(vnet_hdr_sz - sizeof(gso), iter);
+		iov_iter_zero(vnet_hdr_sz - vnet_hdr_content_sz, iter);
 	}
 
 	if (vlan_hlen) {
@@ -3094,6 +3160,8 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int le;
 	int ret;
 	bool do_notify = false;
+	struct tun_vnet_hash vnet_hash_common;
+	struct tun_vnet_hash_container *vnet_hash;
 
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
 	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
@@ -3115,6 +3183,9 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		return open_related_ns(&net->ns, get_net_ns);
+	} else if (cmd == TUNGETVNETHASHCAP) {
+		return copy_to_user(argp, &tun_vnet_hash_cap, sizeof(tun_vnet_hash_cap)) ?
+		       -EFAULT : 0;
 	}
 
 	rtnl_lock();
@@ -3314,6 +3385,13 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 
+		vnet_hash = rtnl_dereference(tun->vnet_hash);
+		if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
+		    vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr_v1_hash)) {
+			ret = -EBUSY;
+			break;
+		}
+
 		tun->vnet_hdr_sz = vnet_hdr_sz;
 		break;
 
@@ -3328,10 +3406,18 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			ret = -EFAULT;
 			break;
 		}
-		if (le)
+		if (le) {
 			tun->flags |= TUN_VNET_LE;
-		else
+		} else {
+			vnet_hash = rtnl_dereference(tun->vnet_hash);
+			if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT) &&
+			    !tun_legacy_is_little_endian(tun)) {
+				ret = -EBUSY;
+				break;
+			}
+
 			tun->flags &= ~TUN_VNET_LE;
+		}
 		break;
 
 	case TUNGETVNETBE:
@@ -3396,6 +3482,30 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = open_related_ns(&net->ns, get_net_ns);
 		break;
 
+	case TUNSETVNETHASH:
+		if (copy_from_user(&vnet_hash_common, argp, sizeof(vnet_hash_common))) {
+			ret = -EFAULT;
+			break;
+		}
+		argp = (struct tun_vnet_hash __user *)argp + 1;
+
+		if ((vnet_hash_common.flags & TUN_VNET_HASH_REPORT) &&
+		    (tun->vnet_hdr_sz < sizeof(struct virtio_net_hdr_v1_hash) ||
+		     !tun_is_little_endian(tun))) {
+			ret = -EBUSY;
+			break;
+		}
+
+		vnet_hash = kmalloc(sizeof(vnet_hash->common), GFP_KERNEL);
+		if (!vnet_hash) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		vnet_hash->common = vnet_hash_common;
+		kfree_rcu_mightsleep(rcu_replace_pointer_rtnl(tun->vnet_hash, vnet_hash));
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 287cdc81c939..1561e8ce0a0a 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -62,6 +62,30 @@ 
 #define TUNSETCARRIER _IOW('T', 226, int)
 #define TUNGETDEVNETNS _IO('T', 227)
 
+/**
+ * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
+ *
+ * The argument is a pointer to &struct tun_vnet_hash which will store the
+ * maximal virtio_net hashing configuration.
+ */
+#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
+
+/**
+ * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
+ *
+ * The argument is a pointer to &struct tun_vnet_hash.
+ *
+ * %TUNSETVNETHDRSZ ioctl must be called with a number greater than or equal to
+ * the size of &struct virtio_net_hdr_v1_hash before calling this ioctl with
+ * %TUN_VNET_HASH_REPORT.
+ *
+ * The virtio_net header must be configured as little-endian before calling this
+ * ioctl with %TUN_VNET_HASH_REPORT.
+ *
+ * This ioctl currently has no effect on XDP packets.
+ */
+#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
+
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
 #define IFF_TAP		0x0002
@@ -115,4 +139,24 @@  struct tun_filter {
 	__u8   addr[][ETH_ALEN];
 };
 
+/**
+ * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
+ */
+#define TUN_VNET_HASH_REPORT	0x0001
+
+/**
+ * struct tun_vnet_hash - virtio_net hashing configuration
+ * @flags:
+ *		Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
+ * @pad:
+ *		Should be filled with zero before passing to %TUNSETVNETHASH
+ * @types:
+ *		Bitmask of allowed hash types
+ */
+struct tun_vnet_hash {
+	__u16 flags;
+	__u8 pad[2];
+	__u32 types;
+};
+
 #endif /* _UAPI__IF_TUN_H */