Message ID | 20180906040526.22518-9-jasowang@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Vhost_net TX batching | expand |
On Thu, Sep 06, 2018 at 12:05:23PM +0800, Jason Wang wrote: > This patch introduces to a new tun/tap specific msg_control: > > #define TUN_MSG_UBUF 1 > #define TUN_MSG_PTR 2 > struct tun_msg_ctl { > int type; > void *ptr; > }; > > This allows us to pass different kinds of msg_control through > sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will > be used by the existed vhost_net zerocopy code. The second is XDP > buff, which allows vhost_net to pass XDP buff to TUN. This could be > used to implement accepting an array of XDP buffs from vhost_net in > the following patches. > > Signed-off-by: Jason Wang <jasowang@redhat.com> At this point, do we want to just add a new sock opt for tap's benefit? Seems cleaner than (ab)using sendmsg. > --- > drivers/net/tap.c | 18 ++++++++++++------ > drivers/net/tun.c | 6 +++++- > drivers/vhost/net.c | 7 +++++-- > include/linux/if_tun.h | 7 +++++++ > 4 files changed, 29 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/tap.c b/drivers/net/tap.c > index f0f7cd977667..7996ed7cbf18 100644 > --- a/drivers/net/tap.c > +++ b/drivers/net/tap.c > @@ -619,7 +619,7 @@ static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, > #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) > > /* Get packet from user space buffer */ > -static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, > +static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, > struct iov_iter *from, int noblock) > { > int good_linear = SKB_MAX_HEAD(TAP_RESERVE); > @@ -663,7 +663,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, > if (unlikely(len < ETH_HLEN)) > goto err; > > - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { > + if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { > struct iov_iter i; > > copylen = vnet_hdr.hdr_len ? > @@ -724,11 +724,11 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, > tap = rcu_dereference(q->tap); > /* copy skb_ubuf_info for callback when skb has no error */ > if (zerocopy) { > - skb_shinfo(skb)->destructor_arg = m->msg_control; > + skb_shinfo(skb)->destructor_arg = msg_control; > skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; > skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; > - } else if (m && m->msg_control) { > - struct ubuf_info *uarg = m->msg_control; > + } else if (msg_control) { > + struct ubuf_info *uarg = msg_control; > uarg->callback(uarg, false); > } > > @@ -1150,7 +1150,13 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, > size_t total_len) > { > struct tap_queue *q = container_of(sock, struct tap_queue, sock); > - return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); > + struct tun_msg_ctl *ctl = m->msg_control; > + > + if (ctl && ctl->type != TUN_MSG_UBUF) > + return -EINVAL; > + > + return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, > + m->msg_flags & MSG_DONTWAIT); > } > > static int tap_recvmsg(struct socket *sock, struct msghdr *m, > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index ff1cbf3ebd50..c839a4bdcbd9 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -2429,11 +2429,15 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) > int ret; > struct tun_file *tfile = container_of(sock, struct tun_file, socket); > struct tun_struct *tun = tun_get(tfile); > + struct tun_msg_ctl *ctl = m->msg_control; > > if (!tun) > return -EBADFD; > > - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, > + if (ctl && ctl->type != TUN_MSG_UBUF) > + return -EINVAL; > + > + ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, > m->msg_flags & MSG_DONTWAIT, > m->msg_flags & MSG_MORE); > tun_put(tun); > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 4e656f89cb22..fb01ce6d981c 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -620,6 +620,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) > .msg_controllen = 0, > .msg_flags = MSG_DONTWAIT, > }; > + struct tun_msg_ctl ctl; > size_t len, total_len = 0; > int err; > struct vhost_net_ubuf_ref *uninitialized_var(ubufs); > @@ -664,8 +665,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) > ubuf->ctx = nvq->ubufs; > ubuf->desc = nvq->upend_idx; > refcount_set(&ubuf->refcnt, 1); > - msg.msg_control = ubuf; > - msg.msg_controllen = sizeof(ubuf); > + msg.msg_control = &ctl; > + ctl.type = TUN_MSG_UBUF; > + ctl.ptr = ubuf; > + msg.msg_controllen = sizeof(ctl); > ubufs = nvq->ubufs; > atomic_inc(&ubufs->refcount); > nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; > diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h > index 3d2996dc7d85..ba46dced1f38 100644 > --- a/include/linux/if_tun.h > +++ b/include/linux/if_tun.h > @@ -19,6 +19,13 @@ > > #define TUN_XDP_FLAG 0x1UL > > +#define TUN_MSG_UBUF 1 > +#define TUN_MSG_PTR 2 Looks like TUN_MSG_PTR should be pushed out to a follow-up patch? > +struct tun_msg_ctl { > + int type; > + void *ptr; > +}; > + type actually includes a size. Why not two short fields then? > #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) > struct socket *tun_get_socket(struct file *); > struct ptr_ring *tun_get_tx_ring(struct file *file); > -- > 2.17.1
On 2018年09月07日 00:54, Michael S. Tsirkin wrote: > On Thu, Sep 06, 2018 at 12:05:23PM +0800, Jason Wang wrote: >> This patch introduces to a new tun/tap specific msg_control: >> >> #define TUN_MSG_UBUF 1 >> #define TUN_MSG_PTR 2 >> struct tun_msg_ctl { >> int type; >> void *ptr; >> }; >> >> This allows us to pass different kinds of msg_control through >> sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will >> be used by the existed vhost_net zerocopy code. The second is XDP >> buff, which allows vhost_net to pass XDP buff to TUN. This could be >> used to implement accepting an array of XDP buffs from vhost_net in >> the following patches. >> >> Signed-off-by: Jason Wang <jasowang@redhat.com> > At this point, do we want to just add a new sock opt for tap's > benefit? Seems cleaner than (ab)using sendmsg. I think it won't be much difference, we still need a void pointer. >> --- >> drivers/net/tap.c | 18 ++++++++++++------ >> drivers/net/tun.c | 6 +++++- >> drivers/vhost/net.c | 7 +++++-- >> include/linux/if_tun.h | 7 +++++++ >> 4 files changed, 29 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/net/tap.c b/drivers/net/tap.c >> index f0f7cd977667..7996ed7cbf18 100644 >> --- a/drivers/net/tap.c >> +++ b/drivers/net/tap.c >> @@ -619,7 +619,7 @@ static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, >> #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) >> >> /* Get packet from user space buffer */ >> -static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, >> +static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, >> struct iov_iter *from, int noblock) >> { >> int good_linear = SKB_MAX_HEAD(TAP_RESERVE); >> @@ -663,7 +663,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, >> if (unlikely(len < ETH_HLEN)) >> goto err; >> >> - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { >> + if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { >> struct iov_iter i; >> >> copylen = vnet_hdr.hdr_len ? >> @@ -724,11 +724,11 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, >> tap = rcu_dereference(q->tap); >> /* copy skb_ubuf_info for callback when skb has no error */ >> if (zerocopy) { >> - skb_shinfo(skb)->destructor_arg = m->msg_control; >> + skb_shinfo(skb)->destructor_arg = msg_control; >> skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; >> skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; >> - } else if (m && m->msg_control) { >> - struct ubuf_info *uarg = m->msg_control; >> + } else if (msg_control) { >> + struct ubuf_info *uarg = msg_control; >> uarg->callback(uarg, false); >> } >> >> @@ -1150,7 +1150,13 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, >> size_t total_len) >> { >> struct tap_queue *q = container_of(sock, struct tap_queue, sock); >> - return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); >> + struct tun_msg_ctl *ctl = m->msg_control; >> + >> + if (ctl && ctl->type != TUN_MSG_UBUF) >> + return -EINVAL; >> + >> + return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, >> + m->msg_flags & MSG_DONTWAIT); >> } >> >> static int tap_recvmsg(struct socket *sock, struct msghdr *m, >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c >> index ff1cbf3ebd50..c839a4bdcbd9 100644 >> --- a/drivers/net/tun.c >> +++ b/drivers/net/tun.c >> @@ -2429,11 +2429,15 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) >> int ret; >> struct tun_file *tfile = container_of(sock, struct tun_file, socket); >> struct tun_struct *tun = tun_get(tfile); >> + struct tun_msg_ctl *ctl = m->msg_control; >> >> if (!tun) >> return -EBADFD; >> >> - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, >> + if (ctl && ctl->type != TUN_MSG_UBUF) >> + return -EINVAL; >> + >> + ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, >> m->msg_flags & MSG_DONTWAIT, >> m->msg_flags & MSG_MORE); >> tun_put(tun); >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index 4e656f89cb22..fb01ce6d981c 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -620,6 +620,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) >> .msg_controllen = 0, >> .msg_flags = MSG_DONTWAIT, >> }; >> + struct tun_msg_ctl ctl; >> size_t len, total_len = 0; >> int err; >> struct vhost_net_ubuf_ref *uninitialized_var(ubufs); >> @@ -664,8 +665,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) >> ubuf->ctx = nvq->ubufs; >> ubuf->desc = nvq->upend_idx; >> refcount_set(&ubuf->refcnt, 1); >> - msg.msg_control = ubuf; >> - msg.msg_controllen = sizeof(ubuf); >> + msg.msg_control = &ctl; >> + ctl.type = TUN_MSG_UBUF; >> + ctl.ptr = ubuf; >> + msg.msg_controllen = sizeof(ctl); >> ubufs = nvq->ubufs; >> atomic_inc(&ubufs->refcount); >> nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; >> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h >> index 3d2996dc7d85..ba46dced1f38 100644 >> --- a/include/linux/if_tun.h >> +++ b/include/linux/if_tun.h >> @@ -19,6 +19,13 @@ >> >> #define TUN_XDP_FLAG 0x1UL >> >> +#define TUN_MSG_UBUF 1 >> +#define TUN_MSG_PTR 2 > Looks like TUN_MSG_PTR should be pushed out to a follow-up patch? Ok. > >> +struct tun_msg_ctl { >> + int type; >> + void *ptr; >> +}; >> + > type actually includes a size. Why not two short fields then? Yes, this sounds better. Thanks > >> #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) >> struct socket *tun_get_socket(struct file *); >> struct ptr_ring *tun_get_tx_ring(struct file *file); >> -- >> 2.17.1
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f0f7cd977667..7996ed7cbf18 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -619,7 +619,7 @@ static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) /* Get packet from user space buffer */ -static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, +static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct iov_iter *from, int noblock) { int good_linear = SKB_MAX_HEAD(TAP_RESERVE); @@ -663,7 +663,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, if (unlikely(len < ETH_HLEN)) goto err; - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; copylen = vnet_hdr.hdr_len ? @@ -724,11 +724,11 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, tap = rcu_dereference(q->tap); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { - skb_shinfo(skb)->destructor_arg = m->msg_control; + skb_shinfo(skb)->destructor_arg = msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; - } else if (m && m->msg_control) { - struct ubuf_info *uarg = m->msg_control; + } else if (msg_control) { + struct ubuf_info *uarg = msg_control; uarg->callback(uarg, false); } @@ -1150,7 +1150,13 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); - return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); + struct tun_msg_ctl *ctl = m->msg_control; + + if (ctl && ctl->type != TUN_MSG_UBUF) + return -EINVAL; + + return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, + m->msg_flags & MSG_DONTWAIT); } static int tap_recvmsg(struct socket *sock, struct msghdr *m, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ff1cbf3ebd50..c839a4bdcbd9 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2429,11 +2429,15 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); + struct tun_msg_ctl *ctl = m->msg_control; if (!tun) return -EBADFD; - ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, + if (ctl && ctl->type != TUN_MSG_UBUF) + return -EINVAL; + + ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, m->msg_flags & MSG_DONTWAIT, m->msg_flags & MSG_MORE); tun_put(tun); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4e656f89cb22..fb01ce6d981c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -620,6 +620,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; + struct tun_msg_ctl ctl; size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); @@ -664,8 +665,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; refcount_set(&ubuf->refcnt, 1); - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); + msg.msg_control = &ctl; + ctl.type = TUN_MSG_UBUF; + ctl.ptr = ubuf; + msg.msg_controllen = sizeof(ctl); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3d2996dc7d85..ba46dced1f38 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -19,6 +19,13 @@ #define TUN_XDP_FLAG 0x1UL +#define TUN_MSG_UBUF 1 +#define TUN_MSG_PTR 2 +struct tun_msg_ctl { + int type; + void *ptr; +}; + #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file);
This patch introduces to a new tun/tap specific msg_control: #define TUN_MSG_UBUF 1 #define TUN_MSG_PTR 2 struct tun_msg_ctl { int type; void *ptr; }; This allows us to pass different kinds of msg_control through sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will be used by the existed vhost_net zerocopy code. The second is XDP buff, which allows vhost_net to pass XDP buff to TUN. This could be used to implement accepting an array of XDP buffs from vhost_net in the following patches. Signed-off-by: Jason Wang <jasowang@redhat.com> --- drivers/net/tap.c | 18 ++++++++++++------ drivers/net/tun.c | 6 +++++- drivers/vhost/net.c | 7 +++++-- include/linux/if_tun.h | 7 +++++++ 4 files changed, 29 insertions(+), 9 deletions(-)