Message ID | 20230718180237.3248179-3-AVKrasnov@sberdevices.ru (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | vsock/virtio/vhost: MSG_ZEROCOPY preparations | expand |
On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote: > For non-linear skb use its pages from fragment array as buffers in > virtio tx queue. These pages are already pinned by 'get_user_pages()' > during such skb creation. > > Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> > Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> > --- > net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++----- > 1 file changed, 34 insertions(+), 6 deletions(-) > > diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c > index e95df847176b..6cbb45bb12d2 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) > vq = vsock->vqs[VSOCK_VQ_TX]; > > for (;;) { > - struct scatterlist hdr, buf, *sgs[2]; > + /* +1 is for packet header. */ > + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; > + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; > int ret, in_sg = 0, out_sg = 0; > struct sk_buff *skb; > bool reply; > @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) > > virtio_transport_deliver_tap_pkt(skb); > reply = virtio_vsock_skb_reply(skb); > + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), > + sizeof(*virtio_vsock_hdr(skb))); > + sgs[out_sg] = &bufs[out_sg]; > + out_sg++; > + > + if (!skb_is_nonlinear(skb)) { > + if (skb->len > 0) { > + sg_init_one(&bufs[out_sg], skb->data, skb->len); > + sgs[out_sg] = &bufs[out_sg]; > + out_sg++; > + } > + } else { > + struct skb_shared_info *si; > + int i; > + > + si = skb_shinfo(skb); > + > + for (i = 0; i < si->nr_frags; i++) { > + skb_frag_t *skb_frag = &si->frags[i]; > + void *va = page_to_virt(skb_frag->bv_page); > > - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); > - sgs[out_sg++] = &hdr; > - if (skb->len > 0) { > - sg_init_one(&buf, skb->data, skb->len); > - sgs[out_sg++] = &buf; > + /* We will use 'page_to_virt()' for userspace page here, don't put comments after code they refer to, please? > + * because virtio layer will call 'virt_to_phys()' later it will but not always. sometimes it's the dma mapping layer. > + * to fill buffer descriptor. We don't touch memory at > + * "virtual" address of this page. you need to stick "the" in a bunch of places above. > + */ > + sg_init_one(&bufs[out_sg], > + va + skb_frag->bv_offset, > + skb_frag->bv_len); > + sgs[out_sg] = &bufs[out_sg]; > + out_sg++; > + } > } > > ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); There's a problem here: if there vq is small this will fail. So you really should check free vq s/gs and switch to non-zcopy if too small. > -- > 2.25.1
On 18.07.2023 23:27, Michael S. Tsirkin wrote: > On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote: >> For non-linear skb use its pages from fragment array as buffers in >> virtio tx queue. These pages are already pinned by 'get_user_pages()' >> during such skb creation. >> >> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> >> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> >> --- >> net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++----- >> 1 file changed, 34 insertions(+), 6 deletions(-) >> >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >> index e95df847176b..6cbb45bb12d2 100644 >> --- a/net/vmw_vsock/virtio_transport.c >> +++ b/net/vmw_vsock/virtio_transport.c >> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) >> vq = vsock->vqs[VSOCK_VQ_TX]; >> >> for (;;) { >> - struct scatterlist hdr, buf, *sgs[2]; >> + /* +1 is for packet header. */ >> + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; >> + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; >> int ret, in_sg = 0, out_sg = 0; >> struct sk_buff *skb; >> bool reply; >> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) >> >> virtio_transport_deliver_tap_pkt(skb); >> reply = virtio_vsock_skb_reply(skb); >> + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), >> + sizeof(*virtio_vsock_hdr(skb))); >> + sgs[out_sg] = &bufs[out_sg]; >> + out_sg++; >> + >> + if (!skb_is_nonlinear(skb)) { >> + if (skb->len > 0) { >> + sg_init_one(&bufs[out_sg], skb->data, skb->len); >> + sgs[out_sg] = &bufs[out_sg]; >> + out_sg++; >> + } >> + } else { >> + struct skb_shared_info *si; >> + int i; >> + >> + si = skb_shinfo(skb); >> + >> + for (i = 0; i < si->nr_frags; i++) { >> + skb_frag_t *skb_frag = &si->frags[i]; >> + void *va = page_to_virt(skb_frag->bv_page); >> >> - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); >> - sgs[out_sg++] = &hdr; >> - if (skb->len > 0) { >> - sg_init_one(&buf, skb->data, skb->len); >> - sgs[out_sg++] = &buf; >> + /* We will use 'page_to_virt()' for userspace page here, > > don't put comments after code they refer to, please? > >> + * because virtio layer will call 'virt_to_phys()' later > > it will but not always. sometimes it's the dma mapping layer. > > >> + * to fill buffer descriptor. We don't touch memory at >> + * "virtual" address of this page. > > > you need to stick "the" in a bunch of places above. Ok, I'll fix this comment! > >> + */ >> + sg_init_one(&bufs[out_sg], >> + va + skb_frag->bv_offset, >> + skb_frag->bv_len); >> + sgs[out_sg] = &bufs[out_sg]; >> + out_sg++; >> + } >> } >> >> ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); > > > There's a problem here: if there vq is small this will fail. > So you really should check free vq s/gs and switch to non-zcopy > if too small. Ok, so idea is that: if (out_sg > vq->num_free) reorganise current skb for copy mode (e.g. 2 out_sg - header and data) and try to add it to vq again. ? @Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next anyway) as this change will require review. R-b I think should be also removed. All other patches in this set still unchanged. Thanks, Arseniy > >> -- >> 2.25.1 >
On 19.07.2023 07:46, Arseniy Krasnov wrote: > > > On 18.07.2023 23:27, Michael S. Tsirkin wrote: >> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote: >>> For non-linear skb use its pages from fragment array as buffers in >>> virtio tx queue. These pages are already pinned by 'get_user_pages()' >>> during such skb creation. >>> >>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> >>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> >>> --- >>> net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++----- >>> 1 file changed, 34 insertions(+), 6 deletions(-) >>> >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >>> index e95df847176b..6cbb45bb12d2 100644 >>> --- a/net/vmw_vsock/virtio_transport.c >>> +++ b/net/vmw_vsock/virtio_transport.c >>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>> vq = vsock->vqs[VSOCK_VQ_TX]; >>> >>> for (;;) { >>> - struct scatterlist hdr, buf, *sgs[2]; >>> + /* +1 is for packet header. */ >>> + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; >>> + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; >>> int ret, in_sg = 0, out_sg = 0; >>> struct sk_buff *skb; >>> bool reply; >>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>> >>> virtio_transport_deliver_tap_pkt(skb); >>> reply = virtio_vsock_skb_reply(skb); >>> + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), >>> + sizeof(*virtio_vsock_hdr(skb))); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + >>> + if (!skb_is_nonlinear(skb)) { >>> + if (skb->len > 0) { >>> + sg_init_one(&bufs[out_sg], skb->data, skb->len); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + } >>> + } else { >>> + struct skb_shared_info *si; >>> + int i; >>> + >>> + si = skb_shinfo(skb); >>> + >>> + for (i = 0; i < si->nr_frags; i++) { >>> + skb_frag_t *skb_frag = &si->frags[i]; >>> + void *va = page_to_virt(skb_frag->bv_page); >>> >>> - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); >>> - sgs[out_sg++] = &hdr; >>> - if (skb->len > 0) { >>> - sg_init_one(&buf, skb->data, skb->len); >>> - sgs[out_sg++] = &buf; >>> + /* We will use 'page_to_virt()' for userspace page here, >> >> don't put comments after code they refer to, please? >> >>> + * because virtio layer will call 'virt_to_phys()' later >> >> it will but not always. sometimes it's the dma mapping layer. >> >> >>> + * to fill buffer descriptor. We don't touch memory at >>> + * "virtual" address of this page. >> >> >> you need to stick "the" in a bunch of places above. > > Ok, I'll fix this comment! > >> >>> + */ >>> + sg_init_one(&bufs[out_sg], >>> + va + skb_frag->bv_offset, >>> + skb_frag->bv_len); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + } >>> } >>> >>> ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); >> >> >> There's a problem here: if there vq is small this will fail. >> So you really should check free vq s/gs and switch to non-zcopy >> if too small. > > Ok, so idea is that: > > if (out_sg > vq->num_free) > reorganise current skb for copy mode (e.g. 2 out_sg - header and data) > and try to add it to vq again. > > ? I guess I need to check number of elements in sg list against 'vq->num_max' - as this is maximum number for totally free queue (if even big sg list does not fit to be added to the tx queue at this moment, skb will be requeued below, waiting for enough space). I'll pass 'vq->num_max' value by another transport callback to 'virtio_transport_common.c' and check number of user pages against this value - if user's buffer is too big, then use copy mode, thus there will be only 2 elements in sg list and this will fit to vq anyway. Thanks, Arseniy > > @Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next > anyway) as this change will require review. R-b I think should be also removed. All > other patches in this set still unchanged. > > Thanks, Arseniy > >> >>> -- >>> 2.25.1 >>
On Wed, Jul 19, 2023 at 07:46:05AM +0300, Arseniy Krasnov wrote: > > >On 18.07.2023 23:27, Michael S. Tsirkin wrote: >> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote: >>> For non-linear skb use its pages from fragment array as buffers in >>> virtio tx queue. These pages are already pinned by 'get_user_pages()' >>> during such skb creation. >>> >>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> >>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> >>> --- >>> net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++----- >>> 1 file changed, 34 insertions(+), 6 deletions(-) >>> >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >>> index e95df847176b..6cbb45bb12d2 100644 >>> --- a/net/vmw_vsock/virtio_transport.c >>> +++ b/net/vmw_vsock/virtio_transport.c >>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>> vq = vsock->vqs[VSOCK_VQ_TX]; >>> >>> for (;;) { >>> - struct scatterlist hdr, buf, *sgs[2]; >>> + /* +1 is for packet header. */ >>> + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; >>> + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; >>> int ret, in_sg = 0, out_sg = 0; >>> struct sk_buff *skb; >>> bool reply; >>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>> >>> virtio_transport_deliver_tap_pkt(skb); >>> reply = virtio_vsock_skb_reply(skb); >>> + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), >>> + sizeof(*virtio_vsock_hdr(skb))); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + >>> + if (!skb_is_nonlinear(skb)) { >>> + if (skb->len > 0) { >>> + sg_init_one(&bufs[out_sg], skb->data, skb->len); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + } >>> + } else { >>> + struct skb_shared_info *si; >>> + int i; >>> + >>> + si = skb_shinfo(skb); >>> + >>> + for (i = 0; i < si->nr_frags; i++) { >>> + skb_frag_t *skb_frag = &si->frags[i]; >>> + void *va = page_to_virt(skb_frag->bv_page); >>> >>> - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); >>> - sgs[out_sg++] = &hdr; >>> - if (skb->len > 0) { >>> - sg_init_one(&buf, skb->data, skb->len); >>> - sgs[out_sg++] = &buf; >>> + /* We will use 'page_to_virt()' for userspace page here, >> >> don't put comments after code they refer to, please? >> >>> + * because virtio layer will call 'virt_to_phys()' later >> >> it will but not always. sometimes it's the dma mapping layer. >> >> >>> + * to fill buffer descriptor. We don't touch memory at >>> + * "virtual" address of this page. >> >> >> you need to stick "the" in a bunch of places above. > >Ok, I'll fix this comment! > >> >>> + */ >>> + sg_init_one(&bufs[out_sg], >>> + va + skb_frag->bv_offset, >>> + skb_frag->bv_len); >>> + sgs[out_sg] = &bufs[out_sg]; >>> + out_sg++; >>> + } >>> } >>> >>> ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); >> >> >> There's a problem here: if there vq is small this will fail. >> So you really should check free vq s/gs and switch to non-zcopy >> if too small. > >Ok, so idea is that: > >if (out_sg > vq->num_free) > reorganise current skb for copy mode (e.g. 2 out_sg - header and data) > and try to add it to vq again. > >? > >@Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next >anyway) as this change will require review. R-b I think should be also removed. All >other patches in this set still unchanged. It's still a new feature so we have net-next tree as the target, right? I think we should keep net-next. Even if patches require to be re-reviewed, net-next indicates the tree where we want these to be merge and for new features is the right one. Ack for not putting RFC again and for R-b removal for this patch. Thanks, Stefano
On 19.07.2023 10:36, Stefano Garzarella wrote: > On Wed, Jul 19, 2023 at 07:46:05AM +0300, Arseniy Krasnov wrote: >> >> >> On 18.07.2023 23:27, Michael S. Tsirkin wrote: >>> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote: >>>> For non-linear skb use its pages from fragment array as buffers in >>>> virtio tx queue. These pages are already pinned by 'get_user_pages()' >>>> during such skb creation. >>>> >>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> >>>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> >>>> --- >>>> net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++----- >>>> 1 file changed, 34 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c >>>> index e95df847176b..6cbb45bb12d2 100644 >>>> --- a/net/vmw_vsock/virtio_transport.c >>>> +++ b/net/vmw_vsock/virtio_transport.c >>>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>>> vq = vsock->vqs[VSOCK_VQ_TX]; >>>> >>>> for (;;) { >>>> - struct scatterlist hdr, buf, *sgs[2]; >>>> + /* +1 is for packet header. */ >>>> + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; >>>> + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; >>>> int ret, in_sg = 0, out_sg = 0; >>>> struct sk_buff *skb; >>>> bool reply; >>>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) >>>> >>>> virtio_transport_deliver_tap_pkt(skb); >>>> reply = virtio_vsock_skb_reply(skb); >>>> + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), >>>> + sizeof(*virtio_vsock_hdr(skb))); >>>> + sgs[out_sg] = &bufs[out_sg]; >>>> + out_sg++; >>>> + >>>> + if (!skb_is_nonlinear(skb)) { >>>> + if (skb->len > 0) { >>>> + sg_init_one(&bufs[out_sg], skb->data, skb->len); >>>> + sgs[out_sg] = &bufs[out_sg]; >>>> + out_sg++; >>>> + } >>>> + } else { >>>> + struct skb_shared_info *si; >>>> + int i; >>>> + >>>> + si = skb_shinfo(skb); >>>> + >>>> + for (i = 0; i < si->nr_frags; i++) { >>>> + skb_frag_t *skb_frag = &si->frags[i]; >>>> + void *va = page_to_virt(skb_frag->bv_page); >>>> >>>> - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); >>>> - sgs[out_sg++] = &hdr; >>>> - if (skb->len > 0) { >>>> - sg_init_one(&buf, skb->data, skb->len); >>>> - sgs[out_sg++] = &buf; >>>> + /* We will use 'page_to_virt()' for userspace page here, >>> >>> don't put comments after code they refer to, please? >>> >>>> + * because virtio layer will call 'virt_to_phys()' later >>> >>> it will but not always. sometimes it's the dma mapping layer. >>> >>> >>>> + * to fill buffer descriptor. We don't touch memory at >>>> + * "virtual" address of this page. >>> >>> >>> you need to stick "the" in a bunch of places above. >> >> Ok, I'll fix this comment! >> >>> >>>> + */ >>>> + sg_init_one(&bufs[out_sg], >>>> + va + skb_frag->bv_offset, >>>> + skb_frag->bv_len); >>>> + sgs[out_sg] = &bufs[out_sg]; >>>> + out_sg++; >>>> + } >>>> } >>>> >>>> ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); >>> >>> >>> There's a problem here: if there vq is small this will fail. >>> So you really should check free vq s/gs and switch to non-zcopy >>> if too small. >> >> Ok, so idea is that: >> >> if (out_sg > vq->num_free) >> reorganise current skb for copy mode (e.g. 2 out_sg - header and data) >> and try to add it to vq again. >> >> ? >> >> @Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next >> anyway) as this change will require review. R-b I think should be also removed. All >> other patches in this set still unchanged. > > It's still a new feature so we have net-next tree as the target, right? > > I think we should keep net-next. Even if patches require to be > re-reviewed, net-next indicates the tree where we want these to be merge > and for new features is the right one. > > Ack for not putting RFC again and for R-b removal for this patch. Ok, Thanks, Arseniy > > Thanks, > Stefano >
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e95df847176b..6cbb45bb12d2 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - struct scatterlist hdr, buf, *sgs[2]; + /* +1 is for packet header. */ + struct scatterlist *sgs[MAX_SKB_FRAGS + 1]; + struct scatterlist bufs[MAX_SKB_FRAGS + 1]; int ret, in_sg = 0, out_sg = 0; struct sk_buff *skb; bool reply; @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work) virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); + sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + sgs[out_sg] = &bufs[out_sg]; + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(&bufs[out_sg], skb->data, skb->len); + sgs[out_sg] = &bufs[out_sg]; + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va = page_to_virt(skb_frag->bv_page); - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); - sgs[out_sg++] = &hdr; - if (skb->len > 0) { - sg_init_one(&buf, skb->data, skb->len); - sgs[out_sg++] = &buf; + /* We will use 'page_to_virt()' for userspace page here, + * because virtio layer will call 'virt_to_phys()' later + * to fill buffer descriptor. We don't touch memory at + * "virtual" address of this page. + */ + sg_init_one(&bufs[out_sg], + va + skb_frag->bv_offset, + skb_frag->bv_len); + sgs[out_sg] = &bufs[out_sg]; + out_sg++; + } } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);