diff mbox series

[net-next,v2,2/4] vsock/virtio: support to send non-linear skb

Message ID 20230718180237.3248179-3-AVKrasnov@sberdevices.ru (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series vsock/virtio/vhost: MSG_ZEROCOPY preparations | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 1365 this patch: 1365
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Arseniy Krasnov July 18, 2023, 6:02 p.m. UTC
For non-linear skb use its pages from fragment array as buffers in
virtio tx queue. These pages are already pinned by 'get_user_pages()'
during such skb creation.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin July 18, 2023, 8:27 p.m. UTC | #1
On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote:
> For non-linear skb use its pages from fragment array as buffers in
> virtio tx queue. These pages are already pinned by 'get_user_pages()'
> during such skb creation.
> 
> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
>  1 file changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index e95df847176b..6cbb45bb12d2 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>  	vq = vsock->vqs[VSOCK_VQ_TX];
>  
>  	for (;;) {
> -		struct scatterlist hdr, buf, *sgs[2];
> +		/* +1 is for packet header. */
> +		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
> +		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>  		int ret, in_sg = 0, out_sg = 0;
>  		struct sk_buff *skb;
>  		bool reply;
> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>  
>  		virtio_transport_deliver_tap_pkt(skb);
>  		reply = virtio_vsock_skb_reply(skb);
> +		sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
> +			    sizeof(*virtio_vsock_hdr(skb)));
> +		sgs[out_sg] = &bufs[out_sg];
> +		out_sg++;
> +
> +		if (!skb_is_nonlinear(skb)) {
> +			if (skb->len > 0) {
> +				sg_init_one(&bufs[out_sg], skb->data, skb->len);
> +				sgs[out_sg] = &bufs[out_sg];
> +				out_sg++;
> +			}
> +		} else {
> +			struct skb_shared_info *si;
> +			int i;
> +
> +			si = skb_shinfo(skb);
> +
> +			for (i = 0; i < si->nr_frags; i++) {
> +				skb_frag_t *skb_frag = &si->frags[i];
> +				void *va = page_to_virt(skb_frag->bv_page);
>  
> -		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
> -		sgs[out_sg++] = &hdr;
> -		if (skb->len > 0) {
> -			sg_init_one(&buf, skb->data, skb->len);
> -			sgs[out_sg++] = &buf;
> +				/* We will use 'page_to_virt()' for userspace page here,

don't put comments after code they refer to, please?

> +				 * because virtio layer will call 'virt_to_phys()' later

it will but not always. sometimes it's the dma mapping layer.


> +				 * to fill buffer descriptor. We don't touch memory at
> +				 * "virtual" address of this page.


you need to stick "the" in a bunch of places above.

> +				 */
> +				sg_init_one(&bufs[out_sg],
> +					    va + skb_frag->bv_offset,
> +					    skb_frag->bv_len);
> +				sgs[out_sg] = &bufs[out_sg];
> +				out_sg++;
> +			}
>  		}
>  
>  		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);


There's a problem here: if there vq is small this will fail.
So you really should check free vq s/gs and switch to non-zcopy
if too small.

> -- 
> 2.25.1
Arseniy Krasnov July 19, 2023, 4:46 a.m. UTC | #2
On 18.07.2023 23:27, Michael S. Tsirkin wrote:
> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote:
>> For non-linear skb use its pages from fragment array as buffers in
>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>> during such skb creation.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
>>  1 file changed, 34 insertions(+), 6 deletions(-)
>>
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index e95df847176b..6cbb45bb12d2 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>  	vq = vsock->vqs[VSOCK_VQ_TX];
>>  
>>  	for (;;) {
>> -		struct scatterlist hdr, buf, *sgs[2];
>> +		/* +1 is for packet header. */
>> +		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>> +		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>  		int ret, in_sg = 0, out_sg = 0;
>>  		struct sk_buff *skb;
>>  		bool reply;
>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>  
>>  		virtio_transport_deliver_tap_pkt(skb);
>>  		reply = virtio_vsock_skb_reply(skb);
>> +		sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
>> +			    sizeof(*virtio_vsock_hdr(skb)));
>> +		sgs[out_sg] = &bufs[out_sg];
>> +		out_sg++;
>> +
>> +		if (!skb_is_nonlinear(skb)) {
>> +			if (skb->len > 0) {
>> +				sg_init_one(&bufs[out_sg], skb->data, skb->len);
>> +				sgs[out_sg] = &bufs[out_sg];
>> +				out_sg++;
>> +			}
>> +		} else {
>> +			struct skb_shared_info *si;
>> +			int i;
>> +
>> +			si = skb_shinfo(skb);
>> +
>> +			for (i = 0; i < si->nr_frags; i++) {
>> +				skb_frag_t *skb_frag = &si->frags[i];
>> +				void *va = page_to_virt(skb_frag->bv_page);
>>  
>> -		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>> -		sgs[out_sg++] = &hdr;
>> -		if (skb->len > 0) {
>> -			sg_init_one(&buf, skb->data, skb->len);
>> -			sgs[out_sg++] = &buf;
>> +				/* We will use 'page_to_virt()' for userspace page here,
> 
> don't put comments after code they refer to, please?
> 
>> +				 * because virtio layer will call 'virt_to_phys()' later
> 
> it will but not always. sometimes it's the dma mapping layer.
> 
> 
>> +				 * to fill buffer descriptor. We don't touch memory at
>> +				 * "virtual" address of this page.
> 
> 
> you need to stick "the" in a bunch of places above.

Ok, I'll fix this comment!

> 
>> +				 */
>> +				sg_init_one(&bufs[out_sg],
>> +					    va + skb_frag->bv_offset,
>> +					    skb_frag->bv_len);
>> +				sgs[out_sg] = &bufs[out_sg];
>> +				out_sg++;
>> +			}
>>  		}
>>  
>>  		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
> 
> 
> There's a problem here: if there vq is small this will fail.
> So you really should check free vq s/gs and switch to non-zcopy
> if too small.

Ok, so idea is that:

if (out_sg > vq->num_free)
    reorganise current skb for copy mode (e.g. 2 out_sg - header and data)
    and try to add it to vq again.

?

@Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next
anyway) as this change will require review. R-b I think should be also removed. All
other patches in this set still unchanged.

Thanks, Arseniy

> 
>> -- 
>> 2.25.1
>
Arseniy Krasnov July 19, 2023, 6:13 a.m. UTC | #3
On 19.07.2023 07:46, Arseniy Krasnov wrote:
> 
> 
> On 18.07.2023 23:27, Michael S. Tsirkin wrote:
>> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote:
>>> For non-linear skb use its pages from fragment array as buffers in
>>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>>> during such skb creation.
>>>
>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
>>> ---
>>>  net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
>>>  1 file changed, 34 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index e95df847176b..6cbb45bb12d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>  	vq = vsock->vqs[VSOCK_VQ_TX];
>>>  
>>>  	for (;;) {
>>> -		struct scatterlist hdr, buf, *sgs[2];
>>> +		/* +1 is for packet header. */
>>> +		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>>> +		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>>  		int ret, in_sg = 0, out_sg = 0;
>>>  		struct sk_buff *skb;
>>>  		bool reply;
>>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>  
>>>  		virtio_transport_deliver_tap_pkt(skb);
>>>  		reply = virtio_vsock_skb_reply(skb);
>>> +		sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
>>> +			    sizeof(*virtio_vsock_hdr(skb)));
>>> +		sgs[out_sg] = &bufs[out_sg];
>>> +		out_sg++;
>>> +
>>> +		if (!skb_is_nonlinear(skb)) {
>>> +			if (skb->len > 0) {
>>> +				sg_init_one(&bufs[out_sg], skb->data, skb->len);
>>> +				sgs[out_sg] = &bufs[out_sg];
>>> +				out_sg++;
>>> +			}
>>> +		} else {
>>> +			struct skb_shared_info *si;
>>> +			int i;
>>> +
>>> +			si = skb_shinfo(skb);
>>> +
>>> +			for (i = 0; i < si->nr_frags; i++) {
>>> +				skb_frag_t *skb_frag = &si->frags[i];
>>> +				void *va = page_to_virt(skb_frag->bv_page);
>>>  
>>> -		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>>> -		sgs[out_sg++] = &hdr;
>>> -		if (skb->len > 0) {
>>> -			sg_init_one(&buf, skb->data, skb->len);
>>> -			sgs[out_sg++] = &buf;
>>> +				/* We will use 'page_to_virt()' for userspace page here,
>>
>> don't put comments after code they refer to, please?
>>
>>> +				 * because virtio layer will call 'virt_to_phys()' later
>>
>> it will but not always. sometimes it's the dma mapping layer.
>>
>>
>>> +				 * to fill buffer descriptor. We don't touch memory at
>>> +				 * "virtual" address of this page.
>>
>>
>> you need to stick "the" in a bunch of places above.
> 
> Ok, I'll fix this comment!
> 
>>
>>> +				 */
>>> +				sg_init_one(&bufs[out_sg],
>>> +					    va + skb_frag->bv_offset,
>>> +					    skb_frag->bv_len);
>>> +				sgs[out_sg] = &bufs[out_sg];
>>> +				out_sg++;
>>> +			}
>>>  		}
>>>  
>>>  		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>>
>>
>> There's a problem here: if there vq is small this will fail.
>> So you really should check free vq s/gs and switch to non-zcopy
>> if too small.
> 
> Ok, so idea is that:
> 
> if (out_sg > vq->num_free)
>     reorganise current skb for copy mode (e.g. 2 out_sg - header and data)
>     and try to add it to vq again.
> 
> ?

I guess I need to check number of elements in sg list against 'vq->num_max' - as this is
maximum number for totally free queue (if even big sg list does not fit to be added to the
tx queue at this moment, skb will be requeued below, waiting for enough space). I'll pass
'vq->num_max' value by another transport callback to 'virtio_transport_common.c' and check
number of user pages against this value - if user's buffer is too big, then use copy mode,
thus there will be only 2 elements in sg list and this will fit to vq anyway.

Thanks, Arseniy

> 
> @Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next
> anyway) as this change will require review. R-b I think should be also removed. All
> other patches in this set still unchanged.
> 
> Thanks, Arseniy
> 
>>
>>> -- 
>>> 2.25.1
>>
Stefano Garzarella July 19, 2023, 7:36 a.m. UTC | #4
On Wed, Jul 19, 2023 at 07:46:05AM +0300, Arseniy Krasnov wrote:
>
>
>On 18.07.2023 23:27, Michael S. Tsirkin wrote:
>> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote:
>>> For non-linear skb use its pages from fragment array as buffers in
>>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>>> during such skb creation.
>>>
>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
>>> ---
>>>  net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
>>>  1 file changed, 34 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index e95df847176b..6cbb45bb12d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>  	vq = vsock->vqs[VSOCK_VQ_TX];
>>>
>>>  	for (;;) {
>>> -		struct scatterlist hdr, buf, *sgs[2];
>>> +		/* +1 is for packet header. */
>>> +		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>>> +		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>>  		int ret, in_sg = 0, out_sg = 0;
>>>  		struct sk_buff *skb;
>>>  		bool reply;
>>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>
>>>  		virtio_transport_deliver_tap_pkt(skb);
>>>  		reply = virtio_vsock_skb_reply(skb);
>>> +		sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
>>> +			    sizeof(*virtio_vsock_hdr(skb)));
>>> +		sgs[out_sg] = &bufs[out_sg];
>>> +		out_sg++;
>>> +
>>> +		if (!skb_is_nonlinear(skb)) {
>>> +			if (skb->len > 0) {
>>> +				sg_init_one(&bufs[out_sg], skb->data, skb->len);
>>> +				sgs[out_sg] = &bufs[out_sg];
>>> +				out_sg++;
>>> +			}
>>> +		} else {
>>> +			struct skb_shared_info *si;
>>> +			int i;
>>> +
>>> +			si = skb_shinfo(skb);
>>> +
>>> +			for (i = 0; i < si->nr_frags; i++) {
>>> +				skb_frag_t *skb_frag = &si->frags[i];
>>> +				void *va = page_to_virt(skb_frag->bv_page);
>>>
>>> -		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>>> -		sgs[out_sg++] = &hdr;
>>> -		if (skb->len > 0) {
>>> -			sg_init_one(&buf, skb->data, skb->len);
>>> -			sgs[out_sg++] = &buf;
>>> +				/* We will use 'page_to_virt()' for userspace page here,
>>
>> don't put comments after code they refer to, please?
>>
>>> +				 * because virtio layer will call 'virt_to_phys()' later
>>
>> it will but not always. sometimes it's the dma mapping layer.
>>
>>
>>> +				 * to fill buffer descriptor. We don't touch memory at
>>> +				 * "virtual" address of this page.
>>
>>
>> you need to stick "the" in a bunch of places above.
>
>Ok, I'll fix this comment!
>
>>
>>> +				 */
>>> +				sg_init_one(&bufs[out_sg],
>>> +					    va + skb_frag->bv_offset,
>>> +					    skb_frag->bv_len);
>>> +				sgs[out_sg] = &bufs[out_sg];
>>> +				out_sg++;
>>> +			}
>>>  		}
>>>
>>>  		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>>
>>
>> There's a problem here: if there vq is small this will fail.
>> So you really should check free vq s/gs and switch to non-zcopy
>> if too small.
>
>Ok, so idea is that:
>
>if (out_sg > vq->num_free)
>    reorganise current skb for copy mode (e.g. 2 out_sg - header and data)
>    and try to add it to vq again.
>
>?
>
>@Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next
>anyway) as this change will require review. R-b I think should be also removed. All
>other patches in this set still unchanged.

It's still a new feature so we have net-next tree as the target, right?

I think we should keep net-next. Even if patches require to be
re-reviewed, net-next indicates the tree where we want these to be merge
and for new features is the right one.

Ack for not putting RFC again and for R-b removal for this patch.

Thanks,
Stefano
Arseniy Krasnov July 19, 2023, 7:47 a.m. UTC | #5
On 19.07.2023 10:36, Stefano Garzarella wrote:
> On Wed, Jul 19, 2023 at 07:46:05AM +0300, Arseniy Krasnov wrote:
>>
>>
>> On 18.07.2023 23:27, Michael S. Tsirkin wrote:
>>> On Tue, Jul 18, 2023 at 09:02:35PM +0300, Arseniy Krasnov wrote:
>>>> For non-linear skb use its pages from fragment array as buffers in
>>>> virtio tx queue. These pages are already pinned by 'get_user_pages()'
>>>> during such skb creation.
>>>>
>>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>>> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
>>>> ---
>>>>  net/vmw_vsock/virtio_transport.c | 40 +++++++++++++++++++++++++++-----
>>>>  1 file changed, 34 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>>> index e95df847176b..6cbb45bb12d2 100644
>>>> --- a/net/vmw_vsock/virtio_transport.c
>>>> +++ b/net/vmw_vsock/virtio_transport.c
>>>> @@ -100,7 +100,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>>      vq = vsock->vqs[VSOCK_VQ_TX];
>>>>
>>>>      for (;;) {
>>>> -        struct scatterlist hdr, buf, *sgs[2];
>>>> +        /* +1 is for packet header. */
>>>> +        struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
>>>> +        struct scatterlist bufs[MAX_SKB_FRAGS + 1];
>>>>          int ret, in_sg = 0, out_sg = 0;
>>>>          struct sk_buff *skb;
>>>>          bool reply;
>>>> @@ -111,12 +113,38 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>>>>
>>>>          virtio_transport_deliver_tap_pkt(skb);
>>>>          reply = virtio_vsock_skb_reply(skb);
>>>> +        sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
>>>> +                sizeof(*virtio_vsock_hdr(skb)));
>>>> +        sgs[out_sg] = &bufs[out_sg];
>>>> +        out_sg++;
>>>> +
>>>> +        if (!skb_is_nonlinear(skb)) {
>>>> +            if (skb->len > 0) {
>>>> +                sg_init_one(&bufs[out_sg], skb->data, skb->len);
>>>> +                sgs[out_sg] = &bufs[out_sg];
>>>> +                out_sg++;
>>>> +            }
>>>> +        } else {
>>>> +            struct skb_shared_info *si;
>>>> +            int i;
>>>> +
>>>> +            si = skb_shinfo(skb);
>>>> +
>>>> +            for (i = 0; i < si->nr_frags; i++) {
>>>> +                skb_frag_t *skb_frag = &si->frags[i];
>>>> +                void *va = page_to_virt(skb_frag->bv_page);
>>>>
>>>> -        sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
>>>> -        sgs[out_sg++] = &hdr;
>>>> -        if (skb->len > 0) {
>>>> -            sg_init_one(&buf, skb->data, skb->len);
>>>> -            sgs[out_sg++] = &buf;
>>>> +                /* We will use 'page_to_virt()' for userspace page here,
>>>
>>> don't put comments after code they refer to, please?
>>>
>>>> +                 * because virtio layer will call 'virt_to_phys()' later
>>>
>>> it will but not always. sometimes it's the dma mapping layer.
>>>
>>>
>>>> +                 * to fill buffer descriptor. We don't touch memory at
>>>> +                 * "virtual" address of this page.
>>>
>>>
>>> you need to stick "the" in a bunch of places above.
>>
>> Ok, I'll fix this comment!
>>
>>>
>>>> +                 */
>>>> +                sg_init_one(&bufs[out_sg],
>>>> +                        va + skb_frag->bv_offset,
>>>> +                        skb_frag->bv_len);
>>>> +                sgs[out_sg] = &bufs[out_sg];
>>>> +                out_sg++;
>>>> +            }
>>>>          }
>>>>
>>>>          ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
>>>
>>>
>>> There's a problem here: if there vq is small this will fail.
>>> So you really should check free vq s/gs and switch to non-zcopy
>>> if too small.
>>
>> Ok, so idea is that:
>>
>> if (out_sg > vq->num_free)
>>    reorganise current skb for copy mode (e.g. 2 out_sg - header and data)
>>    and try to add it to vq again.
>>
>> ?
>>
>> @Stefano, I'll remove net-next tag (guess RFC is not required again, but not net-next
>> anyway) as this change will require review. R-b I think should be also removed. All
>> other patches in this set still unchanged.
> 
> It's still a new feature so we have net-next tree as the target, right?
> 
> I think we should keep net-next. Even if patches require to be
> re-reviewed, net-next indicates the tree where we want these to be merge
> and for new features is the right one.
> 
> Ack for not putting RFC again and for R-b removal for this patch.

Ok,

Thanks, Arseniy

> 
> Thanks,
> Stefano
>
diff mbox series

Patch

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e95df847176b..6cbb45bb12d2 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -100,7 +100,9 @@  virtio_transport_send_pkt_work(struct work_struct *work)
 	vq = vsock->vqs[VSOCK_VQ_TX];
 
 	for (;;) {
-		struct scatterlist hdr, buf, *sgs[2];
+		/* +1 is for packet header. */
+		struct scatterlist *sgs[MAX_SKB_FRAGS + 1];
+		struct scatterlist bufs[MAX_SKB_FRAGS + 1];
 		int ret, in_sg = 0, out_sg = 0;
 		struct sk_buff *skb;
 		bool reply;
@@ -111,12 +113,38 @@  virtio_transport_send_pkt_work(struct work_struct *work)
 
 		virtio_transport_deliver_tap_pkt(skb);
 		reply = virtio_vsock_skb_reply(skb);
+		sg_init_one(&bufs[out_sg], virtio_vsock_hdr(skb),
+			    sizeof(*virtio_vsock_hdr(skb)));
+		sgs[out_sg] = &bufs[out_sg];
+		out_sg++;
+
+		if (!skb_is_nonlinear(skb)) {
+			if (skb->len > 0) {
+				sg_init_one(&bufs[out_sg], skb->data, skb->len);
+				sgs[out_sg] = &bufs[out_sg];
+				out_sg++;
+			}
+		} else {
+			struct skb_shared_info *si;
+			int i;
+
+			si = skb_shinfo(skb);
+
+			for (i = 0; i < si->nr_frags; i++) {
+				skb_frag_t *skb_frag = &si->frags[i];
+				void *va = page_to_virt(skb_frag->bv_page);
 
-		sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
-		sgs[out_sg++] = &hdr;
-		if (skb->len > 0) {
-			sg_init_one(&buf, skb->data, skb->len);
-			sgs[out_sg++] = &buf;
+				/* We will use 'page_to_virt()' for userspace page here,
+				 * because virtio layer will call 'virt_to_phys()' later
+				 * to fill buffer descriptor. We don't touch memory at
+				 * "virtual" address of this page.
+				 */
+				sg_init_one(&bufs[out_sg],
+					    va + skb_frag->bv_offset,
+					    skb_frag->bv_len);
+				sgs[out_sg] = &bufs[out_sg];
+				out_sg++;
+			}
 		}
 
 		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);