diff mbox series

[RFC,v2,2/8] vhost/vsock: rework packet allocation logic

Message ID 72ae7f76-ffee-3e64-d445-7a0f4261d891@sberdevices.ru (mailing list archive)
State New, archived
Headers show
Series virtio/vsock: experimental zerocopy receive | expand

Commit Message

Arseniy Krasnov June 3, 2022, 5:33 a.m. UTC
For packets received from virtio RX queue, use buddy
allocator instead of 'kmalloc()' to be able to insert
such pages to user provided vma. Single call to
'copy_from_iter()' replaced with per-page loop.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 12 deletions(-)

Comments

Stefano Garzarella June 9, 2022, 8:38 a.m. UTC | #1
On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
>For packets received from virtio RX queue, use buddy
>allocator instead of 'kmalloc()' to be able to insert
>such pages to user provided vma. Single call to
>'copy_from_iter()' replaced with per-page loop.
>
>Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>---
> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 69 insertions(+), 12 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index e6c9d41db1de..0dc2229f18f7 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -58,6 +58,7 @@ struct vhost_vsock {
>
> 	u32 guest_cid;
> 	bool seqpacket_allow;
>+	bool zerocopy_rx_on;

This is per-device, so a single socket can change the behaviour of all 
the sockets of this device.

Can we do something better?

Maybe we can allocate the header, copy it, find the socket and check if 
zero-copy is enabled or not for that socket.

Of course we should change or extend virtio_transport_recv_pkt() to 
avoid to find the socket again.


> };
>
> static u32 vhost_transport_get_local_cid(void)
>@@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
> 		      unsigned int out, unsigned int in)
> {
> 	struct virtio_vsock_pkt *pkt;
>+	struct vhost_vsock *vsock;
> 	struct iov_iter iov_iter;
> 	size_t nbytes;
> 	size_t len;
>@@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
> 		return NULL;
> 	}
>
>-	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>-	if (!pkt->buf) {
>-		kfree(pkt);
>-		return NULL;
>-	}
>-
> 	pkt->buf_len = pkt->len;
>+	vsock = container_of(vq->dev, struct vhost_vsock, dev);
>
>-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>-	if (nbytes != pkt->len) {
>-		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>-		       pkt->len, nbytes);
>-		virtio_transport_free_pkt(pkt);
>-		return NULL;
>+	if (!vsock->zerocopy_rx_on) {
>+		pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>+
>+		if (!pkt->buf) {
>+			kfree(pkt);
>+			return NULL;
>+		}
>+
>+		pkt->slab_buf = true;
>+		nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>+		if (nbytes != pkt->len) {
>+			vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>+				pkt->len, nbytes);
>+			virtio_transport_free_pkt(pkt);
>+			return NULL;
>+		}
>+	} else {
>+		struct page *buf_page;
>+		ssize_t pkt_len;
>+		int page_idx;
>+
>+		/* This creates memory overrun, as we allocate
>+		 * at least one page for each packet.
>+		 */
>+		buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
>+
>+		if (buf_page == NULL) {
>+			kfree(pkt);
>+			return NULL;
>+		}
>+
>+		pkt->buf = page_to_virt(buf_page);
>+
>+		page_idx = 0;
>+		pkt_len = pkt->len;
>+
>+		/* As allocated pages are not mapped, process
>+		 * pages one by one.
>+		 */
>+		while (pkt_len > 0) {
>+			void *mapped;
>+			size_t to_copy;
>+
>+			mapped = kmap(buf_page + page_idx);
>+
>+			if (mapped == NULL) {
>+				virtio_transport_free_pkt(pkt);
>+				return NULL;
>+			}
>+
>+			to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
>+
>+			nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
>+			if (nbytes != to_copy) {
>+				vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
>+				       to_copy, nbytes);
>+				kunmap(mapped);
>+				virtio_transport_free_pkt(pkt);
>+				return NULL;
>+			}
>+
>+			kunmap(mapped);
>+
>+			pkt_len -= to_copy;
>+			page_idx++;
>+		}
> 	}
>
> 	return pkt;
>-- 
>2.25.1
Arseniy Krasnov June 9, 2022, 12:09 p.m. UTC | #2
On 09.06.2022 11:38, Stefano Garzarella wrote:
> On Fri, Jun 03, 2022 at 05:33:04AM +0000, Arseniy Krasnov wrote:
>> For packets received from virtio RX queue, use buddy
>> allocator instead of 'kmalloc()' to be able to insert
>> such pages to user provided vma. Single call to
>> 'copy_from_iter()' replaced with per-page loop.
>>
>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>> ---
>> drivers/vhost/vsock.c | 81 ++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 69 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index e6c9d41db1de..0dc2229f18f7 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -58,6 +58,7 @@ struct vhost_vsock {
>>
>>     u32 guest_cid;
>>     bool seqpacket_allow;
>> +    bool zerocopy_rx_on;
> 
> This is per-device, so a single socket can change the behaviour of all the sockets of this device.

Sure, my mistake

> 
> Can we do something better?
> 
> Maybe we can allocate the header, copy it, find the socket and check if zero-copy is enabled or not for that socket.
> 
> Of course we should change or extend virtio_transport_recv_pkt() to avoid to find the socket again.

I think yes

> 
> 
>> };
>>
>> static u32 vhost_transport_get_local_cid(void)
>> @@ -357,6 +358,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>               unsigned int out, unsigned int in)
>> {
>>     struct virtio_vsock_pkt *pkt;
>> +    struct vhost_vsock *vsock;
>>     struct iov_iter iov_iter;
>>     size_t nbytes;
>>     size_t len;
>> @@ -393,20 +395,75 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
>>         return NULL;
>>     }
>>
>> -    pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> -    if (!pkt->buf) {
>> -        kfree(pkt);
>> -        return NULL;
>> -    }
>> -
>>     pkt->buf_len = pkt->len;
>> +    vsock = container_of(vq->dev, struct vhost_vsock, dev);
>>
>> -    nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> -    if (nbytes != pkt->len) {
>> -        vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> -               pkt->len, nbytes);
>> -        virtio_transport_free_pkt(pkt);
>> -        return NULL;
>> +    if (!vsock->zerocopy_rx_on) {
>> +        pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
>> +
>> +        if (!pkt->buf) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->slab_buf = true;
>> +        nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
>> +        if (nbytes != pkt->len) {
>> +            vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
>> +                pkt->len, nbytes);
>> +            virtio_transport_free_pkt(pkt);
>> +            return NULL;
>> +        }
>> +    } else {
>> +        struct page *buf_page;
>> +        ssize_t pkt_len;
>> +        int page_idx;
>> +
>> +        /* This creates memory overrun, as we allocate
>> +         * at least one page for each packet.
>> +         */
>> +        buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
>> +
>> +        if (buf_page == NULL) {
>> +            kfree(pkt);
>> +            return NULL;
>> +        }
>> +
>> +        pkt->buf = page_to_virt(buf_page);
>> +
>> +        page_idx = 0;
>> +        pkt_len = pkt->len;
>> +
>> +        /* As allocated pages are not mapped, process
>> +         * pages one by one.
>> +         */
>> +        while (pkt_len > 0) {
>> +            void *mapped;
>> +            size_t to_copy;
>> +
>> +            mapped = kmap(buf_page + page_idx);
>> +
>> +            if (mapped == NULL) {
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
>> +
>> +            nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
>> +            if (nbytes != to_copy) {
>> +                vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
>> +                       to_copy, nbytes);
>> +                kunmap(mapped);
>> +                virtio_transport_free_pkt(pkt);
>> +                return NULL;
>> +            }
>> +
>> +            kunmap(mapped);
>> +
>> +            pkt_len -= to_copy;
>> +            page_idx++;
>> +        }
>>     }
>>
>>     return pkt;
>> -- 
>> 2.25.1
>
diff mbox series

Patch

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index e6c9d41db1de..0dc2229f18f7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -58,6 +58,7 @@  struct vhost_vsock {
 
 	u32 guest_cid;
 	bool seqpacket_allow;
+	bool zerocopy_rx_on;
 };
 
 static u32 vhost_transport_get_local_cid(void)
@@ -357,6 +358,7 @@  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		      unsigned int out, unsigned int in)
 {
 	struct virtio_vsock_pkt *pkt;
+	struct vhost_vsock *vsock;
 	struct iov_iter iov_iter;
 	size_t nbytes;
 	size_t len;
@@ -393,20 +395,75 @@  vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		return NULL;
 	}
 
-	pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
-	if (!pkt->buf) {
-		kfree(pkt);
-		return NULL;
-	}
-
 	pkt->buf_len = pkt->len;
+	vsock = container_of(vq->dev, struct vhost_vsock, dev);
 
-	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
-	if (nbytes != pkt->len) {
-		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
-		       pkt->len, nbytes);
-		virtio_transport_free_pkt(pkt);
-		return NULL;
+	if (!vsock->zerocopy_rx_on) {
+		pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
+
+		if (!pkt->buf) {
+			kfree(pkt);
+			return NULL;
+		}
+
+		pkt->slab_buf = true;
+		nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
+		if (nbytes != pkt->len) {
+			vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
+				pkt->len, nbytes);
+			virtio_transport_free_pkt(pkt);
+			return NULL;
+		}
+	} else {
+		struct page *buf_page;
+		ssize_t pkt_len;
+		int page_idx;
+
+		/* This creates memory overrun, as we allocate
+		 * at least one page for each packet.
+		 */
+		buf_page = alloc_pages(GFP_KERNEL, get_order(pkt->len));
+
+		if (buf_page == NULL) {
+			kfree(pkt);
+			return NULL;
+		}
+
+		pkt->buf = page_to_virt(buf_page);
+
+		page_idx = 0;
+		pkt_len = pkt->len;
+
+		/* As allocated pages are not mapped, process
+		 * pages one by one.
+		 */
+		while (pkt_len > 0) {
+			void *mapped;
+			size_t to_copy;
+
+			mapped = kmap(buf_page + page_idx);
+
+			if (mapped == NULL) {
+				virtio_transport_free_pkt(pkt);
+				return NULL;
+			}
+
+			to_copy = min(pkt_len, ((ssize_t)PAGE_SIZE));
+
+			nbytes = copy_from_iter(mapped, to_copy, &iov_iter);
+			if (nbytes != to_copy) {
+				vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
+				       to_copy, nbytes);
+				kunmap(mapped);
+				virtio_transport_free_pkt(pkt);
+				return NULL;
+			}
+
+			kunmap(mapped);
+
+			pkt_len -= to_copy;
+			page_idx++;
+		}
 	}
 
 	return pkt;