From patchwork Tue Apr 6 20:32:53 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Stevens X-Patchwork-Id: 90860 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o36KZVWc008403 for ; Tue, 6 Apr 2010 20:35:31 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756245Ab0DFUf2 (ORCPT ); Tue, 6 Apr 2010 16:35:28 -0400 Received: from e37.co.us.ibm.com ([32.97.110.158]:38830 "EHLO e37.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752187Ab0DFUf0 (ORCPT ); Tue, 6 Apr 2010 16:35:26 -0400 Received: from d03relay05.boulder.ibm.com (d03relay05.boulder.ibm.com [9.17.195.107]) by e37.co.us.ibm.com (8.14.3/8.13.1) with ESMTP id o36KXpIW028320; Tue, 6 Apr 2010 14:33:51 -0600 Received: from d03av01.boulder.ibm.com (d03av01.boulder.ibm.com [9.17.195.167]) by d03relay05.boulder.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o36KZ7wZ058564; Tue, 6 Apr 2010 14:35:09 -0600 Received: from d03av01.boulder.ibm.com (loopback [127.0.0.1]) by d03av01.boulder.ibm.com (8.14.3/8.13.1/NCO v10.0 AVout) with ESMTP id o36KZ6rw026086; Tue, 6 Apr 2010 14:35:07 -0600 Received: from [9.47.18.83] (dyn9047018083.beaverton.ibm.com [9.47.18.83]) by d03av01.boulder.ibm.com (8.14.3/8.13.1/NCO v10.0 AVin) with ESMTP id o36KZ5V8026056; Tue, 6 Apr 2010 14:35:06 -0600 Subject: [PATCH v3] Add Mergeable receive buffer support to vhost_net From: David L Stevens Reply-To: dlstevens@us.ibm.com To: "Michael S. Tsirkin" Cc: kvm@vger.kernel.org, netdev@vger.kernel.org, rusty@rustcorp.com.au, virtualization@lists.osdl.org Organization: IBM Date: Tue, 06 Apr 2010 13:32:53 -0700 Message-ID: <1270585973.28407.3.camel@lab1.dls> Mime-Version: 1.0 X-Mailer: Evolution 2.28.3 (2.28.3-1.fc12) Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Tue, 06 Apr 2010 20:35:32 +0000 (UTC) diff -ruNp net-next-p0/drivers/vhost/net.c net-next-v3/drivers/vhost/net.c --- net-next-p0/drivers/vhost/net.c 2010-03-22 12:04:38.000000000 -0700 +++ net-next-v3/drivers/vhost/net.c 2010-04-06 12:54:56.000000000 -0700 @@ -130,9 +130,8 @@ static void handle_tx(struct vhost_net * hdr_size = vq->hdr_size; for (;;) { - head = vhost_get_vq_desc(&net->dev, vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, + head = vhost_get_desc(&net->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { @@ -167,8 +166,15 @@ static void handle_tx(struct vhost_net * /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { - vhost_discard_vq_desc(vq); - tx_poll_start(net, sock); + if (err == -EAGAIN) { + vhost_discard_desc(vq, 1); + tx_poll_start(net, sock); + } else { + vq_err(vq, "sendmsg: errno %d\n", -err); + /* drop packet; do not discard/resend */ + vhost_add_used_and_signal(&net->dev, vq, head, + 0); + } break; } if (err != len) @@ -186,12 +192,25 @@ static void handle_tx(struct vhost_net * unuse_mm(net->dev.mm); } +static int vhost_head_len(struct sock *sk) +{ + struct sk_buff *head; + int len = 0; + + lock_sock(sk); + head = skb_peek(&sk->sk_receive_queue); + if (head) + len = head->len; + release_sock(sk); + return len; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; - unsigned head, out, in, log, s; + unsigned in, log, s; struct vhost_log *vq_log; struct msghdr msg = { .msg_name = NULL, @@ -202,13 +221,14 @@ static void handle_rx(struct vhost_net * .msg_flags = MSG_DONTWAIT, }; - struct virtio_net_hdr hdr = { - .flags = 0, - .gso_type = VIRTIO_NET_HDR_GSO_NONE + struct virtio_net_hdr_mrg_rxbuf hdr = { + .hdr.flags = 0, + .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE }; + int retries = 0; size_t len, total_len = 0; - int err; + int err, headcount, datalen; size_t hdr_size; struct socket *sock = rcu_dereference(vq->private_data); if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) @@ -222,31 +242,25 @@ static void handle_rx(struct vhost_net * vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; - for (;;) { - head = vhost_get_vq_desc(&net->dev, vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - vq_log, &log); + while ((datalen = vhost_head_len(sock->sk))) { + headcount = vhost_get_desc_n(vq, vq->heads, datalen, &in, + vq_log, &log); /* OK, now we need to know about added descriptors. */ - if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { + if (!headcount) { + if (retries == 0 && unlikely(vhost_enable_notify(vq))) { /* They have slipped one in as we were * doing that: check again. */ vhost_disable_notify(vq); + retries++; continue; } + retries = 0; /* Nothing new? Wait for eventfd to tell us * they refilled. */ break; } /* We don't need to be notified again. */ - if (out) { - vq_err(vq, "Unexpected descriptor format for RX: " - "out %d, int %d\n", - out, in); - break; - } - /* Skip header. TODO: support TSO/mergeable rx buffers. */ + /* Skip header. TODO: support TSO. */ s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in); msg.msg_iovlen = in; len = iov_length(vq->iov, in); @@ -261,14 +275,33 @@ static void handle_rx(struct vhost_net * len, MSG_DONTWAIT | MSG_TRUNC); /* TODO: Check specific error and bomb out unless EAGAIN? */ if (err < 0) { - vhost_discard_vq_desc(vq); + vhost_discard_desc(vq, headcount); break; } /* TODO: Should check and handle checksum. */ + if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) { + struct virtio_net_hdr_mrg_rxbuf *vhdr = + (struct virtio_net_hdr_mrg_rxbuf *) + vq->iov[0].iov_base; + /* add num_buffers */ + if (vhost_has_feature(&net->dev, + VHOST_NET_F_VIRTIO_NET_HDR)) + hdr.num_buffers = headcount; + else if (vq->iov[0].iov_len < sizeof(*vhdr)) { + vq_err(vq, "tiny buffers < %d unsupported", + vq->iov[0].iov_len); + vhost_discard_desc(vq, headcount); + break; + } else if (put_user(headcount, &vhdr->num_buffers)) { + vq_err(vq, "Failed num_buffers write"); + vhost_discard_desc(vq, headcount); + break; + } + } if (err > len) { pr_err("Discarded truncated rx packet: " " len %d > %zd\n", err, len); - vhost_discard_vq_desc(vq); + vhost_discard_desc(vq, headcount); continue; } len = err; @@ -279,7 +312,7 @@ static void handle_rx(struct vhost_net * break; } len += hdr_size; - vhost_add_used_and_signal(&net->dev, vq, head, len); + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, len); total_len += len; @@ -560,9 +593,14 @@ done: static int vhost_net_set_features(struct vhost_net *n, u64 features) { - size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ? - sizeof(struct virtio_net_hdr) : 0; + size_t hdr_size = 0; int i; + + if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { + hdr_size = sizeof(struct virtio_net_hdr); + if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } mutex_lock(&n->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&n->dev)) {