[[PATCH,v1] 07/37] [CIFS] SMBD: Implement receive buffer for handling SMBD response
diff mbox

Message ID 1501704648-20159-8-git-send-email-longli@exchange.microsoft.com
State New
Headers show

Commit Message

Long Li Aug. 2, 2017, 8:10 p.m. UTC
From: Long Li <longli@microsoft.com>

Create recevie buffers to handling SMBD response messages. Each SMBD response message is assocated with a receive buffer, where the message payload is received. The number of receive buffer is determine after negotiating SMBD transport with the server.

Signed-off-by: Long Li <longli@microsoft.com>
---
 fs/cifs/cifsrdma.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/cifsrdma.h |  3 ++
 2 files changed, 92 insertions(+)

Comments

Tom Talpey Aug. 14, 2017, 8:09 p.m. UTC | #1
> -----Original Message-----
> From: linux-cifs-owner@vger.kernel.org [mailto:linux-cifs-
> owner@vger.kernel.org] On Behalf Of Long Li
> Sent: Wednesday, August 2, 2017 4:10 PM
> To: Steve French <sfrench@samba.org>; linux-cifs@vger.kernel.org; samba-
> technical@lists.samba.org; linux-kernel@vger.kernel.org
> Cc: Long Li <longli@microsoft.com>
> Subject: [[PATCH v1] 07/37] [CIFS] SMBD: Implement receive buffer for
> handling SMBD response
> 
> +/*
> + * Receive buffer operations.
> + * For each remote send, we need to post a receive. The receive buffers are
> + * pre-allocated in advance.
> + */

This approach appears to have been derived from the NFS/RDMA one.
The SMB protocol operates very differently! It is not a strict request/
response protocol. Many operations can become asynchronous by the
server choosing to make a STATUS_PENDING reply. A second reply then
comes later. The SMB2_CANCEL operation normally has no reply at all.
And callbacks for oplocks can occur at any time.

Even within a single request, many replies can be received. For example,
an SMB2_READ response which exceeds your negotiated receive size of
8192. These will be fragmented by SMB Direct into a "train" of multiple
messages, which will be logically reassembled by the receiver. Each of
them will consume a credit.

Thanks to SMB Direct crediting, the connection is not failing, but you are
undoubtedly spending a lot of time and ping-ponging to re-post receives
and allow the message trains to flow. And, because it's never one-to-one,
there are also unneeded receives posted before and after such exchanges.

You need to use SMB Direct crediting to post a more traffic-sensitive pool
of receives, and simply manage its depth when posting client requests.
As a start, I'd suggest simply choosing a constant number, approximately
whatever credit value you actually negotiate with the peer. Then, just
replenish (re-post) receive buffers as they are completed by the adapter.
You can get more sophisticated about this strategy later.

Tom.

> +static struct cifs_rdma_response* get_receive_buffer(struct cifs_rdma_info
> *info)
> +{
> +       struct cifs_rdma_response *ret = NULL;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&info->receive_queue_lock, flags);
> +       if (!list_empty(&info->receive_queue)) {
> +               ret = list_first_entry(
> +                       &info->receive_queue,
> +                       struct cifs_rdma_response, list);
> +               list_del(&ret->list);
> +               info->count_receive_buffer--;
> +               info->count_get_receive_buffer++;
> +       }
> +       spin_unlock_irqrestore(&info->receive_queue_lock, flags);
> +
> +       return ret;
> +}
> +
> +static void put_receive_buffer(
> +       struct cifs_rdma_info *info, struct cifs_rdma_response *response)
> +{
> +       unsigned long flags;
> +
> +       ib_dma_unmap_single(info->id->device, response->sge.addr,
> +               response->sge.length, DMA_FROM_DEVICE);
> +
> +       spin_lock_irqsave(&info->receive_queue_lock, flags);
> +       list_add_tail(&response->list, &info->receive_queue);
> +       info->count_receive_buffer++;
> +       info->count_put_receive_buffer++;
> +       spin_unlock_irqrestore(&info->receive_queue_lock, flags);
> +}
> +
> +static int allocate_receive_buffers(struct cifs_rdma_info *info, int num_buf)
> +{
> +       int i;
> +       struct cifs_rdma_response *response;
> +
> +       INIT_LIST_HEAD(&info->receive_queue);
> +       spin_lock_init(&info->receive_queue_lock);
> +
> +       for (i=0; i<num_buf; i++) {
> +               response = mempool_alloc(info->response_mempool, GFP_KERNEL);
> +               if (!response)
> +                       goto allocate_failed;
> +
> +               response->info = info;
> +               list_add_tail(&response->list, &info->receive_queue);
> +               info->count_receive_buffer++;
> +       }
> +
> +       return 0;
> +
> +allocate_failed:
> +       while (!list_empty(&info->receive_queue)) {
> +               response = list_first_entry(
> +                               &info->receive_queue,
> +                               struct cifs_rdma_response, list);
> +               list_del(&response->list);
> +               info->count_receive_buffer--;
> +
> +               mempool_free(response, info->response_mempool);
> +       }
> +       return -ENOMEM;
> +}
> +
> +static void destroy_receive_buffers(struct cifs_rdma_info *info)
> +{
> +       struct cifs_rdma_response *response;
> +       while ((response = get_receive_buffer(info)))
> +               mempool_free(response, info->response_mempool);
> +}
> +

--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Long Li Aug. 19, 2017, 11:41 p.m. UTC | #2
> -----Original Message-----
> From: Tom Talpey
> Sent: Monday, August 14, 2017 1:09 PM
> To: Long Li <longli@microsoft.com>; Steve French <sfrench@samba.org>;
> linux-cifs@vger.kernel.org; samba-technical@lists.samba.org; linux-
> kernel@vger.kernel.org; linux-rdma@vger.kernel.org
> Subject: RE: [[PATCH v1] 07/37] [CIFS] SMBD: Implement receive buffer for
> handling SMBD response
> 
> > -----Original Message-----
> > From: linux-cifs-owner@vger.kernel.org [mailto:linux-cifs-
> > owner@vger.kernel.org] On Behalf Of Long Li
> > Sent: Wednesday, August 2, 2017 4:10 PM
> > To: Steve French <sfrench@samba.org>; linux-cifs@vger.kernel.org;
> > samba- technical@lists.samba.org; linux-kernel@vger.kernel.org
> > Cc: Long Li <longli@microsoft.com>
> > Subject: [[PATCH v1] 07/37] [CIFS] SMBD: Implement receive buffer for
> > handling SMBD response
> >
> > +/*
> > + * Receive buffer operations.
> > + * For each remote send, we need to post a receive. The receive
> > +buffers are
> > + * pre-allocated in advance.
> > + */
> 
> This approach appears to have been derived from the NFS/RDMA one.
> The SMB protocol operates very differently! It is not a strict request/
> response protocol. Many operations can become asynchronous by the
> server choosing to make a STATUS_PENDING reply. A second reply then
> comes later. The SMB2_CANCEL operation normally has no reply at all.
> And callbacks for oplocks can occur at any time.

I think you misunderstood the receiver buffers. They are posted so the remote peer can post a send. The remote peer's receive credit is calculated based on how many receive buffer have been posted.  The code doesn't assume one post_send needs one corresponding post_recv. In practice, receive buffers are posted as soon as possible to extend receive credits to the remote peer.

> 
> Even within a single request, many replies can be received. For example, an
> SMB2_READ response which exceeds your negotiated receive size of 8192.
> These will be fragmented by SMB Direct into a "train" of multiple messages,
> which will be logically reassembled by the receiver. Each of them will
> consume a credit.
> 
> Thanks to SMB Direct crediting, the connection is not failing, but you are
> undoubtedly spending a lot of time and ping-ponging to re-post receives and
> allow the message trains to flow. And, because it's never one-to-one, there
> are also unneeded receives posted before and after such exchanges.
> 
> You need to use SMB Direct crediting to post a more traffic-sensitive pool of
> receives, and simply manage its depth when posting client requests.
> As a start, I'd suggest simply choosing a constant number, approximately
> whatever credit value you actually negotiate with the peer. Then, just
> replenish (re-post) receive buffers as they are completed by the adapter.
> You can get more sophisticated about this strategy later.

The code behaves exactly the same as you described. It uses a constant to decide how many receive buffer to post. It's not very smart and can be improved.

> 
> Tom.
> 
> > +static struct cifs_rdma_response* get_receive_buffer(struct
> > +cifs_rdma_info
> > *info)
> > +{
> > +       struct cifs_rdma_response *ret = NULL;
> > +       unsigned long flags;
> > +
> > +       spin_lock_irqsave(&info->receive_queue_lock, flags);
> > +       if (!list_empty(&info->receive_queue)) {
> > +               ret = list_first_entry(
> > +                       &info->receive_queue,
> > +                       struct cifs_rdma_response, list);
> > +               list_del(&ret->list);
> > +               info->count_receive_buffer--;
> > +               info->count_get_receive_buffer++;
> > +       }
> > +       spin_unlock_irqrestore(&info->receive_queue_lock, flags);
> > +
> > +       return ret;
> > +}
> > +
> > +static void put_receive_buffer(
> > +       struct cifs_rdma_info *info, struct cifs_rdma_response
> > +*response) {
> > +       unsigned long flags;
> > +
> > +       ib_dma_unmap_single(info->id->device, response->sge.addr,
> > +               response->sge.length, DMA_FROM_DEVICE);
> > +
> > +       spin_lock_irqsave(&info->receive_queue_lock, flags);
> > +       list_add_tail(&response->list, &info->receive_queue);
> > +       info->count_receive_buffer++;
> > +       info->count_put_receive_buffer++;
> > +       spin_unlock_irqrestore(&info->receive_queue_lock, flags); }
> > +
> > +static int allocate_receive_buffers(struct cifs_rdma_info *info, int
> > +num_buf) {
> > +       int i;
> > +       struct cifs_rdma_response *response;
> > +
> > +       INIT_LIST_HEAD(&info->receive_queue);
> > +       spin_lock_init(&info->receive_queue_lock);
> > +
> > +       for (i=0; i<num_buf; i++) {
> > +               response = mempool_alloc(info->response_mempool,
> GFP_KERNEL);
> > +               if (!response)
> > +                       goto allocate_failed;
> > +
> > +               response->info = info;
> > +               list_add_tail(&response->list, &info->receive_queue);
> > +               info->count_receive_buffer++;
> > +       }
> > +
> > +       return 0;
> > +
> > +allocate_failed:
> > +       while (!list_empty(&info->receive_queue)) {
> > +               response = list_first_entry(
> > +                               &info->receive_queue,
> > +                               struct cifs_rdma_response, list);
> > +               list_del(&response->list);
> > +               info->count_receive_buffer--;
> > +
> > +               mempool_free(response, info->response_mempool);
> > +       }
> > +       return -ENOMEM;
> > +}
> > +
> > +static void destroy_receive_buffers(struct cifs_rdma_info *info) {
> > +       struct cifs_rdma_response *response;
> > +       while ((response = get_receive_buffer(info)))
> > +               mempool_free(response, info->response_mempool); }
> > +

--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/cifs/cifsrdma.c b/fs/cifs/cifsrdma.c
index 1636304..b3e43b1 100644
--- a/fs/cifs/cifsrdma.c
+++ b/fs/cifs/cifsrdma.c
@@ -54,6 +54,14 @@ 
 
 #include "cifsrdma.h"
 
+static struct cifs_rdma_response* get_receive_buffer(
+		struct cifs_rdma_info *info);
+static void put_receive_buffer(
+		struct cifs_rdma_info *info,
+		struct cifs_rdma_response *response);
+static int allocate_receive_buffers(struct cifs_rdma_info *info, int num_buf);
+static void destroy_receive_buffers(struct cifs_rdma_info *info);
+
 /*
  * Per RDMA transport connection parameters
  * as defined in [MS-SMBD] 3.1.1.1
@@ -280,6 +288,85 @@  static int cifs_rdma_ia_open(
 	return rc;
 }
 
+/*
+ * Receive buffer operations.
+ * For each remote send, we need to post a receive. The receive buffers are
+ * pre-allocated in advance.
+ */
+static struct cifs_rdma_response* get_receive_buffer(struct cifs_rdma_info *info)
+{
+	struct cifs_rdma_response *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->receive_queue_lock, flags);
+	if (!list_empty(&info->receive_queue)) {
+		ret = list_first_entry(
+			&info->receive_queue,
+			struct cifs_rdma_response, list);
+		list_del(&ret->list);
+		info->count_receive_buffer--;
+		info->count_get_receive_buffer++;
+	}
+	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+
+	return ret;
+}
+
+static void put_receive_buffer(
+	struct cifs_rdma_info *info, struct cifs_rdma_response *response)
+{
+	unsigned long flags;
+
+	ib_dma_unmap_single(info->id->device, response->sge.addr,
+		response->sge.length, DMA_FROM_DEVICE);
+
+	spin_lock_irqsave(&info->receive_queue_lock, flags);
+	list_add_tail(&response->list, &info->receive_queue);
+	info->count_receive_buffer++;
+	info->count_put_receive_buffer++;
+	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
+}
+
+static int allocate_receive_buffers(struct cifs_rdma_info *info, int num_buf)
+{
+	int i;
+	struct cifs_rdma_response *response;
+
+	INIT_LIST_HEAD(&info->receive_queue);
+	spin_lock_init(&info->receive_queue_lock);
+
+	for (i=0; i<num_buf; i++) {
+		response = mempool_alloc(info->response_mempool, GFP_KERNEL);
+		if (!response)
+			goto allocate_failed;
+
+		response->info = info;
+		list_add_tail(&response->list, &info->receive_queue);
+		info->count_receive_buffer++;
+	}
+
+	return 0;
+
+allocate_failed:
+	while (!list_empty(&info->receive_queue)) {
+		response = list_first_entry(
+				&info->receive_queue,
+				struct cifs_rdma_response, list);
+		list_del(&response->list);
+		info->count_receive_buffer--;
+
+		mempool_free(response, info->response_mempool);
+	}
+	return -ENOMEM;
+}
+
+static void destroy_receive_buffers(struct cifs_rdma_info *info)
+{
+	struct cifs_rdma_response *response;
+	while ((response = get_receive_buffer(info)))
+		mempool_free(response, info->response_mempool);
+}
+
 struct cifs_rdma_info* cifs_create_rdma_session(
 	struct TCP_Server_Info *server, struct sockaddr *dstaddr)
 {
@@ -383,6 +470,8 @@  struct cifs_rdma_info* cifs_create_rdma_session(
 	info->response_mempool =
 		mempool_create(info->receive_credit_max, mempool_alloc_slab,
 		       mempool_free_slab, info->response_cache);
+
+	allocate_receive_buffers(info, info->receive_credit_max);
 out2:
 	rdma_destroy_id(info->id);
 
diff --git a/fs/cifs/cifsrdma.h b/fs/cifs/cifsrdma.h
index 41ae61a..78ce2bf 100644
--- a/fs/cifs/cifsrdma.h
+++ b/fs/cifs/cifsrdma.h
@@ -59,6 +59,9 @@  struct cifs_rdma_info {
 	atomic_t receive_credits;
 	atomic_t receive_credit_target;
 
+	struct list_head receive_queue;
+	spinlock_t receive_queue_lock;
+
 	// response pool for RDMA receive
 	struct kmem_cache *response_cache;
 	mempool_t *response_mempool;