diff mbox

[RFC,v2,2/2] nvmet-rdma: support 16K inline data

Message ID e3c4d8a638b989aef554646d235aa45d633d9565.1526505524.git.swise@opengridcomputing.com (mailing list archive)
State RFC
Headers show

Commit Message

Steve Wise May 16, 2018, 7:58 p.m. UTC
Add a new configfs port attribute, called inline_data_size, to
allow configuring the size of inline data for a given port.
The maximum size allowed is still enforced by nvmet-rdma with
NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is increased to max(16KB,
PAGE_SIZE).  And the default size, if not specified via configfs,
is still PAGE_SIZE.  This preserves the existing behavior, but allows
larger inline sizes.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/nvme/target/admin-cmd.c |  4 ++--
 drivers/nvme/target/configfs.c  | 34 ++++++++++++++++++++++++++++++++++
 drivers/nvme/target/discovery.c |  2 +-
 drivers/nvme/target/nvmet.h     |  4 +++-
 drivers/nvme/target/rdma.c      | 41 +++++++++++++++++++++++++++++------------
 5 files changed, 69 insertions(+), 16 deletions(-)

Comments

Christoph Hellwig May 17, 2018, 11:52 a.m. UTC | #1
> +static ssize_t nvmet_inline_data_size_show(struct config_item *item,
> +		char *page)
> +{
> +	struct nvmet_port *port = to_nvmet_port(item);
> +
> +	return snprintf(page, PAGE_SIZE, "%u\n",
> +			port->inline_data_size);

Please fir the whole sprintf statement onto a single line.

> +}
> +
> +static ssize_t nvmet_inline_data_size_store(struct config_item *item,
> +		const char *page, size_t count)
> +{
> +	struct nvmet_port *port = to_nvmet_port(item);
> +	unsigned int size;
> +	int ret;
> +
> +	if (port->enabled) {
> +		pr_err("Cannot modify inline_data_size enabled\n");
> +		pr_err("Disable the port before modifying\n");
> +		return -EACCES;
> +	}
> +	ret = kstrtouint((const char *)page, 0, &size);

This cast looks bogus.

Also inline_data_size shoul be and u32 as that is closest to what
is on the wire, and you thus should use kstrtou32 and pass the
inline_data_size straight to kstrtou32 instead of bouncing it through
a local variable.

> +CONFIGFS_ATTR(nvmet_, inline_data_size);

The characters before the first _ in the name are used as a group
by nvmetcli.  So I think this should get a param_ or so prefix
before the inline_data_size.  Also currently this attribute only
makes sense for rdma, so I think we still need a flag in
nvmet_fabrics_ops that enables/disables this attribute.

Last but not least please also send a nvmetcli patch to support
this new attribute.

> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1

0 makes much more sense as the default, and then we don't even need
a name for it.

> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)

So for 64k pages the minimum is bigger than the maximum? :)

> +	int			inline_data_size;

u32
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise May 17, 2018, 2:24 p.m. UTC | #2
On 5/17/2018 6:52 AM, Christoph Hellwig wrote:
>> +static ssize_t nvmet_inline_data_size_show(struct config_item *item,
>> +		char *page)
>> +{
>> +	struct nvmet_port *port = to_nvmet_port(item);
>> +
>> +	return snprintf(page, PAGE_SIZE, "%u\n",
>> +			port->inline_data_size);
> Please fir the whole sprintf statement onto a single line.

sure

>> +}
>> +
>> +static ssize_t nvmet_inline_data_size_store(struct config_item *item,
>> +		const char *page, size_t count)
>> +{
>> +	struct nvmet_port *port = to_nvmet_port(item);
>> +	unsigned int size;
>> +	int ret;
>> +
>> +	if (port->enabled) {
>> +		pr_err("Cannot modify inline_data_size enabled\n");
>> +		pr_err("Disable the port before modifying\n");
>> +		return -EACCES;
>> +	}
>> +	ret = kstrtouint((const char *)page, 0, &size);
> This cast looks bogus.
>
> Also inline_data_size shoul be and u32 as that is closest to what
> is on the wire, and you thus should use kstrtou32 and pass the
> inline_data_size straight to kstrtou32 instead of bouncing it through
> a local variable.

I made it an int so it could be initialized to -1 indicating it is not
set by the config.  This allows the rdma transport to use its default
value if the config does not specify any value.  I did this so the admin
could totally disable inline by specifying 0.   So I needed a value that
indicates "unspecified".


>> +CONFIGFS_ATTR(nvmet_, inline_data_size);
> The characters before the first _ in the name are used as a group
> by nvmetcli.  So I think this should get a param_ or so prefix
> before the inline_data_size.  Also currently this attribute only
> makes sense for rdma, so I think we still need a flag in
> nvmet_fabrics_ops that enables/disables this attribute.

Ah, so setting it in a port that isn't the rdma transport will cause a
failure.  That makes sense.

> Last but not least please also send a nvmetcli patch to support
> this new attribute.

Will do.

>> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
> 0 makes much more sense as the default, and then we don't even need
> a name for it.

I wanted the user to be able to disable inline by setting it to 0.  Is
that not needed?  Maybe by adding back the nvmet_fabrics_ops field will
alleviate this issue.  Perhaps a default_inline_size field that rdma
sets to PAGE_SIZE.  Then configfs can default it to that. 

>> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
>> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
> So for 64k pages the minimum is bigger than the maximum? :)

For 64k pages, the default is 64K and the max is 64K.

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 18, 2018, 9:08 a.m. UTC | #3
On Thu, May 17, 2018 at 09:24:57AM -0500, Steve Wise wrote:
> >> +	ret = kstrtouint((const char *)page, 0, &size);
> > This cast looks bogus.
> >
> > Also inline_data_size shoul be and u32 as that is closest to what
> > is on the wire, and you thus should use kstrtou32 and pass the
> > inline_data_size straight to kstrtou32 instead of bouncing it through
> > a local variable.
> 
> I made it an int so it could be initialized to -1 indicating it is not
> set by the config.  This allows the rdma transport to use its default
> value if the config does not specify any value.  I did this so the admin
> could totally disable inline by specifying 0.   So I needed a value that
> indicates "unspecified".

Ok, make sense.  So lets keep that behavior, and let every negative
value mean default so that we don't need another error check here.
Rest of the comments above still stands.

> >> +#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
> > 0 makes much more sense as the default, and then we don't even need
> > a name for it.
> 
> I wanted the user to be able to disable inline by setting it to 0.  Is
> that not needed?  Maybe by adding back the nvmet_fabrics_ops field will
> alleviate this issue.  Perhaps a default_inline_size field that rdma
> sets to PAGE_SIZE.  Then configfs can default it to that. 

As said above I think we can keep the negative means default, I'd
still use the plain -1 instead of a define there.

> 
> >> +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
> >> +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
> > So for 64k pages the minimum is bigger than the maximum? :)
> 
> For 64k pages, the default is 64K and the max is 64K.

Indeed, sorry.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise May 18, 2018, 4:36 p.m. UTC | #4
> 
> On Thu, May 17, 2018 at 09:24:57AM -0500, Steve Wise wrote:
> > >> +	ret = kstrtouint((const char *)page, 0, &size);
> > > This cast looks bogus.
> > >
> > > Also inline_data_size shoul be and u32 as that is closest to what
> > > is on the wire, and you thus should use kstrtou32 and pass the
> > > inline_data_size straight to kstrtou32 instead of bouncing it through
> > > a local variable.
> >
> > I made it an int so it could be initialized to -1 indicating it is not
> > set by the config.  This allows the rdma transport to use its default
> > value if the config does not specify any value.  I did this so the admin
> > could totally disable inline by specifying 0.   So I needed a value that
> > indicates "unspecified".
> 
> Ok, make sense.  So lets keep that behavior, and let every negative
> value mean default so that we don't need another error check here.
> Rest of the comments above still stands.

Agreed.  

Looking at nvmet_rdma_alloc_cmd(), I think I'll also need to fix rdma.c to
support no inline data usage.

Thanks!

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fc..a9e3223 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -247,14 +247,14 @@  static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
 
 	/* Max command capsule size is sqe + single page of in-capsule data */
 	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
-				  ctrl->ops->sqe_inline_size) / 16);
+				  req->port->inline_data_size) / 16);
 	/* Max response capsule size is cqe */
 	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
 
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index ad9ff27..968bdcb 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -214,6 +214,38 @@  static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
 
 CONFIGFS_ATTR(nvmet_, addr_trsvcid);
 
+static ssize_t nvmet_inline_data_size_show(struct config_item *item,
+		char *page)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+
+	return snprintf(page, PAGE_SIZE, "%u\n",
+			port->inline_data_size);
+}
+
+static ssize_t nvmet_inline_data_size_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	unsigned int size;
+	int ret;
+
+	if (port->enabled) {
+		pr_err("Cannot modify inline_data_size enabled\n");
+		pr_err("Disable the port before modifying\n");
+		return -EACCES;
+	}
+	ret = kstrtouint((const char *)page, 0, &size);
+	if (ret) {
+		pr_err("Invalid value '%s' for inline_data_size\n", page);
+		return -EINVAL;
+	}
+	port->inline_data_size = size;
+	return count;
+}
+
+CONFIGFS_ATTR(nvmet_, inline_data_size);
+
 static ssize_t nvmet_addr_trtype_show(struct config_item *item,
 		char *page)
 {
@@ -870,6 +902,7 @@  static void nvmet_port_release(struct config_item *item)
 	&nvmet_attr_addr_traddr,
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
+	&nvmet_attr_inline_data_size,
 	NULL,
 };
 
@@ -899,6 +932,7 @@  static struct config_group *nvmet_ports_make(struct config_group *group,
 	INIT_LIST_HEAD(&port->entry);
 	INIT_LIST_HEAD(&port->subsystems);
 	INIT_LIST_HEAD(&port->referrals);
+	port->inline_data_size = NVMET_DEFAULT_INLINE_DATA_SIZE;
 
 	port->disc_addr.portid = cpu_to_le16(portid);
 	config_group_init_type_name(&port->group, name, &nvmet_port_type);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e..fc2e675 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@  static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
 	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
 	if (ctrl->ops->has_keyed_sgls)
 		id->sgls |= cpu_to_le32(1 << 2);
-	if (ctrl->ops->sqe_inline_size)
+	if (req->port->inline_data_size)
 		id->sgls |= cpu_to_le32(1 << 20);
 
 	strcpy(id->subnqn, ctrl->subsys->subsysnqn);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84a..5be528f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -79,6 +79,8 @@  struct nvmet_sq {
 	struct completion	confirm_done;
 };
 
+#define NVMET_DEFAULT_INLINE_DATA_SIZE	-1
+
 /**
  * struct nvmet_port -	Common structure to keep port
  *				information for the target.
@@ -98,6 +100,7 @@  struct nvmet_port {
 	struct list_head		referrals;
 	void				*priv;
 	bool				enabled;
+	int				inline_data_size;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -202,7 +205,6 @@  struct nvmet_subsys_link {
 struct nvmet_fabrics_ops {
 	struct module *owner;
 	unsigned int type;
-	unsigned int sqe_inline_size;
 	unsigned int msdbd;
 	bool has_keyed_sgls : 1;
 	void (*queue_response)(struct nvmet_req *req);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d..4fe4a2d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,9 +33,10 @@ 
 #include "nvmet.h"
 
 /*
- * We allow up to a page of inline data to go with the SQE
+ * We allow at least 1 page, and up to 16KB of inline data to go with the SQE
  */
-#define NVMET_RDMA_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE	PAGE_SIZE
+#define NVMET_RDMA_MAX_INLINE_DATA_SIZE		max_t(int, SZ_16K, PAGE_SIZE)
 
 struct nvmet_rdma_cmd {
 	struct ib_sge		sge[2];
@@ -116,6 +117,7 @@  struct nvmet_rdma_device {
 	size_t			srq_size;
 	struct kref		ref;
 	struct list_head	entry;
+	int			inline_data_size;
 };
 
 static bool nvmet_rdma_use_srq;
@@ -187,6 +189,8 @@  static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
 static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 			struct nvmet_rdma_cmd *c, bool admin)
 {
+	unsigned int inline_data_size = ndev->inline_data_size;
+
 	/* NVMe command / RDMA RECV */
 	c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL);
 	if (!c->nvme_cmd)
@@ -202,15 +206,15 @@  static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 
 	if (!admin) {
 		c->inline_page = alloc_pages(GFP_KERNEL,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 		if (!c->inline_page)
 			goto out_unmap_cmd;
 		c->sge[1].addr = ib_dma_map_page(ndev->device,
-				c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE,
+				c->inline_page, 0, inline_data_size,
 				DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(ndev->device, c->sge[1].addr))
 			goto out_free_inline_page;
-		c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE;
+		c->sge[1].length = inline_data_size;
 		c->sge[1].lkey = ndev->pd->local_dma_lkey;
 	}
 
@@ -225,7 +229,7 @@  static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 out_free_inline_page:
 	if (!admin) {
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 out_unmap_cmd:
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
@@ -240,11 +244,13 @@  static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev,
 static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *c, bool admin)
 {
+	unsigned int inline_data_size = ndev->inline_data_size;
+
 	if (!admin) {
 		ib_dma_unmap_page(ndev->device, c->sge[1].addr,
-				NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE);
+				inline_data_size, DMA_FROM_DEVICE);
 		__free_pages(c->inline_page,
-				get_order(NVMET_RDMA_INLINE_DATA_SIZE));
+				get_order(inline_data_size));
 	}
 	ib_dma_unmap_single(ndev->device, c->sge[0].addr,
 				sizeof(*c->nvme_cmd), DMA_FROM_DEVICE);
@@ -544,7 +550,7 @@  static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
 	if (!nvme_is_write(rsp->req.cmd))
 		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 
-	if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) {
+	if (off + len > rsp->queue->dev->inline_data_size) {
 		pr_err("invalid inline data offset!\n");
 		return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
 	}
@@ -793,6 +799,7 @@  static void nvmet_rdma_free_dev(struct kref *ref)
 static struct nvmet_rdma_device *
 nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 {
+	struct nvmet_port *port = cm_id->context;
 	struct nvmet_rdma_device *ndev;
 	int ret;
 
@@ -807,6 +814,7 @@  static void nvmet_rdma_free_dev(struct kref *ref)
 	if (!ndev)
 		goto out_err;
 
+	ndev->inline_data_size = port->inline_data_size;
 	ndev->device = cm_id->device;
 	kref_init(&ndev->ref);
 
@@ -1379,6 +1387,15 @@  static int nvmet_rdma_add_port(struct nvmet_port *port)
 		return -EINVAL;
 	}
 
+	if (port->inline_data_size == NVMET_DEFAULT_INLINE_DATA_SIZE) {
+		port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
+	} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
+		pr_err("invalid inline_data_size %d (max supported is %u)\n",
+			port->inline_data_size,
+			NVMET_RDMA_MAX_INLINE_DATA_SIZE);
+		return -EINVAL;
+	}
+
 	ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr,
 			port->disc_addr.trsvcid, &addr);
 	if (ret) {
@@ -1418,8 +1435,9 @@  static int nvmet_rdma_add_port(struct nvmet_port *port)
 		goto out_destroy_id;
 	}
 
-	pr_info("enabling port %d (%pISpcs)\n",
-		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
+	pr_info("enabling port %d (%pISpcs) inline_data_size %d\n",
+		le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr,
+		port->inline_data_size);
 	port->priv = cm_id;
 	return 0;
 
@@ -1456,7 +1474,6 @@  static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
 static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_RDMA,
-	.sqe_inline_size	= NVMET_RDMA_INLINE_DATA_SIZE,
 	.msdbd			= 1,
 	.has_keyed_sgls		= 1,
 	.add_port		= nvmet_rdma_add_port,