diff mbox series

[v9,21/25] block/rnbd: server: functionality for IO submission to file or block dev

Message ID 20200221104721.350-22-jinpuwang@gmail.com (mailing list archive)
State New, archived
Headers show
Series RTRS (former IBTRS) RDMA Transport Library and RNBD (former IBNBD) RDMA Network Block Device | expand

Commit Message

Jinpu Wang Feb. 21, 2020, 10:47 a.m. UTC
From: Jack Wang <jinpu.wang@cloud.ionos.com>

This provides helper functions for IO submission to file or block dev.

Signed-off-by: Danil Kipnis <danil.kipnis@cloud.ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
---
 drivers/block/rnbd/rnbd-srv-dev.c | 142 ++++++++++++++++++++++++++++++
 drivers/block/rnbd/rnbd-srv-dev.h | 110 +++++++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100644 drivers/block/rnbd/rnbd-srv-dev.c
 create mode 100644 drivers/block/rnbd/rnbd-srv-dev.h

Comments

Bart Van Assche March 1, 2020, 3:09 a.m. UTC | #1
On 2020-02-21 02:47, Jack Wang wrote:
> +static struct bio *rnbd_bio_map_kern(struct request_queue *q, void *data,
> +				      struct bio_set *bs,
> +				      unsigned int len, gfp_t gfp_mask)
> +{
> +	unsigned long kaddr = (unsigned long)data;
> +	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> +	unsigned long start = kaddr >> PAGE_SHIFT;
> +	const int nr_pages = end - start;
> +	int offset, i;
> +	struct bio *bio;
> +
> +	bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
> +	if (!bio)
> +		return ERR_PTR(-ENOMEM);
> +
> +	offset = offset_in_page(kaddr);
> +	for (i = 0; i < nr_pages; i++) {
> +		unsigned int bytes = PAGE_SIZE - offset;
> +
> +		if (len <= 0)
> +			break;
> +
> +		if (bytes > len)
> +			bytes = len;
> +
> +		if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> +				    offset) < bytes) {
> +			/* we don't support partial mappings */
> +			bio_put(bio);
> +			return ERR_PTR(-EINVAL);
> +		}
> +
> +		data += bytes;
> +		len -= bytes;
> +		offset = 0;
> +	}
> +
> +	bio->bi_end_io = bio_put;
> +	return bio;
> +}

The above function is almost identical to bio_map_kern(). Please find a
way to prevent such code duplication.

> +static inline int rnbd_dev_get_logical_bsize(const struct rnbd_dev *dev)
> +{
> +	return bdev_logical_block_size(dev->bdev);
> +}
> +
> +static inline int rnbd_dev_get_phys_bsize(const struct rnbd_dev *dev)
> +{
> +	return bdev_physical_block_size(dev->bdev);
> +}
> +
> +static inline int
> +rnbd_dev_get_max_write_same_sects(const struct rnbd_dev *dev)
> +{
> +	return bdev_write_same(dev->bdev);
> +}

Are you sure that the above functions are useful? Please do not
introduce inline functions for well known functions, especially if their
function name is longer than their implementation.

Thanks,

Bart.
Danil Kipnis March 2, 2020, 10:06 a.m. UTC | #2
On Sun, Mar 1, 2020 at 4:09 AM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 2020-02-21 02:47, Jack Wang wrote:
> > +static struct bio *rnbd_bio_map_kern(struct request_queue *q, void *data,
> > +                                   struct bio_set *bs,
> > +                                   unsigned int len, gfp_t gfp_mask)
> > +{
> > +     unsigned long kaddr = (unsigned long)data;
> > +     unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> > +     unsigned long start = kaddr >> PAGE_SHIFT;
> > +     const int nr_pages = end - start;
> > +     int offset, i;
> > +     struct bio *bio;
> > +
> > +     bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
> > +     if (!bio)
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     offset = offset_in_page(kaddr);
> > +     for (i = 0; i < nr_pages; i++) {
> > +             unsigned int bytes = PAGE_SIZE - offset;
> > +
> > +             if (len <= 0)
> > +                     break;
> > +
> > +             if (bytes > len)
> > +                     bytes = len;
> > +
> > +             if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> > +                                 offset) < bytes) {
> > +                     /* we don't support partial mappings */
> > +                     bio_put(bio);
> > +                     return ERR_PTR(-EINVAL);
> > +             }
> > +
> > +             data += bytes;
> > +             len -= bytes;
> > +             offset = 0;
> > +     }
> > +
> > +     bio->bi_end_io = bio_put;
> > +     return bio;
> > +}
>
> The above function is almost identical to bio_map_kern(). Please find a
> way to prevent such code duplication.

Hi Bart,

We prealloc bio_set in order to avoid allocation in io path done by
bio_map_kern() (call to bio_kmalloc). Instead we use
bio_alloc_bioset() with a preallocated bio_set. Will test whether
performance advantage is measurable and if not will switch to
bio_map_kern.
>
> > +static inline int rnbd_dev_get_logical_bsize(const struct rnbd_dev *dev)
> > +{
> > +     return bdev_logical_block_size(dev->bdev);
> > +}
> > +
> > +static inline int rnbd_dev_get_phys_bsize(const struct rnbd_dev *dev)
> > +{
> > +     return bdev_physical_block_size(dev->bdev);
> > +}
> > +
> > +static inline int
> > +rnbd_dev_get_max_write_same_sects(const struct rnbd_dev *dev)
> > +{
> > +     return bdev_write_same(dev->bdev);
> > +}
>
> Are you sure that the above functions are useful? Please do not
> introduce inline functions for well known functions, especially if their
> function name is longer than their implementation.

This was initially introduced to abstract fileio/blockio devices, will
drop those since we only support block io now.
Thank you!


> Thanks,
>
> Bart.
Jinpu Wang March 3, 2020, 4:20 p.m. UTC | #3
On Mon, Mar 2, 2020 at 11:06 AM Danil Kipnis
<danil.kipnis@cloud.ionos.com> wrote:
>
> On Sun, Mar 1, 2020 at 4:09 AM Bart Van Assche <bvanassche@acm.org> wrote:
> >
> > On 2020-02-21 02:47, Jack Wang wrote:
> > > +static struct bio *rnbd_bio_map_kern(struct request_queue *q, void *data,
> > > +                                   struct bio_set *bs,
> > > +                                   unsigned int len, gfp_t gfp_mask)
> > > +{
> > > +     unsigned long kaddr = (unsigned long)data;
> > > +     unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> > > +     unsigned long start = kaddr >> PAGE_SHIFT;
> > > +     const int nr_pages = end - start;
> > > +     int offset, i;
> > > +     struct bio *bio;
> > > +
> > > +     bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
> > > +     if (!bio)
> > > +             return ERR_PTR(-ENOMEM);
> > > +
> > > +     offset = offset_in_page(kaddr);
> > > +     for (i = 0; i < nr_pages; i++) {
> > > +             unsigned int bytes = PAGE_SIZE - offset;
> > > +
> > > +             if (len <= 0)
> > > +                     break;
> > > +
> > > +             if (bytes > len)
> > > +                     bytes = len;
> > > +
> > > +             if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> > > +                                 offset) < bytes) {
> > > +                     /* we don't support partial mappings */
> > > +                     bio_put(bio);
> > > +                     return ERR_PTR(-EINVAL);
> > > +             }
> > > +
> > > +             data += bytes;
> > > +             len -= bytes;
> > > +             offset = 0;
> > > +     }
> > > +
> > > +     bio->bi_end_io = bio_put;
> > > +     return bio;
> > > +}
> >
> > The above function is almost identical to bio_map_kern(). Please find a
> > way to prevent such code duplication.
>
> Hi Bart,
>
> We prealloc bio_set in order to avoid allocation in io path done by
> bio_map_kern() (call to bio_kmalloc). Instead we use
> bio_alloc_bioset() with a preallocated bio_set. Will test whether
> performance advantage is measurable and if not will switch to
> bio_map_kern.
Hi Bart,

We tried the above approach and just noticed the bio_map_kern was no
longer exported since 5.4 kernel.
We checked target_core_iblock.c and nvme io-cmd-bdev.c, they are open
coding similar function.

I guess re-export bio_map_kern will not be accepted?
Do you have suggestion how should we handle it?

Thanks!
Bart Van Assche March 3, 2020, 4:28 p.m. UTC | #4
On 3/3/20 8:20 AM, Jinpu Wang wrote:
> We tried the above approach and just noticed the bio_map_kern was no
> longer exported since 5.4 kernel.
> We checked target_core_iblock.c and nvme io-cmd-bdev.c, they are open
> coding similar function.
> 
> I guess re-export bio_map_kern will not be accepted?
> Do you have suggestion how should we handle it?

Duplicating code is bad.

The code in drivers/nvme/target/io-cmd-bdev.c and target_core_iblock.c 
is different: it calls bio_add_page() instead of bio_add_pc_page().

Please include a patch in this series that reexports bio_map_kern().

Thanks,

Bart.
Jinpu Wang March 3, 2020, 4:43 p.m. UTC | #5
On Tue, Mar 3, 2020 at 5:28 PM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 3/3/20 8:20 AM, Jinpu Wang wrote:
> > We tried the above approach and just noticed the bio_map_kern was no
> > longer exported since 5.4 kernel.
> > We checked target_core_iblock.c and nvme io-cmd-bdev.c, they are open
> > coding similar function.
> >
> > I guess re-export bio_map_kern will not be accepted?
> > Do you have suggestion how should we handle it?
>
> Duplicating code is bad.
>
> The code in drivers/nvme/target/io-cmd-bdev.c and target_core_iblock.c
> is different: it calls bio_add_page() instead of bio_add_pc_page().
>
> Please include a patch in this series that reexports bio_map_kern().
Got it, will do it.
>
> Thanks,
>
> Bart.
>
Thanks!
diff mbox series

Patch

diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c
new file mode 100644
index 000000000000..ef8d35411b6d
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-dev.c
@@ -0,0 +1,142 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
+ * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
+ * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
+ */
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
+
+#include "rnbd-srv-dev.h"
+#include "rnbd-log.h"
+
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
+				 struct bio_set *bs, rnbd_dev_io_fn io_cb)
+{
+	struct rnbd_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	dev->blk_open_flags = flags;
+	dev->bdev = blkdev_get_by_path(path, flags, THIS_MODULE);
+	ret = PTR_ERR_OR_ZERO(dev->bdev);
+	if (ret)
+		goto err;
+
+	dev->blk_open_flags	= flags;
+	dev->io_cb		= io_cb;
+	bdevname(dev->bdev, dev->name);
+	dev->ibd_bio_set	= bs;
+
+	return dev;
+
+err:
+	kfree(dev);
+	return ERR_PTR(ret);
+}
+
+void rnbd_dev_close(struct rnbd_dev *dev)
+{
+	blkdev_put(dev->bdev, dev->blk_open_flags);
+	kfree(dev);
+}
+
+static void rnbd_dev_bi_end_io(struct bio *bio)
+{
+	struct rnbd_dev_blk_io *io = bio->bi_private;
+
+	io->dev->io_cb(io->priv, blk_status_to_errno(bio->bi_status));
+	bio_put(bio);
+}
+
+/**
+ *	rnbd_bio_map_kern	-	map kernel address into bio
+ *	@q: the struct request_queue for the bio
+ *	@data: pointer to buffer to map
+ *	@bs: bio_set to use.
+ *	@len: length in bytes
+ *	@gfp_mask: allocation flags for bio allocation
+ *
+ *	Map the kernel address into a bio suitable for io to a block
+ *	device. Returns an error pointer in case of error.
+ */
+static struct bio *rnbd_bio_map_kern(struct request_queue *q, void *data,
+				      struct bio_set *bs,
+				      unsigned int len, gfp_t gfp_mask)
+{
+	unsigned long kaddr = (unsigned long)data;
+	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned long start = kaddr >> PAGE_SHIFT;
+	const int nr_pages = end - start;
+	int offset, i;
+	struct bio *bio;
+
+	bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
+	if (!bio)
+		return ERR_PTR(-ENOMEM);
+
+	offset = offset_in_page(kaddr);
+	for (i = 0; i < nr_pages; i++) {
+		unsigned int bytes = PAGE_SIZE - offset;
+
+		if (len <= 0)
+			break;
+
+		if (bytes > len)
+			bytes = len;
+
+		if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
+				    offset) < bytes) {
+			/* we don't support partial mappings */
+			bio_put(bio);
+			return ERR_PTR(-EINVAL);
+		}
+
+		data += bytes;
+		len -= bytes;
+		offset = 0;
+	}
+
+	bio->bi_end_io = bio_put;
+	return bio;
+}
+
+int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data,
+			size_t len, u32 bi_size, enum rnbd_io_flags flags,
+			short prio, void *priv)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+	struct rnbd_dev_blk_io *io;
+	struct bio *bio;
+
+	/* check if the buffer is suitable for bdev */
+	if (WARN_ON(!blk_rq_aligned(q, (unsigned long)data, len)))
+		return -EINVAL;
+
+	/* Generate bio with pages pointing to the rdma buffer */
+	bio = rnbd_bio_map_kern(q, data, dev->ibd_bio_set, len, GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	io = container_of(bio, struct rnbd_dev_blk_io, bio);
+
+	io->dev		= dev;
+	io->priv	= priv;
+
+	bio->bi_end_io		= rnbd_dev_bi_end_io;
+	bio->bi_private		= io;
+	bio->bi_opf		= rnbd_to_bio_flags(flags);
+	bio->bi_iter.bi_sector	= sector;
+	bio->bi_iter.bi_size	= bi_size;
+	bio_set_prio(bio, prio);
+	bio_set_dev(bio, dev->bdev);
+
+	submit_bio(bio);
+
+	return 0;
+}
diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h
new file mode 100644
index 000000000000..4bd28aadb86f
--- /dev/null
+++ b/drivers/block/rnbd/rnbd-srv-dev.h
@@ -0,0 +1,110 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * RDMA Network Block Driver
+ *
+ * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
+ * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
+ * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
+ */
+#ifndef RNBD_SRV_DEV_H
+#define RNBD_SRV_DEV_H
+
+#include <linux/fs.h>
+#include "rnbd-proto.h"
+
+typedef void rnbd_dev_io_fn(void *priv, int error);
+
+struct rnbd_dev {
+	struct block_device	*bdev;
+	struct bio_set		*ibd_bio_set;
+	fmode_t			blk_open_flags;
+	char			name[BDEVNAME_SIZE];
+	rnbd_dev_io_fn		*io_cb;
+};
+
+struct rnbd_dev_blk_io {
+	struct rnbd_dev *dev;
+	void		 *priv;
+	/* have to be last member for front_pad usage of bioset_init */
+	struct bio	bio;
+};
+
+/**
+ * rnbd_dev_open() - Open a device
+ * @flags:	open flags
+ * @bs:		bio_set to use during block io,
+ * @io_cb:	is called when I/O finished
+ */
+struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
+				 struct bio_set *bs, rnbd_dev_io_fn io_cb);
+
+/**
+ * rnbd_dev_close() - Close a device
+ */
+void rnbd_dev_close(struct rnbd_dev *dev);
+
+static inline int rnbd_dev_get_logical_bsize(const struct rnbd_dev *dev)
+{
+	return bdev_logical_block_size(dev->bdev);
+}
+
+static inline int rnbd_dev_get_phys_bsize(const struct rnbd_dev *dev)
+{
+	return bdev_physical_block_size(dev->bdev);
+}
+
+static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
+{
+	return queue_max_segments(bdev_get_queue(dev->bdev));
+}
+
+static inline int rnbd_dev_get_max_hw_sects(const struct rnbd_dev *dev)
+{
+	return queue_max_hw_sectors(bdev_get_queue(dev->bdev));
+}
+
+static inline int
+rnbd_dev_get_max_write_same_sects(const struct rnbd_dev *dev)
+{
+	return bdev_write_same(dev->bdev);
+}
+
+static inline int rnbd_dev_get_secure_discard(const struct rnbd_dev *dev)
+{
+	return blk_queue_secure_erase(bdev_get_queue(dev->bdev));
+}
+
+static inline int rnbd_dev_get_max_discard_sects(const struct rnbd_dev *dev)
+{
+	if (!blk_queue_discard(bdev_get_queue(dev->bdev)))
+		return 0;
+
+	return blk_queue_get_max_sectors(bdev_get_queue(dev->bdev),
+					 REQ_OP_DISCARD);
+}
+
+static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
+{
+	return bdev_get_queue(dev->bdev)->limits.discard_granularity;
+}
+
+static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
+{
+	return bdev_get_queue(dev->bdev)->limits.discard_alignment;
+}
+
+/**
+ * rnbd_dev_submit_io() - Submit an I/O to the disk
+ * @dev:	device to that the I/O is submitted
+ * @sector:	address to read/write data to
+ * @data:	I/O data to write or buffer to read I/O date into
+ * @len:	length of @data
+ * @bi_size:	Amount of data that will be read/written
+ * @prio:       IO priority
+ * @priv:	private data passed to @io_fn
+ */
+int rnbd_dev_submit_io(struct rnbd_dev *dev, sector_t sector, void *data,
+			size_t len, u32 bi_size, enum rnbd_io_flags flags,
+			short prio, void *priv);
+
+#endif /* RNBD_SRV_DEV_H */