diff mbox series

[1/2] ublk_drv: store device parameters

Message ID 20220723150713.750369-2-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series ublk_drv: add generic mechanism to get/set parameters | expand

Commit Message

Ming Lei July 23, 2022, 3:07 p.m. UTC
One important goal of ublk is to provide generic framework for
making one block device by userspace.

As one generic block device, there are still lots of parameters,
such as max_sectors, write_cache/fua, discard related limits,
zoned parameters, ...., so this patch starts to store & retrieve
device parameters and prepares for implementing ctrl command of
SET/GET_DEV_PARAMETERS.

Device parameters have to be stored somewhere, one reason is that
disk/queue won't be allocated until START_DEV command is received,
but device parameters have to setup before starting device.

Add two default parameter group for covering default disk setting,
more parameter groups can be added in future, but all should be optional.
Parameter groups will become part of ABI since new commands will be
added to set/get parameters in following patch. Most of parameter
groups should be optional, so store them in xarray.

Not only block device related parameter, feature related parameters can
be set/get with this generic framework too, then ublk can be extended
easily with help of dev_info->flags.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c      | 262 ++++++++++++++++++++++++++++++++--
 include/uapi/linux/ublk_cmd.h |  43 ++++++
 2 files changed, 293 insertions(+), 12 deletions(-)

Comments

Christoph Hellwig July 25, 2022, 6:42 a.m. UTC | #1
On Sat, Jul 23, 2022 at 11:07:12PM +0800, Ming Lei wrote:
> One important goal of ublk is to provide generic framework for
> making one block device by userspace.
> 
> As one generic block device, there are still lots of parameters,
> such as max_sectors, write_cache/fua, discard related limits,
> zoned parameters, ...., so this patch starts to store & retrieve
> device parameters and prepares for implementing ctrl command of
> SET/GET_DEV_PARAMETERS.
> 
> Device parameters have to be stored somewhere, one reason is that
> disk/queue won't be allocated until START_DEV command is received,
> but device parameters have to setup before starting device.

This seems rather overeingeering and really hard to read.  Any reason
to not simply open code the parameter verification and application
and do away with the xarray and all the boiler plate code?
Ming Lei July 25, 2022, 7:06 a.m. UTC | #2
On Mon, Jul 25, 2022 at 08:42:59AM +0200, Christoph Hellwig wrote:
> On Sat, Jul 23, 2022 at 11:07:12PM +0800, Ming Lei wrote:
> > One important goal of ublk is to provide generic framework for
> > making one block device by userspace.
> > 
> > As one generic block device, there are still lots of parameters,
> > such as max_sectors, write_cache/fua, discard related limits,
> > zoned parameters, ...., so this patch starts to store & retrieve
> > device parameters and prepares for implementing ctrl command of
> > SET/GET_DEV_PARAMETERS.
> > 
> > Device parameters have to be stored somewhere, one reason is that
> > disk/queue won't be allocated until START_DEV command is received,
> > but device parameters have to setup before starting device.
> 
> This seems rather overeingeering and really hard to read.  Any reason
> to not simply open code the parameter verification and application
> and do away with the xarray and all the boiler plate code?
 
There could be more parameters than the two types(), such as segments,
zoned, ..., also in future, feature related parameters can be added
in this way too, and most of them are optional.

So xarray is needed.

Thanks,
Ming
Christoph Hellwig July 26, 2022, 12:32 p.m. UTC | #3
On Mon, Jul 25, 2022 at 03:06:50PM +0800, Ming Lei wrote:
> There could be more parameters than the two types(), such as segments,
> zoned, ..., also in future, feature related parameters can be added
> in this way too, and most of them are optional.

Yes.  But just having a struct that grows is much cleaner and simpler
than those indirections.  e.g something like this patch on top of this
series.  With this new fields can just be added to the end of
struct ublk_params.  Old kernels will ignore them, but due to the copy
back of the parsed structure userspace can detect that if it cares:

 drivers/block/ublk_drv.c      |  320 +++++++++++++++------------------------------------------------------------------
 include/uapi/linux/ublk_cmd.h |   67 ++++++----------
 2 files changed, 85 insertions(+), 302 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 83fd65d8a2051..f9db59af12752 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -137,7 +137,7 @@ struct ublk_device {
 	spinlock_t		mm_lock;
 	struct mm_struct	*mm;
 
-	struct xarray		paras;
+	struct ublk_params	params;
 
 	struct completion	completion;
 	unsigned int		nr_queues_ready;
@@ -151,16 +151,6 @@ struct ublk_device {
 	struct work_struct	stop_work;
 };
 
-typedef int (ublk_para_validate)(const struct ublk_device *,
-		const struct ublk_para_header *);
-typedef void (ublk_para_apply)(struct ublk_device *ub,
-		const struct ublk_para_header *);
-
-struct ublk_para_ops {
-	ublk_para_validate *validate_fn;
-	ublk_para_apply *apply_fn;
-};
-
 static dev_t ublk_chr_devt;
 static struct class *ublk_chr_class;
 
@@ -172,231 +162,69 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
 
 static struct miscdevice ublk_misc;
 
-static int ublk_dev_para_basic_validate(const struct ublk_device *ub,
-		const struct ublk_para_header *header)
+static void ublk_init_default_params(struct ublk_device *ub)
 {
-	const struct ublk_basic_para *p = (struct ublk_basic_para *)header;
+	struct ublk_params *p = &ub->params;
+	struct ublksrv_ctrl_dev_info *info = &ub->dev_info;
 
-	if (p->logical_bs_shift > PAGE_SHIFT)
-		return -EINVAL;
+	p->logical_bs_shift = ilog2(info->block_size);
+	p->physical_bs_shift = ilog2(info->block_size);
+	p->io_opt_shift = ilog2(info->block_size);
+	p->io_min_shift = ilog2(info->block_size);
 
-	if (p->logical_bs_shift > p->physical_bs_shift)
-		return -EINVAL;
+	p->max_sectors = info->rq_max_blocks << (ub->bs_shift - 9);
+	p->dev_sectors = info->dev_blocks << (ub->bs_shift - 9);
 
-	return 0;
+	p->discard_granularity = PAGE_SIZE;
+	p->max_discard_sectors = UINT_MAX >> 9;
+	p->max_write_zeroes_sectors = UINT_MAX >> 9;
+	p->max_discard_segments = 1;
 }
 
-static void ublk_dev_para_basic_apply(struct ublk_device *ub,
-		const struct ublk_para_header *header)
+static int ublk_params_apply(const struct ublk_device *ub,
+		struct ublk_params *p)
 {
 	struct request_queue *q = ub->ub_disk->queue;
-	const struct ublk_basic_para *p = (struct ublk_basic_para *)header;
+
+	if (p->logical_bs_shift > PAGE_SHIFT)
+		return -EINVAL;
+	if (p->logical_bs_shift > p->physical_bs_shift)
+		return -EINVAL;
+
+	/* For now only single segment discards are supported */
+	if (p->max_discard_sectors && p->max_discard_segments != 1)
+		return -EINVAL;
 
 	blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
 	blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
 	blk_queue_io_min(q, 1 << p->io_min_shift);
 	blk_queue_io_opt(q, 1 << p->io_opt_shift);
 
-	blk_queue_write_cache(q, p->write_back_cache, p->fua);
-	if (!p->rotational)
-		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-	else
+	blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
+			p->attrs & UBLK_ATTR_FUA);
+	if (p->attrs & UBLK_ATTR_ROTATIONAL)
 		blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+	else
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 
 	blk_queue_max_hw_sectors(q, p->max_sectors);
 	blk_queue_chunk_sectors(q, p->chunk_sectors);
 	blk_queue_virt_boundary(q, p->virt_boundary_mask);
 
-	if (p->read_only)
+	if (p->attrs & UBLK_ATTR_READ_ONLY)
 		set_disk_ro(ub->ub_disk, true);
 
 	set_capacity(ub->ub_disk, p->dev_sectors);
-}
-
-static int ublk_dev_para_discard_validate(const struct ublk_device *ub,
-		const struct ublk_para_header *header)
-{
-	const struct ublk_discard_para *p = (struct ublk_discard_para *)header;
-
-	/* So far, only support single segment discard */
-	if (p->max_discard_sectors && p->max_discard_segments != 1)
-		return -EINVAL;
-	return 0;
-}
-
-static void ublk_dev_para_discard_apply(struct ublk_device *ub,
-		const struct ublk_para_header *header)
-{
-	struct request_queue *q = ub->ub_disk->queue;
-	const struct ublk_discard_para *p = (struct ublk_discard_para *)
-		header;
 
-	q->limits.discard_alignment = p->discard_alignment;
+	q->limits.discard_alignment = p->discard_alignment_offset;
 	q->limits.discard_granularity = p->discard_granularity;
 	blk_queue_max_discard_sectors(q, p->max_discard_sectors);
 	blk_queue_max_write_zeroes_sectors(q,
 			p->max_write_zeroes_sectors);
 	blk_queue_max_discard_segments(q, p->max_discard_segments);
-}
-
-static const unsigned int para_len[] = {
-	[UBLK_PARA_TYPE_BASIC] = sizeof(struct ublk_basic_para),
-	[UBLK_PARA_TYPE_DISCARD] = sizeof(struct ublk_discard_para),
-};
-
-static const struct ublk_para_ops para_ops[] = {
-	[UBLK_PARA_TYPE_BASIC] = {
-		.validate_fn = ublk_dev_para_basic_validate,
-		.apply_fn = ublk_dev_para_basic_apply,
-	},
-	[UBLK_PARA_TYPE_DISCARD] = {
-		.validate_fn = ublk_dev_para_discard_validate,
-		.apply_fn = ublk_dev_para_discard_apply,
-	},
-};
-
-static void ublk_update_dev_sectors(struct ublk_device *ub,
-		sector_t sectors)
-{
-	struct ublk_basic_para  *p = xa_load(&ub->paras,
-			UBLK_PARA_TYPE_BASIC);
-
-	if (p)
-		p->dev_sectors = (__u64)sectors;
-}
-
-static struct ublk_para_header *ublk_alloc_def_basic_para(
-		struct ublk_device *ub)
-{
-	struct ublk_basic_para  *p = kzalloc(sizeof(*p), GFP_KERNEL);
-	struct ublksrv_ctrl_dev_info *info = &ub->dev_info;
-
-	if (!p)
-		return NULL;
-
-	p->header.type = UBLK_PARA_TYPE_BASIC;
-	p->header.len = sizeof(*p);
-	p->logical_bs_shift = ilog2(info->block_size);
-	p->physical_bs_shift = ilog2(info->block_size);
-	p->io_opt_shift = ilog2(info->block_size);
-	p->io_min_shift = ilog2(info->block_size);
-	p->rotational = 0;
-	p->write_back_cache = 0;
-	p->fua = 0;
-	p->read_only = 0;
-
-	p->max_sectors = info->rq_max_blocks << (ub->bs_shift - 9);
-	p->chunk_sectors = 0;
-	p->virt_boundary_mask = 0;
-	p->dev_sectors = info->dev_blocks << (ub->bs_shift - 9);
-
-	return (struct ublk_para_header *)p;
-}
-
-static struct ublk_para_header *ublk_alloc_def_discard_para(
-		struct ublk_device *ub)
-{
-	struct ublk_discard_para  *p = kzalloc(sizeof(*p), GFP_KERNEL);
-
-	if (!p)
-		return NULL;
-
-	p->header.type = UBLK_PARA_TYPE_DISCARD;
-	p->header.len = sizeof(*p);
-
-	p->discard_alignment =	0;
-	p->discard_granularity = PAGE_SIZE;
-	p->max_discard_sectors = UINT_MAX >> 9;
-	p->max_write_zeroes_sectors = UINT_MAX >> 9;
-	p->max_discard_segments = 1;
-
-	return (struct ublk_para_header *)p;
-}
-
-static int ublk_validate_para_header(const struct ublk_device *ub,
-		const struct ublk_para_header *h)
-{
-	if (h->type >= UBLK_PARA_TYPE_LAST)
-		return -EINVAL;
-
-	if (h->len != para_len[h->type])
-		return -EINVAL;
-
 	return 0;
 }
 
-
-static int ublk_validate_para(const struct ublk_device *ub,
-		const struct ublk_para_header *h)
-{
-	int ret = ublk_validate_para_header(ub, h);
-
-	if (ret)
-		return ret;
-
-	if (para_ops[h->type].validate_fn)
-		return para_ops[h->type].validate_fn(ub, h);
-
-	return 0;
-}
-
-/* Old parameter with same type will be overridden */
-static int ublk_install_para(struct ublk_device *ub,
-		struct ublk_para_header *h)
-{
-	void *old;
-	int ret;
-
-	ret = ublk_validate_para(ub, h);
-	if (ret)
-		return ret;
-
-	old = xa_store(&ub->paras, h->type, h, GFP_KERNEL);
-	if (xa_is_err(old))
-		return xa_err(old);
-	kfree(old);
-	return 0;
-}
-
-static void ublk_apply_para(struct ublk_device *ub,
-		const struct ublk_para_header *h)
-{
-	if (para_ops[h->type].apply_fn)
-		para_ops[h->type].apply_fn(ub, h);
-}
-
-/* default parameters are allocated/installed before disk is allocated */
-static void ublk_install_def_paras(struct ublk_device *ub)
-{
-	struct ublk_para_header *h;
-
-	h = ublk_alloc_def_basic_para(ub);
-	if (h && ublk_install_para(ub, h))
-		kfree(h);
-
-	h = ublk_alloc_def_discard_para(ub);
-	if (h && ublk_install_para(ub, h))
-		kfree(h);
-}
-
-static void ublk_apply_paras(struct ublk_device *ub)
-{
-	struct ublk_para_header *h;
-	unsigned long type;
-
-	xa_for_each(&ub->paras, type, h)
-		ublk_apply_para(ub, h);
-}
-
-static void ublk_uninstall_paras(struct ublk_device *ub)
-{
-	unsigned long type;
-	void *p;
-
-	xa_for_each(&ub->paras, type, p)
-		kfree(p);
-}
-
 static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
 {
 	if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
@@ -1281,10 +1109,8 @@ static void ublk_cdev_rel(struct device *dev)
 
 	blk_mq_free_tag_set(&ub->tag_set);
 	ublk_deinit_queues(ub);
-	ublk_uninstall_paras(ub);
 	ublk_free_dev_number(ub);
 	mutex_destroy(&ub->mutex);
-	xa_destroy(&ub->paras);
 	kfree(ub);
 }
 
@@ -1397,8 +1223,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
 	/* We may get disk size updated */
 	if (dev_blocks) {
 		ub->dev_info.dev_blocks = dev_blocks;
-		ublk_update_dev_sectors(ub,
-				dev_blocks << (ub->bs_shift - 9));
+		ub->params.dev_sectors = dev_blocks << (ub->bs_shift - 9);
 	}
 
 	disk = blk_mq_alloc_disk(&ub->tag_set, ub);
@@ -1413,8 +1238,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
 	ub->dev_info.ublksrv_pid = ublksrv_pid;
 	ub->ub_disk = disk;
 
-	ublk_apply_paras(ub);
-
 	get_device(&ub->cdev_dev);
 	ret = add_disk(disk);
 	if (ret) {
@@ -1525,7 +1348,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 	spin_lock_init(&ub->mm_lock);
 	INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
 	INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
-	xa_init(&ub->paras);
 
 	ret = ublk_alloc_dev_number(ub, header->dev_id);
 	if (ret < 0)
@@ -1552,6 +1374,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 			ub->dev_info.nr_hw_queues, nr_cpu_ids);
 	ublk_align_max_io_size(ub);
 
+	ublk_init_default_params(ub);
+
 	ret = ublk_init_queues(ub);
 	if (ret)
 		goto out_free_dev_number;
@@ -1569,9 +1393,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 	 * ublk_add_chdev() will cleanup everything if it fails.
 	 */
 	ret = ublk_add_chdev(ub);
-	if (!ret)
-		ublk_install_def_paras(ub);
-
 	goto out_unlock;
 
 out_free_tag_set:
@@ -1674,84 +1495,65 @@ static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
 	return ret;
 }
 
-static int ublk_ctrl_get_para(struct io_uring_cmd *cmd)
+static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
 {
 	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	size_t psize = min_t(size_t, sizeof(struct ublk_params), header->len);
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	struct ublk_device *ub;
-	struct ublk_para_header ph;
-	struct ublk_para_header *para = NULL;
 	int ret = 0;
 
-	if (header->len <= sizeof(ph) || !header->addr)
+	if (!header->addr)
 		return -EINVAL;
 
 	ub = ublk_get_device_from_id(header->dev_id);
 	if (!ub)
 		return -EINVAL;
 
-	ret = -EFAULT;
-	if (copy_from_user(&ph, argp, sizeof(ph)))
-		goto out_put;
-
-	ret = ublk_validate_para_header(ub, &ph);
-	if (ret)
-		goto out_put;
-
 	mutex_lock(&ub->mutex);
-	para = xa_load(&ub->paras, ph.type);
-	mutex_unlock(&ub->mutex);
-	if (!para)
-		ret = -EINVAL;
-	else if (copy_to_user(argp, para, ph.len))
+	if (copy_to_user(argp, &ub->params, psize))
 		ret = -EFAULT;
-out_put:
+	mutex_unlock(&ub->mutex);
 	ublk_put_device(ub);
 	return ret;
 }
 
-static int ublk_ctrl_set_para(struct io_uring_cmd *cmd)
+static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
 {
 	struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+	size_t psize = min_t(size_t, sizeof(struct ublk_params), header->len);
 	void __user *argp = (void __user *)(unsigned long)header->addr;
 	struct ublk_device *ub;
-	struct ublk_para_header ph;
-	struct ublk_para_header *para = NULL;
+	struct ublk_params params = {};
 	int ret = -EFAULT;
 
-	if (header->len <= sizeof(ph) || !header->addr)
+	if (!header->addr)
 		return -EINVAL;
 
 	ub = ublk_get_device_from_id(header->dev_id);
 	if (!ub)
 		return -EINVAL;
 
-	if (copy_from_user(&ph, argp, sizeof(ph)))
+	if (copy_from_user(&params, argp, psize))
 		goto out_put;
 
-	ret = ublk_validate_para_header(ub, &ph);
+	/* parameters can only be changed when device isn't live */
+	mutex_lock(&ub->mutex);
+	if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
+		ret = -EACCES;
+		goto out_unlock;
+	}
+	ret = ublk_params_apply(ub, &params);
 	if (ret)
-		goto out_put;
+		goto out_unlock;
 
-	para = kmalloc(ph.len, GFP_KERNEL);
-	if (!para) {
-		ret = -ENOMEM;
-	} else if (copy_from_user(para, argp, ph.len)) {
+	/* copy back the paramters that were actually applied */
+	if (copy_to_user(argp, &ub->params, psize))
 		ret = -EFAULT;
-	} else {
-		/* parameters can only be changed when device isn't live */
-		mutex_lock(&ub->mutex);
-		if (ub->dev_info.state != UBLK_S_DEV_LIVE)
-			ret = ublk_install_para(ub, para);
-		else
-			ret = -EACCES;
-		mutex_unlock(&ub->mutex);
-	}
+out_unlock:
+	mutex_unlock(&ub->mutex);
 out_put:
 	ublk_put_device(ub);
-	if (ret)
-		kfree(para);
-
 	return ret;
 }
 
@@ -1790,11 +1592,11 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
 	case UBLK_CMD_GET_QUEUE_AFFINITY:
 		ret = ublk_ctrl_get_queue_affinity(cmd);
 		break;
-	case UBLK_CMD_GET_PARA:
-		ret = ublk_ctrl_get_para(cmd);
+	case UBLK_CMD_GET_PARAMS:
+		ret = ublk_ctrl_get_params(cmd);
 		break;
-	case UBLK_CMD_SET_PARA:
-		ret = ublk_ctrl_set_para(cmd);
+	case UBLK_CMD_SET_PARAMS:
+		ret = ublk_ctrl_set_params(cmd);
 		break;
 	default:
 		break;
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 0f7be73987551..ce406bbede082 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -15,8 +15,8 @@
 #define	UBLK_CMD_DEL_DEV		0x05
 #define	UBLK_CMD_START_DEV	0x06
 #define	UBLK_CMD_STOP_DEV	0x07
-#define	UBLK_CMD_SET_PARA	0x08
-#define	UBLK_CMD_GET_PARA	0x09
+#define	UBLK_CMD_SET_PARAMS	0x08
+#define	UBLK_CMD_GET_PARAMS	0x09
 
 /*
  * IO commands, issued by ublk server, and handled by ublk driver.
@@ -160,47 +160,28 @@ struct ublksrv_io_cmd {
 	__u64	addr;
 };
 
-/* ublk device parameter definition */
-enum {
-	UBLK_PARA_TYPE_BASIC,
-	UBLK_PARA_TYPE_DISCARD,
-	UBLK_PARA_TYPE_LAST,
-};
-
-struct ublk_para_header {
-	__u16 type;
-	__u16 len;
-} __attribute__ ((__packed__));
-
-struct ublk_basic_para {
-	struct ublk_para_header  header;
-	__u32   logical_bs_shift:6;
-	__u32   physical_bs_shift:6;
-	__u32	io_opt_shift:6;
-	__u32	io_min_shift:6;
-	__u32	rotational:1;
-	__u32	write_back_cache:1;
-	__u32	fua:1;
-	__u32	read_only:1;
-	__u32	unused:4;
-
-	__u32	max_sectors;
-	__u32	chunk_sectors;
-
-	__u64   dev_sectors;
-	__u64   virt_boundary_mask;
-};
-
-struct ublk_discard_para {
-	struct ublk_para_header  header;
-	__u32	discard_alignment;
-
-	__u32	discard_granularity;
-	__u32	max_discard_sectors;
-
-	__u32	max_write_zeroes_sectors;
-	__u16	max_discard_segments;
-	__u16	reserved0;
+struct ublk_params {
+	__u32		attrs;
+#define UBLK_ATTR_READ_ONLY		(1 << 0)
+#define UBLK_ATTR_ROTATIONAL		(1 << 1)
+#define UBLK_ATTR_VOLATILE_CACHE	(1 << 2)
+#define UBLK_ATTR_FUA			(1 << 3)
+	__u8		logical_bs_shift;
+	__u8		physical_bs_shift;
+	__u8		io_opt_shift;
+	__u8		io_min_shift;
+	__u32		max_sectors;
+	__u32		chunk_sectors;
+	__u64		dev_sectors;
+	__u64		virt_boundary_mask;
+
+	__u32		discard_alignment_offset;
+	__u32		discard_granularity;
+	__u32		max_discard_sectors;
+	__u32		max_write_zeroes_sectors;
+	__u16		max_discard_segments;
+	__u8		reserved0[6];
+	/* keep this 8 byte aligned */
 };
 
 #endif
Ming Lei July 26, 2022, 2:21 p.m. UTC | #4
On Tue, Jul 26, 2022 at 02:32:24PM +0200, Christoph Hellwig wrote:
> On Mon, Jul 25, 2022 at 03:06:50PM +0800, Ming Lei wrote:
> > There could be more parameters than the two types(), such as segments,
> > zoned, ..., also in future, feature related parameters can be added
> > in this way too, and most of them are optional.
> 
> Yes.  But just having a struct that grows is much cleaner and simpler
> than those indirections.  e.g something like this patch on top of this

But most of fields in ublk_params aren't required for one device,
sending them all isn't friendly from both userspace and kernel side.
Especially inside kernel, a big chunk buffer is allocated for
the structure, but only few fields are useful for one device. There can
be lots of ublk device, and total wasted memory can't be ignored.

If we group parameters, it is easier to extend by adding new parameter
type. One ublk device usually just uses several parameter types.
And the xarray implementation is simple enough too, which is just one
array, but really sparsed.

Adding one parameter type just needs to add two callbacks, so pretty easy
to review.

So I really suggest to keep the parameter type and xarray
implementation, and will post V2 after ublksrv side is updated.

> series.  With this new fields can just be added to the end of
> struct ublk_params.  Old kernels will ignore them, but due to the copy
> back of the parsed structure userspace can detect that if it cares:

There has to be one length field in header, otherwise ublk driver can't
know the exact length for copying back.

With parameter type approach, it is natural to handle new/old app and
old/new kernel, since kernel will fail set parameter command simply
if one parameter type isn't supported.


Thanks, 
Ming
Christoph Hellwig July 26, 2022, 5:47 p.m. UTC | #5
On Tue, Jul 26, 2022 at 10:21:04PM +0800, Ming Lei wrote:
> But most of fields in ublk_params aren't required for one device,
> sending them all isn't friendly from both userspace and kernel side.

I think it actually is easier for both.  For the kernel the benefit
is pretty clear seen by this patch, and for userspae there also is
a lot less boilerplate code.

> Especially inside kernel, a big chunk buffer is allocated for
> the structure, but only few fields are useful for one device. There can
> be lots of ublk device, and total wasted memory can't be ignored.

I think even right now the memory usage is less.  If a lot (and I mean
a lot) new paramters are added, it might be slightly higher than the
minimal config with an xarray, but not to the point where it matters.

> If we group parameters, it is easier to extend by adding new parameter
> type. One ublk device usually just uses several parameter types.
> And the xarray implementation is simple enough too, which is just one
> array, but really sparsed.

Well, it isn't exactly simple.  And the apprach of a struct that just
grows additional members has worked perfectly fine all over the kernel.
Ming Lei July 27, 2022, 12:12 a.m. UTC | #6
On Tue, Jul 26, 2022 at 07:47:21PM +0200, Christoph Hellwig wrote:
> On Tue, Jul 26, 2022 at 10:21:04PM +0800, Ming Lei wrote:
> > But most of fields in ublk_params aren't required for one device,
> > sending them all isn't friendly from both userspace and kernel side.
> 
> I think it actually is easier for both.  For the kernel the benefit
> is pretty clear seen by this patch, and for userspae there also is
> a lot less boilerplate code.

If everything is put into single structure:

1) it is fragile for both userspace/driver side making/paring code
- such as, how to parse zeroed fields? With grouping parameters, if one type
parameter isn't sent via set param command, the userspace just meant we do
not care this parameter just like one real hardare, driver feels free to use
the default. But if it is sent as zero, driver may have to parse it as zero
and use zero, sometimes it can be wrong, and parsing parameter with zero for
kernel becomes hard.

With grouping parameter, if one parameter is sent from userspace, ublk
driver just parses & uses it explicitly. There isn't any burden for driver for
making such decision.

2) ublk driver has to parse every fields, and the code could be harder
to organize, since there isn't parameter group. With grouping
parameters, we just need to implement two callbacks when adding new parameter
type. So code is organized pretty well.

Wrt. cleanness, I will send V2 for review, and please feel free to let me
know where isn't clean, and I am happy to make it correct/clean from the
beginning. And userspace change will be updated too, and we can see
if userspace side has much boilerplate code.

> 
> > Especially inside kernel, a big chunk buffer is allocated for
> > the structure, but only few fields are useful for one device. There can
> > be lots of ublk device, and total wasted memory can't be ignored.
> 
> I think even right now the memory usage is less.  If a lot (and I mean

Adding segment limits & zoned limits are in my todo list.

> a lot) new paramters are added, it might be slightly higher than the
> minimal config with an xarray, but not to the point where it matters.

Alibaba said their case needs to run lots of ublk devices in one machine.
It is ABI interface, and I'd rather make it easy extending from the
beginning given there is the requirement for running lots of devices.

> 
> > If we group parameters, it is easier to extend by adding new parameter
> > type. One ublk device usually just uses several parameter types.
> > And the xarray implementation is simple enough too, which is just one
> > array, but really sparsed.
> 
> Well, it isn't exactly simple.  And the apprach of a struct that just
> grows additional members has worked perfectly fine all over the kernel.

In your patch, there isn't length field in ublk_params, not sure ublk
driver can copy back correct parameter. If userspace is old, on new
kernel with bigger structure size userspace buffer could be corrupted
by copying back kernel's structure. So here one length field is really
a must.

With grouping parameters, each type has fixed length which can't be
changed since it is added, we can verify it easily in both set/get param
command, meantime the parameter type can be verified reliably in driver side,
so grouping parameter is more reliable.


Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 255b2de46a24..e185bdb165de 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -137,6 +137,8 @@  struct ublk_device {
 	spinlock_t		mm_lock;
 	struct mm_struct	*mm;
 
+	struct xarray		paras;
+
 	struct completion	completion;
 	unsigned int		nr_queues_ready;
 	atomic_t		nr_aborted_queues;
@@ -149,6 +151,16 @@  struct ublk_device {
 	struct work_struct	stop_work;
 };
 
+typedef int (ublk_para_validate)(const struct ublk_device *,
+		const struct ublk_para_header *);
+typedef void (ublk_para_apply)(struct ublk_device *ub,
+		const struct ublk_para_header *);
+
+struct ublk_para_ops {
+	ublk_para_validate *validate_fn;
+	ublk_para_apply *apply_fn;
+};
+
 static dev_t ublk_chr_devt;
 static struct class *ublk_chr_class;
 
@@ -160,6 +172,231 @@  static DEFINE_MUTEX(ublk_ctl_mutex);
 
 static struct miscdevice ublk_misc;
 
+static int ublk_dev_para_basic_validate(const struct ublk_device *ub,
+		const struct ublk_para_header *header)
+{
+	const struct ublk_basic_para *p = (struct ublk_basic_para *)header;
+
+	if (p->logical_bs_shift > PAGE_SHIFT)
+		return -EINVAL;
+
+	if (p->logical_bs_shift > p->physical_bs_shift)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ublk_dev_para_basic_apply(struct ublk_device *ub,
+		const struct ublk_para_header *header)
+{
+	struct request_queue *q = ub->ub_disk->queue;
+	const struct ublk_basic_para *p = (struct ublk_basic_para *)header;
+
+	blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
+	blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
+	blk_queue_io_min(q, 1 << p->io_min_shift);
+	blk_queue_io_opt(q, 1 << p->io_opt_shift);
+
+	blk_queue_write_cache(q, p->write_back_cache, p->fua);
+	if (!p->rotational)
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+
+	blk_queue_max_hw_sectors(q, p->max_sectors);
+	blk_queue_chunk_sectors(q, p->chunk_sectors);
+	blk_queue_virt_boundary(q, p->virt_boundary_mask);
+
+	if (p->read_only)
+		set_disk_ro(ub->ub_disk, true);
+
+	set_capacity(ub->ub_disk, p->dev_sectors);
+}
+
+static int ublk_dev_para_discard_validate(const struct ublk_device *ub,
+		const struct ublk_para_header *header)
+{
+	const struct ublk_discard_para *p = (struct ublk_discard_para *)header;
+
+	/* So far, only support single segment discard */
+	if (p->max_discard_sectors && p->max_discard_segments != 1)
+		return -EINVAL;
+	return 0;
+}
+
+static void ublk_dev_para_discard_apply(struct ublk_device *ub,
+		const struct ublk_para_header *header)
+{
+	struct request_queue *q = ub->ub_disk->queue;
+	const struct ublk_discard_para *p = (struct ublk_discard_para *)
+		header;
+
+	q->limits.discard_alignment = p->discard_alignment;
+	q->limits.discard_granularity = p->discard_granularity;
+	blk_queue_max_discard_sectors(q, p->max_discard_sectors);
+	blk_queue_max_write_zeroes_sectors(q,
+			p->max_write_zeroes_sectors);
+	blk_queue_max_discard_segments(q, p->max_discard_segments);
+}
+
+static const unsigned int para_len[] = {
+	[UBLK_PARA_TYPE_BASIC] = sizeof(struct ublk_basic_para),
+	[UBLK_PARA_TYPE_DISCARD] = sizeof(struct ublk_discard_para),
+};
+
+static const struct ublk_para_ops para_ops[] = {
+	[UBLK_PARA_TYPE_BASIC] = {
+		.validate_fn = ublk_dev_para_basic_validate,
+		.apply_fn = ublk_dev_para_basic_apply,
+	},
+	[UBLK_PARA_TYPE_DISCARD] = {
+		.validate_fn = ublk_dev_para_discard_validate,
+		.apply_fn = ublk_dev_para_discard_apply,
+	},
+};
+
+static void ublk_update_dev_sectors(struct ublk_device *ub,
+		sector_t sectors)
+{
+	struct ublk_basic_para  *p = xa_load(&ub->paras,
+			UBLK_PARA_TYPE_BASIC);
+
+	if (p)
+		p->dev_sectors = (__u64)sectors;
+}
+
+static struct ublk_para_header *ublk_alloc_def_basic_para(
+		struct ublk_device *ub)
+{
+	struct ublk_basic_para  *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	struct ublksrv_ctrl_dev_info *info = &ub->dev_info;
+
+	if (!p)
+		return NULL;
+
+	p->header.type = UBLK_PARA_TYPE_BASIC;
+	p->header.len = sizeof(*p);
+	p->logical_bs_shift = ilog2(info->block_size);
+	p->physical_bs_shift = ilog2(info->block_size);
+	p->io_opt_shift = ilog2(info->block_size);
+	p->io_min_shift = ilog2(info->block_size);
+	p->rotational = 0;
+	p->write_back_cache = 0;
+	p->fua = 0;
+	p->read_only = 0;
+
+	p->max_sectors = info->rq_max_blocks << (ub->bs_shift - 9);
+	p->chunk_sectors = 0;
+	p->virt_boundary_mask = 0;
+	p->dev_sectors = info->dev_blocks << (ub->bs_shift - 9);
+
+	return (struct ublk_para_header *)p;
+}
+
+static struct ublk_para_header *ublk_alloc_def_discard_para(
+		struct ublk_device *ub)
+{
+	struct ublk_discard_para  *p = kzalloc(sizeof(*p), GFP_KERNEL);
+
+	if (!p)
+		return NULL;
+
+	p->header.type = UBLK_PARA_TYPE_DISCARD;
+	p->header.len = sizeof(*p);
+
+	p->discard_alignment =	0;
+	p->discard_granularity = PAGE_SIZE;
+	p->max_discard_sectors = UINT_MAX >> 9;
+	p->max_write_zeroes_sectors = UINT_MAX >> 9;
+	p->max_discard_segments = 1;
+
+	return (struct ublk_para_header *)p;
+}
+
+static int ublk_validate_para_header(const struct ublk_device *ub,
+		const struct ublk_para_header *h)
+{
+	if (h->type >= UBLK_PARA_TYPE_LAST)
+		return -EINVAL;
+
+	if (h->len != para_len[h->type])
+		return -EINVAL;
+
+	return 0;
+}
+
+
+static int ublk_validate_para(const struct ublk_device *ub,
+		const struct ublk_para_header *h)
+{
+	int ret = ublk_validate_para_header(ub, h);
+
+	if (ret)
+		return ret;
+
+	if (para_ops[h->type].validate_fn)
+		return para_ops[h->type].validate_fn(ub, h);
+
+	return 0;
+}
+
+/* Old parameter with same type will be overridden */
+static int ublk_install_para(struct ublk_device *ub,
+		struct ublk_para_header *h)
+{
+	void *old;
+	int ret;
+
+	ret = ublk_validate_para(ub, h);
+	if (ret)
+		return ret;
+
+	old = xa_store(&ub->paras, h->type, h, GFP_KERNEL);
+	if (xa_is_err(old))
+		return xa_err(old);
+	kfree(old);
+	return 0;
+}
+
+static void ublk_apply_para(struct ublk_device *ub,
+		const struct ublk_para_header *h)
+{
+	if (para_ops[h->type].apply_fn)
+		para_ops[h->type].apply_fn(ub, h);
+}
+
+/* default parameters are allocated/installed before disk is allocated */
+static void ublk_install_def_paras(struct ublk_device *ub)
+{
+	struct ublk_para_header *h;
+
+	h = ublk_alloc_def_basic_para(ub);
+	if (h && ublk_install_para(ub, h))
+		kfree(h);
+
+	h = ublk_alloc_def_discard_para(ub);
+	if (h && ublk_install_para(ub, h))
+		kfree(h);
+}
+
+static void ublk_apply_paras(struct ublk_device *ub)
+{
+	struct ublk_para_header *h;
+	unsigned long type;
+
+	xa_for_each(&ub->paras, type, h)
+		ublk_apply_para(ub, h);
+}
+
+static void ublk_uninstall_paras(struct ublk_device *ub)
+{
+	unsigned long type;
+	void *p;
+
+	xa_for_each(&ub->paras, type, p)
+		kfree(p);
+}
+
 static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
 {
 	if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
@@ -1044,8 +1281,10 @@  static void ublk_cdev_rel(struct device *dev)
 
 	blk_mq_free_tag_set(&ub->tag_set);
 	ublk_deinit_queues(ub);
+	ublk_uninstall_paras(ub);
 	ublk_free_dev_number(ub);
 	mutex_destroy(&ub->mutex);
+	xa_destroy(&ub->paras);
 	kfree(ub);
 }
 
@@ -1156,8 +1395,11 @@  static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
 	}
 
 	/* We may get disk size updated */
-	if (dev_blocks)
+	if (dev_blocks) {
 		ub->dev_info.dev_blocks = dev_blocks;
+		ublk_update_dev_sectors(ub,
+				dev_blocks << (ub->bs_shift - 9));
+	}
 
 	disk = blk_mq_alloc_disk(&ub->tag_set, ub);
 	if (IS_ERR(disk)) {
@@ -1168,19 +1410,11 @@  static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
 	disk->fops = &ub_fops;
 	disk->private_data = ub;
 
-	blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
-	blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
-	blk_queue_io_min(disk->queue, ub->dev_info.block_size);
-	blk_queue_max_hw_sectors(disk->queue,
-		ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
-	disk->queue->limits.discard_granularity = PAGE_SIZE;
-	blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
-	blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
-
-	set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
-
 	ub->dev_info.ublksrv_pid = ublksrv_pid;
 	ub->ub_disk = disk;
+
+	ublk_apply_paras(ub);
+
 	get_device(&ub->cdev_dev);
 	ret = add_disk(disk);
 	if (ret) {
@@ -1291,6 +1525,7 @@  static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 	spin_lock_init(&ub->mm_lock);
 	INIT_WORK(&ub->stop_work, ublk_stop_work_fn);
 	INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work);
+	xa_init(&ub->paras);
 
 	ret = ublk_alloc_dev_number(ub, header->dev_id);
 	if (ret < 0)
@@ -1334,6 +1569,9 @@  static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
 	 * ublk_add_chdev() will cleanup everything if it fails.
 	 */
 	ret = ublk_add_chdev(ub);
+	if (!ret)
+		ublk_install_def_paras(ub);
+
 	goto out_unlock;
 
 out_free_tag_set:
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index ca33092354ab..99f81a1e9a95 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -158,4 +158,47 @@  struct ublksrv_io_cmd {
 	__u64	addr;
 };
 
+/* ublk device parameter definition */
+enum {
+	UBLK_PARA_TYPE_BASIC,
+	UBLK_PARA_TYPE_DISCARD,
+	UBLK_PARA_TYPE_LAST,
+};
+
+struct ublk_para_header {
+	__u16 type;
+	__u16 len;
+} __attribute__ ((__packed__));
+
+struct ublk_basic_para {
+	struct ublk_para_header  header;
+	__u32   logical_bs_shift:6;
+	__u32   physical_bs_shift:6;
+	__u32	io_opt_shift:6;
+	__u32	io_min_shift:6;
+	__u32	rotational:1;
+	__u32	write_back_cache:1;
+	__u32	fua:1;
+	__u32	read_only:1;
+	__u32	unused:4;
+
+	__u32	max_sectors;
+	__u32	chunk_sectors;
+
+	__u64   dev_sectors;
+	__u64   virt_boundary_mask;
+};
+
+struct ublk_discard_para {
+	struct ublk_para_header  header;
+	__u32	discard_alignment;
+
+	__u32	discard_granularity;
+	__u32	max_discard_sectors;
+
+	__u32	max_write_zeroes_sectors;
+	__u16	max_discard_segments;
+	__u16	reserved0;
+};
+
 #endif