diff mbox series

[v5,11/11] loop: Add LOOP_CONFIGURE ioctl

Message ID 20200513133845.244903-12-maco@android.com (mailing list archive)
State New, archived
Headers show
Series Add a new LOOP_CONFIGURE ioctl | expand

Commit Message

Martijn Coenen May 13, 2020, 1:38 p.m. UTC
This allows userspace to completely setup a loop device with a single
ioctl, removing the in-between state where the device can be partially
configured - eg the loop device has a backing file associated with it,
but is reading from the wrong offset.

Besides removing the intermediate state, another big benefit of this
ioctl is that LOOP_SET_STATUS can be slow; the main reason for this
slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to
freeze the associated queue; this requires waiting for RCU
synchronization, which I've measured can take about 15-20ms on this
device on average.

In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can
also be used to:
- Set the correct block size immediately by setting
  loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE)
- Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO
  in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO)
- Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY
  in loop_config.info.lo_flags

Here's setting up ~70 regular loop devices with an offset on an x86
Android device, using LOOP_SET_FD and LOOP_SET_STATUS:

vsoc_x86:/system/apex # time for i in `seq 30 100`;
do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
    0m03.40s real     0m00.02s user     0m00.03s system

Here's configuring ~70 devices in the same way, but using a modified
losetup that uses the new LOOP_CONFIGURE ioctl:

vsoc_x86:/system/apex # time for i in `seq 30 100`;
do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
    0m01.94s real     0m00.01s user     0m00.01s system

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martijn Coenen <maco@android.com>
---
 drivers/block/loop.c      | 104 ++++++++++++++++++++++++++++----------
 include/uapi/linux/loop.h |  21 ++++++++
 2 files changed, 97 insertions(+), 28 deletions(-)

Comments

Kristian Klausen June 11, 2021, 10:54 p.m. UTC | #1
On 13.05.2020 15.38, Martijn Coenen wrote:
> This allows userspace to completely setup a loop device with a single
> ioctl, removing the in-between state where the device can be partially
> configured - eg the loop device has a backing file associated with it,
> but is reading from the wrong offset.
>
> Besides removing the intermediate state, another big benefit of this
> ioctl is that LOOP_SET_STATUS can be slow; the main reason for this
> slowness is that LOOP_SET_STATUS(64) calls blk_mq_freeze_queue() to
> freeze the associated queue; this requires waiting for RCU
> synchronization, which I've measured can take about 15-20ms on this
> device on average.
>
> In addition to doing what LOOP_SET_STATUS can do, LOOP_CONFIGURE can
> also be used to:
> - Set the correct block size immediately by setting
>    loop_config.block_size (avoids LOOP_SET_BLOCK_SIZE)
> - Explicitly request direct I/O mode by setting LO_FLAGS_DIRECT_IO
>    in loop_config.info.lo_flags (avoids LOOP_SET_DIRECT_IO)
> - Explicitly request read-only mode by setting LO_FLAGS_READ_ONLY
>    in loop_config.info.lo_flags
>
> Here's setting up ~70 regular loop devices with an offset on an x86
> Android device, using LOOP_SET_FD and LOOP_SET_STATUS:
>
> vsoc_x86:/system/apex # time for i in `seq 30 100`;
> do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
>      0m03.40s real     0m00.02s user     0m00.03s system
>
> Here's configuring ~70 devices in the same way, but using a modified
> losetup that uses the new LOOP_CONFIGURE ioctl:
>
> vsoc_x86:/system/apex # time for i in `seq 30 100`;
> do losetup -r -o 4096 /dev/block/loop$i com.android.adbd.apex; done
>      0m01.94s real     0m00.01s user     0m00.01s system
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Martijn Coenen <maco@android.com>
> ---
>   drivers/block/loop.c      | 104 ++++++++++++++++++++++++++++----------
>   include/uapi/linux/loop.h |  21 ++++++++
>   2 files changed, 97 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 13518ba191f5..a565c5aafa52 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -228,6 +228,19 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
>   		blk_mq_unfreeze_queue(lo->lo_queue);
>   }
>   
> +/**
> + * loop_validate_block_size() - validates the passed in block size
> + * @bsize: size to validate
> + */
> +static int
> +loop_validate_block_size(unsigned short bsize)
> +{
> +	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
>   /**
>    * loop_set_size() - sets device size and notifies userspace
>    * @lo: struct loop_device to set the size for
> @@ -1050,23 +1063,24 @@ loop_set_status_from_info(struct loop_device *lo,
>   	return 0;
>   }
>   
> -static int loop_set_fd(struct loop_device *lo, fmode_t mode,
> -		       struct block_device *bdev, unsigned int arg)
> +static int loop_configure(struct loop_device *lo, fmode_t mode,
> +			  struct block_device *bdev,
> +			  const struct loop_config *config)
>   {
>   	struct file	*file;
>   	struct inode	*inode;
>   	struct address_space *mapping;
>   	struct block_device *claimed_bdev = NULL;
> -	int		lo_flags = 0;
>   	int		error;
>   	loff_t		size;
>   	bool		partscan;
> +	unsigned short  bsize;
>   
>   	/* This is safe, since we have a reference from open(). */
>   	__module_get(THIS_MODULE);
>   
>   	error = -EBADF;
> -	file = fget(arg);
> +	file = fget(config->fd);
>   	if (!file)
>   		goto out;
>   
> @@ -1075,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
>   	 * here to avoid changing device under exclusive owner.
>   	 */
>   	if (!(mode & FMODE_EXCL)) {
> -		claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
> +		claimed_bdev = bd_start_claiming(bdev, loop_configure);
>   		if (IS_ERR(claimed_bdev)) {
>   			error = PTR_ERR(claimed_bdev);
>   			goto out_putf;
> @@ -1097,11 +1111,26 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
>   	mapping = file->f_mapping;
>   	inode = mapping->host;
>   
> +	size = get_loop_size(lo, file);
> +
> +	if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
> +		error = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	if (config->block_size) {
> +		error = loop_validate_block_size(config->block_size);
> +		if (error)
> +			goto out_unlock;
> +	}
> +
> +	error = loop_set_status_from_info(lo, &config->info);

loop_set_status_from_info() doesn't call loop_config_discard() nor does 
loop_configure(), I assume it should be called?

I'm asking because I upgraded to util-linux 2.37 which use the new 
LOOP_CONFIGURE ioctl and fstrim is complaining that "the discard 
operation is not supported" and the missing call of 
loop_config_discard() stood out. Sadly I haven't been able to reproduce 
the issue reliably outside CI (yet).

> +	if (error)
> +		goto out_unlock;
> +
>   	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
>   	    !file->f_op->write_iter)
> -		lo_flags |= LO_FLAGS_READ_ONLY;
> -
> -	size = get_loop_size(lo, file);
> +		lo->lo_flags |= LO_FLAGS_READ_ONLY;
>   
>   	error = loop_prepare_queue(lo);
>   	if (error)
>
diff mbox series

Patch

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 13518ba191f5..a565c5aafa52 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,6 +228,19 @@  static void __loop_update_dio(struct loop_device *lo, bool dio)
 		blk_mq_unfreeze_queue(lo->lo_queue);
 }
 
+/**
+ * loop_validate_block_size() - validates the passed in block size
+ * @bsize: size to validate
+ */
+static int
+loop_validate_block_size(unsigned short bsize)
+{
+	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * loop_set_size() - sets device size and notifies userspace
  * @lo: struct loop_device to set the size for
@@ -1050,23 +1063,24 @@  loop_set_status_from_info(struct loop_device *lo,
 	return 0;
 }
 
-static int loop_set_fd(struct loop_device *lo, fmode_t mode,
-		       struct block_device *bdev, unsigned int arg)
+static int loop_configure(struct loop_device *lo, fmode_t mode,
+			  struct block_device *bdev,
+			  const struct loop_config *config)
 {
 	struct file	*file;
 	struct inode	*inode;
 	struct address_space *mapping;
 	struct block_device *claimed_bdev = NULL;
-	int		lo_flags = 0;
 	int		error;
 	loff_t		size;
 	bool		partscan;
+	unsigned short  bsize;
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
 	error = -EBADF;
-	file = fget(arg);
+	file = fget(config->fd);
 	if (!file)
 		goto out;
 
@@ -1075,7 +1089,7 @@  static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	 * here to avoid changing device under exclusive owner.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+		claimed_bdev = bd_start_claiming(bdev, loop_configure);
 		if (IS_ERR(claimed_bdev)) {
 			error = PTR_ERR(claimed_bdev);
 			goto out_putf;
@@ -1097,11 +1111,26 @@  static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	mapping = file->f_mapping;
 	inode = mapping->host;
 
+	size = get_loop_size(lo, file);
+
+	if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
+		error = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (config->block_size) {
+		error = loop_validate_block_size(config->block_size);
+		if (error)
+			goto out_unlock;
+	}
+
+	error = loop_set_status_from_info(lo, &config->info);
+	if (error)
+		goto out_unlock;
+
 	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
 	    !file->f_op->write_iter)
-		lo_flags |= LO_FLAGS_READ_ONLY;
-
-	size = get_loop_size(lo, file);
+		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
 	error = loop_prepare_queue(lo);
 	if (error)
@@ -1109,30 +1138,28 @@  static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
 	error = 0;
 
-	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+	set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
-	lo->use_dio = false;
+	lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
 	lo->lo_device = bdev;
-	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
-	lo->transfer = NULL;
-	lo->ioctl = NULL;
-	lo->lo_sizelimit = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
-	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
+	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
 		blk_queue_write_cache(lo->lo_queue, true, false);
 
-	if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+	if (config->block_size)
+		bsize = config->block_size;
+	else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
 		/* In case of direct I/O, match underlying block size */
-		unsigned short bsize = bdev_logical_block_size(
-			inode->i_sb->s_bdev);
+		bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
+	else
+		bsize = 512;
 
-		blk_queue_logical_block_size(lo->lo_queue, bsize);
-		blk_queue_physical_block_size(lo->lo_queue, bsize);
-		blk_queue_io_min(lo->lo_queue, bsize);
-	}
+	blk_queue_logical_block_size(lo->lo_queue, bsize);
+	blk_queue_physical_block_size(lo->lo_queue, bsize);
+	blk_queue_io_min(lo->lo_queue, bsize);
 
 	loop_update_rotational(lo);
 	loop_update_dio(lo);
@@ -1155,14 +1182,14 @@  static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (partscan)
 		loop_reread_partitions(lo, bdev);
 	if (claimed_bdev)
-		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+		bd_abort_claiming(bdev, claimed_bdev, loop_configure);
 	return 0;
 
 out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_bdev:
 	if (claimed_bdev)
-		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+		bd_abort_claiming(bdev, claimed_bdev, loop_configure);
 out_putf:
 	fput(file);
 out:
@@ -1582,8 +1609,9 @@  static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
 
-	if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
-		return -EINVAL;
+	err = loop_validate_block_size(arg);
+	if (err)
+		return err;
 
 	if (lo->lo_queue->limits.logical_block_size == arg)
 		return 0;
@@ -1645,8 +1673,27 @@  static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 	int err;
 
 	switch (cmd) {
-	case LOOP_SET_FD:
-		return loop_set_fd(lo, mode, bdev, arg);
+	case LOOP_SET_FD: {
+		/*
+		 * Legacy case - pass in a zeroed out struct loop_config with
+		 * only the file descriptor set , which corresponds with the
+		 * default parameters we'd have used otherwise.
+		 */
+		struct loop_config config;
+
+		memset(&config, 0, sizeof(config));
+		config.fd = arg;
+
+		return loop_configure(lo, mode, bdev, &config);
+	}
+	case LOOP_CONFIGURE: {
+		struct loop_config config;
+
+		if (copy_from_user(&config, argp, sizeof(config)))
+			return -EFAULT;
+
+		return loop_configure(lo, mode, bdev, &config);
+	}
 	case LOOP_CHANGE_FD:
 		return loop_change_fd(lo, bdev, arg);
 	case LOOP_CLR_FD:
@@ -1818,6 +1865,7 @@  static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 	case LOOP_CLR_FD:
 	case LOOP_GET_STATUS64:
 	case LOOP_SET_STATUS64:
+	case LOOP_CONFIGURE:
 		arg = (unsigned long) compat_ptr(arg);
 		/* fall through */
 	case LOOP_SET_FD:
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index 6b32fee80ce0..24a1c45bd1ae 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -31,6 +31,10 @@  enum {
 /* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */
 #define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR)
 
+/* LO_FLAGS that can be set using LOOP_CONFIGURE */
+#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \
+				       | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO)
+
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
 #include <linux/types.h>	/* for __u64 */
 
@@ -66,6 +70,22 @@  struct loop_info64 {
 	__u64		   lo_init[2];
 };
 
+/**
+ * struct loop_config - Complete configuration for a loop device.
+ * @fd: fd of the file to be used as a backing file for the loop device.
+ * @block_size: block size to use; ignored if 0.
+ * @info: struct loop_info64 to configure the loop device with.
+ *
+ * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to
+ * atomically setup and configure all loop device parameters at once.
+ */
+struct loop_config {
+	__u32			fd;
+	__u32                   block_size;
+	struct loop_info64	info;
+	__u64			__reserved[8];
+};
+
 /*
  * Loop filter types
  */
@@ -96,6 +116,7 @@  struct loop_info64 {
 #define LOOP_SET_CAPACITY	0x4C07
 #define LOOP_SET_DIRECT_IO	0x4C08
 #define LOOP_SET_BLOCK_SIZE	0x4C09
+#define LOOP_CONFIGURE		0x4C0A
 
 /* /dev/loop-control interface */
 #define LOOP_CTL_ADD		0x4C80