diff mbox

Btrfs-progs: fix race condition between btrfs and udev

Message ID 1402488698-12518-1-git-send-email-wangsl.fnst@cn.fujitsu.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Wang Shilong June 11, 2014, 12:11 p.m. UTC
Originally this problem was reproduced by the following scripts:

 # dd if=/dev/zero of=data bs=1M count=50
 # losetup /dev/loop1 data
 # i =1
 # while [ 1 ]
   do
   	mkfs.btrfs -fK /dev/loop1 >& /dev/null || exit 1
	((i++))
	echo "loop $i"
   done

Futher, a easy way to trigger this problem is by running the following
c codes repeatedly:

 int main(int argc, char **argv)
 {
 	/* pass a btrfs block device */
 	int fd = open(argv[1], O_RDWR | O_EXCL);
	if (fd < 0) {
		perror("fail to open: %s", strerror(errno));
		exit(1);
	}
	close(fd);
	return 0;
 }

So the problem is RW opening would trigger udev event which will
call btrfs_scan_one_device(). In btrfs_scan_one_device(), it
would open the block device with EXCL flag..meanwhile if another
program try to open that device with O_EXCL, it would fail with
EBUSY.

This happen seldomly in the real world, but if we use loop device
for test, we may hit this annoying problem.

A walkaround way to solve this problem is to wait kernel scanning
finished and then try it again.

Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com>
---
 utils.c   | 36 +++++++++++++++++++++++++++++++++++-
 utils.h   |  1 +
 volumes.c |  3 ++-
 3 files changed, 38 insertions(+), 2 deletions(-)

Comments

Tomasz Torcz June 11, 2014, 12:23 p.m. UTC | #1
On Wed, Jun 11, 2014 at 08:11:38PM +0800, Wang Shilong wrote:
> So the problem is RW opening would trigger udev event which will
> call btrfs_scan_one_device(). In btrfs_scan_one_device(), it
> would open the block device with EXCL flag..meanwhile if another
> program try to open that device with O_EXCL, it would fail with
> EBUSY.
> 
> This happen seldomly in the real world, but if we use loop device
> for test, we may hit this annoying problem.

  Hi,

udev just changed the locking semantics, see description in
http://cgit.freedesktop.org/systemd/systemd/commit/NEWS?id=4196a3ead3cfb823670d225eefcb3e60e34c7d95
Anand Jain June 12, 2014, 6:23 a.m. UTC | #2
Wang,

> Futher, a easy way to trigger this problem is by running the following
> c codes repeatedly:
>
>   int main(int argc, char **argv)
>   {
>   	/* pass a btrfs block device */
>   	int fd = open(argv[1], O_RDWR | O_EXCL);
> 	if (fd < 0) {
> 		perror("fail to open: %s", strerror(errno));
> 		exit(1);
> 	}
> 	close(fd);
> 	return 0;
>   }
>
> So the problem is RW opening would trigger udev event which will
> call btrfs_scan_one_device().


> In btrfs_scan_one_device(), it
> would open the block device with EXCL flag..meanwhile if another
> program try to open that device with O_EXCL, it would fail with
> EBUSY.

  Expected. But do we need O_EXCL in kernel:btrfs_scan_one_device()
  at all ?

> This happen seldomly in the real world, but if we use loop device
> for test, we may hit this annoying problem.

> A walkaround way to solve this problem is to wait kernel scanning
> finished and then try it again.

  I agree this happens seldom in production. But I don't agree on
  something to fix as workaround.

  Just sent out the patch:
      btrfs: looping mkfs.btrfs -f <dev> may fail with EBUSY

  which I believe can be a final fix. Your review / tests appreciated.


Thanks, Anand

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/utils.c b/utils.c
index d29de94..20108c3 100644
--- a/utils.c
+++ b/utils.c
@@ -2058,6 +2058,40 @@  int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile,
 	return 0;
 }
 
+/*
+ * there is a race condition between btrfs and udev, we may fail
+ * because udev call btrfs_scan_device() which will open the block
+ * device with O_EXCL. A walkaround solution is to wait kernel
+ * scanning finished and then try again.
+ */
+int btrfs_open_block_device(char *file, int flag)
+{
+	int ret, fd;
+	struct btrfs_ioctl_vol_args args;
+	int tried = 0;
+
+again:
+	fd = open(file, flag);
+	if (fd < 0) {
+		if (!tried && errno == EBUSY && (flag & O_EXCL)) {
+			tried = 1;
+			fd = open("/dev/btrfs-control", O_RDWR);
+			if (fd < 0) {
+				fprintf(stderr,
+					"unable to open /dev/btrfs-control: %s\n",
+					 strerror(errno));
+				return fd;
+			}
+			strncpy(args.name, file, BTRFS_PATH_NAME_MAX);
+			ret = ioctl(fd, BTRFS_IOC_SCAN_DEV, &args);
+			close(fd);
+			if (!ret)
+				goto again;
+		}
+	}
+	return fd;
+}
+
 /* Check if disk is suitable for btrfs
  * returns:
  *  1: something is wrong, estr provides the error
@@ -2096,7 +2130,7 @@  int test_dev_for_mkfs(char *file, int force_overwrite, char *estr)
 		return 1;
 	}
 	/* check if the device is busy */
-	fd = open(file, O_RDWR|O_EXCL);
+	fd = btrfs_open_block_device(file, O_RDWR | O_EXCL);
 	if (fd < 0) {
 		snprintf(estr, sz, "unable to open %s: %s\n", file,
 			strerror(errno));
diff --git a/utils.h b/utils.h
index 0b03830..f6ca252 100644
--- a/utils.h
+++ b/utils.h
@@ -95,6 +95,7 @@  int open_path_or_dev_mnt(const char *path, DIR **dirstream);
 u64 btrfs_device_size(int fd, struct stat *st);
 /* Helper to always get proper size of the destination string */
 #define strncpy_null(dest, src) __strncpy__null(dest, src, sizeof(dest))
+int btrfs_open_block_device(char *file, int flag);
 int test_dev_for_mkfs(char *file, int force_overwrite, char *estr);
 int scan_for_btrfs(int where, int update_kernel);
 int get_label_mounted(const char *mount_path, char *labelp);
diff --git a/volumes.c b/volumes.c
index a61928c..f40740a 100644
--- a/volumes.c
+++ b/volumes.c
@@ -30,6 +30,7 @@ 
 #include "print-tree.h"
 #include "volumes.h"
 #include "math.h"
+#include "utils.h"
 
 struct stripe {
 	struct btrfs_device *dev;
@@ -207,7 +208,7 @@  int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
 			continue;
 		}
 
-		fd = open(device->name, flags);
+		fd = btrfs_open_block_device(device->name, flags);
 		if (fd < 0) {
 			ret = -errno;
 			goto fail;