diff mbox

[1/5] Avoid to consider lvm snapshots when scanning devices.

Message ID 1417718382-6753-2-git-send-email-kreijack@inwind.it (mailing list archive)
State New, archived
Headers show

Commit Message

Goffredo Baroncelli Dec. 4, 2014, 6:39 p.m. UTC
LVM snapshots create a problem to the btrfs devices management.
BTRFS assumes that each device haw an unique 'device UUID'.
A LVM snapshot breaks this assumption.

This patch skips LVM snapshots during the device scan phase.
If you need to consider a LVM snapshot you have to set the
environmental variable BTRFS_SKIP_LVM_SNAPSHOT to "no".

To check if a device is a LVM snapshot, it is checked the
'udev' device property 'DM_UDEV_LOW_PRIORITY_FLAG' .
If it is set to 1, the device has to be skipped.

As conseguence, btrfs now depends by libudev.

Programmatically you can control this behavior with the functions:
- btrfs_scan_set_skip_lvm_snapshot(int new_value)
- int btrfs_scan_get_skip_lvm_snapshot( )

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
---
 Makefile |   4 +--
 utils.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 utils.h  |   9 +++++-
 3 files changed, 117 insertions(+), 3 deletions(-)

Comments

Qu Wenruo Dec. 8, 2014, 2:02 a.m. UTC | #1
-------- Original Message --------
Subject: [PATCH 1/5] Avoid to consider lvm snapshots when scanning devices.
From: Goffredo Baroncelli <kreijack@gmail.com>
To: <linux-btrfs@vger.kernel.org>
Date: 2014?12?05? 02:39
> LVM snapshots create a problem to the btrfs devices management.
> BTRFS assumes that each device haw an unique 'device UUID'.
> A LVM snapshot breaks this assumption.
>
> This patch skips LVM snapshots during the device scan phase.
> If you need to consider a LVM snapshot you have to set the
> environmental variable BTRFS_SKIP_LVM_SNAPSHOT to "no".
IMHO, it is better only to skip LVM snapshot if and only if the snapshot 
contains a btrfs with
conflicting UUID.

Since LVM is such a flexible block level volume management, it is 
possible that some one
did a snapshot of a btrfs fs, and then reformat the original one to 
other fs.
In that case, the LVM snapshot skip seems overkilled.

Also, personally, I prefer there will be some option like -i to allow 
user to choose which device is
used when conflicting uuid is detected. This seems to be the best case 
and user can have the full control
on device scan. This also makes the environment variant not needed.

LVM snapshot skip (only when conflicting) is better to be the fallback 
behavior if -i is not given.

Thanks,
Qu
>
> To check if a device is a LVM snapshot, it is checked the
> 'udev' device property 'DM_UDEV_LOW_PRIORITY_FLAG' .
> If it is set to 1, the device has to be skipped.
>
> As conseguence, btrfs now depends by libudev.
>
> Programmatically you can control this behavior with the functions:
> - btrfs_scan_set_skip_lvm_snapshot(int new_value)
> - int btrfs_scan_get_skip_lvm_snapshot( )
>
> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> ---
>   Makefile |   4 +--
>   utils.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   utils.h  |   9 +++++-
>   3 files changed, 117 insertions(+), 3 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index 4cae30c..9464361 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -26,7 +26,7 @@ TESTS = fsck-tests.sh convert-tests.sh
>   INSTALL = install
>   prefix ?= /usr/local
>   bindir = $(prefix)/bin
> -lib_LIBS = -luuid -lblkid -lm -lz -llzo2 -L.
> +lib_LIBS = -luuid -lblkid -lm -lz -ludev -llzo2 -L.
>   libdir ?= $(prefix)/lib
>   incdir = $(prefix)/include/btrfs
>   LIBS = $(lib_LIBS) $(libs_static)
> @@ -99,7 +99,7 @@ lib_links = libbtrfs.so.0 libbtrfs.so
>   headers = $(libbtrfs_headers)
>   
>   # make C=1 to enable sparse
> -check_defs := .cc-defines.h
> +check_defs := .cc-defines.h
>   ifdef C
>   	#
>   	# We're trying to use sparse against glibc headers which go wild
> diff --git a/utils.c b/utils.c
> index 2a92416..9887f8b 100644
> --- a/utils.c
> +++ b/utils.c
> @@ -29,6 +29,7 @@
>   #include <sys/types.h>
>   #include <sys/stat.h>
>   #include <uuid/uuid.h>
> +#include <libudev.h>
>   #include <fcntl.h>
>   #include <unistd.h>
>   #include <mntent.h>
> @@ -52,6 +53,13 @@
>   #define BLKDISCARD	_IO(0x12,119)
>   #endif
>   
> +/*
> + * This variable controls if the lvm snapshot have to be skipped or not.
> + * Access this variable only via the btrfs_scan_[sg]et_skip_lvm_snapshot()
> + * functions
> + */
> +static int __scan_device_skip_lvm_snapshot = -1;
> +
>   static int btrfs_scan_done = 0;
>   
>   static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
> @@ -1593,6 +1601,9 @@ int btrfs_scan_block_devices(int run_ioctl)
>   	char fullpath[110];
>   	int scans = 0;
>   	int special;
> +	int skip_snapshot;
> +
> +	skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>   
>   scan_again:
>   	proc_partitions = fopen("/proc/partitions","r");
> @@ -1642,6 +1653,9 @@ scan_again:
>   			continue;
>   		}
>   
> +		if (skip_snapshot && is_low_priority_device(fullpath))
> +			continue;
> +
>   		fd = open(fullpath, O_RDONLY);
>   		if (fd < 0) {
>   			if (errno != ENOMEDIUM)
> @@ -2182,6 +2196,30 @@ int test_dev_for_mkfs(char *file, int force_overwrite, char *estr)
>   	return 0;
>   }
>   
> +int btrfs_scan_get_skip_lvm_snapshot( )
> +{
> +	const char *value;
> +
> +	if (__scan_device_skip_lvm_snapshot != -1 )
> +		return __scan_device_skip_lvm_snapshot;
> +
> +	value = getenv(BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME);
> +	if (value && !strcasecmp(value, "NO"))
> +		__scan_device_skip_lvm_snapshot = 0;
> +	else  if (value && !strcasecmp(value, "YES"))
> +		__scan_device_skip_lvm_snapshot = 1;
> +	else
> +		__scan_device_skip_lvm_snapshot =
> +			BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT;
> +
> +	return __scan_device_skip_lvm_snapshot;
> +}
> +
> +void btrfs_scan_set_skip_lvm_snapshot(int new_value)
> +{
> +	__scan_device_skip_lvm_snapshot = !!new_value;
> +}
> +
>   int btrfs_scan_lblkid()
>   {
>   	int fd = -1;
> @@ -2192,6 +2230,9 @@ int btrfs_scan_lblkid()
>   	blkid_dev dev = NULL;
>   	blkid_cache cache = NULL;
>   	char path[PATH_MAX];
> +	int skip_snapshot;
> +
> +	skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>   
>   	if (btrfs_scan_done)
>   		return 0;
> @@ -2210,6 +2251,9 @@ int btrfs_scan_lblkid()
>   		/* if we are here its definitely a btrfs disk*/
>   		strncpy(path, blkid_dev_devname(dev), PATH_MAX);
>   
> +		if (skip_snapshot && is_low_priority_device(path))
> +			continue;
> +
>   		fd = open(path, O_RDONLY);
>   		if (fd < 0) {
>   			printf("ERROR: could not open %s\n", path);
> @@ -2450,3 +2494,66 @@ int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
>   	}
>   	return 1;
>   }
> +
> +/*
> + * This function return 1 if the device (path) is consdered "LOW_PRIORITY" by
> + * LVM2 library. These device are typically the nsapshot.
> + * This function return < 0 in case of error; 0 otherwise.
> + */
> +int is_low_priority_device(const char *path)
> +{
> +
> +	struct udev *udev=NULL;
> +	struct udev_device *dev;
> +	struct udev_enumerate *enumerate=NULL;
> +	struct udev_list_entry *devices;
> +	struct udev_list_entry *node, *list;
> +	int ret=-1;
> +	const char *value, *syspath;
> +	char *rpath=NULL;
> +
> +	rpath = realpath(path, NULL);
> +	if (!rpath) {
> +		fprintf(stderr, "ERROR: not enough memory\n");
> +		ret=-2;
> +		goto exit;
> +	}
> +
> +	/* Create the udev object */
> +	udev = udev_new();
> +	if (!udev) {
> +		fprintf(stderr, "ERROR: Can't create udev\n");
> +		ret=-1;
> +		goto exit;
> +	}
> +
> +	enumerate = udev_enumerate_new(udev);
> +	udev_enumerate_add_match_subsystem(enumerate, "block");
> +	udev_enumerate_add_match_property(enumerate, "DEVNAME", rpath);
> +	udev_enumerate_scan_devices(enumerate);
> +
> +	devices = udev_enumerate_get_list_entry(enumerate);
> +	syspath = udev_list_entry_get_name(devices);
> +
> +	dev = udev_device_new_from_syspath(udev, syspath);
> +
> +	list = udev_device_get_properties_list_entry (dev);
> +
> +	ret = 0;
> +	node = udev_list_entry_get_by_name(list, "DM_UDEV_LOW_PRIORITY_FLAG");
> +	if (node) {
> +		value = udev_list_entry_get_value(node);
> +		if (value && !strcmp(value, "1"))
> +			ret = 1;
> +	}
> +
> +exit:
> +	free(rpath);
> +
> +	/* Free the enumerator object */
> +	udev_enumerate_unref(enumerate);
> +
> +	udev_unref(udev);
> +
> +	return ret;
> +}
> diff --git a/utils.h b/utils.h
> index 289e86b..9855f09 100644
> --- a/utils.h
> +++ b/utils.h
> @@ -128,7 +128,7 @@ int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
>   			   int verify);
>   int ask_user(char *question);
>   int lookup_ino_rootid(int fd, u64 *rootid);
> -int btrfs_scan_lblkid(void);
> +int btrfs_scan_lblkid();
>   int get_btrfs_mount(const char *dev, char *mp, size_t mp_size);
>   int find_mount_root(const char *path, char **mount_root);
>   int get_device_info(int fd, u64 devid,
> @@ -161,4 +161,11 @@ static inline u64 btrfs_min_dev_size(u32 leafsize)
>   
>   int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
>   
> +#define BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT 1
> +#define BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME "BTRFS_SKIP_LVM_SNAPSHOT"
> +
> +int is_low_priority_device(const char *path);
> +void btrfs_scan_set_skip_lvm_snapshot(int new_value);
> +int btrfs_scan_get_skip_lvm_snapshot( );
> +
>   #endif

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Dec. 8, 2014, 2:58 p.m. UTC | #2
On 12/08/2014 03:02 AM, Qu Wenruo wrote:
> 
> -------- Original Message --------
> Subject: [PATCH 1/5] Avoid to consider lvm snapshots when scanning devices.
> From: Goffredo Baroncelli <kreijack@gmail.com>
> To: <linux-btrfs@vger.kernel.org>
> Date: 2014?12?05? 02:39
>> LVM snapshots create a problem to the btrfs devices management.
>> BTRFS assumes that each device haw an unique 'device UUID'.
>> A LVM snapshot breaks this assumption.
>>
>> This patch skips LVM snapshots during the device scan phase.
>> If you need to consider a LVM snapshot you have to set the
>> environmental variable BTRFS_SKIP_LVM_SNAPSHOT to "no".
> IMHO, it is better only to skip LVM snapshot if and only if the snapshot contains a btrfs with
> conflicting UUID.

Hi Qu,

Currently the "scan phase" in btrfs is done 1 device at time.
(udev finds a new device and starts "btrfs dev scan <device>"),
and I haven't changed that. This means that btrfs[-prog] doesn't 
know which devices are already registered, or not. And if even 
it would know this information, you have to consider the case
where the snapshot appears before the real target. So 
btrfs[-prog] is not in position to perform this analysis [see
below my other comment]

> Since LVM is such a flexible block level volume management, it is possible that some one
> did a snapshot of a btrfs fs, and then reformat the original one to other fs.
> In that case, the LVM snapshot skip seems overkilled.
> 
> Also, personally, I prefer there will be some option like -i to allow user to choose which device is
> used when conflicting uuid is detected. This seems to be the best case and user can have the full control
> on device scan. This also makes the environment variant not needed.
> 
> LVM snapshot skip (only when conflicting) is better to be the fallback behavior if -i is not given.

I understood your reasons, but I don't find any solution 
compatible with the current btrfs device registration model
(asynchronously when the device appears).

In another patch set, I proposed a mount.btrfs helper which is
in position to perform these analysis and to pick the "right"
device (even with the user suggestion).

Today the lvm-snapshot and btrfs behave very poor: it is not
predictable which device is pick (the original or the snapshot). 
These patch *avoid* most problems skipping the snapshots, which
to me seems a reasonable default.
For the other case the user is still able to mount any disks
[combination] passing them directly via command line (
mount /dev/sdX -o device=/dev/sdY,device=/dev/sdz...  );

Anyway I think for these kind of setup (btrfs on lvm-snapshot), 
passing the disks explicitly is the only solution; in fact your 
suggestion about the '-i' switch is not very different.

> Thanks,
> Qu

BR
G.Baroncelli
>>
>> To check if a device is a LVM snapshot, it is checked the
>> 'udev' device property 'DM_UDEV_LOW_PRIORITY_FLAG' .
>> If it is set to 1, the device has to be skipped.
>>
>> As conseguence, btrfs now depends by libudev.
>>
>> Programmatically you can control this behavior with the functions:
>> - btrfs_scan_set_skip_lvm_snapshot(int new_value)
>> - int btrfs_scan_get_skip_lvm_snapshot( )
>>
>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>> ---
>>   Makefile |   4 +--
>>   utils.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   utils.h  |   9 +++++-
>>   3 files changed, 117 insertions(+), 3 deletions(-)
>>
>> diff --git a/Makefile b/Makefile
>> index 4cae30c..9464361 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -26,7 +26,7 @@ TESTS = fsck-tests.sh convert-tests.sh
>>   INSTALL = install
>>   prefix ?= /usr/local
>>   bindir = $(prefix)/bin
>> -lib_LIBS = -luuid -lblkid -lm -lz -llzo2 -L.
>> +lib_LIBS = -luuid -lblkid -lm -lz -ludev -llzo2 -L.
>>   libdir ?= $(prefix)/lib
>>   incdir = $(prefix)/include/btrfs
>>   LIBS = $(lib_LIBS) $(libs_static)
>> @@ -99,7 +99,7 @@ lib_links = libbtrfs.so.0 libbtrfs.so
>>   headers = $(libbtrfs_headers)
>>     # make C=1 to enable sparse
>> -check_defs := .cc-defines.h
>> +check_defs := .cc-defines.h
>>   ifdef C
>>       #
>>       # We're trying to use sparse against glibc headers which go wild
>> diff --git a/utils.c b/utils.c
>> index 2a92416..9887f8b 100644
>> --- a/utils.c
>> +++ b/utils.c
>> @@ -29,6 +29,7 @@
>>   #include <sys/types.h>
>>   #include <sys/stat.h>
>>   #include <uuid/uuid.h>
>> +#include <libudev.h>
>>   #include <fcntl.h>
>>   #include <unistd.h>
>>   #include <mntent.h>
>> @@ -52,6 +53,13 @@
>>   #define BLKDISCARD    _IO(0x12,119)
>>   #endif
>>   +/*
>> + * This variable controls if the lvm snapshot have to be skipped or not.
>> + * Access this variable only via the btrfs_scan_[sg]et_skip_lvm_snapshot()
>> + * functions
>> + */
>> +static int __scan_device_skip_lvm_snapshot = -1;
>> +
>>   static int btrfs_scan_done = 0;
>>     static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
>> @@ -1593,6 +1601,9 @@ int btrfs_scan_block_devices(int run_ioctl)
>>       char fullpath[110];
>>       int scans = 0;
>>       int special;
>> +    int skip_snapshot;
>> +
>> +    skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>>     scan_again:
>>       proc_partitions = fopen("/proc/partitions","r");
>> @@ -1642,6 +1653,9 @@ scan_again:
>>               continue;
>>           }
>>   +        if (skip_snapshot && is_low_priority_device(fullpath))
>> +            continue;
>> +
>>           fd = open(fullpath, O_RDONLY);
>>           if (fd < 0) {
>>               if (errno != ENOMEDIUM)
>> @@ -2182,6 +2196,30 @@ int test_dev_for_mkfs(char *file, int force_overwrite, char *estr)
>>       return 0;
>>   }
>>   +int btrfs_scan_get_skip_lvm_snapshot( )
>> +{
>> +    const char *value;
>> +
>> +    if (__scan_device_skip_lvm_snapshot != -1 )
>> +        return __scan_device_skip_lvm_snapshot;
>> +
>> +    value = getenv(BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME);
>> +    if (value && !strcasecmp(value, "NO"))
>> +        __scan_device_skip_lvm_snapshot = 0;
>> +    else  if (value && !strcasecmp(value, "YES"))
>> +        __scan_device_skip_lvm_snapshot = 1;
>> +    else
>> +        __scan_device_skip_lvm_snapshot =
>> +            BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT;
>> +
>> +    return __scan_device_skip_lvm_snapshot;
>> +}
>> +
>> +void btrfs_scan_set_skip_lvm_snapshot(int new_value)
>> +{
>> +    __scan_device_skip_lvm_snapshot = !!new_value;
>> +}
>> +
>>   int btrfs_scan_lblkid()
>>   {
>>       int fd = -1;
>> @@ -2192,6 +2230,9 @@ int btrfs_scan_lblkid()
>>       blkid_dev dev = NULL;
>>       blkid_cache cache = NULL;
>>       char path[PATH_MAX];
>> +    int skip_snapshot;
>> +
>> +    skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>>         if (btrfs_scan_done)
>>           return 0;
>> @@ -2210,6 +2251,9 @@ int btrfs_scan_lblkid()
>>           /* if we are here its definitely a btrfs disk*/
>>           strncpy(path, blkid_dev_devname(dev), PATH_MAX);
>>   +        if (skip_snapshot && is_low_priority_device(path))
>> +            continue;
>> +
>>           fd = open(path, O_RDONLY);
>>           if (fd < 0) {
>>               printf("ERROR: could not open %s\n", path);
>> @@ -2450,3 +2494,66 @@ int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
>>       }
>>       return 1;
>>   }
>> +
>> +/*
>> + * This function return 1 if the device (path) is consdered "LOW_PRIORITY" by
>> + * LVM2 library. These device are typically the nsapshot.
>> + * This function return < 0 in case of error; 0 otherwise.
>> + */
>> +int is_low_priority_device(const char *path)
>> +{
>> +
>> +    struct udev *udev=NULL;
>> +    struct udev_device *dev;
>> +    struct udev_enumerate *enumerate=NULL;
>> +    struct udev_list_entry *devices;
>> +    struct udev_list_entry *node, *list;
>> +    int ret=-1;
>> +    const char *value, *syspath;
>> +    char *rpath=NULL;
>> +
>> +    rpath = realpath(path, NULL);
>> +    if (!rpath) {
>> +        fprintf(stderr, "ERROR: not enough memory\n");
>> +        ret=-2;
>> +        goto exit;
>> +    }
>> +
>> +    /* Create the udev object */
>> +    udev = udev_new();
>> +    if (!udev) {
>> +        fprintf(stderr, "ERROR: Can't create udev\n");
>> +        ret=-1;
>> +        goto exit;
>> +    }
>> +
>> +    enumerate = udev_enumerate_new(udev);
>> +    udev_enumerate_add_match_subsystem(enumerate, "block");
>> +    udev_enumerate_add_match_property(enumerate, "DEVNAME", rpath);
>> +    udev_enumerate_scan_devices(enumerate);
>> +
>> +    devices = udev_enumerate_get_list_entry(enumerate);
>> +    syspath = udev_list_entry_get_name(devices);
>> +
>> +    dev = udev_device_new_from_syspath(udev, syspath);
>> +
>> +    list = udev_device_get_properties_list_entry (dev);
>> +
>> +    ret = 0;
>> +    node = udev_list_entry_get_by_name(list, "DM_UDEV_LOW_PRIORITY_FLAG");
>> +    if (node) {
>> +        value = udev_list_entry_get_value(node);
>> +        if (value && !strcmp(value, "1"))
>> +            ret = 1;
>> +    }
>> +
>> +exit:
>> +    free(rpath);
>> +
>> +    /* Free the enumerator object */
>> +    udev_enumerate_unref(enumerate);
>> +
>> +    udev_unref(udev);
>> +
>> +    return ret;
>> +}
>> diff --git a/utils.h b/utils.h
>> index 289e86b..9855f09 100644
>> --- a/utils.h
>> +++ b/utils.h
>> @@ -128,7 +128,7 @@ int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
>>                  int verify);
>>   int ask_user(char *question);
>>   int lookup_ino_rootid(int fd, u64 *rootid);
>> -int btrfs_scan_lblkid(void);
>> +int btrfs_scan_lblkid();
>>   int get_btrfs_mount(const char *dev, char *mp, size_t mp_size);
>>   int find_mount_root(const char *path, char **mount_root);
>>   int get_device_info(int fd, u64 devid,
>> @@ -161,4 +161,11 @@ static inline u64 btrfs_min_dev_size(u32 leafsize)
>>     int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
>>   +#define BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT 1
>> +#define BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME "BTRFS_SKIP_LVM_SNAPSHOT"
>> +
>> +int is_low_priority_device(const char *path);
>> +void btrfs_scan_set_skip_lvm_snapshot(int new_value);
>> +int btrfs_scan_get_skip_lvm_snapshot( );
>> +
>>   #endif
> 
>
Qu Wenruo Dec. 9, 2014, 12:32 a.m. UTC | #3
Hi Goffredo

On 12/08/2014 03:02 AM, Qu Wenruo wrote:
>> -------- Original Message --------
>> Subject: [PATCH 1/5] Avoid to consider lvm snapshots when scanning devices.
>> From: Goffredo Baroncelli <kreijack@gmail.com>
>> To: <linux-btrfs@vger.kernel.org>
>> Date: 2014?12?05? 02:39
>>> LVM snapshots create a problem to the btrfs devices management.
>>> BTRFS assumes that each device haw an unique 'device UUID'.
>>> A LVM snapshot breaks this assumption.
>>>
>>> This patch skips LVM snapshots during the device scan phase.
>>> If you need to consider a LVM snapshot you have to set the
>>> environmental variable BTRFS_SKIP_LVM_SNAPSHOT to "no".
>> IMHO, it is better only to skip LVM snapshot if and only if the snapshot contains a btrfs with
>> conflicting UUID.
> Hi Qu,
>
> Currently the "scan phase" in btrfs is done 1 device at time.
> (udev finds a new device and starts "btrfs dev scan <device>"),
> and I haven't changed that. This means that btrfs[-prog] doesn't
> know which devices are already registered, or not. And if even
> it would know this information, you have to consider the case
> where the snapshot appears before the real target. So
> btrfs[-prog] is not in position to perform this analysis [see
> below my other comment]
>
>> Since LVM is such a flexible block level volume management, it is possible that some one
>> did a snapshot of a btrfs fs, and then reformat the original one to other fs.
>> In that case, the LVM snapshot skip seems overkilled.
>>
>> Also, personally, I prefer there will be some option like -i to allow user to choose which device is
>> used when conflicting uuid is detected. This seems to be the best case and user can have the full control
>> on device scan. This also makes the environment variant not needed.
>>
>> LVM snapshot skip (only when conflicting) is better to be the fallback behavior if -i is not given.
> I understood your reasons, but I don't find any solution
> compatible with the current btrfs device registration model
> (asynchronously when the device appears).
>
> In another patch set, I proposed a mount.btrfs helper which is
> in position to perform these analysis and to pick the "right"
> device (even with the user suggestion).
>
> Today the lvm-snapshot and btrfs behave very poor: it is not
> predictable which device is pick (the original or the snapshot).
> These patch *avoid* most problems skipping the snapshots, which
> to me seems a reasonable default.
> For the other case the user is still able to mount any disks
> [combination] passing them directly via command line (
> mount /dev/sdX -o device=/dev/sdY,device=/dev/sdz...  );
>
> Anyway I think for these kind of setup (btrfs on lvm-snapshot),
> passing the disks explicitly is the only solution; in fact your
> suggestion about the '-i' switch is not very different.
>
>> Thanks,
>> Qu
> BR
> G.Baroncelli
Your explains sounds quite reasonable, it's good enough before any 
better solutions.

Thanks,
Qu
>>> To check if a device is a LVM snapshot, it is checked the
>>> 'udev' device property 'DM_UDEV_LOW_PRIORITY_FLAG' .
>>> If it is set to 1, the device has to be skipped.
>>>
>>> As conseguence, btrfs now depends by libudev.
>>>
>>> Programmatically you can control this behavior with the functions:
>>> - btrfs_scan_set_skip_lvm_snapshot(int new_value)
>>> - int btrfs_scan_get_skip_lvm_snapshot( )
>>>
>>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>>> ---
>>>    Makefile |   4 +--
>>>    utils.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    utils.h  |   9 +++++-
>>>    3 files changed, 117 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/Makefile b/Makefile
>>> index 4cae30c..9464361 100644
>>> --- a/Makefile
>>> +++ b/Makefile
>>> @@ -26,7 +26,7 @@ TESTS = fsck-tests.sh convert-tests.sh
>>>    INSTALL = install
>>>    prefix ?= /usr/local
>>>    bindir = $(prefix)/bin
>>> -lib_LIBS = -luuid -lblkid -lm -lz -llzo2 -L.
>>> +lib_LIBS = -luuid -lblkid -lm -lz -ludev -llzo2 -L.
>>>    libdir ?= $(prefix)/lib
>>>    incdir = $(prefix)/include/btrfs
>>>    LIBS = $(lib_LIBS) $(libs_static)
>>> @@ -99,7 +99,7 @@ lib_links = libbtrfs.so.0 libbtrfs.so
>>>    headers = $(libbtrfs_headers)
>>>      # make C=1 to enable sparse
>>> -check_defs := .cc-defines.h
>>> +check_defs := .cc-defines.h
>>>    ifdef C
>>>        #
>>>        # We're trying to use sparse against glibc headers which go wild
>>> diff --git a/utils.c b/utils.c
>>> index 2a92416..9887f8b 100644
>>> --- a/utils.c
>>> +++ b/utils.c
>>> @@ -29,6 +29,7 @@
>>>    #include <sys/types.h>
>>>    #include <sys/stat.h>
>>>    #include <uuid/uuid.h>
>>> +#include <libudev.h>
>>>    #include <fcntl.h>
>>>    #include <unistd.h>
>>>    #include <mntent.h>
>>> @@ -52,6 +53,13 @@
>>>    #define BLKDISCARD    _IO(0x12,119)
>>>    #endif
>>>    +/*
>>> + * This variable controls if the lvm snapshot have to be skipped or not.
>>> + * Access this variable only via the btrfs_scan_[sg]et_skip_lvm_snapshot()
>>> + * functions
>>> + */
>>> +static int __scan_device_skip_lvm_snapshot = -1;
>>> +
>>>    static int btrfs_scan_done = 0;
>>>      static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
>>> @@ -1593,6 +1601,9 @@ int btrfs_scan_block_devices(int run_ioctl)
>>>        char fullpath[110];
>>>        int scans = 0;
>>>        int special;
>>> +    int skip_snapshot;
>>> +
>>> +    skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>>>      scan_again:
>>>        proc_partitions = fopen("/proc/partitions","r");
>>> @@ -1642,6 +1653,9 @@ scan_again:
>>>                continue;
>>>            }
>>>    +        if (skip_snapshot && is_low_priority_device(fullpath))
>>> +            continue;
>>> +
>>>            fd = open(fullpath, O_RDONLY);
>>>            if (fd < 0) {
>>>                if (errno != ENOMEDIUM)
>>> @@ -2182,6 +2196,30 @@ int test_dev_for_mkfs(char *file, int force_overwrite, char *estr)
>>>        return 0;
>>>    }
>>>    +int btrfs_scan_get_skip_lvm_snapshot( )
>>> +{
>>> +    const char *value;
>>> +
>>> +    if (__scan_device_skip_lvm_snapshot != -1 )
>>> +        return __scan_device_skip_lvm_snapshot;
>>> +
>>> +    value = getenv(BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME);
>>> +    if (value && !strcasecmp(value, "NO"))
>>> +        __scan_device_skip_lvm_snapshot = 0;
>>> +    else  if (value && !strcasecmp(value, "YES"))
>>> +        __scan_device_skip_lvm_snapshot = 1;
>>> +    else
>>> +        __scan_device_skip_lvm_snapshot =
>>> +            BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT;
>>> +
>>> +    return __scan_device_skip_lvm_snapshot;
>>> +}
>>> +
>>> +void btrfs_scan_set_skip_lvm_snapshot(int new_value)
>>> +{
>>> +    __scan_device_skip_lvm_snapshot = !!new_value;
>>> +}
>>> +
>>>    int btrfs_scan_lblkid()
>>>    {
>>>        int fd = -1;
>>> @@ -2192,6 +2230,9 @@ int btrfs_scan_lblkid()
>>>        blkid_dev dev = NULL;
>>>        blkid_cache cache = NULL;
>>>        char path[PATH_MAX];
>>> +    int skip_snapshot;
>>> +
>>> +    skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
>>>          if (btrfs_scan_done)
>>>            return 0;
>>> @@ -2210,6 +2251,9 @@ int btrfs_scan_lblkid()
>>>            /* if we are here its definitely a btrfs disk*/
>>>            strncpy(path, blkid_dev_devname(dev), PATH_MAX);
>>>    +        if (skip_snapshot && is_low_priority_device(path))
>>> +            continue;
>>> +
>>>            fd = open(path, O_RDONLY);
>>>            if (fd < 0) {
>>>                printf("ERROR: could not open %s\n", path);
>>> @@ -2450,3 +2494,66 @@ int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
>>>        }
>>>        return 1;
>>>    }
>>> +
>>> +/*
>>> + * This function return 1 if the device (path) is consdered "LOW_PRIORITY" by
>>> + * LVM2 library. These device are typically the nsapshot.
>>> + * This function return < 0 in case of error; 0 otherwise.
>>> + */
>>> +int is_low_priority_device(const char *path)
>>> +{
>>> +
>>> +    struct udev *udev=NULL;
>>> +    struct udev_device *dev;
>>> +    struct udev_enumerate *enumerate=NULL;
>>> +    struct udev_list_entry *devices;
>>> +    struct udev_list_entry *node, *list;
>>> +    int ret=-1;
>>> +    const char *value, *syspath;
>>> +    char *rpath=NULL;
>>> +
>>> +    rpath = realpath(path, NULL);
>>> +    if (!rpath) {
>>> +        fprintf(stderr, "ERROR: not enough memory\n");
>>> +        ret=-2;
>>> +        goto exit;
>>> +    }
>>> +
>>> +    /* Create the udev object */
>>> +    udev = udev_new();
>>> +    if (!udev) {
>>> +        fprintf(stderr, "ERROR: Can't create udev\n");
>>> +        ret=-1;
>>> +        goto exit;
>>> +    }
>>> +
>>> +    enumerate = udev_enumerate_new(udev);
>>> +    udev_enumerate_add_match_subsystem(enumerate, "block");
>>> +    udev_enumerate_add_match_property(enumerate, "DEVNAME", rpath);
>>> +    udev_enumerate_scan_devices(enumerate);
>>> +
>>> +    devices = udev_enumerate_get_list_entry(enumerate);
>>> +    syspath = udev_list_entry_get_name(devices);
>>> +
>>> +    dev = udev_device_new_from_syspath(udev, syspath);
>>> +
>>> +    list = udev_device_get_properties_list_entry (dev);
>>> +
>>> +    ret = 0;
>>> +    node = udev_list_entry_get_by_name(list, "DM_UDEV_LOW_PRIORITY_FLAG");
>>> +    if (node) {
>>> +        value = udev_list_entry_get_value(node);
>>> +        if (value && !strcmp(value, "1"))
>>> +            ret = 1;
>>> +    }
>>> +
>>> +exit:
>>> +    free(rpath);
>>> +
>>> +    /* Free the enumerator object */
>>> +    udev_enumerate_unref(enumerate);
>>> +
>>> +    udev_unref(udev);
>>> +
>>> +    return ret;
>>> +}
>>> diff --git a/utils.h b/utils.h
>>> index 289e86b..9855f09 100644
>>> --- a/utils.h
>>> +++ b/utils.h
>>> @@ -128,7 +128,7 @@ int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
>>>                   int verify);
>>>    int ask_user(char *question);
>>>    int lookup_ino_rootid(int fd, u64 *rootid);
>>> -int btrfs_scan_lblkid(void);
>>> +int btrfs_scan_lblkid();
>>>    int get_btrfs_mount(const char *dev, char *mp, size_t mp_size);
>>>    int find_mount_root(const char *path, char **mount_root);
>>>    int get_device_info(int fd, u64 devid,
>>> @@ -161,4 +161,11 @@ static inline u64 btrfs_min_dev_size(u32 leafsize)
>>>      int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
>>>    +#define BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT 1
>>> +#define BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME "BTRFS_SKIP_LVM_SNAPSHOT"
>>> +
>>> +int is_low_priority_device(const char *path);
>>> +void btrfs_scan_set_skip_lvm_snapshot(int new_value);
>>> +int btrfs_scan_get_skip_lvm_snapshot( );
>>> +
>>>    #endif
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Dec. 9, 2014, 10:27 a.m. UTC | #4
On Mon, Dec 08, 2014 at 03:58:09PM +0100, Goffredo Baroncelli wrote:
> In another patch set, I proposed a mount.btrfs helper which is
> in position to perform these analysis and to pick the "right"
> device (even with the user suggestion).
> 
> Today the lvm-snapshot and btrfs behave very poor: it is not
> predictable which device is pick (the original or the snapshot). 
> These patch *avoid* most problems skipping the snapshots, which
> to me seems a reasonable default.
> For the other case the user is still able to mount any disks
> [combination] passing them directly via command line (
> mount /dev/sdX -o device=/dev/sdY,device=/dev/sdz...  );

Beware that passing 'device' does not mean that btrfs will use that
device to assemble the filesystem. It only says to scan the device the
same way any preceding 'btrfs dev scan' would do.

super.c:
 822                 case Opt_device:
 823                         device_name = match_strdup(&args[0]);
 824                         if (!device_name) {
 825                                 error = -ENOMEM;
 826                                 goto out;
 827                         }
 828                         error = btrfs_scan_one_device(device_name,
 829                                         flags, holder, fs_devices);
 830                         kfree(device_name);
 831                         if (error)
 832                                 goto out;
 833                         break;

> Anyway I think for these kind of setup (btrfs on lvm-snapshot), 
> passing the disks explicitly is the only solution; [...]

... for which you'd need another mount option and update the code that
selects the devices accordingly.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Dec. 9, 2014, 6:19 p.m. UTC | #5
On 12/09/2014 11:27 AM, David Sterba wrote:
>> > Today the lvm-snapshot and btrfs behave very poor: it is not
>> > predictable which device is pick (the original or the snapshot). 
>> > These patch *avoid* most problems skipping the snapshots, which
>> > to me seems a reasonable default.
>> > For the other case the user is still able to mount any disks
>> > [combination] passing them directly via command line (
>> > mount /dev/sdX -o device=/dev/sdY,device=/dev/sdz...  );

> Beware that passing 'device' does not mean that btrfs will use that
> device to assemble the filesystem. It only says to scan the device the
> same way any preceding 'btrfs dev scan' would do.

I thought a bit about your sentence, but I was unable to understand
the difference. Could you describe a case where it is different ?

I have quite clear that "btrfs scan <dev>" and "mount -o device=<dev>"
do the same thing: these fill a table with the devices information 
grouped by fsid. Then the kernel uses this table as hint to pick 
the devices for a filesystem. So except some strange case 
(like device "hot" removed) this shouldn't make any difference... 
Or not ?

The point is that when a btrfs scan is ran asynchronously,
a snapshot "may" hide the origin volume. Where the word
"may" means that it is not predictable. Passing the device solve only
this point: it becomes predictable which device is used.
diff mbox

Patch

diff --git a/Makefile b/Makefile
index 4cae30c..9464361 100644
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,7 @@  TESTS = fsck-tests.sh convert-tests.sh
 INSTALL = install
 prefix ?= /usr/local
 bindir = $(prefix)/bin
-lib_LIBS = -luuid -lblkid -lm -lz -llzo2 -L.
+lib_LIBS = -luuid -lblkid -lm -lz -ludev -llzo2 -L.
 libdir ?= $(prefix)/lib
 incdir = $(prefix)/include/btrfs
 LIBS = $(lib_LIBS) $(libs_static)
@@ -99,7 +99,7 @@  lib_links = libbtrfs.so.0 libbtrfs.so
 headers = $(libbtrfs_headers)
 
 # make C=1 to enable sparse
-check_defs := .cc-defines.h 
+check_defs := .cc-defines.h
 ifdef C
 	#
 	# We're trying to use sparse against glibc headers which go wild
diff --git a/utils.c b/utils.c
index 2a92416..9887f8b 100644
--- a/utils.c
+++ b/utils.c
@@ -29,6 +29,7 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <uuid/uuid.h>
+#include <libudev.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <mntent.h>
@@ -52,6 +53,13 @@ 
 #define BLKDISCARD	_IO(0x12,119)
 #endif
 
+/*
+ * This variable controls if the lvm snapshot have to be skipped or not.
+ * Access this variable only via the btrfs_scan_[sg]et_skip_lvm_snapshot()
+ * functions
+ */
+static int __scan_device_skip_lvm_snapshot = -1;
+
 static int btrfs_scan_done = 0;
 
 static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs";
@@ -1593,6 +1601,9 @@  int btrfs_scan_block_devices(int run_ioctl)
 	char fullpath[110];
 	int scans = 0;
 	int special;
+	int skip_snapshot;
+
+	skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
 
 scan_again:
 	proc_partitions = fopen("/proc/partitions","r");
@@ -1642,6 +1653,9 @@  scan_again:
 			continue;
 		}
 
+		if (skip_snapshot && is_low_priority_device(fullpath))
+			continue;
+
 		fd = open(fullpath, O_RDONLY);
 		if (fd < 0) {
 			if (errno != ENOMEDIUM)
@@ -2182,6 +2196,30 @@  int test_dev_for_mkfs(char *file, int force_overwrite, char *estr)
 	return 0;
 }
 
+int btrfs_scan_get_skip_lvm_snapshot( )
+{
+	const char *value;
+
+	if (__scan_device_skip_lvm_snapshot != -1 )
+		return __scan_device_skip_lvm_snapshot;
+
+	value = getenv(BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME);
+	if (value && !strcasecmp(value, "NO"))
+		__scan_device_skip_lvm_snapshot = 0;
+	else  if (value && !strcasecmp(value, "YES"))
+		__scan_device_skip_lvm_snapshot = 1;
+	else
+		__scan_device_skip_lvm_snapshot =
+			BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT;
+
+	return __scan_device_skip_lvm_snapshot;
+}
+
+void btrfs_scan_set_skip_lvm_snapshot(int new_value)
+{
+	__scan_device_skip_lvm_snapshot = !!new_value;
+}
+
 int btrfs_scan_lblkid()
 {
 	int fd = -1;
@@ -2192,6 +2230,9 @@  int btrfs_scan_lblkid()
 	blkid_dev dev = NULL;
 	blkid_cache cache = NULL;
 	char path[PATH_MAX];
+	int skip_snapshot;
+
+	skip_snapshot = btrfs_scan_get_skip_lvm_snapshot();
 
 	if (btrfs_scan_done)
 		return 0;
@@ -2210,6 +2251,9 @@  int btrfs_scan_lblkid()
 		/* if we are here its definitely a btrfs disk*/
 		strncpy(path, blkid_dev_devname(dev), PATH_MAX);
 
+		if (skip_snapshot && is_low_priority_device(path))
+			continue;
+
 		fd = open(path, O_RDONLY);
 		if (fd < 0) {
 			printf("ERROR: could not open %s\n", path);
@@ -2450,3 +2494,66 @@  int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
 	}
 	return 1;
 }
+
+/*
+ * This function return 1 if the device (path) is consdered "LOW_PRIORITY" by
+ * LVM2 library. These device are typically the nsapshot.
+ * This function return < 0 in case of error; 0 otherwise.
+ */
+int is_low_priority_device(const char *path)
+{
+
+	struct udev *udev=NULL;
+	struct udev_device *dev;
+	struct udev_enumerate *enumerate=NULL;
+	struct udev_list_entry *devices;
+	struct udev_list_entry *node, *list;
+	int ret=-1;
+	const char *value, *syspath;
+	char *rpath=NULL;
+
+	rpath = realpath(path, NULL);
+	if (!rpath) {
+		fprintf(stderr, "ERROR: not enough memory\n");
+		ret=-2;
+		goto exit;
+	}
+
+	/* Create the udev object */
+	udev = udev_new();
+	if (!udev) {
+		fprintf(stderr, "ERROR: Can't create udev\n");
+		ret=-1;
+		goto exit;
+	}
+
+	enumerate = udev_enumerate_new(udev);
+	udev_enumerate_add_match_subsystem(enumerate, "block");
+	udev_enumerate_add_match_property(enumerate, "DEVNAME", rpath);
+	udev_enumerate_scan_devices(enumerate);
+
+	devices = udev_enumerate_get_list_entry(enumerate);
+	syspath = udev_list_entry_get_name(devices);
+
+	dev = udev_device_new_from_syspath(udev, syspath);
+
+	list = udev_device_get_properties_list_entry (dev);
+
+	ret = 0;
+	node = udev_list_entry_get_by_name(list, "DM_UDEV_LOW_PRIORITY_FLAG");
+	if (node) {
+		value = udev_list_entry_get_value(node);
+		if (value && !strcmp(value, "1"))
+			ret = 1;
+	}
+
+exit:
+	free(rpath);
+
+	/* Free the enumerator object */
+	udev_enumerate_unref(enumerate);
+
+	udev_unref(udev);
+
+	return ret;
+}
diff --git a/utils.h b/utils.h
index 289e86b..9855f09 100644
--- a/utils.h
+++ b/utils.h
@@ -128,7 +128,7 @@  int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
 			   int verify);
 int ask_user(char *question);
 int lookup_ino_rootid(int fd, u64 *rootid);
-int btrfs_scan_lblkid(void);
+int btrfs_scan_lblkid();
 int get_btrfs_mount(const char *dev, char *mp, size_t mp_size);
 int find_mount_root(const char *path, char **mount_root);
 int get_device_info(int fd, u64 devid,
@@ -161,4 +161,11 @@  static inline u64 btrfs_min_dev_size(u32 leafsize)
 
 int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
 
+#define BTRFS_SKIP_LVM_SNAPSHOT_DEFAULT 1
+#define BTRFS_SKIP_LVM_SNAPSHOT_ENV_NAME "BTRFS_SKIP_LVM_SNAPSHOT"
+
+int is_low_priority_device(const char *path);
+void btrfs_scan_set_skip_lvm_snapshot(int new_value);
+int btrfs_scan_get_skip_lvm_snapshot( );
+
 #endif