diff mbox

btrfs: introduce BTRFS_IOC_GET_DEVS

Message ID 1390812770-11720-2-git-send-email-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Anand Jain Jan. 27, 2014, 8:52 a.m. UTC
The user land progs needs   a simple way to read
the raw list of disks and its parameters as
btrfs kernel understands it. This patch will
introduce BTRFS_IOC_GET_DEVS which dumps
every thing under fs_devices.

As of now btrfs-devlist uses this ioctl.

In the long run this ioctl would help to optimize
some part of btrfs-progs, mainly the current
btrfs filesystem show

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/super.c           |   56 +++++++++++++++++++++++++
 fs/btrfs/volumes.c         |   99 ++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h         |    2 +
 include/uapi/linux/btrfs.h |   45 ++++++++++++++++++++
 4 files changed, 202 insertions(+), 0 deletions(-)

Comments

Hugo Mills Jan. 27, 2014, 8:47 a.m. UTC | #1
On Mon, Jan 27, 2014 at 04:52:50PM +0800, Anand Jain wrote:
> The user land progs needs   a simple way to read
> the raw list of disks and its parameters as
> btrfs kernel understands it. This patch will
> introduce BTRFS_IOC_GET_DEVS which dumps
> every thing under fs_devices.
> 
> As of now btrfs-devlist uses this ioctl.
> 
> In the long run this ioctl would help to optimize
> some part of btrfs-progs, mainly the current
> btrfs filesystem show

   Just thinking out loud here, really, but can we export this
information in /sys instead, rather than adding yet more ioctls?

   Hugo.

> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>  fs/btrfs/super.c           |   56 +++++++++++++++++++++++++
>  fs/btrfs/volumes.c         |   99 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/volumes.h         |    2 +
>  include/uapi/linux/btrfs.h |   45 ++++++++++++++++++++
>  4 files changed, 202 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 878874b..2fc0e2b 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1693,6 +1693,59 @@ static struct file_system_type btrfs_fs_type = {
>  };
>  MODULE_ALIAS_FS("btrfs");
>  
> +static int btrfs_ioc_get_devlist(void __user *arg)
> +{
> +	int ret = 0;
> +	u64 sz_devlist_arg;
> +	u64 sz_devlist;
> +	u64 sz_out;
> +
> +	struct btrfs_ioctl_devlist_args *devlist_arg;
> +	struct btrfs_ioctl_devlist_args *tmp_devlist_arg;
> +	struct btrfs_ioctl_devlist *devlist;
> +
> +	u64 cnt = 0, ucnt;
> +
> +	sz_devlist_arg = sizeof(*devlist_arg);
> +	sz_devlist = sizeof(*devlist);
> +
> +	if (copy_from_user(&ucnt,
> +		(struct btrfs_ioctl_devlist_args __user *)(arg +
> +		offsetof(struct btrfs_ioctl_devlist_args, count)),
> +			sizeof(ucnt)))
> +		return -EFAULT;
> +
> +	cnt = btrfs_get_devlist_cnt();
> +
> +	if (cnt > ucnt) {
> +		if (copy_to_user(arg +
> +		offsetof(struct btrfs_ioctl_devlist_args, count),
> +			&cnt, sizeof(cnt)))
> +			return -EFAULT;
> +		return 1;
> +	}
> +
> +	sz_out = sz_devlist_arg + sz_devlist * cnt;
> +
> +	tmp_devlist_arg = devlist_arg = memdup_user(arg, sz_out);
> +	if (IS_ERR(devlist_arg))
> +		return PTR_ERR(devlist_arg);
> +
> +	devlist = (struct btrfs_ioctl_devlist *) (++tmp_devlist_arg);
> +	cnt = btrfs_get_devlist(devlist, cnt);
> +	devlist_arg->count = cnt;
> +
> +	if (copy_to_user(arg, devlist_arg, sz_out)) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +	ret = 0;
> +out:
> +	kfree(devlist_arg);
> +	return ret;
> +
> +}
> +
>  static int btrfs_ioc_get_fslist(void __user *arg)
>  {
>  	int ret = 0;
> @@ -1777,6 +1830,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
>  	case BTRFS_IOC_GET_FSLIST:
>  		ret = btrfs_ioc_get_fslist(argp);
>  		break;
> +	case BTRFS_IOC_GET_DEVS:
> +		ret = btrfs_ioc_get_devlist(argp);
> +		break;
>  	}
>  
>  	return ret;
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 2ca91fc..f00ba27 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6322,3 +6322,102 @@ u64 btrfs_get_fslist(struct btrfs_ioctl_fslist *fslist, u64 ucnt)
>  
>  	return cnt;
>  }
> +
> +int btrfs_get_devlist_cnt(void)
> +{
> +	int cnt = 0;
> +	struct btrfs_device *device;
> +	struct btrfs_fs_devices *fs_devices;
> +
> +	mutex_lock(&uuid_mutex);
> +	list_for_each_entry(fs_devices, &fs_uuids, list)
> +		list_for_each_entry(device, &fs_devices->devices, dev_list)
> +			cnt++;
> +	mutex_unlock(&uuid_mutex);
> +
> +	return cnt;
> +}
> +
> +u64 btrfs_get_devlist(struct btrfs_ioctl_devlist *dev, u64 alloc_cnt)
> +{
> +	u64 cnt = 0;
> +
> +	struct btrfs_device *device;
> +	struct btrfs_fs_devices *fs_devices;
> +
> +	mutex_lock(&uuid_mutex);
> +	/* Todo: there must be better way of doing this */
> +	list_for_each_entry(fs_devices, &fs_uuids, list) {
> +
> +		mutex_lock(&fs_devices->device_list_mutex);
> +
> +		list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +
> +			if (!(cnt < alloc_cnt))
> +				break;
> +
> +			memcpy(dev->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
> +			dev->fs_latest_devid = fs_devices->latest_devid;
> +			dev->fs_latest_trans = fs_devices->latest_trans;
> +			dev->fs_num_devices = fs_devices->num_devices;
> +			dev->fs_open_devices = fs_devices->open_devices;
> +			dev->fs_rw_devices = fs_devices->rw_devices;
> +			dev->fs_missing_devices = fs_devices->missing_devices;
> +			dev->fs_total_rw_bytes = fs_devices->total_rw_bytes;
> +			dev->fs_num_can_discard = fs_devices->num_can_discard;
> +			dev->fs_total_devices = fs_devices->total_devices;
> +
> +			if (fs_devices->opened)
> +				dev->flags = BTRFS_FS_MOUNTED;
> +			if (fs_devices->seeding)
> +				dev->flags |= BTRFS_FS_SEEDING;
> +			if (fs_devices->rotating)
> +				dev->flags |= BTRFS_FS_ROTATING;
> +
> +			if (device->writeable)
> +				dev->flags |= BTRFS_DEV_WRITEABLE;
> +			if (device->in_fs_metadata)
> +				dev->flags |= BTRFS_DEV_IN_FS_MD;
> +			if (device->missing)
> +				dev->flags |= BTRFS_DEV_MISSING;
> +			if (device->can_discard)
> +				dev->flags |= BTRFS_DEV_CAN_DISCARD;
> +			if (device->is_tgtdev_for_dev_replace)
> +				dev->flags |= BTRFS_DEV_SUBSTITUTED;
> +			if (device->running_pending)
> +				dev->flags |= BTRFS_DEV_RUN_PENDING;
> +			if (device->nobarriers)
> +				dev->flags |= BTRFS_DEV_NOBARRIERS;
> +			if (device->dev_stats_valid)
> +				dev->flags |= BTRFS_DEV_STATS_VALID;
> +			if (device->dev_stats_dirty)
> +				dev->flags |= BTRFS_DEV_STATS_DIRTY;
> +
> +			dev->gen = device->generation;
> +			dev->devid = device->devid;
> +			dev->total_bytes = device->total_bytes;
> +			dev->disk_total_bytes = device->disk_total_bytes;
> +			dev->bytes_used = device->bytes_used;
> +			dev->type = device->type;
> +			dev->io_align = device->io_align;
> +			dev->io_width = device->io_width;
> +			dev->sector_size = device->sector_size;
> +			memcpy(dev->uuid, device->uuid, BTRFS_UUID_SIZE);
> +			if (device->name) {
> +				rcu_read_lock();
> +				memcpy(dev->name, rcu_str_deref(device->name),
> +					BTRFS_PATH_NAME_MAX);
> +				rcu_read_unlock();
> +			} else {
> +				memset(dev->name, 0, BTRFS_PATH_NAME_MAX);
> +			}
> +			dev++;
> +			cnt++;
> +		}
> +
> +		mutex_unlock(&fs_devices->device_list_mutex);
> +	}
> +	mutex_unlock(&uuid_mutex);
> +
> +	return cnt;
> +}
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index ebb0d0c..82a459c 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -392,4 +392,6 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
>  }
>  int btrfs_get_fslist_cnt(void);
>  u64 btrfs_get_fslist(struct btrfs_ioctl_fslist *fslist, u64 ucnt);
> +int btrfs_get_devlist_cnt(void);
> +u64 btrfs_get_devlist(struct btrfs_ioctl_devlist *devlist, u64 ucnt);
>  #endif
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 7d7f776..2983019 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -520,6 +520,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
>  
>  /* fs flags */
>  #define BTRFS_FS_MOUNTED	(1LLU << 0)
> +#define BTRFS_FS_SEEDING	(1LLU << 1)
> +#define BTRFS_FS_ROTATING	(1LLU << 2)
>  
>  struct btrfs_ioctl_fslist {
>  	__u64 self_sz;			/* in/out */
> @@ -535,6 +537,47 @@ struct btrfs_ioctl_fslist_args {
>  	__u64 count;		/* out */
>  };
>  
> +#define BTRFS_DEV_WRITEABLE	(1LLU << 8)
> +#define BTRFS_DEV_IN_FS_MD	(1LLU << 9)
> +#define BTRFS_DEV_MISSING	(1LLU << 10)
> +#define BTRFS_DEV_CAN_DISCARD	(1LLU << 11)
> +#define BTRFS_DEV_SUBSTITUTED	(1LLU << 12)
> +#define BTRFS_DEV_RUN_PENDING   (1LLU << 13)
> +#define BTRFS_DEV_NOBARRIERS	(1LLU << 14)
> +#define BTRFS_DEV_STATS_VALID	(1LLU << 15)
> +#define BTRFS_DEV_STATS_DIRTY	(1LLU << 16)
> +
> +struct btrfs_ioctl_devlist {
> +	__u64 sz_self;
> +	__u64 fs_latest_devid;
> +	__u64 fs_latest_trans;
> +	__u64 fs_num_devices;
> +	__u64 fs_open_devices;
> +	__u64 fs_rw_devices;
> +	__u64 fs_missing_devices;
> +	__u64 fs_total_rw_bytes;
> +	__u64 fs_num_can_discard;
> +	__u64 fs_total_devices;
> +	__u64 gen;
> +	__u64 flags;
> +	__u64 devid;
> +	__u64 total_bytes;
> +	__u64 disk_total_bytes;
> +	__u64 bytes_used;
> +	__u64 type;
> +	__u32 io_align;
> +	__u32 io_width;
> +	__u32 sector_size;
> +	__u8 fsid[BTRFS_FSID_SIZE];
> +	__u8 uuid[BTRFS_UUID_SIZE];
> +	__u8 name[BTRFS_PATH_NAME_MAX];
> +}__attribute__ ((__packed__));
> +
> +struct btrfs_ioctl_devlist_args {
> +	__u64 self_sz;		/* in/out */
> +	__u64 count;		/* in/out */
> +};
> +
>  #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
>  				   struct btrfs_ioctl_vol_args)
>  #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
> @@ -638,5 +681,7 @@ struct btrfs_ioctl_fslist_args {
>  				   struct btrfs_ioctl_feature_flags[2])
>  #define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
>  				   struct btrfs_ioctl_feature_flags[3])
> +#define BTRFS_IOC_GET_DEVS _IOWR(BTRFS_IOCTL_MAGIC, 58, \
> +				     struct btrfs_ioctl_devlist_args)
>  
>  #endif /* _UAPI_LINUX_BTRFS_H */
Anand Jain Jan. 27, 2014, 9:08 a.m. UTC | #2
On 01/27/2014 04:47 PM, Hugo Mills wrote:
> On Mon, Jan 27, 2014 at 04:52:50PM +0800, Anand Jain wrote:
>> The user land progs needs   a simple way to read
>> the raw list of disks and its parameters as
>> btrfs kernel understands it. This patch will
>> introduce BTRFS_IOC_GET_DEVS which dumps
>> every thing under fs_devices.
>>
>> As of now btrfs-devlist uses this ioctl.
>>
>> In the long run this ioctl would help to optimize
>> some part of btrfs-progs, mainly the current
>> btrfs filesystem show
>
>     Just thinking out loud here, really, but can we export this
> information in /sys instead, rather than adding yet more ioctls?
>

  I was more inclined towards (live) memory dump. But I need to fix
  btrfs-progs as well so ioctl.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Feb. 6, 2014, 7:49 p.m. UTC | #3
On Mon, Jan 27, 2014 at 08:47:09AM +0000, Hugo Mills wrote:
> On Mon, Jan 27, 2014 at 04:52:50PM +0800, Anand Jain wrote:
> > The user land progs needs   a simple way to read
> > the raw list of disks and its parameters as
> > btrfs kernel understands it. This patch will
> > introduce BTRFS_IOC_GET_DEVS which dumps
> > every thing under fs_devices.
> > 
> > As of now btrfs-devlist uses this ioctl.
> > 
> > In the long run this ioctl would help to optimize
> > some part of btrfs-progs, mainly the current
> > btrfs filesystem show
> 
>    Just thinking out loud here, really, but can we export this
> information in /sys instead, rather than adding yet more ioctls?

I tend to agree that this belongs to sysfs, it's more flexible in case
we'll add more per-device stats, ie. one file that holds all the stats
about the device. This is also easier to use from scripts that gather
system information.

With 3.14 the sysfs interface is available, but the devices under
 /sys/fs/btrfs/<fs-uuid>/devices/...
are symlinks to the sysfs devices, so this btrfs-specific device
information has to be located in a separate directory.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Feb. 6, 2014, 8:09 p.m. UTC | #4
On 02/06/2014 08:49 PM, David Sterba wrote:
> On Mon, Jan 27, 2014 at 08:47:09AM +0000, Hugo Mills wrote:
>> On Mon, Jan 27, 2014 at 04:52:50PM +0800, Anand Jain wrote:
>>> The user land progs needs   a simple way to read
>>> the raw list of disks and its parameters as
>>> btrfs kernel understands it. This patch will
>>> introduce BTRFS_IOC_GET_DEVS which dumps
>>> every thing under fs_devices.
>>>
>>> As of now btrfs-devlist uses this ioctl.
>>>
>>> In the long run this ioctl would help to optimize
>>> some part of btrfs-progs, mainly the current
>>> btrfs filesystem show
>>
>>    Just thinking out loud here, really, but can we export this
>> information in /sys instead, rather than adding yet more ioctls?
> 
> I tend to agree that this belongs to sysfs, it's more flexible in case
> we'll add more per-device stats, ie. one file that holds all the stats
> about the device. This is also easier to use from scripts that gather
> system information.
> 
> With 3.14 the sysfs interface is available, but the devices under
>  /sys/fs/btrfs/<fs-uuid>/devices/...
> are symlinks to the sysfs devices, so this btrfs-specific device
> information has to be located in a separate directory.

yes please; it would scale better than the ioctl()s; anyway I suggest 1
file per property, and not "...one file that holds all the stats
 about the device"

> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
David Sterba Feb. 6, 2014, 10:05 p.m. UTC | #5
On Thu, Feb 06, 2014 at 09:09:57PM +0100, Goffredo Baroncelli wrote:
> > With 3.14 the sysfs interface is available, but the devices under
> >  /sys/fs/btrfs/<fs-uuid>/devices/...
> > are symlinks to the sysfs devices, so this btrfs-specific device
> > information has to be located in a separate directory.
> 
> yes please; it would scale better than the ioctl()s; anyway I suggest 1
> file per property, and not "...one file that holds all the stats
>  about the device"

Well, we can do both, I don't have a preference and both make sense.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain Feb. 7, 2014, 10:08 a.m. UTC | #6
Thanks for the comments.

  mainly here sysfs way defeats the purpose - debug as
  mentioned. Sysfs would/should show only mounted disks,
  the ioctl way doesn't have such a limitation (of course
  as I commented memory dump way would have been best choice
  but I got stuck with that approach if anybody wants to give
  a try with that approach you are most welcome).

  IMO no harm to have both sysfs way and ioctl way let user
  or developer use which were is suitable in their context.

Thanks, Anand


On 02/07/2014 06:05 AM, David Sterba wrote:
> On Thu, Feb 06, 2014 at 09:09:57PM +0100, Goffredo Baroncelli wrote:
>>> With 3.14 the sysfs interface is available, but the devices under
>>>   /sys/fs/btrfs/<fs-uuid>/devices/...
>>> are symlinks to the sysfs devices, so this btrfs-specific device
>>> information has to be located in a separate directory.
>>
>> yes please; it would scale better than the ioctl()s; anyway I suggest 1
>> file per property, and not "...one file that holds all the stats
>>   about the device"
>
> Well, we can do both, I don't have a preference and both make sense.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hugo Mills Feb. 7, 2014, 10:20 a.m. UTC | #7
On Fri, Feb 07, 2014 at 06:08:56PM +0800, Anand Jain wrote:
> 
> 
>  Thanks for the comments.
> 
>  mainly here sysfs way defeats the purpose - debug as
>  mentioned. Sysfs would/should show only mounted disks,

   So let's find a way of showing the "known-about" data structure
separately from the "mounted" data structure. Just thinking aloud
here, how about something like this:

For unmounted, but known filesystems:
/sys/fs/btrfs/devmap/<UUID>/<symlink to device>   (for each device known)
/sys/fs/btrfs/devmap/<UUID>/label
/sys/fs/btrfs/devmap/<UUID>/complete      (0 if missing devices, 1 otherwise)
/sys/fs/btrfs/devmap/<UUID>/<other properties>

And then for mounted filesystems, we can populate the <other
properties> with more details as appropriate (and remove them when
unmounted again). We can also add a symlink to the <UUID> directory
for the filesystem to e.g.:

/sys/fs/btrfs/mounted/<symlink to UUID>

   If truly the *only* reason to do this is debug, then simply dump
the info in debugfs and have done with it. However, there are good
reasons to want to have this information in an ordinary running
system, too. So stick with sysfs, IMO.

>  the ioctl way doesn't have such a limitation (of course
>  as I commented memory dump way would have been best choice
>  but I got stuck with that approach if anybody wants to give
>  a try with that approach you are most welcome).
> 
>  IMO no harm to have both sysfs way and ioctl way let user
>  or developer use which were is suitable in their context.

   Extra maintenance overhead; divergence of the APIs. "You can find
out the label of the filesystem using sysfs, but not the ioctl. You
can find out the size of the filesystem using the ioctl, but not using
sysfs." That way lies quite a bit of pain for the user of the
interfaces.

   Hugo.

> Thanks, Anand
> 
> 
> On 02/07/2014 06:05 AM, David Sterba wrote:
> >On Thu, Feb 06, 2014 at 09:09:57PM +0100, Goffredo Baroncelli wrote:
> >>>With 3.14 the sysfs interface is available, but the devices under
> >>>  /sys/fs/btrfs/<fs-uuid>/devices/...
> >>>are symlinks to the sysfs devices, so this btrfs-specific device
> >>>information has to be located in a separate directory.
> >>
> >>yes please; it would scale better than the ioctl()s; anyway I suggest 1
> >>file per property, and not "...one file that holds all the stats
> >>  about the device"
> >
> >Well, we can do both, I don't have a preference and both make sense.
David Sterba Feb. 12, 2014, 4:01 p.m. UTC | #8
On Fri, Feb 07, 2014 at 10:20:10AM +0000, Hugo Mills wrote:
> On Fri, Feb 07, 2014 at 06:08:56PM +0800, Anand Jain wrote:
> >  mainly here sysfs way defeats the purpose - debug as
> >  mentioned. Sysfs would/should show only mounted disks,
> 
>    So let's find a way of showing the "known-about" data structure
> separately from the "mounted" data structure. Just thinking aloud
> here, how about something like this:
> 
> For unmounted, but known filesystems:
> /sys/fs/btrfs/devmap/<UUID>/<symlink to device>   (for each device known)
> /sys/fs/btrfs/devmap/<UUID>/label
> /sys/fs/btrfs/devmap/<UUID>/complete      (0 if missing devices, 1 otherwise)
> /sys/fs/btrfs/devmap/<UUID>/<other properties>

I like that, the device map represents the global state maintained by the
module, does not interfere with the current sysfs structure.

One minor comment, the symlink to device should have probably a fixed
name so it can be easily located.

> And then for mounted filesystems, we can populate the <other
> properties> with more details as appropriate (and remove them when
> unmounted again). We can also add a symlink to the <UUID> directory
> for the filesystem to e.g.:
> 
> /sys/fs/btrfs/mounted/<symlink to UUID>
> 
>    If truly the *only* reason to do this is debug, then simply dump
> the info in debugfs and have done with it. However, there are good
> reasons to want to have this information in an ordinary running
> system, too. So stick with sysfs, IMO.

Agreed.

> >  the ioctl way doesn't have such a limitation (of course
> >  as I commented memory dump way would have been best choice
> >  but I got stuck with that approach if anybody wants to give
> >  a try with that approach you are most welcome).
> > 
> >  IMO no harm to have both sysfs way and ioctl way let user
> >  or developer use which were is suitable in their context.
> 
>    Extra maintenance overhead; divergence of the APIs. "You can find
> out the label of the filesystem using sysfs, but not the ioctl. You
> can find out the size of the filesystem using the ioctl, but not using
> sysfs." That way lies quite a bit of pain for the user of the
> interfaces.

I won't mind having several ways how to get the information, sysfs,
ioctl or the properties. Each way has it's own pros and cons or is
suitable for some type of user (C program, shell scripts).
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hugo Mills Feb. 12, 2014, 4:15 p.m. UTC | #9
On Wed, Feb 12, 2014 at 05:01:23PM +0100, David Sterba wrote:
> On Fri, Feb 07, 2014 at 10:20:10AM +0000, Hugo Mills wrote:
> > On Fri, Feb 07, 2014 at 06:08:56PM +0800, Anand Jain wrote:
> > >  mainly here sysfs way defeats the purpose - debug as
> > >  mentioned. Sysfs would/should show only mounted disks,
> > 
> >    So let's find a way of showing the "known-about" data structure
> > separately from the "mounted" data structure. Just thinking aloud
> > here, how about something like this:
> > 
> > For unmounted, but known filesystems:
> > /sys/fs/btrfs/devmap/<UUID>/<symlink to device>   (for each device known)
> > /sys/fs/btrfs/devmap/<UUID>/label
> > /sys/fs/btrfs/devmap/<UUID>/complete      (0 if missing devices, 1 otherwise)
> > /sys/fs/btrfs/devmap/<UUID>/<other properties>
> 
> I like that, the device map represents the global state maintained by the
> module, does not interfere with the current sysfs structure.
> 
> One minor comment, the symlink to device should have probably a fixed
> name so it can be easily located.

   Two ways spring to mind: Number them according to the internal FS's
device numbers, so you'll have something like:

$ ls -l /sys/fs/btrfs/devmap/3993e50e-a926-48a4-867f-36b53d924c35
total 0
lrwxrwxrwx 1 root root    0 Feb  1 11:22 0 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:0:0/0:0:0:0/block/sda
lrwxrwxrwx 1 root root    0 Feb  1 11:22 1 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:1:0/0:1:0:0/block/sdb
lrwxrwxrwx 1 root root    0 Feb  1 11:22 2 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:2:0/0:2:0:0/block/sdc
lrwxrwxrwx 1 root root    0 Feb  1 11:22 3 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:3:0/0:3:0:0/block/sdd
-r--r--r-- 1 root root 4096 Feb  1 11:22 complete
-r--r--r-- 1 root root 4096 Feb  1 11:22 label

   Then you can find them easily by enumerating the filenames, and
anything which is composed entirely of digits [0-9] is a device.
Alternatively, put them into a subdirectory, and anything at all
within that subdir is a device symlink.

[snip]
> > >  the ioctl way doesn't have such a limitation (of course
> > >  as I commented memory dump way would have been best choice
> > >  but I got stuck with that approach if anybody wants to give
> > >  a try with that approach you are most welcome).
> > > 
> > >  IMO no harm to have both sysfs way and ioctl way let user
> > >  or developer use which were is suitable in their context.
> > 
> >    Extra maintenance overhead; divergence of the APIs. "You can find
> > out the label of the filesystem using sysfs, but not the ioctl. You
> > can find out the size of the filesystem using the ioctl, but not using
> > sysfs." That way lies quite a bit of pain for the user of the
> > interfaces.
> 
> I won't mind having several ways how to get the information, sysfs,
> ioctl or the properties. Each way has it's own pros and cons or is
> suitable for some type of user (C program, shell scripts).

   Fair enough. I do worry a little about the API divergence, though.

   Hugo.
David Sterba Feb. 25, 2014, 5:45 p.m. UTC | #10
On Wed, Feb 12, 2014 at 04:15:55PM +0000, Hugo Mills wrote:
> On Wed, Feb 12, 2014 at 05:01:23PM +0100, David Sterba wrote:
> > On Fri, Feb 07, 2014 at 10:20:10AM +0000, Hugo Mills wrote:
> > > For unmounted, but known filesystems:
> > > /sys/fs/btrfs/devmap/<UUID>/<symlink to device>   (for each device known)
> > > /sys/fs/btrfs/devmap/<UUID>/label
> > > /sys/fs/btrfs/devmap/<UUID>/complete      (0 if missing devices, 1 otherwise)
> > > /sys/fs/btrfs/devmap/<UUID>/<other properties>
> > 
> > I like that, the device map represents the global state maintained by the
> > module, does not interfere with the current sysfs structure.
> > 
> > One minor comment, the symlink to device should have probably a fixed
> > name so it can be easily located.
> 
>    Two ways spring to mind: Number them according to the internal FS's
> device numbers, so you'll have something like:
> 
> $ ls -l /sys/fs/btrfs/devmap/3993e50e-a926-48a4-867f-36b53d924c35
> total 0
> lrwxrwxrwx 1 root root    0 Feb  1 11:22 0 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:0:0/0:0:0:0/block/sda
> lrwxrwxrwx 1 root root    0 Feb  1 11:22 1 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:1:0/0:1:0:0/block/sdb
> lrwxrwxrwx 1 root root    0 Feb  1 11:22 2 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:2:0/0:2:0:0/block/sdc
> lrwxrwxrwx 1 root root    0 Feb  1 11:22 3 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:3:0/0:3:0:0/block/sdd
> -r--r--r-- 1 root root 4096 Feb  1 11:22 complete
> -r--r--r-- 1 root root 4096 Feb  1 11:22 label
> 
>    Then you can find them easily by enumerating the filenames, and
> anything which is composed entirely of digits [0-9] is a device.
> Alternatively, put them into a subdirectory, and anything at all
> within that subdir is a device symlink.

A separate subdirectory looks more user friendly, a simple subdir/*
would get them all, regardless of the names. This is much simpler than
writing a "[0-9] [0-9][0-9] ..." pattern to match digits-only filenames.

> > > >  IMO no harm to have both sysfs way and ioctl way let user
> > > >  or developer use which were is suitable in their context.
> > > 
> > >    Extra maintenance overhead; divergence of the APIs. "You can find
> > > out the label of the filesystem using sysfs, but not the ioctl. You
> > > can find out the size of the filesystem using the ioctl, but not using
> > > sysfs." That way lies quite a bit of pain for the user of the
> > > interfaces.
> > 
> > I won't mind having several ways how to get the information, sysfs,
> > ioctl or the properties. Each way has it's own pros and cons or is
> > suitable for some type of user (C program, shell scripts).
> 
>    Fair enough. I do worry a little about the API divergence, though.

That's a valid point, given the number of data retrieved by the ioctl,
(I'm looking at v2 of Anand's patch https://patchwork.kernel.org/patch/3708901/)
this would need to make it extensible and backward compatible.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain Feb. 26, 2014, 2:34 a.m. UTC | #11
On 26/02/2014 01:45, David Sterba wrote:
> On Wed, Feb 12, 2014 at 04:15:55PM +0000, Hugo Mills wrote:
>> On Wed, Feb 12, 2014 at 05:01:23PM +0100, David Sterba wrote:
>>> On Fri, Feb 07, 2014 at 10:20:10AM +0000, Hugo Mills wrote:
>>>> For unmounted, but known filesystems:
>>>> /sys/fs/btrfs/devmap/<UUID>/<symlink to device>   (for each device known)
>>>> /sys/fs/btrfs/devmap/<UUID>/label
>>>> /sys/fs/btrfs/devmap/<UUID>/complete      (0 if missing devices, 1 otherwise)
>>>> /sys/fs/btrfs/devmap/<UUID>/<other properties>
>>>
>>> I like that, the device map represents the global state maintained by the
>>> module, does not interfere with the current sysfs structure.
>>>
>>> One minor comment, the symlink to device should have probably a fixed
>>> name so it can be easily located.
>>
>>     Two ways spring to mind: Number them according to the internal FS's
>> device numbers, so you'll have something like:
>>
>> $ ls -l /sys/fs/btrfs/devmap/3993e50e-a926-48a4-867f-36b53d924c35
>> total 0
>> lrwxrwxrwx 1 root root    0 Feb  1 11:22 0 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:0:0/0:0:0:0/block/sda
>> lrwxrwxrwx 1 root root    0 Feb  1 11:22 1 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:1:0/0:1:0:0/block/sdb
>> lrwxrwxrwx 1 root root    0 Feb  1 11:22 2 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:2:0/0:2:0:0/block/sdc
>> lrwxrwxrwx 1 root root    0 Feb  1 11:22 3 -> ../../../../devices/pci0000:00/0000:00:04.0/0000:02:00.0/ata1/host0/target0:3:0/0:3:0:0/block/sdd
>> -r--r--r-- 1 root root 4096 Feb  1 11:22 complete
>> -r--r--r-- 1 root root 4096 Feb  1 11:22 label
>>
>>     Then you can find them easily by enumerating the filenames, and
>> anything which is composed entirely of digits [0-9] is a device.
>> Alternatively, put them into a subdirectory, and anything at all
>> within that subdir is a device symlink.
>
> A separate subdirectory looks more user friendly, a simple subdir/*
> would get them all, regardless of the names. This is much simpler than
> writing a "[0-9] [0-9][0-9] ..." pattern to match digits-only filenames.


  Its better to have a btrfs sysfs layout posted (and update)
  at the wiki so that (I)/anybody-keen can attempt it (at a later
  point). Just would like to highlight that sysfs is end user
  visible, having tons-of-info /debug information would clutter.
  It doesn't have to match all the contents of BTRFS_IOC_GET_DEVS
  (For example BTRFS_IOC_GET_DEVS tells if bdev pointer is set,
  I doubt if this has to go into sysfs, knowing this info is
  just good for debugging).


>>>>>   IMO no harm to have both sysfs way and ioctl way let user
>>>>>   or developer use which were is suitable in their context.
>>>>
>>>>     Extra maintenance overhead; divergence of the APIs. "You can find
>>>> out the label of the filesystem using sysfs, but not the ioctl. You
>>>> can find out the size of the filesystem using the ioctl, but not using
>>>> sysfs." That way lies quite a bit of pain for the user of the
>>>> interfaces.
>>>
>>> I won't mind having several ways how to get the information, sysfs,
>>> ioctl or the properties. Each way has it's own pros and cons or is
>>> suitable for some type of user (C program, shell scripts).
>>
>>     Fair enough. I do worry a little about the API divergence, though.
>
> That's a valid point, given the number of data retrieved by the ioctl,
> (I'm looking at v2 of Anand's patch https://patchwork.kernel.org/patch/3708901/)
> this would need to make it extensible and backward compatible.

   BTRFS_IOC_GET_DEVS wasn't in the btrfs-next so backward compatible
   part should be ok to defer as of now .? In any case would make
   a frame-work for backward compatible.

Thanks, Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 878874b..2fc0e2b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1693,6 +1693,59 @@  static struct file_system_type btrfs_fs_type = {
 };
 MODULE_ALIAS_FS("btrfs");
 
+static int btrfs_ioc_get_devlist(void __user *arg)
+{
+	int ret = 0;
+	u64 sz_devlist_arg;
+	u64 sz_devlist;
+	u64 sz_out;
+
+	struct btrfs_ioctl_devlist_args *devlist_arg;
+	struct btrfs_ioctl_devlist_args *tmp_devlist_arg;
+	struct btrfs_ioctl_devlist *devlist;
+
+	u64 cnt = 0, ucnt;
+
+	sz_devlist_arg = sizeof(*devlist_arg);
+	sz_devlist = sizeof(*devlist);
+
+	if (copy_from_user(&ucnt,
+		(struct btrfs_ioctl_devlist_args __user *)(arg +
+		offsetof(struct btrfs_ioctl_devlist_args, count)),
+			sizeof(ucnt)))
+		return -EFAULT;
+
+	cnt = btrfs_get_devlist_cnt();
+
+	if (cnt > ucnt) {
+		if (copy_to_user(arg +
+		offsetof(struct btrfs_ioctl_devlist_args, count),
+			&cnt, sizeof(cnt)))
+			return -EFAULT;
+		return 1;
+	}
+
+	sz_out = sz_devlist_arg + sz_devlist * cnt;
+
+	tmp_devlist_arg = devlist_arg = memdup_user(arg, sz_out);
+	if (IS_ERR(devlist_arg))
+		return PTR_ERR(devlist_arg);
+
+	devlist = (struct btrfs_ioctl_devlist *) (++tmp_devlist_arg);
+	cnt = btrfs_get_devlist(devlist, cnt);
+	devlist_arg->count = cnt;
+
+	if (copy_to_user(arg, devlist_arg, sz_out)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = 0;
+out:
+	kfree(devlist_arg);
+	return ret;
+
+}
+
 static int btrfs_ioc_get_fslist(void __user *arg)
 {
 	int ret = 0;
@@ -1777,6 +1830,9 @@  static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 	case BTRFS_IOC_GET_FSLIST:
 		ret = btrfs_ioc_get_fslist(argp);
 		break;
+	case BTRFS_IOC_GET_DEVS:
+		ret = btrfs_ioc_get_devlist(argp);
+		break;
 	}
 
 	return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2ca91fc..f00ba27 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6322,3 +6322,102 @@  u64 btrfs_get_fslist(struct btrfs_ioctl_fslist *fslist, u64 ucnt)
 
 	return cnt;
 }
+
+int btrfs_get_devlist_cnt(void)
+{
+	int cnt = 0;
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *fs_devices;
+
+	mutex_lock(&uuid_mutex);
+	list_for_each_entry(fs_devices, &fs_uuids, list)
+		list_for_each_entry(device, &fs_devices->devices, dev_list)
+			cnt++;
+	mutex_unlock(&uuid_mutex);
+
+	return cnt;
+}
+
+u64 btrfs_get_devlist(struct btrfs_ioctl_devlist *dev, u64 alloc_cnt)
+{
+	u64 cnt = 0;
+
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *fs_devices;
+
+	mutex_lock(&uuid_mutex);
+	/* Todo: there must be better way of doing this */
+	list_for_each_entry(fs_devices, &fs_uuids, list) {
+
+		mutex_lock(&fs_devices->device_list_mutex);
+
+		list_for_each_entry(device, &fs_devices->devices, dev_list) {
+
+			if (!(cnt < alloc_cnt))
+				break;
+
+			memcpy(dev->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+			dev->fs_latest_devid = fs_devices->latest_devid;
+			dev->fs_latest_trans = fs_devices->latest_trans;
+			dev->fs_num_devices = fs_devices->num_devices;
+			dev->fs_open_devices = fs_devices->open_devices;
+			dev->fs_rw_devices = fs_devices->rw_devices;
+			dev->fs_missing_devices = fs_devices->missing_devices;
+			dev->fs_total_rw_bytes = fs_devices->total_rw_bytes;
+			dev->fs_num_can_discard = fs_devices->num_can_discard;
+			dev->fs_total_devices = fs_devices->total_devices;
+
+			if (fs_devices->opened)
+				dev->flags = BTRFS_FS_MOUNTED;
+			if (fs_devices->seeding)
+				dev->flags |= BTRFS_FS_SEEDING;
+			if (fs_devices->rotating)
+				dev->flags |= BTRFS_FS_ROTATING;
+
+			if (device->writeable)
+				dev->flags |= BTRFS_DEV_WRITEABLE;
+			if (device->in_fs_metadata)
+				dev->flags |= BTRFS_DEV_IN_FS_MD;
+			if (device->missing)
+				dev->flags |= BTRFS_DEV_MISSING;
+			if (device->can_discard)
+				dev->flags |= BTRFS_DEV_CAN_DISCARD;
+			if (device->is_tgtdev_for_dev_replace)
+				dev->flags |= BTRFS_DEV_SUBSTITUTED;
+			if (device->running_pending)
+				dev->flags |= BTRFS_DEV_RUN_PENDING;
+			if (device->nobarriers)
+				dev->flags |= BTRFS_DEV_NOBARRIERS;
+			if (device->dev_stats_valid)
+				dev->flags |= BTRFS_DEV_STATS_VALID;
+			if (device->dev_stats_dirty)
+				dev->flags |= BTRFS_DEV_STATS_DIRTY;
+
+			dev->gen = device->generation;
+			dev->devid = device->devid;
+			dev->total_bytes = device->total_bytes;
+			dev->disk_total_bytes = device->disk_total_bytes;
+			dev->bytes_used = device->bytes_used;
+			dev->type = device->type;
+			dev->io_align = device->io_align;
+			dev->io_width = device->io_width;
+			dev->sector_size = device->sector_size;
+			memcpy(dev->uuid, device->uuid, BTRFS_UUID_SIZE);
+			if (device->name) {
+				rcu_read_lock();
+				memcpy(dev->name, rcu_str_deref(device->name),
+					BTRFS_PATH_NAME_MAX);
+				rcu_read_unlock();
+			} else {
+				memset(dev->name, 0, BTRFS_PATH_NAME_MAX);
+			}
+			dev++;
+			cnt++;
+		}
+
+		mutex_unlock(&fs_devices->device_list_mutex);
+	}
+	mutex_unlock(&uuid_mutex);
+
+	return cnt;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ebb0d0c..82a459c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -392,4 +392,6 @@  static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
 }
 int btrfs_get_fslist_cnt(void);
 u64 btrfs_get_fslist(struct btrfs_ioctl_fslist *fslist, u64 ucnt);
+int btrfs_get_devlist_cnt(void);
+u64 btrfs_get_devlist(struct btrfs_ioctl_devlist *devlist, u64 ucnt);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7d7f776..2983019 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -520,6 +520,8 @@  static inline char *btrfs_err_str(enum btrfs_err_code err_code)
 
 /* fs flags */
 #define BTRFS_FS_MOUNTED	(1LLU << 0)
+#define BTRFS_FS_SEEDING	(1LLU << 1)
+#define BTRFS_FS_ROTATING	(1LLU << 2)
 
 struct btrfs_ioctl_fslist {
 	__u64 self_sz;			/* in/out */
@@ -535,6 +537,47 @@  struct btrfs_ioctl_fslist_args {
 	__u64 count;		/* out */
 };
 
+#define BTRFS_DEV_WRITEABLE	(1LLU << 8)
+#define BTRFS_DEV_IN_FS_MD	(1LLU << 9)
+#define BTRFS_DEV_MISSING	(1LLU << 10)
+#define BTRFS_DEV_CAN_DISCARD	(1LLU << 11)
+#define BTRFS_DEV_SUBSTITUTED	(1LLU << 12)
+#define BTRFS_DEV_RUN_PENDING   (1LLU << 13)
+#define BTRFS_DEV_NOBARRIERS	(1LLU << 14)
+#define BTRFS_DEV_STATS_VALID	(1LLU << 15)
+#define BTRFS_DEV_STATS_DIRTY	(1LLU << 16)
+
+struct btrfs_ioctl_devlist {
+	__u64 sz_self;
+	__u64 fs_latest_devid;
+	__u64 fs_latest_trans;
+	__u64 fs_num_devices;
+	__u64 fs_open_devices;
+	__u64 fs_rw_devices;
+	__u64 fs_missing_devices;
+	__u64 fs_total_rw_bytes;
+	__u64 fs_num_can_discard;
+	__u64 fs_total_devices;
+	__u64 gen;
+	__u64 flags;
+	__u64 devid;
+	__u64 total_bytes;
+	__u64 disk_total_bytes;
+	__u64 bytes_used;
+	__u64 type;
+	__u32 io_align;
+	__u32 io_width;
+	__u32 sector_size;
+	__u8 fsid[BTRFS_FSID_SIZE];
+	__u8 uuid[BTRFS_UUID_SIZE];
+	__u8 name[BTRFS_PATH_NAME_MAX];
+}__attribute__ ((__packed__));
+
+struct btrfs_ioctl_devlist_args {
+	__u64 self_sz;		/* in/out */
+	__u64 count;		/* in/out */
+};
+
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -638,5 +681,7 @@  struct btrfs_ioctl_fslist_args {
 				   struct btrfs_ioctl_feature_flags[2])
 #define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
 				   struct btrfs_ioctl_feature_flags[3])
+#define BTRFS_IOC_GET_DEVS _IOWR(BTRFS_IOCTL_MAGIC, 58, \
+				     struct btrfs_ioctl_devlist_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */