diff mbox

[2/2,v2] btrfs: add framework to read fs info and dev info from the kernel

Message ID 1370876355-16584-3-git-send-email-anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Anand Jain June 10, 2013, 2:59 p.m. UTC
This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
which reads the btrfs_fs_devices and btrfs_device structure
from the kernel respectively.

The information in these structure are useful to report the
device/fs information in line with the kernel operations and
thus immediately addresses the problem that 'btrfs fi show'
command reports the stale information after device device add
remove operation is performed. That is because btrfs fi show
reads the disks directly.

Further the frame-work provided here would help to enhance
the btrfs-progs/library to read the other fs information and
its device information. Also the frame work provided here is
easily extensible to retrieve any other structure as future
needs.

v1->v2:
  .code optimized
  .get the device generation number as well, so that
   btrfs-progs could print using print_one_uuid

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/super.c           | 100 +++++++++++++++++++++++++++++++++++++++++----
 fs/btrfs/volumes.c         |  99 +++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/volumes.h         |   2 +
 include/uapi/linux/btrfs.h |  62 ++++++++++++++++++++++++++++
 4 files changed, 255 insertions(+), 8 deletions(-)

Comments

Josef Bacik June 10, 2013, 7:40 p.m. UTC | #1
On Mon, Jun 10, 2013 at 08:59:15AM -0600, Anand Jain wrote:
> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
> which reads the btrfs_fs_devices and btrfs_device structure
> from the kernel respectively.
> 
> The information in these structure are useful to report the
> device/fs information in line with the kernel operations and
> thus immediately addresses the problem that 'btrfs fi show'
> command reports the stale information after device device add
> remove operation is performed. That is because btrfs fi show
> reads the disks directly.
> 
> Further the frame-work provided here would help to enhance
> the btrfs-progs/library to read the other fs information and
> its device information. Also the frame work provided here is
> easily extensible to retrieve any other structure as future
> needs.
>

Please submit an xfstest along with this to test the new functionality so we
have something to test it with.  Make sure it runs properly if your patches are
not in place since they obviously won't be for most people.  Once you have an
xfstest I can properly test these patches.  Thanks,

Josef 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zach Brown June 10, 2013, 8:30 p.m. UTC | #2
On Mon, Jun 10, 2013 at 10:59:15PM +0800, Anand Jain wrote:
> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
> which reads the btrfs_fs_devices and btrfs_device structure
> from the kernel respectively.

Why not just use sysfs to export the device lists?

> The information in these structure are useful to report the
> device/fs information in line with the kernel operations and
> thus immediately addresses the problem that 'btrfs fi show'
> command reports the stale information after device device add
> remove operation is performed. That is because btrfs fi show
> reads the disks directly.

Hmm.  Would it be enough to get the block device address_space in sync
with the btrfs stuctures?  Would that get rid of the need for this
interface?

But sure, for the sake of argument and code review, let's say that we
wanted some ioctls to read the btrfs kernel device lists.

> Further the frame-work provided here would help to enhance

Please don't add a layer of generic encoding above these new ioctls.
It's not necessary and it makes more work for the various parts of the
world that want to do deep ioctl inspection (think, for example, of
strace).

> +static size_t get_ioctl_sz(size_t pl_list_sz, u64 mul, u64 alloc_cnt)
> +
> +static int get_ioctl_header_and_payload(unsigned long arg,
> +		size_t unit_sz, int default_len,
> +		struct btrfs_ioctl_header **argp, size_t *sz)

This adds a lot of fiddly math and allocation that can go wrong for no
benefit.  We can restructure the interface so all of this code goes
away.

1) Have a simple single fixed input structure for each ioctl.  Maybe
with some extra padding and a flags argument if you think stuff is going
to be added over time.  No generic header.  No casting.  The ioctl
number defines the input structure.  If you need a different structure
later, use a different ioctl number.

2) Don't have a fixed array of output structs embedded in the input
struct.  Have a pointer to the array of output structs in the input
struct.  Probably with a number of elements found there.

3) Don't copy the giant user output buffer into the kernel, write to it,
then copy it back to user space.  Instead have the kernel copy_to_user()
each output structure to the userspace pointer as it goes.  Have the
ioctl() return a positive count of the number of elements copied.

>  static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
>  				unsigned long arg)
>  {
> -	struct btrfs_ioctl_vol_args *vol;
> +	struct btrfs_ioctl_vol_args *vol = NULL;
>  	struct btrfs_fs_devices *fs_devices;
> -	int ret = -ENOTTY;
> +	struct btrfs_ioctl_header *argp = NULL;
> + 	int ret = -ENOTTY;
> +	size_t sz = 0;
>  
>  	if (!capable(CAP_SYS_ADMIN))
>  		return -EPERM;
>  
> -	vol = memdup_user((void __user *)arg, sizeof(*vol));
> -	if (IS_ERR(vol))
> -		return PTR_ERR(vol);
> -
>  	switch (cmd) {
>  	case BTRFS_IOC_SCAN_DEV:
> +		vol = memdup_user((void __user *)arg, sizeof(*vol));
> +		if (IS_ERR(vol))
> +			return PTR_ERR(vol);

Hmm, having to duplicate that previously shared setup into each ioctl
block is a sign that the layering is wrong.

Don't smush the new ioctls into case bodies of this existing simple
fuction that worked with the _vol_args ioctls.  Rename this
btrfs_ioctl_vol_args, or something, and call it from a tiny new
btrfs_control_ioctl() for its two ioctls.  That new high level btrfs
ioctl dispatch function can then call the two new _get_devs and
_get_fsids functions.

> +	case BTRFS_IOC_GET_FSIDS:
> +		ret = get_ioctl_header_and_payload(arg,
> +				sizeof(struct btrfs_ioctl_fs_list),
> +				BTRFS_FSIDS_LEN, &argp, &sz);
> +		if (ret == 1) {
> +			ret = copy_to_user((void __user *)arg, argp, sz);
> +			kfree(argp);
> +			return -EFAULT;
> +		} else if (ret)
> +			return -EFAULT;
> +		ret = btrfs_get_fsids(argp);
> +		if (ret == 0 && copy_to_user((void __user *)arg, argp, sz))
> +			ret = -EFAULT;
> +		kfree(argp);
> +		break;

All of this can go away if you have trivial input and array-of-output
args that the ioctl helper works with.

	case BTRFS_IOC_GET_FSIDS:
		ret = btrfs_ioctl_get_fsids(arg);
		break;

> +int btrfs_get_fsids(struct btrfs_ioctl_header *argp)
> +{
> +	u64 cnt = 0;
> +	struct btrfs_device *device;
> +	struct btrfs_fs_devices *fs_devices;
> +	struct btrfs_ioctl_fs_list *fslist;
> +	struct btrfs_ioctl_fsinfo *fsinfo;
> +
> +	fslist = (struct btrfs_ioctl_fs_list *)((u8 *)argp
> +						+ sizeof(*argp));

Instead of working with an allocated copy of the input, copy the user
input struct onto the stack here.

	struct btrfs_ioctl_get_fsids_input in;

	if (copy_from_user(&in, argp, sizeof(in)))
		return -EFAULT;

(Or you could get_user() each field.. but that's probably not worth it
for a small input struct.)

> +
> +	list_for_each_entry(fs_devices, &fs_uuids, list) {

Surely this needs to be protected by some lock?  The uuid_mutex, maybe?

> +			fsinfo = &fslist->fsinfo[cnt];
> +			memcpy(fsinfo->fsid, fs_devices->fsid,
> +					BTRFS_FSID_SIZE);

And then copy each device's output struct back to userspace one at a
time instead of building them up in the giant allocated kernel buffer.

This might get a little hairy if you don't want to block under the
uuid_mutex, but that's solvable too (dummy cursor items on the list,
probably).

> +				fsinfo->bytes_used = device->dev_root\
> +						->fs_info->super_copy->bytes_used;

A line continuation in the middle of a pointer deref?  Cache the
super_copy pointer on the stack, maybe.  It's probably better to use a
function that copies a single device's output struct to get all those
indentation levels back.

> +	/* Todo: optimize. there must be better way of doing this*/
> +	list_for_each_entry(fs_devices, &fs_uuids, list) {

(same locking question)

> +				if (device->in_fs_metadata)
> +					dev->flags = dev->flags |
> +						BTRFS_DEV_IN_FS_MD;

	'a = a | b' -> 'a |= b'

> +				memcpy(dev->name, rcu_str_deref(device->name),
> +						BTRFS_PATH_NAME_MAX);

Hmm.  Don't we need to be in rcu_read_lock() to use rcu_str_deref()?

> +struct btrfs_ioctl_fsinfo {
> +	__u64 sz_self;
> +	__u8 fsid[BTRFS_FSID_SIZE]; /* out */
> +	__u64 num_devices;
> +	__u64 total_devices;
> +	__u64 missing_devices;
> +	__u64 total_rw_bytes;
> +	__u64 bytes_used;
> +	__u64 flags;
> +	__u64 default_mount_subvol_id;
> +	char label[BTRFS_LABEL_SIZE];
> +}__attribute__ ((__packed__));

It'd be __packed, but I'd naturally align the structs to u64s and not
use it.

> +struct btrfs_ioctl_fs_list {
> +	__u64 all; /* in */
> +	struct btrfs_ioctl_fsinfo fsinfo[BTRFS_FSIDS_LEN]; /* out */
> +}__attribute__ ((__packed__));

I'd turn this into something like:

struct btrfs_ioctl_get_fsinfo_input {
	__u64 flags;  /* bit for all */
	__u64 nr_fsinfo;
	__u64 fsinfo_ptr; /* u64 ptr to avoid compat */
};

> +struct btrfs_ioctl_devinfo {
> +	__u64 sz_self;
> +	__u64 flags;
> +	__u64 devid;
> +	__u64 total_bytes;
> +	__u64 disk_total_bytes;
> +	__u64 bytes_used;
> +	__u64 type;
> +	__u64 generation;
> +	__u32 io_align;
> +	__u32 io_width;
> +	__u32 sector_size;
 __u32 _padding;
> +	__u8 uuid[BTRFS_UUID_SIZE];
> +	__u8 name[BTRFS_PATH_NAME_MAX + 1];

Pad that nutty 4088 byte string to a multiple of u64s and you can
remove the packed attribute.

- z
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain June 11, 2013, 1:10 p.m. UTC | #3
On 06/11/2013 03:40 AM, Josef Bacik wrote:
> On Mon, Jun 10, 2013 at 08:59:15AM -0600, Anand Jain wrote:
>> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
>> which reads the btrfs_fs_devices and btrfs_device structure
>> from the kernel respectively.
>>
>> The information in these structure are useful to report the
>> device/fs information in line with the kernel operations and
>> thus immediately addresses the problem that 'btrfs fi show'
>> command reports the stale information after device device add
>> remove operation is performed. That is because btrfs fi show
>> reads the disks directly.
>>
>> Further the frame-work provided here would help to enhance
>> the btrfs-progs/library to read the other fs information and
>> its device information. Also the frame work provided here is
>> easily extensible to retrieve any other structure as future
>> needs.
>>
>
> Please submit an xfstest along with this to test the new functionality so we
> have something to test it with.  Make sure it runs properly if your patches are
> not in place since they obviously won't be for most people.  Once you have an
> xfstest I can properly test these patches.  Thanks,

  This kernel patch supports a new cli option --kernel
  under the existing command 'btrfs filesystem show'.
  xfstest first of all would need testcase to test the
  btrfs filesystem show which isn't there yet. Doing it
  here deviate too much from the point here. But let
  me try to quick get that.

Thanks, Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik June 11, 2013, 1:15 p.m. UTC | #4
On Tue, Jun 11, 2013 at 07:10:12AM -0600, anand jain wrote:
> 
> 
> On 06/11/2013 03:40 AM, Josef Bacik wrote:
> > On Mon, Jun 10, 2013 at 08:59:15AM -0600, Anand Jain wrote:
> >> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
> >> which reads the btrfs_fs_devices and btrfs_device structure
> >> from the kernel respectively.
> >>
> >> The information in these structure are useful to report the
> >> device/fs information in line with the kernel operations and
> >> thus immediately addresses the problem that 'btrfs fi show'
> >> command reports the stale information after device device add
> >> remove operation is performed. That is because btrfs fi show
> >> reads the disks directly.
> >>
> >> Further the frame-work provided here would help to enhance
> >> the btrfs-progs/library to read the other fs information and
> >> its device information. Also the frame work provided here is
> >> easily extensible to retrieve any other structure as future
> >> needs.
> >>
> >
> > Please submit an xfstest along with this to test the new functionality so we
> > have something to test it with.  Make sure it runs properly if your patches are
> > not in place since they obviously won't be for most people.  Once you have an
> > xfstest I can properly test these patches.  Thanks,
> 
>   This kernel patch supports a new cli option --kernel
>   under the existing command 'btrfs filesystem show'.
>   xfstest first of all would need testcase to test the
>   btrfs filesystem show which isn't there yet. Doing it
>   here deviate too much from the point here. But let
>   me try to quick get that.
> 

No you just need a testcase to make sure --kernel is working properly, so make
it use SCRATCH_DEV_POOL and do things like make a file system, remove a device,
make sure its no longer there, add it back and make sure it pops up again, that
sort of thing.  We are trying to make btrfs more stable and I have no interest
in taking in new features/ioctls without a way to test them properly, so it is
not beside the point.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain June 11, 2013, 2:05 p.m. UTC | #5
On 06/11/2013 04:30 AM, Zach Brown wrote:
> On Mon, Jun 10, 2013 at 10:59:15PM +0800, Anand Jain wrote:
>> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
>> which reads the btrfs_fs_devices and btrfs_device structure
>> from the kernel respectively.
>
> Why not just use sysfs to export the device lists?

  Hmm. I see sysfs being used but isn't ioctl more easy
  to get stuff out from the kernel (except for dumping
  address_space) ? Providing a complete sysfs interface
  for btrfs can be a RFC as well. For now ioctl will help
  to fix the bugs.

>> The information in these structure are useful to report the
>> device/fs information in line with the kernel operations and
>> thus immediately addresses the problem that 'btrfs fi show'
>> command reports the stale information after device device add
>> remove operation is performed. That is because btrfs fi show
>> reads the disks directly.
>
> Hmm.  Would it be enough to get the block device address_space in sync
> with the btrfs stuctures?  Would that get rid of the need for this
> interface?

  Absolutely ! I have that to experiment in linux for
  a long time. More to come on that as time permits from
  the bug fixes which is kind of urgent need as of now.

> But sure, for the sake of argument and code review, let's say that we
> wanted some ioctls to read the btrfs kernel device lists.

  thanks.

>> Further the frame-work provided here would help to enhance
>
> Please don't add a layer of generic encoding above these new ioctls.
> It's not necessary and it makes more work for the various parts of the
> world that want to do deep ioctl inspection (think, for example, of
> strace).

   Agreed. Debugging has to be easier.

>> +static size_t get_ioctl_sz(size_t pl_list_sz, u64 mul, u64 alloc_cnt)
>> +
>> +static int get_ioctl_header_and_payload(unsigned long arg,
>> +		size_t unit_sz, int default_len,
>> +		struct btrfs_ioctl_header **argp, size_t *sz)
>
> This adds a lot of fiddly math and allocation that can go wrong for no
> benefit.  We can restructure the interface so all of this code goes
> away.
>
> 1) Have a simple single fixed input structure for each ioctl.  Maybe
> with some extra padding and a flags argument if you think stuff is going
> to be added over time.  No generic header.  No casting.  The ioctl
> number defines the input structure.  If you need a different structure
> later, use a different ioctl number.

  -No generic header and No casting-  Why not ? anything that
  might not work with strace ?

  (Header-payload model are typically favorites of IO protocol drivers.
  they are easily extensible, checking for compatibility is easier, and
  code re-useability is great).

  Thanks for the review comments below. I will look into that.

Anand
-------
> 2) Don't have a fixed array of output structs embedded in the input
> struct.  Have a pointer to the array of output structs in the input
> struct.  Probably with a number of elements found there.



> 3) Don't copy the giant user output buffer into the kernel, write to it,
> then copy it back to user space.  Instead have the kernel copy_to_user()
> each output structure to the userspace pointer as it goes.  Have the
> ioctl() return a positive count of the number of elements copied.



>>   static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
>>   				unsigned long arg)
>>   {
>> -	struct btrfs_ioctl_vol_args *vol;
>> +	struct btrfs_ioctl_vol_args *vol = NULL;
>>   	struct btrfs_fs_devices *fs_devices;
>> -	int ret = -ENOTTY;
>> +	struct btrfs_ioctl_header *argp = NULL;
>> + 	int ret = -ENOTTY;
>> +	size_t sz = 0;
>>
>>   	if (!capable(CAP_SYS_ADMIN))
>>   		return -EPERM;
>>
>> -	vol = memdup_user((void __user *)arg, sizeof(*vol));
>> -	if (IS_ERR(vol))
>> -		return PTR_ERR(vol);
>> -
>>   	switch (cmd) {
>>   	case BTRFS_IOC_SCAN_DEV:
>> +		vol = memdup_user((void __user *)arg, sizeof(*vol));
>> +		if (IS_ERR(vol))
>> +			return PTR_ERR(vol);
>
> Hmm, having to duplicate that previously shared setup into each ioctl
> block is a sign that the layering is wrong.
>
> Don't smush the new ioctls into case bodies of this existing simple
> fuction that worked with the _vol_args ioctls.  Rename this
> btrfs_ioctl_vol_args, or something, and call it from a tiny new
> btrfs_control_ioctl() for its two ioctls.  That new high level btrfs
> ioctl dispatch function can then call the two new _get_devs and
> _get_fsids functions.



>> +	case BTRFS_IOC_GET_FSIDS:
>> +		ret = get_ioctl_header_and_payload(arg,
>> +				sizeof(struct btrfs_ioctl_fs_list),
>> +				BTRFS_FSIDS_LEN, &argp, &sz);
>> +		if (ret == 1) {
>> +			ret = copy_to_user((void __user *)arg, argp, sz);
>> +			kfree(argp);
>> +			return -EFAULT;
>> +		} else if (ret)
>> +			return -EFAULT;
>> +		ret = btrfs_get_fsids(argp);
>> +		if (ret == 0 && copy_to_user((void __user *)arg, argp, sz))
>> +			ret = -EFAULT;
>> +		kfree(argp);
>> +		break;
>
> All of this can go away if you have trivial input and array-of-output
> args that the ioctl helper works with.
>
> 	case BTRFS_IOC_GET_FSIDS:
> 		ret = btrfs_ioctl_get_fsids(arg);
> 		break;


>> +int btrfs_get_fsids(struct btrfs_ioctl_header *argp)
>> +{
>> +	u64 cnt = 0;
>> +	struct btrfs_device *device;
>> +	struct btrfs_fs_devices *fs_devices;
>> +	struct btrfs_ioctl_fs_list *fslist;
>> +	struct btrfs_ioctl_fsinfo *fsinfo;
>> +
>> +	fslist = (struct btrfs_ioctl_fs_list *)((u8 *)argp
>> +						+ sizeof(*argp));
>
> Instead of working with an allocated copy of the input, copy the user
> input struct onto the stack here.
>
> 	struct btrfs_ioctl_get_fsids_input in;
>
> 	if (copy_from_user(&in, argp, sizeof(in)))
> 		return -EFAULT;
>
> (Or you could get_user() each field.. but that's probably not worth it
> for a small input struct.)



>> +
>> +	list_for_each_entry(fs_devices, &fs_uuids, list) {
>
> Surely this needs to be protected by some lock?  The uuid_mutex, maybe?



>> +			fsinfo = &fslist->fsinfo[cnt];
>> +			memcpy(fsinfo->fsid, fs_devices->fsid,
>> +					BTRFS_FSID_SIZE);
>
> And then copy each device's output struct back to userspace one at a
> time instead of building them up in the giant allocated kernel buffer.
>
> This might get a little hairy if you don't want to block under the
> uuid_mutex, but that's solvable too (dummy cursor items on the list,
> probably).



>> +				fsinfo->bytes_used = device->dev_root\
>> +						->fs_info->super_copy->bytes_used;
>
> A line continuation in the middle of a pointer deref?  Cache the
> super_copy pointer on the stack, maybe.  It's probably better to use a
> function that copies a single device's output struct to get all those
> indentation levels back.
>
>> +	/* Todo: optimize. there must be better way of doing this*/
>> +	list_for_each_entry(fs_devices, &fs_uuids, list) {
>
> (same locking question)



>> +				if (device->in_fs_metadata)
>> +					dev->flags = dev->flags |
>> +						BTRFS_DEV_IN_FS_MD;
>
> 	'a = a | b' -> 'a |= b'



>> +				memcpy(dev->name, rcu_str_deref(device->name),
>> +						BTRFS_PATH_NAME_MAX);
>
> Hmm.  Don't we need to be in rcu_read_lock() to use rcu_str_deref()?



>> +struct btrfs_ioctl_fsinfo {
>> +	__u64 sz_self;
>> +	__u8 fsid[BTRFS_FSID_SIZE]; /* out */
>> +	__u64 num_devices;
>> +	__u64 total_devices;
>> +	__u64 missing_devices;
>> +	__u64 total_rw_bytes;
>> +	__u64 bytes_used;
>> +	__u64 flags;
>> +	__u64 default_mount_subvol_id;
>> +	char label[BTRFS_LABEL_SIZE];
>> +}__attribute__ ((__packed__));
>
> It'd be __packed, but I'd naturally align the structs to u64s and not
> use it.



>> +struct btrfs_ioctl_fs_list {
>> +	__u64 all; /* in */
>> +	struct btrfs_ioctl_fsinfo fsinfo[BTRFS_FSIDS_LEN]; /* out */
>> +}__attribute__ ((__packed__));
>
> I'd turn this into something like:
>
> struct btrfs_ioctl_get_fsinfo_input {
> 	__u64 flags;  /* bit for all */
> 	__u64 nr_fsinfo;
> 	__u64 fsinfo_ptr; /* u64 ptr to avoid compat */
> };



>> +struct btrfs_ioctl_devinfo {
>> +	__u64 sz_self;
>> +	__u64 flags;
>> +	__u64 devid;
>> +	__u64 total_bytes;
>> +	__u64 disk_total_bytes;
>> +	__u64 bytes_used;
>> +	__u64 type;
>> +	__u64 generation;
>> +	__u32 io_align;
>> +	__u32 io_width;
>> +	__u32 sector_size;
>   __u32 _padding;
>> +	__u8 uuid[BTRFS_UUID_SIZE];
>> +	__u8 name[BTRFS_PATH_NAME_MAX + 1];
>
> Pad that nutty 4088 byte string to a multiple of u64s and you can
> remove the packed attribute.




> - z
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik June 11, 2013, 2:24 p.m. UTC | #6
On Mon, Jun 10, 2013 at 08:59:15AM -0600, Anand Jain wrote:
> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
> which reads the btrfs_fs_devices and btrfs_device structure
> from the kernel respectively.
> 
> The information in these structure are useful to report the
> device/fs information in line with the kernel operations and
> thus immediately addresses the problem that 'btrfs fi show'
> command reports the stale information after device device add
> remove operation is performed. That is because btrfs fi show
> reads the disks directly.
> 
> Further the frame-work provided here would help to enhance
> the btrfs-progs/library to read the other fs information and
> its device information. Also the frame work provided here is
> easily extensible to retrieve any other structure as future
> needs.
> 
> v1->v2:
>   .code optimized
>   .get the device generation number as well, so that
>    btrfs-progs could print using print_one_uuid
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

In fact NACK altogether on this patch, you can get the same info out with the
TREE_SEARCH ioctl, just do that in btrfs-progs and don't add yet another ioctl.
Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zach Brown June 11, 2013, 5:50 p.m. UTC | #7
> >1) Have a simple single fixed input structure for each ioctl.  Maybe
> >with some extra padding and a flags argument if you think stuff is going
> >to be added over time.  No generic header.  No casting.  The ioctl
> >number defines the input structure.  If you need a different structure
> >later, use a different ioctl number.
> 
>  -No generic header and No casting-  Why not ?

Because it brings no benefits.  All the supposed benefits can be achived
through careful use of the existing interfaces, ioctl or otherwise.  And
it has costs: code complexity, run-time cpu/memory, etc.  Its
benefit/cost ratio is not flattering :).

- z
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anand Jain June 21, 2013, 7:02 a.m. UTC | #8
TREE_SEARCH ioctl doesn't help in this case
  the dev item don't provide all the info
  that btrfs-progs might need in the long run
  (and even the current needs).

Thanks, Anand

On 06/11/2013 10:24 PM, Josef Bacik wrote:
> On Mon, Jun 10, 2013 at 08:59:15AM -0600, Anand Jain wrote:
>> This adds two ioctl BTRFS_IOC_GET_FSIDS and BTRFS_IOC_GET_DEVS
>> which reads the btrfs_fs_devices and btrfs_device structure
>> from the kernel respectively.
>>
>> The information in these structure are useful to report the
>> device/fs information in line with the kernel operations and
>> thus immediately addresses the problem that 'btrfs fi show'
>> command reports the stale information after device device add
>> remove operation is performed. That is because btrfs fi show
>> reads the disks directly.
>>
>> Further the frame-work provided here would help to enhance
>> the btrfs-progs/library to read the other fs information and
>> its device information. Also the frame work provided here is
>> easily extensible to retrieve any other structure as future
>> needs.
>>
>> v1->v2:
>>    .code optimized
>>    .get the device generation number as well, so that
>>     btrfs-progs could print using print_one_uuid
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>
> In fact NACK altogether on this patch, you can get the same info out with the
> TREE_SEARCH ioctl, just do that in btrfs-progs and don't add yet another ioctl.
> Thanks,
>
> Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f0857e0..8b39a08 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1554,38 +1554,124 @@  static struct file_system_type btrfs_fs_type = {
 };
 MODULE_ALIAS_FS("btrfs");
 
+static size_t get_ioctl_sz(size_t pl_list_sz, u64 mul, u64 alloc_cnt)
+{
+	if (alloc_cnt)
+		return (sizeof(struct btrfs_ioctl_header)
+					+ pl_list_sz * alloc_cnt/mul);
+
+	return (sizeof(struct btrfs_ioctl_header) + pl_list_sz);
+}
+
+static int get_ioctl_header_and_payload(unsigned long arg,
+		size_t unit_sz, int default_len,
+		struct btrfs_ioctl_header **argp, size_t *sz)
+{
+	u64 cnt = 0;
+	struct btrfs_ioctl_header *ioctl_head;
+	size_t sz_head = sizeof(struct btrfs_ioctl_header);
+
+	*sz = get_ioctl_sz(unit_sz, default_len, cnt);
+	ioctl_head = *argp = (struct btrfs_ioctl_header *)
+				memdup_user((void __user *)arg, *sz);
+
+	if (IS_ERR(ioctl_head))
+		return PTR_ERR(ioctl_head);
+
+	cnt = ioctl_head->count;
+	if (cnt > default_len) {
+		kfree(ioctl_head);
+		/* Check for any mal-formed ioctl */
+		if (cnt % default_len)
+			return -EFAULT;
+		/* user has allocated more, so re-read
+		 * by using the recomupted size
+		*/
+		*sz = get_ioctl_sz(unit_sz, default_len, cnt);
+		ioctl_head = *argp = (struct btrfs_ioctl_header *)
+				memdup_user((void __user *)arg, *sz);
+		if (IS_ERR(ioctl_head))
+			return PTR_ERR(ioctl_head);
+	}
+
+	ioctl_head->sz_self = sz_head;
+	ioctl_head->sz = unit_sz;
+	ioctl_head->count = default_len;
+
+	if (ioctl_head->sz_self != sz_head || ioctl_head->sz != unit_sz
+		|| ioctl_head->count != default_len)
+		return 1;
+
+	return 0;
+}
+
 /*
  * used by btrfsctl to scan devices when no FS is mounted
  */
 static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 				unsigned long arg)
 {
-	struct btrfs_ioctl_vol_args *vol;
+	struct btrfs_ioctl_vol_args *vol = NULL;
 	struct btrfs_fs_devices *fs_devices;
-	int ret = -ENOTTY;
+	struct btrfs_ioctl_header *argp = NULL;
+ 	int ret = -ENOTTY;
+	size_t sz = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	vol = memdup_user((void __user *)arg, sizeof(*vol));
-	if (IS_ERR(vol))
-		return PTR_ERR(vol);
-
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
+		vol = memdup_user((void __user *)arg, sizeof(*vol));
+		if (IS_ERR(vol))
+			return PTR_ERR(vol);
 		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
 					    &btrfs_fs_type, &fs_devices);
+		kfree(vol);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
+		vol = memdup_user((void __user *)arg, sizeof(*vol));
+		if (IS_ERR(vol))
+			return PTR_ERR(vol);
 		ret = btrfs_scan_one_device(vol->name, FMODE_READ,
 					    &btrfs_fs_type, &fs_devices);
+		kfree(vol);
 		if (ret)
 			break;
 		ret = !(fs_devices->num_devices == fs_devices->total_devices);
 		break;
+	case BTRFS_IOC_GET_FSIDS:
+		ret = get_ioctl_header_and_payload(arg,
+				sizeof(struct btrfs_ioctl_fs_list),
+				BTRFS_FSIDS_LEN, &argp, &sz);
+		if (ret == 1) {
+			ret = copy_to_user((void __user *)arg, argp, sz);
+			kfree(argp);
+			return -EFAULT;
+		} else if (ret)
+			return -EFAULT;
+		ret = btrfs_get_fsids(argp);
+		if (ret == 0 && copy_to_user((void __user *)arg, argp, sz))
+			ret = -EFAULT;
+		kfree(argp);
+		break;
+	case BTRFS_IOC_GET_DEVS:
+		ret = get_ioctl_header_and_payload(arg,
+				sizeof(struct btrfs_ioctl_dev_list),
+				BTRFS_DEVS_LEN, &argp, &sz);
+		if (ret == 1) {
+			ret = copy_to_user((void __user *)arg, argp, sz);
+			kfree(argp);
+			return -EFAULT;
+		} else if (ret)
+			return -EFAULT;
+		ret = btrfs_get_devs(argp);
+		if (ret == 0 && copy_to_user((void __user *)arg, argp, sz))
+			ret = -EFAULT;
+		kfree(argp);
+		break;
 	}
 
-	kfree(vol);
 	return ret;
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 024fc11..de8399b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1819,7 +1819,7 @@  static int btrfs_prepare_sprout(struct btrfs_root *root)
 }
 
 /*
- * strore the expected generation for seed devices in device items.
+ * store the expected generation for seed devices in device items.
  */
 static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
@@ -5988,3 +5988,100 @@  int btrfs_scratch_superblock(struct btrfs_device *device)
 
 	return 0;
 }
+
+int btrfs_get_fsids(struct btrfs_ioctl_header *argp)
+{
+	u64 cnt = 0;
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *fs_devices;
+	struct btrfs_ioctl_fs_list *fslist;
+	struct btrfs_ioctl_fsinfo *fsinfo;
+
+	fslist = (struct btrfs_ioctl_fs_list *)((u8 *)argp
+						+ sizeof(*argp));
+
+	list_for_each_entry(fs_devices, &fs_uuids, list) {
+		if (!fslist->all && !fs_devices->opened)
+			continue;
+		if (cnt < argp->count) {
+			fsinfo = &fslist->fsinfo[cnt];
+			memcpy(fsinfo->fsid, fs_devices->fsid,
+					BTRFS_FSID_SIZE);
+			fsinfo->num_devices = fs_devices->num_devices;
+			fsinfo->missing_devices = fs_devices->missing_devices;
+			fsinfo->total_devices = fs_devices->total_devices;
+
+			if (fs_devices->opened) {
+				fsinfo->flags = BTRFS_FS_MOUNTED;
+				/* just get a devs sb */
+				device = list_entry(fs_devices->devices.next,
+						struct btrfs_device, dev_list);
+
+				fsinfo->bytes_used = device->dev_root\
+						->fs_info->super_copy->bytes_used;
+				memcpy(fsinfo->label, device->dev_root\
+						->fs_info->super_copy->label,
+						BTRFS_LABEL_SIZE);
+			}
+		}
+		cnt++;
+	}
+	argp->count = cnt;
+	return 0;
+}
+
+int btrfs_get_devs(struct btrfs_ioctl_header *argp)
+{
+	u64 cnt = 0;
+	u64 alloc_cnt = argp->count;
+	struct btrfs_device *device;
+	struct btrfs_fs_devices *fs_devices;
+	struct btrfs_ioctl_dev_list *devlist;
+	struct btrfs_ioctl_devinfo *dev;
+
+	devlist = (struct btrfs_ioctl_dev_list *)
+					((u8 *)argp + sizeof(*argp));
+
+	/* Todo: optimize. there must be better way of doing this*/
+	list_for_each_entry(fs_devices, &fs_uuids, list) {
+		if (memcmp(devlist->fsid, fs_devices->fsid, BTRFS_FSID_SIZE))
+			continue;
+
+		list_for_each_entry(device, &fs_devices->devices, dev_list) {
+			if (cnt < alloc_cnt) {
+				dev = &devlist->dev[cnt];
+				if (device->writeable)
+					dev->flags = BTRFS_DEV_WRITEABLE;
+				if (device->in_fs_metadata)
+					dev->flags = dev->flags |
+						BTRFS_DEV_IN_FS_MD;
+				if (device->missing)
+					dev->flags = dev->flags |
+						BTRFS_DEV_MISSING;
+				if (device->can_discard)
+					dev->flags = dev->flags |
+							BTRFS_DEV_CAN_DISCARD;
+				if (device->is_tgtdev_for_dev_replace)
+					dev->flags = dev->flags |
+						BTRFS_DEV_SUBSTITUTED;
+
+				dev->devid = device->devid;
+				dev->disk_total_bytes = device->disk_total_bytes;
+				dev->bytes_used = device->bytes_used;
+
+				dev->type = device->type;
+				dev->io_align = device->io_align;
+				dev->io_width = device->io_width;
+				dev->sector_size = device->sector_size;
+				memcpy(dev->uuid, device->uuid, BTRFS_UUID_SIZE);
+				memcpy(dev->name, rcu_str_deref(device->name),
+						BTRFS_PATH_NAME_MAX);
+				dev->generation = device->generation;
+			}
+			cnt++;
+		}
+		break;
+	}
+	argp->count = cnt;
+	return 0;
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f6247e2..48ebd3e 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -371,4 +371,6 @@  static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
 {
 	btrfs_dev_stat_set(dev, index, 0);
 }
+int btrfs_get_fsids(struct btrfs_ioctl_header *fsid);
+int btrfs_get_devs(struct btrfs_ioctl_header *argp);
 #endif
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index de3ec34..9d552ef 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -487,6 +487,64 @@  static inline char *btrfs_err_str(enum btrfs_err_code err_code)
 	}
 }
 
+/* ioctl header */
+struct btrfs_ioctl_header {
+	__u64 sz_self;	/* in/out */
+	__u64 sz;	/* in/out */
+	__u64 count;	/* in/out */
+}__attribute__ ((__packed__));
+
+/* ioctl payloads */
+#define BTRFS_FSIDS_LEN	16
+#define BTRFS_FS_MOUNTED	(1LLU << 0)
+#define BTRFS_LABEL_SIZE	256
+
+struct btrfs_ioctl_fsinfo {
+	__u64 sz_self;
+	__u8 fsid[BTRFS_FSID_SIZE]; /* out */
+	__u64 num_devices;
+	__u64 total_devices;
+	__u64 missing_devices;
+	__u64 total_rw_bytes;
+	__u64 bytes_used;
+	__u64 flags;
+	__u64 default_mount_subvol_id;
+	char label[BTRFS_LABEL_SIZE];
+}__attribute__ ((__packed__));
+
+struct btrfs_ioctl_fs_list {
+	__u64 all; /* in */
+	struct btrfs_ioctl_fsinfo fsinfo[BTRFS_FSIDS_LEN]; /* out */
+}__attribute__ ((__packed__));
+
+#define BTRFS_DEVS_LEN	16
+#define BTRFS_DEV_WRITEABLE	(1LLU << 0)
+#define BTRFS_DEV_IN_FS_MD	(1LLU << 1)
+#define BTRFS_DEV_MISSING	(1LLU << 2)
+#define BTRFS_DEV_CAN_DISCARD	(1LLU << 3)
+#define BTRFS_DEV_SUBSTITUTED	(1LLU << 4)
+
+struct btrfs_ioctl_devinfo {
+	__u64 sz_self;
+	__u64 flags;
+	__u64 devid;
+	__u64 total_bytes;
+	__u64 disk_total_bytes;
+	__u64 bytes_used;
+	__u64 type;
+	__u64 generation;
+	__u32 io_align;
+	__u32 io_width;
+	__u32 sector_size;
+	__u8 uuid[BTRFS_UUID_SIZE];
+	__u8 name[BTRFS_PATH_NAME_MAX + 1];
+}__attribute__ ((__packed__));
+
+struct btrfs_ioctl_dev_list {
+	__u8 fsid[BTRFS_FSID_SIZE]; /* in */
+	struct btrfs_ioctl_devinfo dev[BTRFS_DEVS_LEN]; /* out */
+}__attribute__ ((__packed__));
+
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -578,4 +636,8 @@  static inline char *btrfs_err_str(enum btrfs_err_code err_code)
 				      struct btrfs_ioctl_get_dev_stats)
 #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
 				    struct btrfs_ioctl_dev_replace_args)
+#define BTRFS_IOC_GET_FSIDS _IOWR(BTRFS_IOCTL_MAGIC, 54, \
+				struct btrfs_ioctl_header)
+#define BTRFS_IOC_GET_DEVS _IOWR(BTRFS_IOCTL_MAGIC, 55, \
+				struct btrfs_ioctl_header)
 #endif /* _UAPI_LINUX_BTRFS_H */