[rfc,v3] New ioctl BTRFS_IOC_GET_CHUNK_INFO.
diff mbox series

Message ID 20200319203913.3103-2-kreijack@libero.it
State New
Headers show
Series
  • [rfc,v3] New ioctl BTRFS_IOC_GET_CHUNK_INFO.
Related show

Commit Message

Goffredo Baroncelli March 19, 2020, 8:39 p.m. UTC
From: Goffredo Baroncelli <kreijack@inwind.it>

Add a new ioctl to get info about chunk without requiring the root
privileges. This allow to a non root user to know how the space of the
filesystem is allocated.

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
---
 fs/btrfs/ioctl.c           | 211 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |  38 +++++++
 2 files changed, 249 insertions(+)

Comments

Josef Bacik March 19, 2020, 8:59 p.m. UTC | #1
On 3/19/20 4:39 PM, Goffredo Baroncelli wrote:
> From: Goffredo Baroncelli <kreijack@inwind.it>
> 
> Add a new ioctl to get info about chunk without requiring the root
> privileges. This allow to a non root user to know how the space of the
> filesystem is allocated.
> 
> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> ---
>   fs/btrfs/ioctl.c           | 211 +++++++++++++++++++++++++++++++++++++
>   include/uapi/linux/btrfs.h |  38 +++++++
>   2 files changed, 249 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 40b729dce91c..b3296a479bf6 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2234,6 +2234,215 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
>   	return ret;
>   }
>   
> +/*
> + * Return:
> + *	1		-> copied all data, possible further data
> + *	0		-> copied all data, no further data
> + *	-EAGAIN		-> not enough space, restart it
> + *	-EFAULT		-> the user passed an invalid address/size pair
> + */
> +static noinline int copy_chunk_info(struct btrfs_path *path,
> +			       char __user *ubuf,
> +			       size_t buf_size,
> +			       u64 *used_buf,
> +			       int *num_found,
> +			       u64 *offset)
> +{
> +	struct extent_buffer *leaf;
> +	unsigned long item_off;
> +	unsigned long item_len;
> +	int nritems;
> +	int i;
> +	int slot;
> +	struct btrfs_key key;
> +
> +	leaf = path->nodes[0];
> +	slot = path->slots[0];
> +	nritems = btrfs_header_nritems(leaf);
> +
> +	for (i = slot; i < nritems; i++) {
> +		u64 destsize;
> +		struct btrfs_chunk_info ci;
> +		struct btrfs_chunk chunk;
> +		int j, chunk_size;
> +
> +		item_off = btrfs_item_ptr_offset(leaf, i);
> +		item_len = btrfs_item_size_nr(leaf, i);
> +
> +		btrfs_item_key_to_cpu(leaf, &key, i);
> +		/*
> +		 * we are not interested in other items type
> +		 */
> +		if (key.type != BTRFS_CHUNK_ITEM_KEY)
> +			return 0;
> +
> +		/*
> +		 * In any case, the next search must start from here
> +		 */
> +		*offset = key.offset;
> +		read_extent_buffer(leaf, &chunk, item_off, sizeof(chunk));
> +
> +		/*
> +		 * chunk.num_stripes-1 is correct, because btrfs_chunk includes
> +		 * already a stripe
> +		 */
> +		destsize = sizeof(struct btrfs_chunk_info) +
> +			(chunk.num_stripes - 1) * sizeof(struct btrfs_stripe);
> +
> +		if (destsize > item_len) {
> +			ASSERT(0);
> +			return -EINVAL;
> +		}
> +
> +		if (buf_size < destsize + *used_buf) {
> +			if (*num_found)
> +				/* try onother time */
> +				return -EAGAIN;
> +			else
> +				/* in any case the buffer is too small */
> +				return -EOVERFLOW;
> +		}
> +
> +		/* copy chunk */
> +		chunk_size = offsetof(struct btrfs_chunk_info, stripes);
> +		memset(&ci, 0, chunk_size);
> +		ci.length = btrfs_stack_chunk_length(&chunk);
> +		ci.stripe_len = btrfs_stack_chunk_stripe_len(&chunk);
> +		ci.type = btrfs_stack_chunk_type(&chunk);
> +		ci.num_stripes = btrfs_stack_chunk_num_stripes(&chunk);
> +		ci.sub_stripes = btrfs_stack_chunk_sub_stripes(&chunk);
> +		ci.offset = key.offset;
> +
> +		if (copy_to_user(ubuf + *used_buf, &ci, chunk_size))
> +			return -EFAULT;
> +
> +		*used_buf += chunk_size;
> +
> +		/* copy stripes */
> +		for (j = 0 ; j < chunk.num_stripes ; j++) {
> +			struct btrfs_stripe chunk_stripe;
> +			struct btrfs_chunk_info_stripe csi;
> +
> +			/*
> +			 * j-1 is correct, because btrfs_chunk includes already
> +			 * a stripe
> +			 */
> +			read_extent_buffer(leaf, &chunk_stripe,
> +					item_off + sizeof(struct btrfs_chunk) +
> +						sizeof(struct btrfs_stripe) *
> +						(j - 1), sizeof(chunk_stripe));
> +
> +			memset(&csi, 0, sizeof(csi));
> +
> +			csi.devid = btrfs_stack_stripe_devid(&chunk_stripe);
> +			csi.offset = btrfs_stack_stripe_offset(&chunk_stripe);
> +			memcpy(csi.dev_uuid, chunk_stripe.dev_uuid,
> +				sizeof(chunk_stripe.dev_uuid));
> +			if (copy_to_user(ubuf + *used_buf, &csi, sizeof(csi)))
> +				return -EFAULT;
> +
> +			*used_buf += sizeof(csi);
> +		}
> +
> +		++(*num_found);
> +	}
> +
> +	if (*offset < (u64)-1)
> +		++(*offset);
> +
> +	return 1;
> +}
> +
> +static noinline int search_chunk_info(struct inode *inode, u64 *offset,
> +				      int *items_count,
> +				      char __user *ubuf, u64 buf_size)
> +{
> +	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
> +	struct btrfs_root *root;
> +	struct btrfs_key key;
> +	struct btrfs_path *path;
> +	int ret = -EAGAIN;
> +	u64 used_buf = 0;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	/* search for BTRFS_CHUNK_TREE_OBJECTID tree */
> +	key.objectid = BTRFS_CHUNK_TREE_OBJECTID;
> +	key.type = BTRFS_ROOT_ITEM_KEY;
> +	key.offset = (u64)-1;
> +	root = btrfs_get_fs_root(info, &key, true);
> +	if (IS_ERR(root)) {
> +		btrfs_err(info, "could not find root\n");
> +		btrfs_free_path(path);
> +		return -ENOENT;
> +	}
> +
> +
> +	while (used_buf < buf_size) {
> +		key.objectid = 0x0100;
> +		key.type = BTRFS_CHUNK_ITEM_KEY;
> +		key.offset = *offset;
> +
> +		ret = btrfs_search_forward(root, &key, path, 0);
> +		if (ret != 0) {
> +			if (ret > 0)
> +				ret = 0;
> +			goto ret;
> +		}
> +
> +		ret = copy_chunk_info(path, ubuf, buf_size,
> +				      &used_buf, items_count, offset);
> +
> +		btrfs_release_path(path);
> +
> +		if (ret < 1)
> +			break;
> +	}
> +
> +ret:
> +	btrfs_free_path(path);
> +	return ret;
> +}
> +
> +static noinline int btrfs_ioctl_get_chunk_info(struct file *file,
> +					       void __user *argp)
> +{
> +	struct btrfs_ioctl_chunk_info arg;
> +	struct inode *inode;
> +	int ret;
> +	size_t buf_size;
> +	u64 data_offset;
> +	const size_t buf_limit = SZ_16M;
> +
> +
> +	data_offset = sizeof(struct btrfs_ioctl_chunk_info);

I think I'm missing something, but since we have a single 
btrfs_chunk_info_stripe at the end, this will point to the next slot, so we're 
just copying in starting at slot 1, not slot 0, because you pass in argp + 
data_offset below.  This looks wonky to me, thanks,

Josef
Goffredo Baroncelli March 19, 2020, 9:09 p.m. UTC | #2
On 3/19/20 9:59 PM, Josef Bacik wrote:
> On 3/19/20 4:39 PM, Goffredo Baroncelli wrote:
>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>
>> Add a new ioctl to get info about chunk without requiring the root
>> privileges. This allow to a non root user to know how the space of the
>> filesystem is allocated.
>>
>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>> ---
>>   fs/btrfs/ioctl.c           | 211 +++++++++++++++++++++++++++++++++++++
>>   include/uapi/linux/btrfs.h |  38 +++++++
>>   2 files changed, 249 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 40b729dce91c..b3296a479bf6 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -2234,6 +2234,215 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
>>       return ret;
>>   }
>> +/*
>> + * Return:
>> + *    1        -> copied all data, possible further data
>> + *    0        -> copied all data, no further data
>> + *    -EAGAIN        -> not enough space, restart it
>> + *    -EFAULT        -> the user passed an invalid address/size pair
>> + */
>> +static noinline int copy_chunk_info(struct btrfs_path *path,
>> +                   char __user *ubuf,
>> +                   size_t buf_size,
>> +                   u64 *used_buf,
>> +                   int *num_found,
>> +                   u64 *offset)
>> +{
>> +    struct extent_buffer *leaf;
[...]
>> +
>> +static noinline int btrfs_ioctl_get_chunk_info(struct file *file,
>> +                           void __user *argp)
>> +{
>> +    struct btrfs_ioctl_chunk_info arg;
>> +    struct inode *inode;
>> +    int ret;
>> +    size_t buf_size;
>> +    u64 data_offset;
>> +    const size_t buf_limit = SZ_16M;
>> +
>> +
>> +    data_offset = sizeof(struct btrfs_ioctl_chunk_info);
> 
> I think I'm missing something, but since we have a single btrfs_chunk_info_stripe at the end, this will point to the next slot, so we're just copying in starting at slot 1, not slot 0, because you pass in argp + data_offset below.  This looks wonky to me, thanks,

I think that you are confunsing  "struct btrfs_ioctl_chunk_info" with "struct btrfs_chunk_info". Only the second one has the single "struct btrfs_chunk_info_stripe" at the end. May be ?



> 
> Josef
David Sterba May 25, 2020, 5:14 p.m. UTC | #3
I'll start with the data structures

On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote:
> From: Goffredo Baroncelli <kreijack@inwind.it>
> +struct btrfs_chunk_info_stripe {
> +	__u64 devid;
> +	__u64 offset;
> +	__u8 dev_uuid[BTRFS_UUID_SIZE];
> +};
> +
> +struct btrfs_chunk_info {
> +	/* logical start of this chunk */
> +	__u64 offset;
> +	/* size of this chunk in bytes */
> +	__u64 length;
> +
> +	__u64 stripe_len;
> +	__u64 type;
> +
> +	/* 2^16 stripes is quite a lot, a second limit is the size of a single
> +	 * item in the btree
> +	 */
> +	__u16 num_stripes;
> +
> +	/* sub stripes only matter for raid10 */
> +	__u16 sub_stripes;
> +
> +	struct btrfs_chunk_info_stripe stripes[1];
> +	/* additional stripes go here */
> +};

This looks like a copy of btrfs_chunk and stripe, only removing items
not needed for the chunk information. Rather than copying the
unnecessary fileds like dev_uuid in stripe, this should be designed for
data exchange with the usecase in mind.

The format does not need follow the exact layout that kernel uses, ie.
chunk info with one embedded stripe and then followed by variable length
array of further stripes. This is convenient in one way but not in
another one. Alternatively each chunk can be emitted as a single entry,
duplicating part of the common fields and adding the stripe-specific
ones. This is for consideration.

I've looked at my old code doing the chunk dump based on the search
ioctl and found that it also allows to read the chunk usage, with one
extra search to the block group item where the usage is stored. As this
is can be slow, it should be optional. Ie. the main ioctl structure
needs flags where this can be requested.
Goffredo Baroncelli May 26, 2020, 8:19 p.m. UTC | #4
On 5/25/20 7:14 PM, David Sterba wrote:
> I'll start with the data structures
> 
> On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote:
>> From: Goffredo Baroncelli <kreijack@inwind.it>
>> +struct btrfs_chunk_info_stripe {
>> +	__u64 devid;
>> +	__u64 offset;
>> +	__u8 dev_uuid[BTRFS_UUID_SIZE];
>> +};
>> +
>> +struct btrfs_chunk_info {
>> +	/* logical start of this chunk */
>> +	__u64 offset;
>> +	/* size of this chunk in bytes */
>> +	__u64 length;
>> +
>> +	__u64 stripe_len;
>> +	__u64 type;
>> +
>> +	/* 2^16 stripes is quite a lot, a second limit is the size of a single
>> +	 * item in the btree
>> +	 */
>> +	__u16 num_stripes;
>> +
>> +	/* sub stripes only matter for raid10 */
>> +	__u16 sub_stripes;
>> +
>> +	struct btrfs_chunk_info_stripe stripes[1];
>> +	/* additional stripes go here */
>> +};
> 
> This looks like a copy of btrfs_chunk and stripe, only removing items
> not needed for the chunk information. Rather than copying the
> unnecessary fileds like dev_uuid in stripe, this should be designed for
> data exchange with the usecase in mind.

There are two clients for this api:
- btrfs fi us
- btrfs dev us

We can get rid of:
	- "offset" fields (2x)
	- "uuid" fields

However the "offset" fields can be used to understand where a logical map
is on the physical disks. I am thinking about a graphical tool to show this
mapping, which doesn't exits yet -).
The offset field may be used as key to get further information (like the chunk
usage, see below)

Regarding the UUID field, I agree it can be removed because it is redundant (there
is already the devid)


> 
> The format does not need follow the exact layout that kernel uses, ie.
> chunk info with one embedded stripe and then followed by variable length
> array of further stripes. This is convenient in one way but not in
> another one. Alternatively each chunk can be emitted as a single entry,
> duplicating part of the common fields and adding the stripe-specific
> ones. This is for consideration.
> 
> I've looked at my old code doing the chunk dump based on the search
> ioctl and found that it also allows to read the chunk usage, with one
> extra search to the block group item where the usage is stored. As this
> is can be slow, it should be optional. Ie. the main ioctl structure
> needs flags where this can be requested.

This info could be very useful. I think to something like a balance of
chunks which are near filled (or near empty). The question is if we
should have a different ioctl.
>
Hans van Kranenburg May 28, 2020, 9:03 p.m. UTC | #5
Hi,

On 5/26/20 10:19 PM, Goffredo Baroncelli wrote:
> On 5/25/20 7:14 PM, David Sterba wrote:
>> I'll start with the data structures
>>
>> On Thu, Mar 19, 2020 at 09:39:13PM +0100, Goffredo Baroncelli wrote:
>>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>> +struct btrfs_chunk_info_stripe {
>>> +	__u64 devid;
>>> +	__u64 offset;
>>> +	__u8 dev_uuid[BTRFS_UUID_SIZE];
>>> +};
>>> +
>>> +struct btrfs_chunk_info {
>>> +	/* logical start of this chunk */
>>> +	__u64 offset;
>>> +	/* size of this chunk in bytes */
>>> +	__u64 length;
>>> +
>>> +	__u64 stripe_len;
>>> +	__u64 type;
>>> +
>>> +	/* 2^16 stripes is quite a lot, a second limit is the size of a single
>>> +	 * item in the btree
>>> +	 */
>>> +	__u16 num_stripes;
>>> +
>>> +	/* sub stripes only matter for raid10 */
>>> +	__u16 sub_stripes;
>>> +
>>> +	struct btrfs_chunk_info_stripe stripes[1];
>>> +	/* additional stripes go here */
>>> +};
>>
>> This looks like a copy of btrfs_chunk and stripe, only removing items
>> not needed for the chunk information. Rather than copying the
>> unnecessary fileds like dev_uuid in stripe, this should be designed for
>> data exchange with the usecase in mind.
> 
> There are two clients for this api:
> - btrfs fi us
> - btrfs dev us
> 
> We can get rid of:
> 	- "offset" fields (2x)
> 	- "uuid" fields
> 
> However the "offset" fields can be used to understand where a logical map
> is on the physical disks. I am thinking about a graphical tool to show this
> mapping, which doesn't exits yet -).
> The offset field may be used as key to get further information (like the chunk
> usage, see below)
> 
> Regarding the UUID field, I agree it can be removed because it is redundant (there
> is already the devid)
> 
> 
>>
>> The format does not need follow the exact layout that kernel uses, ie.
>> chunk info with one embedded stripe and then followed by variable length
>> array of further stripes. This is convenient in one way but not in
>> another one. Alternatively each chunk can be emitted as a single entry,
>> duplicating part of the common fields and adding the stripe-specific
>> ones. This is for consideration.
>>
>> I've looked at my old code doing the chunk dump based on the search
>> ioctl and found that it also allows to read the chunk usage, with one
>> extra search to the block group item where the usage is stored. As this
>> is can be slow, it should be optional. Ie. the main ioctl structure
>> needs flags where this can be requested.
> 
> This info could be very useful. I think to something like a balance of
> chunks which are near filled (or near empty). The question is if we
> should have a different ioctl.

Do you mean that you want to allow to a non root user to run btrfs balance?

Otherwise, no. IMO convenience functions for quickly retrieving a
specific subset of data should be created as reusable library functions
in the calling code, not as a redundant extra IOCTL that has to be
maintained.

Hans

Patch
diff mbox series

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 40b729dce91c..b3296a479bf6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2234,6 +2234,215 @@  static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
 	return ret;
 }
 
+/*
+ * Return:
+ *	1		-> copied all data, possible further data
+ *	0		-> copied all data, no further data
+ *	-EAGAIN		-> not enough space, restart it
+ *	-EFAULT		-> the user passed an invalid address/size pair
+ */
+static noinline int copy_chunk_info(struct btrfs_path *path,
+			       char __user *ubuf,
+			       size_t buf_size,
+			       u64 *used_buf,
+			       int *num_found,
+			       u64 *offset)
+{
+	struct extent_buffer *leaf;
+	unsigned long item_off;
+	unsigned long item_len;
+	int nritems;
+	int i;
+	int slot;
+	struct btrfs_key key;
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	nritems = btrfs_header_nritems(leaf);
+
+	for (i = slot; i < nritems; i++) {
+		u64 destsize;
+		struct btrfs_chunk_info ci;
+		struct btrfs_chunk chunk;
+		int j, chunk_size;
+
+		item_off = btrfs_item_ptr_offset(leaf, i);
+		item_len = btrfs_item_size_nr(leaf, i);
+
+		btrfs_item_key_to_cpu(leaf, &key, i);
+		/*
+		 * we are not interested in other items type
+		 */
+		if (key.type != BTRFS_CHUNK_ITEM_KEY)
+			return 0;
+
+		/*
+		 * In any case, the next search must start from here
+		 */
+		*offset = key.offset;
+		read_extent_buffer(leaf, &chunk, item_off, sizeof(chunk));
+
+		/*
+		 * chunk.num_stripes-1 is correct, because btrfs_chunk includes
+		 * already a stripe
+		 */
+		destsize = sizeof(struct btrfs_chunk_info) +
+			(chunk.num_stripes - 1) * sizeof(struct btrfs_stripe);
+
+		if (destsize > item_len) {
+			ASSERT(0);
+			return -EINVAL;
+		}
+
+		if (buf_size < destsize + *used_buf) {
+			if (*num_found)
+				/* try onother time */
+				return -EAGAIN;
+			else
+				/* in any case the buffer is too small */
+				return -EOVERFLOW;
+		}
+
+		/* copy chunk */
+		chunk_size = offsetof(struct btrfs_chunk_info, stripes);
+		memset(&ci, 0, chunk_size);
+		ci.length = btrfs_stack_chunk_length(&chunk);
+		ci.stripe_len = btrfs_stack_chunk_stripe_len(&chunk);
+		ci.type = btrfs_stack_chunk_type(&chunk);
+		ci.num_stripes = btrfs_stack_chunk_num_stripes(&chunk);
+		ci.sub_stripes = btrfs_stack_chunk_sub_stripes(&chunk);
+		ci.offset = key.offset;
+
+		if (copy_to_user(ubuf + *used_buf, &ci, chunk_size))
+			return -EFAULT;
+
+		*used_buf += chunk_size;
+
+		/* copy stripes */
+		for (j = 0 ; j < chunk.num_stripes ; j++) {
+			struct btrfs_stripe chunk_stripe;
+			struct btrfs_chunk_info_stripe csi;
+
+			/*
+			 * j-1 is correct, because btrfs_chunk includes already
+			 * a stripe
+			 */
+			read_extent_buffer(leaf, &chunk_stripe,
+					item_off + sizeof(struct btrfs_chunk) +
+						sizeof(struct btrfs_stripe) *
+						(j - 1), sizeof(chunk_stripe));
+
+			memset(&csi, 0, sizeof(csi));
+
+			csi.devid = btrfs_stack_stripe_devid(&chunk_stripe);
+			csi.offset = btrfs_stack_stripe_offset(&chunk_stripe);
+			memcpy(csi.dev_uuid, chunk_stripe.dev_uuid,
+				sizeof(chunk_stripe.dev_uuid));
+			if (copy_to_user(ubuf + *used_buf, &csi, sizeof(csi)))
+				return -EFAULT;
+
+			*used_buf += sizeof(csi);
+		}
+
+		++(*num_found);
+	}
+
+	if (*offset < (u64)-1)
+		++(*offset);
+
+	return 1;
+}
+
+static noinline int search_chunk_info(struct inode *inode, u64 *offset,
+				      int *items_count,
+				      char __user *ubuf, u64 buf_size)
+{
+	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	int ret = -EAGAIN;
+	u64 used_buf = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/* search for BTRFS_CHUNK_TREE_OBJECTID tree */
+	key.objectid = BTRFS_CHUNK_TREE_OBJECTID;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_get_fs_root(info, &key, true);
+	if (IS_ERR(root)) {
+		btrfs_err(info, "could not find root\n");
+		btrfs_free_path(path);
+		return -ENOENT;
+	}
+
+
+	while (used_buf < buf_size) {
+		key.objectid = 0x0100;
+		key.type = BTRFS_CHUNK_ITEM_KEY;
+		key.offset = *offset;
+
+		ret = btrfs_search_forward(root, &key, path, 0);
+		if (ret != 0) {
+			if (ret > 0)
+				ret = 0;
+			goto ret;
+		}
+
+		ret = copy_chunk_info(path, ubuf, buf_size,
+				      &used_buf, items_count, offset);
+
+		btrfs_release_path(path);
+
+		if (ret < 1)
+			break;
+	}
+
+ret:
+	btrfs_free_path(path);
+	return ret;
+}
+
+static noinline int btrfs_ioctl_get_chunk_info(struct file *file,
+					       void __user *argp)
+{
+	struct btrfs_ioctl_chunk_info arg;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
+	u64 data_offset;
+	const size_t buf_limit = SZ_16M;
+
+
+	data_offset = sizeof(struct btrfs_ioctl_chunk_info);
+	inode = file_inode(file);
+
+	if (copy_from_user(&arg, argp, sizeof(arg)))
+		return -EFAULT;
+
+	buf_size = arg.buf_size;
+	arg.items_count = 0;
+
+	if (buf_size < sizeof(struct btrfs_ioctl_chunk_info) +
+			sizeof(struct btrfs_chunk_info))
+		return -EOVERFLOW;
+
+	/* limit result size to 16MB */
+	if (buf_size > buf_limit)
+		buf_size = buf_limit;
+
+	ret = search_chunk_info(inode, &arg.offset, &arg.items_count,
+			argp + data_offset, buf_size - data_offset);
+
+	if (copy_to_user(argp, &arg, data_offset))
+		return -EFAULT;
+
+	return ret;
+}
+
 /*
  * Search INODE_REFs to identify path name of 'dirid' directory
  * in a 'tree_id' tree. and sets path name to 'name'.
@@ -4907,6 +5116,8 @@  long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_get_subvol_rootref(file, argp);
 	case BTRFS_IOC_INO_LOOKUP_USER:
 		return btrfs_ioctl_ino_lookup_user(file, argp);
+	case BTRFS_IOC_GET_CHUNK_INFO:
+		return btrfs_ioctl_get_chunk_info(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 8134924cfc17..b28f7886dcbd 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -734,6 +734,42 @@  struct btrfs_ioctl_received_subvol_args {
 	__u64	reserved[16];		/* in */
 };
 
+struct btrfs_chunk_info_stripe {
+	__u64 devid;
+	__u64 offset;
+	__u8 dev_uuid[BTRFS_UUID_SIZE];
+};
+
+struct btrfs_chunk_info {
+	/* logical start of this chunk */
+	__u64 offset;
+	/* size of this chunk in bytes */
+	__u64 length;
+
+	__u64 stripe_len;
+	__u64 type;
+
+	/* 2^16 stripes is quite a lot, a second limit is the size of a single
+	 * item in the btree
+	 */
+	__u16 num_stripes;
+
+	/* sub stripes only matter for raid10 */
+	__u16 sub_stripes;
+
+	struct btrfs_chunk_info_stripe stripes[1];
+	/* additional stripes go here */
+};
+
+
+struct btrfs_ioctl_chunk_info {
+	u64			offset;		/* offset to start the search */
+	u32			buf_size;	/* size of the buffer, including
+						 * btrfs_ioctl_chunk_info
+						 */
+	u32			items_count;	/* number of items returned */
+};
+
 /*
  * Caller doesn't want file data in the send stream, even if the
  * search of clone sources doesn't find an extent. UPDATE_EXTENT
@@ -972,5 +1008,7 @@  enum btrfs_err_code {
 				struct btrfs_ioctl_ino_lookup_user_args)
 #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
 				struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_GET_CHUNK_INFO _IOR(BTRFS_IOCTL_MAGIC, 64, \
+				struct btrfs_ioctl_chunk_info)
 
 #endif /* _UAPI_LINUX_BTRFS_H */