diff mbox series

[1/5] btrfs: add flags to give an hint to the chunk allocator

Message ID b15072e61eac46aa9f61317c59219713a01ff897.1646589622.git.kreijack@inwind.it (mailing list archive)
State New, archived
Headers show
Series btrfs: allocation_hint | expand

Commit Message

Goffredo Baroncelli March 6, 2022, 6:14 p.m. UTC
From: Goffredo Baroncelli <kreijack@inwind.it>

Add the following flags to give an hint about which chunk should be
allocated in which disk:

- BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
  preferred for data chunk, but metadata chunk allowed
- BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
  preferred for metadata chunk, but data chunk allowed
- BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
  only metadata chunk allowed
- BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
  only data chunk allowed

Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
---
 include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Josef Bacik March 22, 2022, 5:51 p.m. UTC | #1
On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> From: Goffredo Baroncelli <kreijack@inwind.it>
> 
> Add the following flags to give an hint about which chunk should be
> allocated in which disk:
> 
> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>   preferred for data chunk, but metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>   preferred for metadata chunk, but data chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>   only metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>   only data chunk allowed
> 
> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> ---
>  include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index b069752a8ecf..e0d842c2e616 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -389,6 +389,22 @@ struct btrfs_key {
>  	__u64 offset;
>  } __attribute__ ((__packed__));
>  
> +/* dev_item.type */
> +
> +/* btrfs chunk allocation hint */
> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> +/* btrfs chunk allocation hint mask */
> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> +/* preferred data chunk, but metadata chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> +/* preferred metadata chunk, but data chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
> +/* only metadata chunk are allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
> +/* only data chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
> +

Just a style thing here, format it to match everything else, and have the bit
stuff later, as well as newlines between everything since we're commenting.
Also we don't need the () around the values, generally it's best to follow the
style of the surrounding code when in doubt.

I know Dave probably has stronger opinions on this than I do, but I think
something like this would be better.

/*
 * btrfs_dev_item.type values.
 *
 * The _PREFERRED options indicate we prefer to allocate the respective type on
 * this device, but will fall back to allocating any chunk type if we run out of
 * space.  The _ONLY options indicate we will only allocate the given type from
 * this device and no other types.
 */
#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	0ULL
#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	1ULL
....

#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
#define BTRFS_DEV_ALLOCATION_HINT_MASK		\
	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)

This will make it a bit clearer and easier to read.  Thanks,

Josef
Josef Bacik March 22, 2022, 5:56 p.m. UTC | #2
On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> From: Goffredo Baroncelli <kreijack@inwind.it>
> 
> Add the following flags to give an hint about which chunk should be
> allocated in which disk:
> 
> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>   preferred for data chunk, but metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>   preferred for metadata chunk, but data chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>   only metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>   only data chunk allowed
> 
> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> ---
>  include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index b069752a8ecf..e0d842c2e616 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -389,6 +389,22 @@ struct btrfs_key {
>  	__u64 offset;
>  } __attribute__ ((__packed__));
>  
> +/* dev_item.type */
> +
> +/* btrfs chunk allocation hint */
> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> +/* btrfs chunk allocation hint mask */
> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> +/* preferred data chunk, but metadata chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)

Actually don't we have 0 set on type already?  So this will make all existing
file systems have DATA_PREFERRED, when in reality they may not want that
behavior.  So should we start at 1?  Thanks,

Josef
Goffredo Baroncelli March 22, 2022, 6:49 p.m. UTC | #3
On 22/03/2022 18.56, Josef Bacik wrote:
> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>
>> Add the following flags to give an hint about which chunk should be
>> allocated in which disk:
>>
>> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>>    preferred for data chunk, but metadata chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>>    preferred for metadata chunk, but data chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>>    only metadata chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>>    only data chunk allowed
>>
>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>> ---
>>   include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>>   1 file changed, 16 insertions(+)
>>
>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>> index b069752a8ecf..e0d842c2e616 100644
>> --- a/include/uapi/linux/btrfs_tree.h
>> +++ b/include/uapi/linux/btrfs_tree.h
>> @@ -389,6 +389,22 @@ struct btrfs_key {
>>   	__u64 offset;
>>   } __attribute__ ((__packed__));
>>   
>> +/* dev_item.type */
>> +
>> +/* btrfs chunk allocation hint */
>> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
>> +/* btrfs chunk allocation hint mask */
>> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
>> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
>> +/* preferred data chunk, but metadata chunk allowed */
>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> 
> Actually don't we have 0 set on type already?  So this will make all existing
> file systems have DATA_PREFERRED, when in reality they may not want that
> behavior.  So should we start at 1?  Thanks,

Yes, the default is 0 (now DATA_PREFERRED).
If we have all the disks set to DATA_PREFERRED (or METADATA_PREFERRED), all the disks
have the same priority so the chunks allocator works as usual.

If we have DATA_PREFERRED=1, it is not clear to me which would be the expected behavior when the allocator has to choice one of the followings disks:
- TYPE=0
- DATA_PREFERRED
- DATA_ONLY

It should ignore TYPE=0 ? or it should block the 'allocation_hint' policy ? Or TYPE=0 should have the lowest (or highest) priority ?

DATA_PREFERRED to me seems a safe default, because at worst we have only missing performance penalty (i.e. it is a faster disk where we should put METADATA)

BR

> 
> Josef
Josef Bacik March 22, 2022, 7:52 p.m. UTC | #4
On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> From: Goffredo Baroncelli <kreijack@inwind.it>
> 
> Add the following flags to give an hint about which chunk should be
> allocated in which disk:
> 
> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>   preferred for data chunk, but metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>   preferred for metadata chunk, but data chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>   only metadata chunk allowed
> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>   only data chunk allowed
> 
> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> ---
>  include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index b069752a8ecf..e0d842c2e616 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -389,6 +389,22 @@ struct btrfs_key {
>  	__u64 offset;
>  } __attribute__ ((__packed__));
>  
> +/* dev_item.type */
> +
> +/* btrfs chunk allocation hint */
> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> +/* btrfs chunk allocation hint mask */
> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> +/* preferred data chunk, but metadata chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> +/* preferred metadata chunk, but data chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
> +/* only metadata chunk are allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
> +/* only data chunk allowed */
> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
> +

I also just realized you're using these as flags, so they need to be

(1ULL << 0)
(1ULL << 1)
(1ULL << 2)
(1ULL << 3)

Thanks,

Josef
Goffredo Baroncelli March 22, 2022, 8:25 p.m. UTC | #5
On 22/03/2022 20.52, Josef Bacik wrote:
> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>
>> Add the following flags to give an hint about which chunk should be
>> allocated in which disk:
>>
>> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>>    preferred for data chunk, but metadata chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>>    preferred for metadata chunk, but data chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>>    only metadata chunk allowed
>> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>>    only data chunk allowed
>>
>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>> ---
>>   include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>>   1 file changed, 16 insertions(+)
>>
>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>> index b069752a8ecf..e0d842c2e616 100644
>> --- a/include/uapi/linux/btrfs_tree.h
>> +++ b/include/uapi/linux/btrfs_tree.h
>> @@ -389,6 +389,22 @@ struct btrfs_key {
>>   	__u64 offset;
>>   } __attribute__ ((__packed__));
>>   
>> +/* dev_item.type */
>> +
>> +/* btrfs chunk allocation hint */
>> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
>> +/* btrfs chunk allocation hint mask */
>> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
>> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
>> +/* preferred data chunk, but metadata chunk allowed */
>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
>> +/* preferred metadata chunk, but data chunk allowed */
>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
>> +/* only metadata chunk are allowed */
>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
>> +/* only data chunk allowed */
>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
>> +
> 
> I also just realized you're using these as flags, so they need to be
> 
> (1ULL << 0)
> (1ULL << 1)
> (1ULL << 2)
> (1ULL << 3)
> 

Could you elaborate a bit ? These are mutual exclusive values...

> Thanks,
> 
> Josef
Zygo Blaxell March 23, 2022, 2:56 a.m. UTC | #6
On Tue, Mar 22, 2022 at 09:25:45PM +0100, Goffredo Baroncelli wrote:
> On 22/03/2022 20.52, Josef Bacik wrote:
> > On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> > > From: Goffredo Baroncelli <kreijack@inwind.it>
> > > 
> > > Add the following flags to give an hint about which chunk should be
> > > allocated in which disk:
> > > 
> > > - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
> > >    preferred for data chunk, but metadata chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
> > >    preferred for metadata chunk, but data chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
> > >    only metadata chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
> > >    only data chunk allowed
> > > 
> > > Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> > > ---
> > >   include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
> > >   1 file changed, 16 insertions(+)
> > > 
> > > diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> > > index b069752a8ecf..e0d842c2e616 100644
> > > --- a/include/uapi/linux/btrfs_tree.h
> > > +++ b/include/uapi/linux/btrfs_tree.h
> > > @@ -389,6 +389,22 @@ struct btrfs_key {
> > >   	__u64 offset;
> > >   } __attribute__ ((__packed__));
> > > +/* dev_item.type */
> > > +
> > > +/* btrfs chunk allocation hint */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> > > +/* btrfs chunk allocation hint mask */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> > > +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> > > +/* preferred data chunk, but metadata chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> > > +/* preferred metadata chunk, but data chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
> > > +/* only metadata chunk are allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
> > > +/* only data chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
> > > +
> > 
> > I also just realized you're using these as flags, so they need to be
> > 
> > (1ULL << 0)
> > (1ULL << 1)
> > (1ULL << 2)
> > (1ULL << 3)
> > 
> 
> Could you elaborate a bit ? These are mutual exclusive values...

One of the comments I had on earlier versions was that these should
be bit values.  Bit 0 is data (0) or metadata (1), bit 1 is preferred
(0) or only (2).  Thus "metadata only" is 3, "data preferred" is 0,
"data only" is 2, and "metadata preferred" is 1.  This maintained on-disk
compatibility with the earliest versions that only had the two "preferred"
options encoded as 0 and 1.

At some point this got lost.  Between one of the patch versions and
another I had to change the type numbers on all of my devices.

> > Thanks,
> > 
> > Josef
> 
> 
> -- 
> gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
> Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5
>
Zygo Blaxell March 23, 2022, 3:26 a.m. UTC | #7
On Tue, Mar 22, 2022 at 01:56:14PM -0400, Josef Bacik wrote:
> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> > From: Goffredo Baroncelli <kreijack@inwind.it>
> > 
> > Add the following flags to give an hint about which chunk should be
> > allocated in which disk:
> > 
> > - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
> >   preferred for data chunk, but metadata chunk allowed
> > - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
> >   preferred for metadata chunk, but data chunk allowed
> > - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
> >   only metadata chunk allowed
> > - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
> >   only data chunk allowed
> > 
> > Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> > ---
> >  include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> > index b069752a8ecf..e0d842c2e616 100644
> > --- a/include/uapi/linux/btrfs_tree.h
> > +++ b/include/uapi/linux/btrfs_tree.h
> > @@ -389,6 +389,22 @@ struct btrfs_key {
> >  	__u64 offset;
> >  } __attribute__ ((__packed__));
> >  
> > +/* dev_item.type */
> > +
> > +/* btrfs chunk allocation hint */
> > +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> > +/* btrfs chunk allocation hint mask */
> > +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> > +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> > +/* preferred data chunk, but metadata chunk allowed */
> > +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> 
> Actually don't we have 0 set on type already?  So this will make all existing
> file systems have DATA_PREFERRED, when in reality they may not want that
> behavior.  So should we start at 1?  Thanks,

If all the disks in the filesystem have the same value, and it's one
of the two _PREFERRED values, then there is no change in behavior from
before:  all devices will be filled with no (difference in) preferences.
So if the default is either DATA_PREFERRED or METADATA_PREFERRED, then
nothing changes for existing filesystems until at least one device is
set to some other type.

Another problem is, what happens if we add a new device to a filesystem
where some devices have _ONLY set?  In that case, the new device has
type 0, and might get either data or metadata allocated on it that we
don't want.

Four possible solutions to choose from:

1.  Leave things as they are, sysadmins will have to set the type on
new devices immediately after add, and possibly balance to fix bad
allocations if they lose the race.

2.  Make device type a parameter to the add-device ioctl, so that
drives can be added with a non-default type already set.

3.  Define "0" as "get the preference value from a mount option, a
sysfs variable, or some on-disk item?" which is another way to do
#2 that doesn't mean having to change an existing ioctl's parameters
or roll a new one (I haven't checked to see if we have a spare u64
on device add).

4.  Define "0" as meaning "last resort", a new preference level where
the device is not preferred for any allocation unless there are no other
devices with a preference set.  Ecch I don't like this one because there's
a possible race where we're converting e.g. a 4-disk raid1 array into 2
data-only 2 metadata-only, and there's a point where we have 2x data-only
1x metadata-only and one type 0 device and it's possible to ENOSPC on
metadata at that moment if type 0 doesn't allow metadata allocation.
This plan would prevent the "data on the wrong drive" failure mode, so
it's in the list, but don't use it unless you can solve the new problem.


> Josef
>
Goffredo Baroncelli March 24, 2022, 7:05 p.m. UTC | #8
On 23/03/2022 03.56, Zygo Blaxell wrote:
> On Tue, Mar 22, 2022 at 09:25:45PM +0100, Goffredo Baroncelli wrote:
>> On 22/03/2022 20.52, Josef Bacik wrote:
>>> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
>>>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>>>
>>>> Add the following flags to give an hint about which chunk should be
>>>> allocated in which disk:
>>>>
>>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>>>>     preferred for data chunk, but metadata chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>>>>     preferred for metadata chunk, but data chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>>>>     only metadata chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>>>>     only data chunk allowed
>>>>
>>>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>>>> ---
>>>>    include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>>>>    1 file changed, 16 insertions(+)
>>>>
>>>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>>>> index b069752a8ecf..e0d842c2e616 100644
>>>> --- a/include/uapi/linux/btrfs_tree.h
>>>> +++ b/include/uapi/linux/btrfs_tree.h
>>>> @@ -389,6 +389,22 @@ struct btrfs_key {
>>>>    	__u64 offset;
>>>>    } __attribute__ ((__packed__));
>>>> +/* dev_item.type */
>>>> +
>>>> +/* btrfs chunk allocation hint */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
>>>> +/* btrfs chunk allocation hint mask */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
>>>> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
>>>> +/* preferred data chunk, but metadata chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
>>>> +/* preferred metadata chunk, but data chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
>>>> +/* only metadata chunk are allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
>>>> +/* only data chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
>>>> +
>>>
>>> I also just realized you're using these as flags, so they need to be
>>>
>>> (1ULL << 0)
>>> (1ULL << 1)
>>> (1ULL << 2)
>>> (1ULL << 3)
>>>
>>
>> Could you elaborate a bit ? These are mutual exclusive values...
> 
> One of the comments I had on earlier versions was that these should
> be bit values.  Bit 0 is data (0) or metadata (1), bit 1 is preferred
> (0) or only (2).  Thus "metadata only" is 3, "data preferred" is 0,
> "data only" is 2, and "metadata preferred" is 1.  This maintained on-disk
> compatibility with the earliest versions that only had the two "preferred"
> options encoded as 0 and 1.

At this point I would prefer to not change this part.
  
> At some point this got lost.  Between one of the patch versions and
> another I had to change the type numbers on all of my devices.

I remember that in the first iterations I used 3 bits, when now I use
only two bits. So it is possible that I changed the values.

> 
>>> Thanks,
>>>
>>> Josef
>>
>>
>> -- 
>> gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
>> Key fingerprint BBF5 1610 0B64 DAC6 5F7D  17B2 0EDA 9B37 8B82 E0B5
>>
Josef Bacik March 25, 2022, 2:59 p.m. UTC | #9
On Tue, Mar 22, 2022 at 09:25:45PM +0100, Goffredo Baroncelli wrote:
> On 22/03/2022 20.52, Josef Bacik wrote:
> > On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
> > > From: Goffredo Baroncelli <kreijack@inwind.it>
> > > 
> > > Add the following flags to give an hint about which chunk should be
> > > allocated in which disk:
> > > 
> > > - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
> > >    preferred for data chunk, but metadata chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
> > >    preferred for metadata chunk, but data chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
> > >    only metadata chunk allowed
> > > - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
> > >    only data chunk allowed
> > > 
> > > Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
> > > ---
> > >   include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
> > >   1 file changed, 16 insertions(+)
> > > 
> > > diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> > > index b069752a8ecf..e0d842c2e616 100644
> > > --- a/include/uapi/linux/btrfs_tree.h
> > > +++ b/include/uapi/linux/btrfs_tree.h
> > > @@ -389,6 +389,22 @@ struct btrfs_key {
> > >   	__u64 offset;
> > >   } __attribute__ ((__packed__));
> > > +/* dev_item.type */
> > > +
> > > +/* btrfs chunk allocation hint */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
> > > +/* btrfs chunk allocation hint mask */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
> > > +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
> > > +/* preferred data chunk, but metadata chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
> > > +/* preferred metadata chunk, but data chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
> > > +/* only metadata chunk are allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
> > > +/* only data chunk allowed */
> > > +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
> > > +
> > 
> > I also just realized you're using these as flags, so they need to be
> > 
> > (1ULL << 0)
> > (1ULL << 1)
> > (1ULL << 2)
> > (1ULL << 3)
> > 
> 
> Could you elaborate a bit ? These are mutual exclusive values...

Your set patch is doing

type = (type & ~MASK) | value;

So if you have METADATA_PREFERRED set already, and you do DATA_PREFERRED it'll
be

type = (1) | 0

which is METADATA_PREFERRED.

If you were doing simply

type = <value>

then these values would make sense.  So either we need to treat them as bit
flags, which should be <<, or you need to stop using the | operator.  It sounds
like you want them to be exclusive, so you should just change the setting code?
Thanks,

Josef
Goffredo Baroncelli March 25, 2022, 6:55 p.m. UTC | #10
On 25/03/2022 15.59, Josef Bacik wrote:
> On Tue, Mar 22, 2022 at 09:25:45PM +0100, Goffredo Baroncelli wrote:
>> On 22/03/2022 20.52, Josef Bacik wrote:
>>> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
>>>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>>>
>>>> Add the following flags to give an hint about which chunk should be
>>>> allocated in which disk:
>>>>
>>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>>>>     preferred for data chunk, but metadata chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>>>>     preferred for metadata chunk, but data chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>>>>     only metadata chunk allowed
>>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>>>>     only data chunk allowed
>>>>
>>>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>>>> ---
>>>>    include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>>>>    1 file changed, 16 insertions(+)
>>>>
>>>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>>>> index b069752a8ecf..e0d842c2e616 100644
>>>> --- a/include/uapi/linux/btrfs_tree.h
>>>> +++ b/include/uapi/linux/btrfs_tree.h
>>>> @@ -389,6 +389,22 @@ struct btrfs_key {
>>>>    	__u64 offset;
>>>>    } __attribute__ ((__packed__));
>>>> +/* dev_item.type */
>>>> +
>>>> +/* btrfs chunk allocation hint */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
>>>> +/* btrfs chunk allocation hint mask */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
>>>> +	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
>>>> +/* preferred data chunk, but metadata chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
>>>> +/* preferred metadata chunk, but data chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
>>>> +/* only metadata chunk are allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
>>>> +/* only data chunk allowed */
>>>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
>>>> +
>>>
>>> I also just realized you're using these as flags, so they need to be
>>>
>>> (1ULL << 0)
>>> (1ULL << 1)
>>> (1ULL << 2)
>>> (1ULL << 3)
>>>
>>
>> Could you elaborate a bit ? These are mutual exclusive values...
> 
> Your set patch is doing
> 
> type = (type & ~MASK) | value;



> 
> So if you have METADATA_PREFERRED set already, and you do DATA_PREFERRED it'll
> be
> 
> type = (1) | 0
> 
> which is METADATA_PREFERRED.

May be that I am missing something, however

MASK = 3
~MASK = ~3 = 0xfffffffffffffffc; /* see below */

so

type = (type & ~MASK) | 0 =
        (1 & 0xfffffffffffffffc) | 0 = 0 -> DATA_PREFERRED


May be that you missed '~' ?

In any case, my MASK definition is wrong, it should contain (u64)1

#define BTRFS_DEV_ALLOCATION_HINT_MASK       \
    (((u64)1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)

otherwise on a 32 bit arch, BTRFS_DEV_ALLOCATION_HINT_MASK = 0xfffffffc


> 
> If you were doing simply
> 
> type = <value>
> 
> then these values would make sense.  So either we need to treat them as bit
> flags, which should be <<, or you need to stop using the | operator.  It sounds
> like you want them to be exclusive, so you should just change the setting code?
> Thanks,
> 
> Josef
Goffredo Baroncelli April 6, 2022, 7:32 p.m. UTC | #11
Gentle ping

On 22/03/2022 19.49, Goffredo Baroncelli wrote:
> On 22/03/2022 18.56, Josef Bacik wrote:
>> On Sun, Mar 06, 2022 at 07:14:39PM +0100, Goffredo Baroncelli wrote:
>>> From: Goffredo Baroncelli <kreijack@inwind.it>
>>>
>>> Add the following flags to give an hint about which chunk should be
>>> allocated in which disk:
>>>
>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED
>>>    preferred for data chunk, but metadata chunk allowed
>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED
>>>    preferred for metadata chunk, but data chunk allowed
>>> - BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY
>>>    only metadata chunk allowed
>>> - BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY
>>>    only data chunk allowed
>>>
>>> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
>>> ---
>>>   include/uapi/linux/btrfs_tree.h | 16 ++++++++++++++++
>>>   1 file changed, 16 insertions(+)
>>>
>>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>>> index b069752a8ecf..e0d842c2e616 100644
>>> --- a/include/uapi/linux/btrfs_tree.h
>>> +++ b/include/uapi/linux/btrfs_tree.h
>>> @@ -389,6 +389,22 @@ struct btrfs_key {
>>>       __u64 offset;
>>>   } __attribute__ ((__packed__));
>>> +/* dev_item.type */
>>> +
>>> +/* btrfs chunk allocation hint */
>>> +#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT    2
>>> +/* btrfs chunk allocation hint mask */
>>> +#define BTRFS_DEV_ALLOCATION_HINT_MASK    \
>>> +    ((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
>>> +/* preferred data chunk, but metadata chunk allowed */
>>> +#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED    (0ULL)
>>
>> Actually don't we have 0 set on type already?  So this will make all existing
>> file systems have DATA_PREFERRED, when in reality they may not want that
>> behavior.  So should we start at 1?  Thanks,
> 
> Yes, the default is 0 (now DATA_PREFERRED).
> If we have all the disks set to DATA_PREFERRED (or METADATA_PREFERRED), all the disks
> have the same priority so the chunks allocator works as usual.
> 
> If we have DATA_PREFERRED=1, it is not clear to me which would be the expected behavior when the allocator has to choice one of the followings disks:
> - TYPE=0
> - DATA_PREFERRED
> - DATA_ONLY
> 
> It should ignore TYPE=0 ? or it should block the 'allocation_hint' policy ? Or TYPE=0 should have the lowest (or highest) priority ?
> 
> DATA_PREFERRED to me seems a safe default, because at worst we have only missing performance penalty (i.e. it is a faster disk where we should put METADATA)
> 
> BR
> 
>>
>> Josef
> 
>
diff mbox series

Patch

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index b069752a8ecf..e0d842c2e616 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -389,6 +389,22 @@  struct btrfs_key {
 	__u64 offset;
 } __attribute__ ((__packed__));
 
+/* dev_item.type */
+
+/* btrfs chunk allocation hint */
+#define BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT	2
+/* btrfs chunk allocation hint mask */
+#define BTRFS_DEV_ALLOCATION_HINT_MASK	\
+	((1 << BTRFS_DEV_ALLOCATION_HINT_BIT_COUNT) - 1)
+/* preferred data chunk, but metadata chunk allowed */
+#define BTRFS_DEV_ALLOCATION_HINT_DATA_PREFERRED	(0ULL)
+/* preferred metadata chunk, but data chunk allowed */
+#define BTRFS_DEV_ALLOCATION_HINT_METADATA_PREFERRED	(1ULL)
+/* only metadata chunk are allowed */
+#define BTRFS_DEV_ALLOCATION_HINT_METADATA_ONLY		(2ULL)
+/* only data chunk allowed */
+#define BTRFS_DEV_ALLOCATION_HINT_DATA_ONLY		(3ULL)
+
 struct btrfs_dev_item {
 	/* the internal btrfs device id */
 	__le64 devid;