diff mbox

[1/2] btrfs: extend balance filter limit to take minimum and maximum

Message ID 35832c0abf72cfd1e118d484f6a9ebdd689f8757.1445013000.git.dsterba@suse.com (mailing list archive)
State Superseded
Headers show

Commit Message

David Sterba Oct. 16, 2015, 4:50 p.m. UTC
The 'limit' filter is underdesigned, it should have been a range for
[min,max], with some relaxed semantics when one of the bounds is
missing. Besides that, using a full u64 for a single value is a waste of
bytes.

Let's fix both by extending the use of the u64 bytes for the [min,max]
range. This can be done in a backward compatible way, the range will be
interpreted only if the appropriate flag is set
(BTRFS_BALANCE_ARGS_LIMITS).

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h           | 14 ++++++++++++--
 fs/btrfs/volumes.c         | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h         |  1 +
 include/uapi/linux/btrfs.h | 13 ++++++++++++-
 4 files changed, 67 insertions(+), 3 deletions(-)

Comments

Zygo Blaxell Oct. 16, 2015, 11:08 p.m. UTC | #1
On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote:
> The 'limit' filter is underdesigned, it should have been a range for
> [min,max], with some relaxed semantics when one of the bounds is
> missing. Besides that, using a full u64 for a single value is a waste of
> bytes.

What is min for?

If we have more than 'max' matching chunks, we'd process 'max' and stop.
If we have fewer than 'min' chunks, we'd process what we have and run out.

If we have more than 'min' chunks but fewer than 'max' chunks, why would
we stop before we reach 'max' chunks?

> Let's fix both by extending the use of the u64 bytes for the [min,max]
> range. This can be done in a backward compatible way, the range will be
> interpreted only if the appropriate flag is set
> (BTRFS_BALANCE_ARGS_LIMITS).
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/ctree.h           | 14 ++++++++++++--
>  fs/btrfs/volumes.c         | 42 ++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/volumes.h         |  1 +
>  include/uapi/linux/btrfs.h | 13 ++++++++++++-
>  4 files changed, 67 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 938efe33be80..7d2e1b6d0ac1 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -846,8 +846,18 @@ struct btrfs_disk_balance_args {
>  	/* BTRFS_BALANCE_ARGS_* */
>  	__le64 flags;
>  
> -	/* BTRFS_BALANCE_ARGS_LIMIT value */
> -	__le64 limit;
> +	/*
> +	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
> +	 * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and
> +	 * maximum
> +	 */
> +	union {
> +		__le64 limit;
> +		struct {
> +			__le32 limit_min;
> +			__le32 limit_max;
> +		};
> +	};
>  
>  	__le64 unused[7];
>  } __attribute__ ((__packed__));
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 6fc735869c18..0fbe6855ea05 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -3250,6 +3250,16 @@ static int should_balance_chunk(struct btrfs_root *root,
>  			return 0;
>  		else
>  			bargs->limit--;
> +	} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMITS)) {
> +		/*
> +		 * Same logic as the 'limit' filter; the minimum cannot be
> +		 * determined here because we do not have the global informatoin
> +		 * about the count of all chunks that satisfy the filters.
> +		 */
> +		if (bargs->limit_max == 0)
> +			return 0;
> +		else
> +			bargs->limit_max--;
>  	}
>  
>  	return 1;
> @@ -3264,6 +3274,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
>  	struct btrfs_device *device;
>  	u64 old_size;
>  	u64 size_to_free;
> +	u64 chunk_type;
>  	struct btrfs_chunk *chunk;
>  	struct btrfs_path *path;
>  	struct btrfs_key key;
> @@ -3274,9 +3285,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
>  	int ret;
>  	int enospc_errors = 0;
>  	bool counting = true;
> +	/* The single value limit and min/max limits use the same bytes in the */
>  	u64 limit_data = bctl->data.limit;
>  	u64 limit_meta = bctl->meta.limit;
>  	u64 limit_sys = bctl->sys.limit;
> +	u32 count_data = 0;
> +	u32 count_meta = 0;
> +	u32 count_sys = 0;
>  
>  	/* step one make some room on all the devices */
>  	devices = &fs_info->fs_devices->devices;
> @@ -3317,6 +3332,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
>  	spin_unlock(&fs_info->balance_lock);
>  again:
>  	if (!counting) {
> +		/*
> +		 * The single value limit and min/max limits use the same bytes
> +		 * in the
> +		 */
>  		bctl->data.limit = limit_data;
>  		bctl->meta.limit = limit_meta;
>  		bctl->sys.limit = limit_sys;
> @@ -3364,6 +3383,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
>  		}
>  
>  		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
> +		chunk_type = btrfs_chunk_type(leaf, chunk);
>  
>  		if (!counting) {
>  			spin_lock(&fs_info->balance_lock);
> @@ -3384,6 +3404,28 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
>  			spin_lock(&fs_info->balance_lock);
>  			bctl->stat.expected++;
>  			spin_unlock(&fs_info->balance_lock);
> +
> +			if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
> +				count_data++;
> +			else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
> +				count_sys++;
> +			else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
> +				count_meta++;
> +
> +			goto loop;
> +		}
> +
> +		/*
> +		 * Apply limit_min filter, no need to check if the LIMITS
> +		 * filter is used, limit_min is 0 by default
> +		 */
> +		if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
> +					count_data < bctl->data.limit_min)
> +				|| ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
> +					count_meta < bctl->meta.limit_min)
> +				|| ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
> +					count_sys < bctl->sys.limit_min)) {
> +			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
>  			goto loop;
>  		}
>  
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 2ca784a14e84..1c9d8edd7d57 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -375,6 +375,7 @@ struct map_lookup {
>  #define BTRFS_BALANCE_ARGS_DRANGE	(1ULL << 3)
>  #define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)
>  #define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)
> +#define BTRFS_BALANCE_ARGS_LIMITS	(1ULL << 6)
>  
>  /*
>   * Profile changing flags.  When SOFT is set we won't relocate chunk if
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index b6dec05c7196..264ecea5ecfc 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -217,7 +217,18 @@ struct btrfs_balance_args {
>  
>  	__u64 flags;
>  
> -	__u64 limit;		/* limit number of processed chunks */
> +	/*
> +	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
> +	 * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and
> +	 * maximum
> +	 */
> +	union {
> +		__u64 limit;		/* limit number of processed chunks */
> +		struct {
> +			__u32 limit_min;
> +			__u32 limit_max;
> +		};
> +	};
>  	__u64 unused[7];
>  } __attribute__ ((__packed__));
>  
> -- 
> 2.6.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Oct. 19, 2015, 9:14 a.m. UTC | #2
On Fri, Oct 16, 2015 at 07:08:37PM -0400, Zygo Blaxell wrote:
> On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote:
> > The 'limit' filter is underdesigned, it should have been a range for
> > [min,max], with some relaxed semantics when one of the bounds is
> > missing. Besides that, using a full u64 for a single value is a waste of
> > bytes.
> 
> What is min for?
> 
> If we have more than 'max' matching chunks, we'd process 'max' and stop.

Right.

> If we have fewer than 'min' chunks, we'd process what we have and run out.

If we have fewer than min, we'll do nothing.

> If we have more than 'min' chunks but fewer than 'max' chunks, why would
> we stop before we reach 'max' chunks?

I must be missing something here. If there are less than 'max' chunks
(with all filters applied), how are we supposed to reach 'max'?

The 'limit' filter is applied after all the other filters. It can be
used standalone, or with eg. usage. The usecase where I find it useful:

* we want to make metadata chunks more compact
* let's set usage=30
* avoid unnecessary work (which is a bigger performance hit in case
  of metadata chunks), ie. if there's a single chunk with usage < 30%,
  let's skip it

The command:

 $ btrfs balance start -musage=30,limit=2.. /path

So in case there's only one such chunk, balance would just move it
without any gain. Avoiding the unnecessary work here is IMO a win,
though it might seem limited. But I'm going to utilize this in the
btrfsmaintenance package that among others does periodic balancing
on the background, I believe the effects will be noticeable.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zygo Blaxell Oct. 19, 2015, 3:31 p.m. UTC | #3
On Mon, Oct 19, 2015 at 11:14:16AM +0200, David Sterba wrote:
> On Fri, Oct 16, 2015 at 07:08:37PM -0400, Zygo Blaxell wrote:
> > On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote:
> > > The 'limit' filter is underdesigned, it should have been a range for
> > > [min,max], with some relaxed semantics when one of the bounds is
> > > missing. Besides that, using a full u64 for a single value is a waste of
> > > bytes.
> > 
> > What is min for?
> > 
> > If we have more than 'max' matching chunks, we'd process 'max' and stop.
> 
> Right.
> 
> > If we have fewer than 'min' chunks, we'd process what we have and run out.
> 
> If we have fewer than min, we'll do nothing.

Aha.  My mental model of balance was missing the step where chunks are
enumerated and filtered before any real work starts.

> > If we have more than 'min' chunks but fewer than 'max' chunks, why would
> > we stop before we reach 'max' chunks?
> 
> I must be missing something here. If there are less than 'max' chunks
> (with all filters applied), how are we supposed to reach 'max'?
> 
> The 'limit' filter is applied after all the other filters. It can be
> used standalone, or with eg. usage. The usecase where I find it useful:
> 
> * we want to make metadata chunks more compact
> * let's set usage=30
> * avoid unnecessary work (which is a bigger performance hit in case
>   of metadata chunks), ie. if there's a single chunk with usage < 30%,
>   let's skip it
> 
> The command:
> 
>  $ btrfs balance start -musage=30,limit=2.. /path
> 
> So in case there's only one such chunk, balance would just move it
> without any gain. 

...and if there's two chunks with <50% space used, then we can guarantee
that the data from one fits in the unused space of the other.  With a
bit of math we can figure out what limit-min has to be for any particular
value of usage to guarantee one chunk is always deallocated.  That makes
sense.

> Avoiding the unnecessary work here is IMO a win,
> though it might seem limited. But I'm going to utilize this in the
> btrfsmaintenance package that among others does periodic balancing
> on the background, I believe the effects will be noticeable.

...and this use case did not occur to me because I try to make
sure I never do anything remotely similar to this, even accidentally.

I've learned the hard way not to balance metadata except when reshaping
a filesystem array.  Apart from being egregiously slow and hard to
interrupt, balancing metadata deallocates space that was previously
allocated to metadata.  If you keep doing it on a nearly-full filesystem,
the risk that you'll run out of metadata space in the future increases
to certainty.

The penalty for running out of metadata space is currently much, much
worse than the cost of unused metadata space could ever be.  :-/

"-dusage=49,limit=2.." is something I might use, though, especially for
small or mostly static filesystems where there might be a shortage of
work for balance (a large filesystem with a moderate rate of modification
doesn't need a min-limit).

> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33be80..7d2e1b6d0ac1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -846,8 +846,18 @@  struct btrfs_disk_balance_args {
 	/* BTRFS_BALANCE_ARGS_* */
 	__le64 flags;
 
-	/* BTRFS_BALANCE_ARGS_LIMIT value */
-	__le64 limit;
+	/*
+	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
+	 * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and
+	 * maximum
+	 */
+	union {
+		__le64 limit;
+		struct {
+			__le32 limit_min;
+			__le32 limit_max;
+		};
+	};
 
 	__le64 unused[7];
 } __attribute__ ((__packed__));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6fc735869c18..0fbe6855ea05 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3250,6 +3250,16 @@  static int should_balance_chunk(struct btrfs_root *root,
 			return 0;
 		else
 			bargs->limit--;
+	} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMITS)) {
+		/*
+		 * Same logic as the 'limit' filter; the minimum cannot be
+		 * determined here because we do not have the global informatoin
+		 * about the count of all chunks that satisfy the filters.
+		 */
+		if (bargs->limit_max == 0)
+			return 0;
+		else
+			bargs->limit_max--;
 	}
 
 	return 1;
@@ -3264,6 +3274,7 @@  static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 	struct btrfs_device *device;
 	u64 old_size;
 	u64 size_to_free;
+	u64 chunk_type;
 	struct btrfs_chunk *chunk;
 	struct btrfs_path *path;
 	struct btrfs_key key;
@@ -3274,9 +3285,13 @@  static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 	int ret;
 	int enospc_errors = 0;
 	bool counting = true;
+	/* The single value limit and min/max limits use the same bytes in the */
 	u64 limit_data = bctl->data.limit;
 	u64 limit_meta = bctl->meta.limit;
 	u64 limit_sys = bctl->sys.limit;
+	u32 count_data = 0;
+	u32 count_meta = 0;
+	u32 count_sys = 0;
 
 	/* step one make some room on all the devices */
 	devices = &fs_info->fs_devices->devices;
@@ -3317,6 +3332,10 @@  static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 	spin_unlock(&fs_info->balance_lock);
 again:
 	if (!counting) {
+		/*
+		 * The single value limit and min/max limits use the same bytes
+		 * in the
+		 */
 		bctl->data.limit = limit_data;
 		bctl->meta.limit = limit_meta;
 		bctl->sys.limit = limit_sys;
@@ -3364,6 +3383,7 @@  static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 		}
 
 		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+		chunk_type = btrfs_chunk_type(leaf, chunk);
 
 		if (!counting) {
 			spin_lock(&fs_info->balance_lock);
@@ -3384,6 +3404,28 @@  static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 			spin_lock(&fs_info->balance_lock);
 			bctl->stat.expected++;
 			spin_unlock(&fs_info->balance_lock);
+
+			if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
+				count_data++;
+			else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
+				count_sys++;
+			else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
+				count_meta++;
+
+			goto loop;
+		}
+
+		/*
+		 * Apply limit_min filter, no need to check if the LIMITS
+		 * filter is used, limit_min is 0 by default
+		 */
+		if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
+					count_data < bctl->data.limit_min)
+				|| ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
+					count_meta < bctl->meta.limit_min)
+				|| ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+					count_sys < bctl->sys.limit_min)) {
+			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
 			goto loop;
 		}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2ca784a14e84..1c9d8edd7d57 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -375,6 +375,7 @@  struct map_lookup {
 #define BTRFS_BALANCE_ARGS_DRANGE	(1ULL << 3)
 #define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)
 #define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)
+#define BTRFS_BALANCE_ARGS_LIMITS	(1ULL << 6)
 
 /*
  * Profile changing flags.  When SOFT is set we won't relocate chunk if
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b6dec05c7196..264ecea5ecfc 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -217,7 +217,18 @@  struct btrfs_balance_args {
 
 	__u64 flags;
 
-	__u64 limit;		/* limit number of processed chunks */
+	/*
+	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
+	 * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and
+	 * maximum
+	 */
+	union {
+		__u64 limit;		/* limit number of processed chunks */
+		struct {
+			__u32 limit_min;
+			__u32 limit_max;
+		};
+	};
 	__u64 unused[7];
 } __attribute__ ((__packed__));