diff mbox series

btrfs: Optimise space flushing machinery

Message ID 20200310090035.16676-1-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: Optimise space flushing machinery | expand

Commit Message

Nikolay Borisov March 10, 2020, 9 a.m. UTC
Instead of iterating all pending tickets on the normal/priority list to
sum their total size the cost can be amortized across ticket addition/
removal. This turns O(n) + O(m) (where n is the size of the normal list
and m of the priority list) into O(1). This will mostly have effect in workloads
that experience heavy flushing.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/space-info.c | 13 ++++++++-----
 fs/btrfs/space-info.h |  4 ++++
 2 files changed, 12 insertions(+), 5 deletions(-)

--
2.17.1

Comments

David Sterba March 11, 2020, 1:14 a.m. UTC | #1
On Tue, Mar 10, 2020 at 11:00:35AM +0200, Nikolay Borisov wrote:
> Instead of iterating all pending tickets on the normal/priority list to
> sum their total size the cost can be amortized across ticket addition/
> removal. This turns O(n) + O(m) (where n is the size of the normal list
> and m of the priority list) into O(1). This will mostly have effect in workloads
> that experience heavy flushing.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>

Added to misc-next, thanks.

> ---
>  fs/btrfs/space-info.c | 13 ++++++++-----
>  fs/btrfs/space-info.h |  4 ++++
>  2 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 9cb511d8cd9d..316a724dc990 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -389,6 +389,8 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
>  							      space_info,
>  							      ticket->bytes);
>  			list_del_init(&ticket->list);
> +			ASSERT(space_info->reclaim_size >= ticket->bytes);
> +			space_info->reclaim_size -= ticket->bytes;
>  			ticket->bytes = 0;
>  			space_info->tickets_id++;
>  			wake_up(&ticket->wait);
> @@ -784,16 +786,15 @@ static inline u64
>  btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
>  				 struct btrfs_space_info *space_info)
>  {
> -	struct reserve_ticket *ticket;
>  	u64 used;
>  	u64 avail;
>  	u64 expected;
>  	u64 to_reclaim = 0;
> 
> -	list_for_each_entry(ticket, &space_info->tickets, list)
> -		to_reclaim += ticket->bytes;
> -	list_for_each_entry(ticket, &space_info->priority_tickets, list)
> -		to_reclaim += ticket->bytes;
> +	lockdep_assert_held(&space_info->lock);

This can potentially save cachelines when the list traversal is avoided
as we're interested in just the sum of some data and the tickets are not
touched anywhere in this function callgraph.

> +
> +	if (space_info->reclaim_size)
> +		return space_info->reclaim_size;
> 
>  	avail = calc_available_free_space(fs_info, space_info,
>  					  BTRFS_RESERVE_FLUSH_ALL);
> @@ -1192,8 +1193,10 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
>  	 * the list and we will do our own flushing further down.
>  	 */
>  	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
> +		ASSERT(space_info->reclaim_size >= 0);
>  		ticket.bytes = orig_bytes;
>  		ticket.error = 0;
> +		space_info->reclaim_size += ticket.bytes;
>  		init_waitqueue_head(&ticket.wait);
>  		if (flush == BTRFS_RESERVE_FLUSH_ALL) {
>  			list_add_tail(&ticket.list, &space_info->tickets);
> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
> index 24514cd2c6c1..0e68a6dd2a79 100644
> --- a/fs/btrfs/space-info.h
> +++ b/fs/btrfs/space-info.h
> @@ -54,6 +54,10 @@ struct btrfs_space_info {
>  	struct list_head ro_bgs;
>  	struct list_head priority_tickets;
>  	struct list_head tickets;
> +
> +	u64 reclaim_size; /* Size of space that needs to be reclaimed in order
> +			     to satisfy pending tickets */

Please use comment-before-member format.
Josef Bacik March 11, 2020, 5:52 p.m. UTC | #2
On 3/10/20 5:00 AM, Nikolay Borisov wrote:
> Instead of iterating all pending tickets on the normal/priority list to
> sum their total size the cost can be amortized across ticket addition/
> removal. This turns O(n) + O(m) (where n is the size of the normal list
> and m of the priority list) into O(1). This will mostly have effect in workloads
> that experience heavy flushing.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
>   fs/btrfs/space-info.c | 13 ++++++++-----
>   fs/btrfs/space-info.h |  4 ++++
>   2 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 9cb511d8cd9d..316a724dc990 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -389,6 +389,8 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
>   							      space_info,
>   							      ticket->bytes);
>   			list_del_init(&ticket->list);
> +			ASSERT(space_info->reclaim_size >= ticket->bytes);
> +			space_info->reclaim_size -= ticket->bytes;
>   			ticket->bytes = 0;
>   			space_info->tickets_id++;
>   			wake_up(&ticket->wait);
> @@ -784,16 +786,15 @@ static inline u64
>   btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
>   				 struct btrfs_space_info *space_info)
>   {
> -	struct reserve_ticket *ticket;
>   	u64 used;
>   	u64 avail;
>   	u64 expected;
>   	u64 to_reclaim = 0;
> 
> -	list_for_each_entry(ticket, &space_info->tickets, list)
> -		to_reclaim += ticket->bytes;
> -	list_for_each_entry(ticket, &space_info->priority_tickets, list)
> -		to_reclaim += ticket->bytes;
> +	lockdep_assert_held(&space_info->lock);
> +
> +	if (space_info->reclaim_size)
> +		return space_info->reclaim_size;

This undoes the fix that I put up making sure we include any space we can no 
longer overcommit.  Thanks,

Josef
Nikolay Borisov March 11, 2020, 5:57 p.m. UTC | #3
On 11.03.20 г. 19:52 ч., Josef Bacik wrote:
> On 3/10/20 5:00 AM, Nikolay Borisov wrote:
>> Instead of iterating all pending tickets on the normal/priority list to
>> sum their total size the cost can be amortized across ticket addition/
>> removal. This turns O(n) + O(m) (where n is the size of the normal list
>> and m of the priority list) into O(1). This will mostly have effect in
>> workloads
>> that experience heavy flushing.
>>
>> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
>> ---
>>   fs/btrfs/space-info.c | 13 ++++++++-----
>>   fs/btrfs/space-info.h |  4 ++++
>>   2 files changed, 12 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
>> index 9cb511d8cd9d..316a724dc990 100644
>> --- a/fs/btrfs/space-info.c
>> +++ b/fs/btrfs/space-info.c
>> @@ -389,6 +389,8 @@ void btrfs_try_granting_tickets(struct
>> btrfs_fs_info *fs_info,
>>                                     space_info,
>>                                     ticket->bytes);
>>               list_del_init(&ticket->list);
>> +            ASSERT(space_info->reclaim_size >= ticket->bytes);
>> +            space_info->reclaim_size -= ticket->bytes;
>>               ticket->bytes = 0;
>>               space_info->tickets_id++;
>>               wake_up(&ticket->wait);
>> @@ -784,16 +786,15 @@ static inline u64
>>   btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
>>                    struct btrfs_space_info *space_info)
>>   {
>> -    struct reserve_ticket *ticket;
>>       u64 used;
>>       u64 avail;
>>       u64 expected;
>>       u64 to_reclaim = 0;
>>
>> -    list_for_each_entry(ticket, &space_info->tickets, list)
>> -        to_reclaim += ticket->bytes;
>> -    list_for_each_entry(ticket, &space_info->priority_tickets, list)
>> -        to_reclaim += ticket->bytes;
>> +    lockdep_assert_held(&space_info->lock);
>> +
>> +    if (space_info->reclaim_size)
>> +        return space_info->reclaim_size;
> 
> This undoes the fix that I put up making sure we include any space we
> can no longer overcommit.  Thanks,

Which fix is that?

> 
> Josef
Josef Bacik March 11, 2020, 5:59 p.m. UTC | #4
On 3/11/20 1:57 PM, Nikolay Borisov wrote:
> 
> 
> On 11.03.20 г. 19:52 ч., Josef Bacik wrote:
>> On 3/10/20 5:00 AM, Nikolay Borisov wrote:
>>> Instead of iterating all pending tickets on the normal/priority list to
>>> sum their total size the cost can be amortized across ticket addition/
>>> removal. This turns O(n) + O(m) (where n is the size of the normal list
>>> and m of the priority list) into O(1). This will mostly have effect in
>>> workloads
>>> that experience heavy flushing.
>>>
>>> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
>>> ---
>>>    fs/btrfs/space-info.c | 13 ++++++++-----
>>>    fs/btrfs/space-info.h |  4 ++++
>>>    2 files changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
>>> index 9cb511d8cd9d..316a724dc990 100644
>>> --- a/fs/btrfs/space-info.c
>>> +++ b/fs/btrfs/space-info.c
>>> @@ -389,6 +389,8 @@ void btrfs_try_granting_tickets(struct
>>> btrfs_fs_info *fs_info,
>>>                                      space_info,
>>>                                      ticket->bytes);
>>>                list_del_init(&ticket->list);
>>> +            ASSERT(space_info->reclaim_size >= ticket->bytes);
>>> +            space_info->reclaim_size -= ticket->bytes;
>>>                ticket->bytes = 0;
>>>                space_info->tickets_id++;
>>>                wake_up(&ticket->wait);
>>> @@ -784,16 +786,15 @@ static inline u64
>>>    btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
>>>                     struct btrfs_space_info *space_info)
>>>    {
>>> -    struct reserve_ticket *ticket;
>>>        u64 used;
>>>        u64 avail;
>>>        u64 expected;
>>>        u64 to_reclaim = 0;
>>>
>>> -    list_for_each_entry(ticket, &space_info->tickets, list)
>>> -        to_reclaim += ticket->bytes;
>>> -    list_for_each_entry(ticket, &space_info->priority_tickets, list)
>>> -        to_reclaim += ticket->bytes;
>>> +    lockdep_assert_held(&space_info->lock);
>>> +
>>> +    if (space_info->reclaim_size)
>>> +        return space_info->reclaim_size;
>>
>> This undoes the fix that I put up making sure we include any space we
>> can no longer overcommit.  Thanks,
> 
> Which fix is that?

https://github.com/kdave/btrfs-devel/commit/593212a6137ff3c5674609b4233f8ecec459dc45

Thanks,

Josef
David Sterba March 12, 2020, 8:36 p.m. UTC | #5
On Wed, Mar 11, 2020 at 01:59:14PM -0400, Josef Bacik wrote:
> >>>        u64 to_reclaim = 0;
> >>>
> >>> -    list_for_each_entry(ticket, &space_info->tickets, list)
> >>> -        to_reclaim += ticket->bytes;
> >>> -    list_for_each_entry(ticket, &space_info->priority_tickets, list)
> >>> -        to_reclaim += ticket->bytes;
> >>> +    lockdep_assert_held(&space_info->lock);
> >>> +
> >>> +    if (space_info->reclaim_size)
> >>> +        return space_info->reclaim_size;
> >>
> >> This undoes the fix that I put up making sure we include any space we
> >> can no longer overcommit.  Thanks,
> > 
> > Which fix is that?
> 
> https://github.com/kdave/btrfs-devel/commit/593212a6137ff3c5674609b4233f8ecec459dc45

Nik sent me a fixup, now folded to the patch.

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 9c36397b733c..0884dac883b5 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -792,13 +792,10 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
        u64 used;
        u64 avail;
        u64 expected;
-       u64 to_reclaim = 0;
+       u64 to_reclaim = space_info->reclaim_size;
 
        lockdep_assert_held(&space_info->lock);
 
-       if (space_info->reclaim_size)
-               return space_info->reclaim_size;
-
        avail = calc_available_free_space(fs_info, space_info,
                                          BTRFS_RESERVE_FLUSH_
diff mbox series

Patch

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 9cb511d8cd9d..316a724dc990 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -389,6 +389,8 @@  void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
 							      space_info,
 							      ticket->bytes);
 			list_del_init(&ticket->list);
+			ASSERT(space_info->reclaim_size >= ticket->bytes);
+			space_info->reclaim_size -= ticket->bytes;
 			ticket->bytes = 0;
 			space_info->tickets_id++;
 			wake_up(&ticket->wait);
@@ -784,16 +786,15 @@  static inline u64
 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
 				 struct btrfs_space_info *space_info)
 {
-	struct reserve_ticket *ticket;
 	u64 used;
 	u64 avail;
 	u64 expected;
 	u64 to_reclaim = 0;

-	list_for_each_entry(ticket, &space_info->tickets, list)
-		to_reclaim += ticket->bytes;
-	list_for_each_entry(ticket, &space_info->priority_tickets, list)
-		to_reclaim += ticket->bytes;
+	lockdep_assert_held(&space_info->lock);
+
+	if (space_info->reclaim_size)
+		return space_info->reclaim_size;

 	avail = calc_available_free_space(fs_info, space_info,
 					  BTRFS_RESERVE_FLUSH_ALL);
@@ -1192,8 +1193,10 @@  static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
 	 * the list and we will do our own flushing further down.
 	 */
 	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
+		ASSERT(space_info->reclaim_size >= 0);
 		ticket.bytes = orig_bytes;
 		ticket.error = 0;
+		space_info->reclaim_size += ticket.bytes;
 		init_waitqueue_head(&ticket.wait);
 		if (flush == BTRFS_RESERVE_FLUSH_ALL) {
 			list_add_tail(&ticket.list, &space_info->tickets);
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 24514cd2c6c1..0e68a6dd2a79 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -54,6 +54,10 @@  struct btrfs_space_info {
 	struct list_head ro_bgs;
 	struct list_head priority_tickets;
 	struct list_head tickets;
+
+	u64 reclaim_size; /* Size of space that needs to be reclaimed in order
+			     to satisfy pending tickets */
+
 	/*
 	 * tickets_id just indicates the next ticket will be handled, so note
 	 * it's not stored per ticket.