diff mbox

[v3,06/21] btrfs: delayed_ref: Add new function to record reserved space into delayed ref

Message ID 1444702827-18169-7-git-send-email-quwenruo@cn.fujitsu.com (mailing list archive)
State Accepted
Headers show

Commit Message

Qu Wenruo Oct. 13, 2015, 2:20 a.m. UTC
Add new function btrfs_add_delayed_qgroup_reserve() function to record
how much space is reserved for that extent.

As btrfs only accounts qgroup at run_delayed_refs() time, so newly
allocated extent should keep the reserved space until then.

So add needed function with related members to do it.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
v2:
  None
v3:
  None
---
 fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
 fs/btrfs/delayed-ref.h | 14 ++++++++++++++
 2 files changed, 43 insertions(+)

Comments

Filipe Manana Oct. 25, 2015, 2:39 p.m. UTC | #1
On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
> Add new function btrfs_add_delayed_qgroup_reserve() function to record
> how much space is reserved for that extent.
>
> As btrfs only accounts qgroup at run_delayed_refs() time, so newly
> allocated extent should keep the reserved space until then.
>
> So add needed function with related members to do it.
>
> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> ---
> v2:
>   None
> v3:
>   None
> ---
>  fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
>  fs/btrfs/delayed-ref.h | 14 ++++++++++++++
>  2 files changed, 43 insertions(+)
>
> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
> index ac3e81d..bd9b63b 100644
> --- a/fs/btrfs/delayed-ref.c
> +++ b/fs/btrfs/delayed-ref.c
> @@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
>         INIT_LIST_HEAD(&head_ref->ref_list);
>         head_ref->processing = 0;
>         head_ref->total_ref_mod = count_mod;
> +       head_ref->qgroup_reserved = 0;
> +       head_ref->qgroup_ref_root = 0;
>
>         /* Record qgroup extent info if provided */
>         if (qrecord) {
> @@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>         return 0;
>  }
>
> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
> +                                    struct btrfs_trans_handle *trans,
> +                                    u64 ref_root, u64 bytenr, u64 num_bytes)
> +{
> +       struct btrfs_delayed_ref_root *delayed_refs;
> +       struct btrfs_delayed_ref_head *ref_head;
> +       int ret = 0;
> +
> +       if (!fs_info->quota_enabled || !is_fstree(ref_root))
> +               return 0;
> +
> +       delayed_refs = &trans->transaction->delayed_refs;
> +
> +       spin_lock(&delayed_refs->lock);
> +       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
> +       if (!ref_head) {
> +               ret = -ENOENT;
> +               goto out;
> +       }

Hi Qu,

So while running btrfs/063, with qgroups enabled (I modified the test
to enable qgroups), ran into this 2 times:

[169125.246506] BTRFS info (device sdc): disk space caching is enabled
[169125.363164] ------------[ cut here ]------------
[169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
[169125.367702] BTRFS: Transaction aborted (error -2)
[169125.368830] Modules linked in: btrfs dm_flakey dm_mod
crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
scsi_mod e1000 virtio [last unloaded: btrfs]
[169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
  W       4.3.0-rc5-btrfs-next-17+ #1
[169125.378522] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org
04/01/2014
[169125.380916] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs]
[169125.382167]  0000000000000000 ffff88007ef2bc28 ffffffff812566f4
ffff88007ef2bc70
[169125.383643]  ffff88007ef2bc60 ffffffff8104d0a6 ffffffffa03cac33
ffff8801f5ca6db0
[169125.385197]  ffff8802c6c7ee98 ffff880122bc1000 00000000fffffffe
ffff88007ef2bcc8
[169125.386691] Call Trace:
[169125.387194]  [<ffffffff812566f4>] dump_stack+0x4e/0x79
[169125.388205]  [<ffffffff8104d0a6>] warn_slowpath_common+0x9f/0xb8
[169125.389386]  [<ffffffffa03cac33>] ?
btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
[169125.390837]  [<ffffffff8104d107>] warn_slowpath_fmt+0x48/0x50
[169125.391839]  [<ffffffffa03d67bb>] ? unpin_extent_cache+0xbe/0xcc [btrfs]
[169125.392973]  [<ffffffffa03cac33>]
btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
[169125.395714]  [<ffffffff8147c612>] ? _raw_spin_unlock_irqrestore+0x38/0x60
[169125.396888]  [<ffffffff81087d0b>] ? trace_hardirqs_off_caller+0x1f/0xb9
[169125.397986]  [<ffffffffa03cadec>] finish_ordered_fn+0x15/0x17 [btrfs]
[169125.399122]  [<ffffffffa03ec706>] normal_work_helper+0x14c/0x32a [btrfs]
[169125.400300]  [<ffffffffa03ec9e6>] btrfs_endio_write_helper+0x12/0x14 [btrfs]
[169125.401450]  [<ffffffff81063b23>] process_one_work+0x24a/0x4ac
[169125.402631]  [<ffffffff81064285>] worker_thread+0x206/0x2c2
[169125.403622]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
[169125.404693]  [<ffffffff8106904d>] kthread+0xef/0xf7
[169125.405727]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
[169125.406808]  [<ffffffff8147d10f>] ret_from_fork+0x3f/0x70
[169125.407834]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
[169125.408840] ---[ end trace 6ee4342a5722b119 ]---
[169125.409654] BTRFS: error (device sdc) in
btrfs_finish_ordered_io:2929: errno=-2 No such entry

So what you have here is racy:

btrfs_finish_ordered_io()
   joins existing transaction (or starts a new one)
   insert_reserved_file_extent()
      btrfs_alloc_reserved_file_extent() --> creates delayed ref

      ******* delayed refs are run, someone called
btrfs_async_run_delayed_refs() from btrfs_end_transaction(), ref head
is removed ******

      btrfs_add_delayed_qgroup_reserve() --> does not find delayed ref
head, returns -ENOENT and finish_ordered_io aborts current
transaction...

A very tiny race, but...

thanks


> +       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
> +       ref_head->qgroup_ref_root = ref_root;
> +       ref_head->qgroup_reserved = num_bytes;
> +out:
> +       spin_unlock(&delayed_refs->lock);
> +       return ret;
> +}
> +
>  int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>                                 struct btrfs_trans_handle *trans,
>                                 u64 bytenr, u64 num_bytes,
> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
> index 13fb5e6..d4c41e2 100644
> --- a/fs/btrfs/delayed-ref.h
> +++ b/fs/btrfs/delayed-ref.h
> @@ -113,6 +113,17 @@ struct btrfs_delayed_ref_head {
>         int total_ref_mod;
>
>         /*
> +        * For qgroup reserved space freeing.
> +        *
> +        * ref_root and reserved will be recorded after
> +        * BTRFS_ADD_DELAYED_EXTENT is called.
> +        * And will be used to free reserved qgroup space at
> +        * run_delayed_refs() time.
> +        */
> +       u64 qgroup_ref_root;
> +       u64 qgroup_reserved;
> +
> +       /*
>          * when a new extent is allocated, it is just reserved in memory
>          * The actual extent isn't inserted into the extent allocation tree
>          * until the delayed ref is processed.  must_insert_reserved is
> @@ -242,6 +253,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>                                u64 owner, u64 offset, int action,
>                                struct btrfs_delayed_extent_op *extent_op,
>                                int no_quota);
> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
> +                                    struct btrfs_trans_handle *trans,
> +                                    u64 ref_root, u64 bytenr, u64 num_bytes);
>  int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>                                 struct btrfs_trans_handle *trans,
>                                 u64 bytenr, u64 num_bytes,
> --
> 2.6.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 26, 2015, 1:27 a.m. UTC | #2
Filipe Manana wrote on 2015/10/25 14:39 +0000:
> On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
>> Add new function btrfs_add_delayed_qgroup_reserve() function to record
>> how much space is reserved for that extent.
>>
>> As btrfs only accounts qgroup at run_delayed_refs() time, so newly
>> allocated extent should keep the reserved space until then.
>>
>> So add needed function with related members to do it.
>>
>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>> ---
>> v2:
>>    None
>> v3:
>>    None
>> ---
>>   fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
>>   fs/btrfs/delayed-ref.h | 14 ++++++++++++++
>>   2 files changed, 43 insertions(+)
>>
>> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
>> index ac3e81d..bd9b63b 100644
>> --- a/fs/btrfs/delayed-ref.c
>> +++ b/fs/btrfs/delayed-ref.c
>> @@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
>>          INIT_LIST_HEAD(&head_ref->ref_list);
>>          head_ref->processing = 0;
>>          head_ref->total_ref_mod = count_mod;
>> +       head_ref->qgroup_reserved = 0;
>> +       head_ref->qgroup_ref_root = 0;
>>
>>          /* Record qgroup extent info if provided */
>>          if (qrecord) {
>> @@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>>          return 0;
>>   }
>>
>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>> +                                    struct btrfs_trans_handle *trans,
>> +                                    u64 ref_root, u64 bytenr, u64 num_bytes)
>> +{
>> +       struct btrfs_delayed_ref_root *delayed_refs;
>> +       struct btrfs_delayed_ref_head *ref_head;
>> +       int ret = 0;
>> +
>> +       if (!fs_info->quota_enabled || !is_fstree(ref_root))
>> +               return 0;
>> +
>> +       delayed_refs = &trans->transaction->delayed_refs;
>> +
>> +       spin_lock(&delayed_refs->lock);
>> +       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
>> +       if (!ref_head) {
>> +               ret = -ENOENT;
>> +               goto out;
>> +       }
>
> Hi Qu,
>
> So while running btrfs/063, with qgroups enabled (I modified the test
> to enable qgroups), ran into this 2 times:

Thanks for the test.

I also want a method to enable quota for all other btrfs/generic tests,
but have no good idea other than modifing testcase itself.

Any good ideas?
>
> [169125.246506] BTRFS info (device sdc): disk space caching is enabled
> [169125.363164] ------------[ cut here ]------------
> [169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
> [169125.367702] BTRFS: Transaction aborted (error -2)
> [169125.368830] Modules linked in: btrfs dm_flakey dm_mod
> crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
> lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
> psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
> serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
> ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
> scsi_mod e1000 virtio [last unloaded: btrfs]
> [169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
>    W       4.3.0-rc5-btrfs-next-17+ #1
> [169125.378522] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org
> 04/01/2014
> [169125.380916] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs]
> [169125.382167]  0000000000000000 ffff88007ef2bc28 ffffffff812566f4
> ffff88007ef2bc70
> [169125.383643]  ffff88007ef2bc60 ffffffff8104d0a6 ffffffffa03cac33
> ffff8801f5ca6db0
> [169125.385197]  ffff8802c6c7ee98 ffff880122bc1000 00000000fffffffe
> ffff88007ef2bcc8
> [169125.386691] Call Trace:
> [169125.387194]  [<ffffffff812566f4>] dump_stack+0x4e/0x79
> [169125.388205]  [<ffffffff8104d0a6>] warn_slowpath_common+0x9f/0xb8
> [169125.389386]  [<ffffffffa03cac33>] ?
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
> [169125.390837]  [<ffffffff8104d107>] warn_slowpath_fmt+0x48/0x50
> [169125.391839]  [<ffffffffa03d67bb>] ? unpin_extent_cache+0xbe/0xcc [btrfs]
> [169125.392973]  [<ffffffffa03cac33>]
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
> [169125.395714]  [<ffffffff8147c612>] ? _raw_spin_unlock_irqrestore+0x38/0x60
> [169125.396888]  [<ffffffff81087d0b>] ? trace_hardirqs_off_caller+0x1f/0xb9
> [169125.397986]  [<ffffffffa03cadec>] finish_ordered_fn+0x15/0x17 [btrfs]
> [169125.399122]  [<ffffffffa03ec706>] normal_work_helper+0x14c/0x32a [btrfs]
> [169125.400300]  [<ffffffffa03ec9e6>] btrfs_endio_write_helper+0x12/0x14 [btrfs]
> [169125.401450]  [<ffffffff81063b23>] process_one_work+0x24a/0x4ac
> [169125.402631]  [<ffffffff81064285>] worker_thread+0x206/0x2c2
> [169125.403622]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
> [169125.404693]  [<ffffffff8106904d>] kthread+0xef/0xf7
> [169125.405727]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
> [169125.406808]  [<ffffffff8147d10f>] ret_from_fork+0x3f/0x70
> [169125.407834]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
> [169125.408840] ---[ end trace 6ee4342a5722b119 ]---
> [169125.409654] BTRFS: error (device sdc) in
> btrfs_finish_ordered_io:2929: errno=-2 No such entry
>
> So what you have here is racy:
>
> btrfs_finish_ordered_io()
>     joins existing transaction (or starts a new one)
>     insert_reserved_file_extent()
>        btrfs_alloc_reserved_file_extent() --> creates delayed ref
>
>        ******* delayed refs are run, someone called
> btrfs_async_run_delayed_refs() from btrfs_end_transaction(), ref head
> is removed ******
>
>        btrfs_add_delayed_qgroup_reserve() --> does not find delayed ref
> head, returns -ENOENT and finish_ordered_io aborts current
> transaction...
>
> A very tiny race, but...

Oh, abort transaction, quite a big problem.

The original idea to introduce btrfs_add_delayed_qgroup_reserve() is to 
put all related qgroup code into qgroup.c, but truth turned out that is 
too ideal.

I'll add a new patch to modify btrfs_add_delayed_data_ref() function, 
remove the last parameter no_quota and add new reserved parameter, to 
allow reserved bytenr inserted at delayed_ref inserting time.

Thanks
Qu

>
> thanks
>
>
>> +       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
>> +       ref_head->qgroup_ref_root = ref_root;
>> +       ref_head->qgroup_reserved = num_bytes;
>> +out:
>> +       spin_unlock(&delayed_refs->lock);
>> +       return ret;
>> +}
>> +
>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>                                  struct btrfs_trans_handle *trans,
>>                                  u64 bytenr, u64 num_bytes,
>> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
>> index 13fb5e6..d4c41e2 100644
>> --- a/fs/btrfs/delayed-ref.h
>> +++ b/fs/btrfs/delayed-ref.h
>> @@ -113,6 +113,17 @@ struct btrfs_delayed_ref_head {
>>          int total_ref_mod;
>>
>>          /*
>> +        * For qgroup reserved space freeing.
>> +        *
>> +        * ref_root and reserved will be recorded after
>> +        * BTRFS_ADD_DELAYED_EXTENT is called.
>> +        * And will be used to free reserved qgroup space at
>> +        * run_delayed_refs() time.
>> +        */
>> +       u64 qgroup_ref_root;
>> +       u64 qgroup_reserved;
>> +
>> +       /*
>>           * when a new extent is allocated, it is just reserved in memory
>>           * The actual extent isn't inserted into the extent allocation tree
>>           * until the delayed ref is processed.  must_insert_reserved is
>> @@ -242,6 +253,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>>                                 u64 owner, u64 offset, int action,
>>                                 struct btrfs_delayed_extent_op *extent_op,
>>                                 int no_quota);
>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>> +                                    struct btrfs_trans_handle *trans,
>> +                                    u64 ref_root, u64 bytenr, u64 num_bytes);
>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>                                  struct btrfs_trans_handle *trans,
>>                                  u64 bytenr, u64 num_bytes,
>> --
>> 2.6.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 27, 2015, 4:13 a.m. UTC | #3
Filipe Manana wrote on 2015/10/25 14:39 +0000:
> On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
>> Add new function btrfs_add_delayed_qgroup_reserve() function to record
>> how much space is reserved for that extent.
>>
>> As btrfs only accounts qgroup at run_delayed_refs() time, so newly
>> allocated extent should keep the reserved space until then.
>>
>> So add needed function with related members to do it.
>>
>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>> ---
>> v2:
>>    None
>> v3:
>>    None
>> ---
>>   fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
>>   fs/btrfs/delayed-ref.h | 14 ++++++++++++++
>>   2 files changed, 43 insertions(+)
>>
>> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
>> index ac3e81d..bd9b63b 100644
>> --- a/fs/btrfs/delayed-ref.c
>> +++ b/fs/btrfs/delayed-ref.c
>> @@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
>>          INIT_LIST_HEAD(&head_ref->ref_list);
>>          head_ref->processing = 0;
>>          head_ref->total_ref_mod = count_mod;
>> +       head_ref->qgroup_reserved = 0;
>> +       head_ref->qgroup_ref_root = 0;
>>
>>          /* Record qgroup extent info if provided */
>>          if (qrecord) {
>> @@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>>          return 0;
>>   }
>>
>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>> +                                    struct btrfs_trans_handle *trans,
>> +                                    u64 ref_root, u64 bytenr, u64 num_bytes)
>> +{
>> +       struct btrfs_delayed_ref_root *delayed_refs;
>> +       struct btrfs_delayed_ref_head *ref_head;
>> +       int ret = 0;
>> +
>> +       if (!fs_info->quota_enabled || !is_fstree(ref_root))
>> +               return 0;
>> +
>> +       delayed_refs = &trans->transaction->delayed_refs;
>> +
>> +       spin_lock(&delayed_refs->lock);
>> +       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
>> +       if (!ref_head) {
>> +               ret = -ENOENT;
>> +               goto out;
>> +       }
>
> Hi Qu,
>
> So while running btrfs/063, with qgroups enabled (I modified the test
> to enable qgroups), ran into this 2 times:
>
> [169125.246506] BTRFS info (device sdc): disk space caching is enabled
> [169125.363164] ------------[ cut here ]------------
> [169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
> [169125.367702] BTRFS: Transaction aborted (error -2)
> [169125.368830] Modules linked in: btrfs dm_flakey dm_mod
> crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
> lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
> psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
> serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
> ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
> scsi_mod e1000 virtio [last unloaded: btrfs]
> [169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
>    W       4.3.0-rc5-btrfs-next-17+ #1

Hi Filipe,

Although not related to the bug report, I'm a little interested in your 
testing kernel.

Are you testing integration-4.4 from Chris repo?
Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?

Although integration-4.4 already merged qgroup reserve patchset, but 
it's causing some strange bug like over decrease data 
sinfo->bytes_may_use, mainly in generic/127 testcase.

But if qgroup reserve patchset is rebased to integration-4.3 (I did all 
my old tests based on that), no generic/127 problem at all.

Thanks,
Qu

> [169125.378522] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org
> 04/01/2014
> [169125.380916] Workqueue: btrfs-endio-write btrfs_endio_write_helper [btrfs]
> [169125.382167]  0000000000000000 ffff88007ef2bc28 ffffffff812566f4
> ffff88007ef2bc70
> [169125.383643]  ffff88007ef2bc60 ffffffff8104d0a6 ffffffffa03cac33
> ffff8801f5ca6db0
> [169125.385197]  ffff8802c6c7ee98 ffff880122bc1000 00000000fffffffe
> ffff88007ef2bcc8
> [169125.386691] Call Trace:
> [169125.387194]  [<ffffffff812566f4>] dump_stack+0x4e/0x79
> [169125.388205]  [<ffffffff8104d0a6>] warn_slowpath_common+0x9f/0xb8
> [169125.389386]  [<ffffffffa03cac33>] ?
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
> [169125.390837]  [<ffffffff8104d107>] warn_slowpath_fmt+0x48/0x50
> [169125.391839]  [<ffffffffa03d67bb>] ? unpin_extent_cache+0xbe/0xcc [btrfs]
> [169125.392973]  [<ffffffffa03cac33>]
> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
> [169125.395714]  [<ffffffff8147c612>] ? _raw_spin_unlock_irqrestore+0x38/0x60
> [169125.396888]  [<ffffffff81087d0b>] ? trace_hardirqs_off_caller+0x1f/0xb9
> [169125.397986]  [<ffffffffa03cadec>] finish_ordered_fn+0x15/0x17 [btrfs]
> [169125.399122]  [<ffffffffa03ec706>] normal_work_helper+0x14c/0x32a [btrfs]
> [169125.400300]  [<ffffffffa03ec9e6>] btrfs_endio_write_helper+0x12/0x14 [btrfs]
> [169125.401450]  [<ffffffff81063b23>] process_one_work+0x24a/0x4ac
> [169125.402631]  [<ffffffff81064285>] worker_thread+0x206/0x2c2
> [169125.403622]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
> [169125.404693]  [<ffffffff8106904d>] kthread+0xef/0xf7
> [169125.405727]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
> [169125.406808]  [<ffffffff8147d10f>] ret_from_fork+0x3f/0x70
> [169125.407834]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
> [169125.408840] ---[ end trace 6ee4342a5722b119 ]---
> [169125.409654] BTRFS: error (device sdc) in
> btrfs_finish_ordered_io:2929: errno=-2 No such entry
>
> So what you have here is racy:
>
> btrfs_finish_ordered_io()
>     joins existing transaction (or starts a new one)
>     insert_reserved_file_extent()
>        btrfs_alloc_reserved_file_extent() --> creates delayed ref
>
>        ******* delayed refs are run, someone called
> btrfs_async_run_delayed_refs() from btrfs_end_transaction(), ref head
> is removed ******
>
>        btrfs_add_delayed_qgroup_reserve() --> does not find delayed ref
> head, returns -ENOENT and finish_ordered_io aborts current
> transaction...
>
> A very tiny race, but...
>
> thanks
>
>
>> +       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
>> +       ref_head->qgroup_ref_root = ref_root;
>> +       ref_head->qgroup_reserved = num_bytes;
>> +out:
>> +       spin_unlock(&delayed_refs->lock);
>> +       return ret;
>> +}
>> +
>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>                                  struct btrfs_trans_handle *trans,
>>                                  u64 bytenr, u64 num_bytes,
>> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
>> index 13fb5e6..d4c41e2 100644
>> --- a/fs/btrfs/delayed-ref.h
>> +++ b/fs/btrfs/delayed-ref.h
>> @@ -113,6 +113,17 @@ struct btrfs_delayed_ref_head {
>>          int total_ref_mod;
>>
>>          /*
>> +        * For qgroup reserved space freeing.
>> +        *
>> +        * ref_root and reserved will be recorded after
>> +        * BTRFS_ADD_DELAYED_EXTENT is called.
>> +        * And will be used to free reserved qgroup space at
>> +        * run_delayed_refs() time.
>> +        */
>> +       u64 qgroup_ref_root;
>> +       u64 qgroup_reserved;
>> +
>> +       /*
>>           * when a new extent is allocated, it is just reserved in memory
>>           * The actual extent isn't inserted into the extent allocation tree
>>           * until the delayed ref is processed.  must_insert_reserved is
>> @@ -242,6 +253,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>>                                 u64 owner, u64 offset, int action,
>>                                 struct btrfs_delayed_extent_op *extent_op,
>>                                 int no_quota);
>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>> +                                    struct btrfs_trans_handle *trans,
>> +                                    u64 ref_root, u64 bytenr, u64 num_bytes);
>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>                                  struct btrfs_trans_handle *trans,
>>                                  u64 bytenr, u64 num_bytes,
>> --
>> 2.6.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Oct. 27, 2015, 5:14 a.m. UTC | #4
On Tue, Oct 27, 2015 at 12:13:11PM +0800, Qu Wenruo wrote:
> 
> 
> Filipe Manana wrote on 2015/10/25 14:39 +0000:
> >On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
> >>Add new function btrfs_add_delayed_qgroup_reserve() function to record
> >>how much space is reserved for that extent.
> >>
> >>As btrfs only accounts qgroup at run_delayed_refs() time, so newly
> >>allocated extent should keep the reserved space until then.
> >>
> >>So add needed function with related members to do it.
> >>
> >>Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
> >>---
> >>v2:
> >>   None
> >>v3:
> >>   None
> >>---
> >>  fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
> >>  fs/btrfs/delayed-ref.h | 14 ++++++++++++++
> >>  2 files changed, 43 insertions(+)
> >>
> >>diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
> >>index ac3e81d..bd9b63b 100644
> >>--- a/fs/btrfs/delayed-ref.c
> >>+++ b/fs/btrfs/delayed-ref.c
> >>@@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
> >>         INIT_LIST_HEAD(&head_ref->ref_list);
> >>         head_ref->processing = 0;
> >>         head_ref->total_ref_mod = count_mod;
> >>+       head_ref->qgroup_reserved = 0;
> >>+       head_ref->qgroup_ref_root = 0;
> >>
> >>         /* Record qgroup extent info if provided */
> >>         if (qrecord) {
> >>@@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
> >>         return 0;
> >>  }
> >>
> >>+int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
> >>+                                    struct btrfs_trans_handle *trans,
> >>+                                    u64 ref_root, u64 bytenr, u64 num_bytes)
> >>+{
> >>+       struct btrfs_delayed_ref_root *delayed_refs;
> >>+       struct btrfs_delayed_ref_head *ref_head;
> >>+       int ret = 0;
> >>+
> >>+       if (!fs_info->quota_enabled || !is_fstree(ref_root))
> >>+               return 0;
> >>+
> >>+       delayed_refs = &trans->transaction->delayed_refs;
> >>+
> >>+       spin_lock(&delayed_refs->lock);
> >>+       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
> >>+       if (!ref_head) {
> >>+               ret = -ENOENT;
> >>+               goto out;
> >>+       }
> >
> >Hi Qu,
> >
> >So while running btrfs/063, with qgroups enabled (I modified the test
> >to enable qgroups), ran into this 2 times:
> >
> >[169125.246506] BTRFS info (device sdc): disk space caching is enabled
> >[169125.363164] ------------[ cut here ]------------
> >[169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
> >btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
> >[169125.367702] BTRFS: Transaction aborted (error -2)
> >[169125.368830] Modules linked in: btrfs dm_flakey dm_mod
> >crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
> >lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
> >psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
> >serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
> >ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
> >scsi_mod e1000 virtio [last unloaded: btrfs]
> >[169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
> >   W       4.3.0-rc5-btrfs-next-17+ #1
> 
> Hi Filipe,
> 
> Although not related to the bug report, I'm a little interested in your
> testing kernel.
> 
> Are you testing integration-4.4 from Chris repo?
> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
> 
> Although integration-4.4 already merged qgroup reserve patchset, but it's
> causing some strange bug like over decrease data sinfo->bytes_may_use,
> mainly in generic/127 testcase.
> 
> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
> old tests based on that), no generic/127 problem at all.

Did I mismerge things?

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 27, 2015, 5:48 a.m. UTC | #5
Chris Mason wrote on 2015/10/27 01:14 -0400:
> On Tue, Oct 27, 2015 at 12:13:11PM +0800, Qu Wenruo wrote:
>>
>>
>> Filipe Manana wrote on 2015/10/25 14:39 +0000:
>>> On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
>>>> Add new function btrfs_add_delayed_qgroup_reserve() function to record
>>>> how much space is reserved for that extent.
>>>>
>>>> As btrfs only accounts qgroup at run_delayed_refs() time, so newly
>>>> allocated extent should keep the reserved space until then.
>>>>
>>>> So add needed function with related members to do it.
>>>>
>>>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>>>> ---
>>>> v2:
>>>>    None
>>>> v3:
>>>>    None
>>>> ---
>>>>   fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
>>>>   fs/btrfs/delayed-ref.h | 14 ++++++++++++++
>>>>   2 files changed, 43 insertions(+)
>>>>
>>>> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
>>>> index ac3e81d..bd9b63b 100644
>>>> --- a/fs/btrfs/delayed-ref.c
>>>> +++ b/fs/btrfs/delayed-ref.c
>>>> @@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
>>>>          INIT_LIST_HEAD(&head_ref->ref_list);
>>>>          head_ref->processing = 0;
>>>>          head_ref->total_ref_mod = count_mod;
>>>> +       head_ref->qgroup_reserved = 0;
>>>> +       head_ref->qgroup_ref_root = 0;
>>>>
>>>>          /* Record qgroup extent info if provided */
>>>>          if (qrecord) {
>>>> @@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
>>>>          return 0;
>>>>   }
>>>>
>>>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>>>> +                                    struct btrfs_trans_handle *trans,
>>>> +                                    u64 ref_root, u64 bytenr, u64 num_bytes)
>>>> +{
>>>> +       struct btrfs_delayed_ref_root *delayed_refs;
>>>> +       struct btrfs_delayed_ref_head *ref_head;
>>>> +       int ret = 0;
>>>> +
>>>> +       if (!fs_info->quota_enabled || !is_fstree(ref_root))
>>>> +               return 0;
>>>> +
>>>> +       delayed_refs = &trans->transaction->delayed_refs;
>>>> +
>>>> +       spin_lock(&delayed_refs->lock);
>>>> +       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
>>>> +       if (!ref_head) {
>>>> +               ret = -ENOENT;
>>>> +               goto out;
>>>> +       }
>>>
>>> Hi Qu,
>>>
>>> So while running btrfs/063, with qgroups enabled (I modified the test
>>> to enable qgroups), ran into this 2 times:
>>>
>>> [169125.246506] BTRFS info (device sdc): disk space caching is enabled
>>> [169125.363164] ------------[ cut here ]------------
>>> [169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
>>> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
>>> [169125.367702] BTRFS: Transaction aborted (error -2)
>>> [169125.368830] Modules linked in: btrfs dm_flakey dm_mod
>>> crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
>>> lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
>>> psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
>>> serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
>>> ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
>>> scsi_mod e1000 virtio [last unloaded: btrfs]
>>> [169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
>>>    W       4.3.0-rc5-btrfs-next-17+ #1
>>
>> Hi Filipe,
>>
>> Although not related to the bug report, I'm a little interested in your
>> testing kernel.
>>
>> Are you testing integration-4.4 from Chris repo?
>> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>>
>> Although integration-4.4 already merged qgroup reserve patchset, but it's
>> causing some strange bug like over decrease data sinfo->bytes_may_use,
>> mainly in generic/127 testcase.
>>
>> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
>> old tests based on that), no generic/127 problem at all.
>
> Did I mismerge things?
>
> -chris
>
Not sure yet.

But at least some patches in 4.3 is not in integration-4.4, like the 
following patch:
btrfs: Avoid truncate tailing page if fallocate range doesn't exceed 
inode size

I'll continue testing and bisecting to see what triggers the strange 
WARN_ON() in integration-4.4.
------
Oct 27 11:05:00 vmware kernel: WARNING: CPU: 4 PID: 13711 at 
fs/btrfs//extent-tree.c:4171 
btrfs_free_reserved_data_space_noquota+0x175/0x190 [btrfs]()
Oct 27 11:05:00 vmware kernel: Modules linked in: btrfs(OE) fuse vfat 
msdos fat xfs binfmt_misc bridge stp llc dm_snapshot dm_bufio dm_flakey 
loop iptable_nat nf_conntrack_ipv4 nf
_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw 
iptable_filter ip_tables dm_mirror dm_region_hash dm_log xor dm_mod 
crc32c_intel vmw_balloon raid6_pq nfsd
vmw_vmci i2c_piix4 shpchp auth_rpcgss acpi_cpufreq nfs_acl lockd grace 
sunrpc ext4 mbcache jbd2 sd_mod vmwgfx drm_kms_helper syscopyarea 
sysfillrect sysimgblt fb_sys_fops ttm drm
ata_piix vmxnet3 libata vmw_pvscsi floppy [last unloaded: btrfs]
Oct 27 11:05:00 vmware kernel: CPU: 4 PID: 13711 Comm: fsx Tainted: G 
      W  OE   4.3.0-rc5+ #5
Oct 27 11:05:00 vmware kernel: Hardware name: VMware, Inc. VMware 
Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 08/16/2013
Oct 27 11:05:00 vmware kernel: 0000000000000000 000000002caf2373 
ffff88021f63b760 ffffffff81302e73
Oct 27 11:05:00 vmware kernel: 0000000000000000 ffff88021f63b798 
ffffffff810600f6 ffff88021c6e9000
Oct 27 11:05:00 vmware kernel: ffff88022a4abc00 0000000000006000 
ffff88021f63b8ac ffff8800827a0820
Oct 27 11:05:00 vmware kernel: Call Trace:
Oct 27 11:05:00 vmware kernel: [<ffffffff81302e73>] dump_stack+0x4b/0x68
Oct 27 11:05:00 vmware kernel: [<ffffffff810600f6>] 
warn_slowpath_common+0x86/0xc0
Oct 27 11:05:00 vmware kernel: [<ffffffff8106023a>] 
warn_slowpath_null+0x1a/0x20
Oct 27 11:05:00 vmware kernel: [<ffffffffa04d6b25>] 
btrfs_free_reserved_data_space_noquota+0x175/0x190 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa04f6b8d>] 
btrfs_clear_bit_hook+0x2ed/0x360 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa05113ad>] 
clear_state_bit+0x5d/0x1d0 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa051174a>] 
__clear_extent_bit+0x22a/0x3d0 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa051283a>] 
extent_clear_unlock_delalloc+0x7a/0x2c0 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffff8161a547>] ? 
_raw_spin_unlock+0x27/0x40
Oct 27 11:05:00 vmware kernel: [<ffffffffa050d665>] ? 
__btrfs_add_ordered_extent+0x245/0x3b0 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa04f934b>] 
cow_file_range+0x27b/0x430 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa04fa112>] 
run_delalloc_range+0x102/0x400 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa0513152>] 
writepage_delalloc.isra.35+0x112/0x170 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa0514235>] 
__extent_writepage+0xf5/0x370 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa0514817>] 
extent_write_cache_pages.isra.32.constprop.47+0x367/0x420 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa051662c>] 
extent_writepages+0x5c/0x90 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa04f73b0>] ? 
btrfs_real_readdir+0x570/0x570 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa04f4a38>] 
btrfs_writepages+0x28/0x30 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffff81191501>] do_writepages+0x21/0x40
Oct 27 11:05:00 vmware kernel: [<ffffffff81185ad0>] 
__filemap_fdatawrite_range+0x80/0xb0
Oct 27 11:05:00 vmware kernel: [<ffffffff81185bc3>] 
filemap_fdatawrite_range+0x13/0x20
Oct 27 11:05:00 vmware kernel: [<ffffffffa05095c0>] 
btrfs_fdatawrite_range+0x20/0x50 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa0509609>] 
start_ordered_ops+0x19/0x30 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffffa05096a3>] 
btrfs_sync_file+0x83/0x420 [btrfs]
Oct 27 11:05:00 vmware kernel: [<ffffffff811bd9e0>] ? SyS_msync+0x90/0x1f0
Oct 27 11:05:00 vmware kernel: [<ffffffff8122f7cd>] 
vfs_fsync_range+0x3d/0xb0
Oct 27 11:05:00 vmware kernel: [<ffffffff811bdac1>] SyS_msync+0x171/0x1f0
Oct 27 11:05:00 vmware kernel: [<ffffffff8161af17>] 
entry_SYSCALL_64_fastpath+0x12/0x6f
------

At least, this won't cause anything wrong, as I enhanced the existing 
WARN_ON() in old btrfs_free_reserved_data_space() to handle underflow 
case quite well.
But still need investigating as it seems to be a regression.

Maybe there are some other hidden bug in my qgroup patchset... :(

Thanks,
Qu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Oct. 27, 2015, 6:12 a.m. UTC | #6
On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
> >>Are you testing integration-4.4 from Chris repo?
> >>Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
> >>
> >>Although integration-4.4 already merged qgroup reserve patchset, but it's
> >>causing some strange bug like over decrease data sinfo->bytes_may_use,
> >>mainly in generic/127 testcase.
> >>
> >>But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
> >>old tests based on that), no generic/127 problem at all.
> >
> >Did I mismerge things?
> >
> >-chris
> >
> Not sure yet.
> 
> But at least some patches in 4.3 is not in integration-4.4, like the
> following patch:
> btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
> size

Have you tried testing integration-4.4 merged with current Linus git?

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 27, 2015, 7:26 a.m. UTC | #7
Chris Mason wrote on 2015/10/27 02:12 -0400:
> On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
>>>> Are you testing integration-4.4 from Chris repo?
>>>> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>>>>
>>>> Although integration-4.4 already merged qgroup reserve patchset, but it's
>>>> causing some strange bug like over decrease data sinfo->bytes_may_use,
>>>> mainly in generic/127 testcase.
>>>>
>>>> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
>>>> old tests based on that), no generic/127 problem at all.
>>>
>>> Did I mismerge things?
>>>
>>> -chris
>>>
>> Not sure yet.
>>
>> But at least some patches in 4.3 is not in integration-4.4, like the
>> following patch:
>> btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
>> size
>
> Have you tried testing integration-4.4 merged with current Linus git?
>
> -chris
>
Nice advice, compiling now.

But even rebasing my local old branch(based on 7d35199e15b82a4d1a200, 
with 21 patches) to 4.3-rc7, the result got screwed up...

If things get crazy, I'm afraid it would need to revert all my qgroup 
patchset from integration-4.4. :(

Thanks,
Qu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 27, 2015, 9:05 a.m. UTC | #8
Chris Mason wrote on 2015/10/27 02:12 -0400:
> On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
>>>> Are you testing integration-4.4 from Chris repo?
>>>> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>>>>
>>>> Although integration-4.4 already merged qgroup reserve patchset, but it's
>>>> causing some strange bug like over decrease data sinfo->bytes_may_use,
>>>> mainly in generic/127 testcase.
>>>>
>>>> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
>>>> old tests based on that), no generic/127 problem at all.
>>>
>>> Did I mismerge things?
>>>
>>> -chris
>>>
>> Not sure yet.
>>
>> But at least some patches in 4.3 is not in integration-4.4, like the
>> following patch:
>> btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
>> size
>
> Have you tried testing integration-4.4 merged with current Linus git?
>
> -chris
>
Integration-4.4 merged with Linus' master also fails. :(

Current known working branches are all based on 4.3-integration(4.2-rc5):
https://github.com/adam900710/linux/tree/qgroup_reserve_good

Tried 4.3-rc5 and 4.3-rc7, all fails with kernel warning in generic/137.

And due to the huge difference, I'm afraid it won't take a short time to 
find the root cause...

Thanks,
Qu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Oct. 27, 2015, 9:22 a.m. UTC | #9
On Tue, Oct 27, 2015 at 4:13 AM, Qu Wenruo <quwenruo@cn.fujitsu.com> wrote:
>
>
> Filipe Manana wrote on 2015/10/25 14:39 +0000:
>>
>> On Tue, Oct 13, 2015 at 3:20 AM, Qu Wenruo <quwenruo@cn.fujitsu.com>
>> wrote:
>>>
>>> Add new function btrfs_add_delayed_qgroup_reserve() function to record
>>> how much space is reserved for that extent.
>>>
>>> As btrfs only accounts qgroup at run_delayed_refs() time, so newly
>>> allocated extent should keep the reserved space until then.
>>>
>>> So add needed function with related members to do it.
>>>
>>> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
>>> ---
>>> v2:
>>>    None
>>> v3:
>>>    None
>>> ---
>>>   fs/btrfs/delayed-ref.c | 29 +++++++++++++++++++++++++++++
>>>   fs/btrfs/delayed-ref.h | 14 ++++++++++++++
>>>   2 files changed, 43 insertions(+)
>>>
>>> diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
>>> index ac3e81d..bd9b63b 100644
>>> --- a/fs/btrfs/delayed-ref.c
>>> +++ b/fs/btrfs/delayed-ref.c
>>> @@ -476,6 +476,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
>>>          INIT_LIST_HEAD(&head_ref->ref_list);
>>>          head_ref->processing = 0;
>>>          head_ref->total_ref_mod = count_mod;
>>> +       head_ref->qgroup_reserved = 0;
>>> +       head_ref->qgroup_ref_root = 0;
>>>
>>>          /* Record qgroup extent info if provided */
>>>          if (qrecord) {
>>> @@ -746,6 +748,33 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info
>>> *fs_info,
>>>          return 0;
>>>   }
>>>
>>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>>> +                                    struct btrfs_trans_handle *trans,
>>> +                                    u64 ref_root, u64 bytenr, u64
>>> num_bytes)
>>> +{
>>> +       struct btrfs_delayed_ref_root *delayed_refs;
>>> +       struct btrfs_delayed_ref_head *ref_head;
>>> +       int ret = 0;
>>> +
>>> +       if (!fs_info->quota_enabled || !is_fstree(ref_root))
>>> +               return 0;
>>> +
>>> +       delayed_refs = &trans->transaction->delayed_refs;
>>> +
>>> +       spin_lock(&delayed_refs->lock);
>>> +       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
>>> +       if (!ref_head) {
>>> +               ret = -ENOENT;
>>> +               goto out;
>>> +       }
>>
>>
>> Hi Qu,
>>
>> So while running btrfs/063, with qgroups enabled (I modified the test
>> to enable qgroups), ran into this 2 times:
>>
>> [169125.246506] BTRFS info (device sdc): disk space caching is enabled
>> [169125.363164] ------------[ cut here ]------------
>> [169125.365236] WARNING: CPU: 10 PID: 2827 at fs/btrfs/inode.c:2929
>> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]()
>> [169125.367702] BTRFS: Transaction aborted (error -2)
>> [169125.368830] Modules linked in: btrfs dm_flakey dm_mod
>> crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs
>> lockd grace fscache sunrpc loop fuse parport_pc parport i2c_piix4
>> psmouse acpi_cpufreq microcode pcspkr processor evdev i2c_core
>> serio_raw button ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom
>> ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring
>> scsi_mod e1000 virtio [last unloaded: btrfs]
>> [169125.376755] CPU: 10 PID: 2827 Comm: kworker/u32:14 Tainted: G
>>    W       4.3.0-rc5-btrfs-next-17+ #1
>
>
> Hi Filipe,

Hi Qu,

>
> Although not related to the bug report, I'm a little interested in your
> testing kernel.
>
> Are you testing integration-4.4 from Chris repo?

Yes, I got that from Chris' integration-4.4 branch.

> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>
> Although integration-4.4 already merged qgroup reserve patchset, but it's
> causing some strange bug like over decrease data sinfo->bytes_may_use,
> mainly in generic/127 testcase.

Haven't hit that one yet.

>
> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
> old tests based on that), no generic/127 problem at all.
>
> Thanks,
> Qu
>
>
>> [169125.378522] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org
>> 04/01/2014
>> [169125.380916] Workqueue: btrfs-endio-write btrfs_endio_write_helper
>> [btrfs]
>> [169125.382167]  0000000000000000 ffff88007ef2bc28 ffffffff812566f4
>> ffff88007ef2bc70
>> [169125.383643]  ffff88007ef2bc60 ffffffff8104d0a6 ffffffffa03cac33
>> ffff8801f5ca6db0
>> [169125.385197]  ffff8802c6c7ee98 ffff880122bc1000 00000000fffffffe
>> ffff88007ef2bcc8
>> [169125.386691] Call Trace:
>> [169125.387194]  [<ffffffff812566f4>] dump_stack+0x4e/0x79
>> [169125.388205]  [<ffffffff8104d0a6>] warn_slowpath_common+0x9f/0xb8
>> [169125.389386]  [<ffffffffa03cac33>] ?
>> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
>> [169125.390837]  [<ffffffff8104d107>] warn_slowpath_fmt+0x48/0x50
>> [169125.391839]  [<ffffffffa03d67bb>] ? unpin_extent_cache+0xbe/0xcc
>> [btrfs]
>> [169125.392973]  [<ffffffffa03cac33>]
>> btrfs_finish_ordered_io+0x347/0x4eb [btrfs]
>> [169125.395714]  [<ffffffff8147c612>] ?
>> _raw_spin_unlock_irqrestore+0x38/0x60
>> [169125.396888]  [<ffffffff81087d0b>] ?
>> trace_hardirqs_off_caller+0x1f/0xb9
>> [169125.397986]  [<ffffffffa03cadec>] finish_ordered_fn+0x15/0x17 [btrfs]
>> [169125.399122]  [<ffffffffa03ec706>] normal_work_helper+0x14c/0x32a
>> [btrfs]
>> [169125.400300]  [<ffffffffa03ec9e6>] btrfs_endio_write_helper+0x12/0x14
>> [btrfs]
>> [169125.401450]  [<ffffffff81063b23>] process_one_work+0x24a/0x4ac
>> [169125.402631]  [<ffffffff81064285>] worker_thread+0x206/0x2c2
>> [169125.403622]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
>> [169125.404693]  [<ffffffff8106904d>] kthread+0xef/0xf7
>> [169125.405727]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
>> [169125.406808]  [<ffffffff8147d10f>] ret_from_fork+0x3f/0x70
>> [169125.407834]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
>> [169125.408840] ---[ end trace 6ee4342a5722b119 ]---
>> [169125.409654] BTRFS: error (device sdc) in
>> btrfs_finish_ordered_io:2929: errno=-2 No such entry
>>
>> So what you have here is racy:
>>
>> btrfs_finish_ordered_io()
>>     joins existing transaction (or starts a new one)
>>     insert_reserved_file_extent()
>>        btrfs_alloc_reserved_file_extent() --> creates delayed ref
>>
>>        ******* delayed refs are run, someone called
>> btrfs_async_run_delayed_refs() from btrfs_end_transaction(), ref head
>> is removed ******
>>
>>        btrfs_add_delayed_qgroup_reserve() --> does not find delayed ref
>> head, returns -ENOENT and finish_ordered_io aborts current
>> transaction...
>>
>> A very tiny race, but...
>>
>> thanks
>>
>>
>>> +       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
>>> +       ref_head->qgroup_ref_root = ref_root;
>>> +       ref_head->qgroup_reserved = num_bytes;
>>> +out:
>>> +       spin_unlock(&delayed_refs->lock);
>>> +       return ret;
>>> +}
>>> +
>>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>>                                  struct btrfs_trans_handle *trans,
>>>                                  u64 bytenr, u64 num_bytes,
>>> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
>>> index 13fb5e6..d4c41e2 100644
>>> --- a/fs/btrfs/delayed-ref.h
>>> +++ b/fs/btrfs/delayed-ref.h
>>> @@ -113,6 +113,17 @@ struct btrfs_delayed_ref_head {
>>>          int total_ref_mod;
>>>
>>>          /*
>>> +        * For qgroup reserved space freeing.
>>> +        *
>>> +        * ref_root and reserved will be recorded after
>>> +        * BTRFS_ADD_DELAYED_EXTENT is called.
>>> +        * And will be used to free reserved qgroup space at
>>> +        * run_delayed_refs() time.
>>> +        */
>>> +       u64 qgroup_ref_root;
>>> +       u64 qgroup_reserved;
>>> +
>>> +       /*
>>>           * when a new extent is allocated, it is just reserved in memory
>>>           * The actual extent isn't inserted into the extent allocation
>>> tree
>>>           * until the delayed ref is processed.  must_insert_reserved is
>>> @@ -242,6 +253,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info
>>> *fs_info,
>>>                                 u64 owner, u64 offset, int action,
>>>                                 struct btrfs_delayed_extent_op
>>> *extent_op,
>>>                                 int no_quota);
>>> +int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
>>> +                                    struct btrfs_trans_handle *trans,
>>> +                                    u64 ref_root, u64 bytenr, u64
>>> num_bytes);
>>>   int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
>>>                                  struct btrfs_trans_handle *trans,
>>>                                  u64 bytenr, u64 num_bytes,
>>> --
>>> 2.6.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
>>
>>
>
Chris Mason Oct. 27, 2015, 11:34 a.m. UTC | #10
On Tue, Oct 27, 2015 at 05:05:56PM +0800, Qu Wenruo wrote:
> 
> 
> Chris Mason wrote on 2015/10/27 02:12 -0400:
> >On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
> >>>>Are you testing integration-4.4 from Chris repo?
> >>>>Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
> >>>>
> >>>>Although integration-4.4 already merged qgroup reserve patchset, but it's
> >>>>causing some strange bug like over decrease data sinfo->bytes_may_use,
> >>>>mainly in generic/127 testcase.
> >>>>
> >>>>But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
> >>>>old tests based on that), no generic/127 problem at all.
> >>>
> >>>Did I mismerge things?
> >>>
> >>>-chris
> >>>
> >>Not sure yet.
> >>
> >>But at least some patches in 4.3 is not in integration-4.4, like the
> >>following patch:
> >>btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
> >>size
> >
> >Have you tried testing integration-4.4 merged with current Linus git?
> >
> >-chris
> >
> Integration-4.4 merged with Linus' master also fails. :(
> 
> Current known working branches are all based on 4.3-integration(4.2-rc5):
> https://github.com/adam900710/linux/tree/qgroup_reserve_good
> 
> Tried 4.3-rc5 and 4.3-rc7, all fails with kernel warning in generic/137.
> 
> And due to the huge difference, I'm afraid it won't take a short time to
> find the root cause...

Ok, this is the top merge commit in integration:

commit a9e6d153563d2ed69c6cd7fb4fa5ce4ca7c712eb
Merge: 56fa9d0 0584f71
Author: Chris Mason <clm@fb.com>
Date:   Wed Oct 21 19:00:38 2015 -0700

    Merge branch 'allocator-fixes' into for-linus-4.4

Please try commit 56fa9d0, which doesn't have Josef's allocator fixes.
It's possible there is a conflict with your changes in there.

-chris

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Qu Wenruo Oct. 28, 2015, 12:25 a.m. UTC | #11
Chris Mason wrote on 2015/10/27 07:34 -0400:
> On Tue, Oct 27, 2015 at 05:05:56PM +0800, Qu Wenruo wrote:
>>
>>
>> Chris Mason wrote on 2015/10/27 02:12 -0400:
>>> On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
>>>>>> Are you testing integration-4.4 from Chris repo?
>>>>>> Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>>>>>>
>>>>>> Although integration-4.4 already merged qgroup reserve patchset, but it's
>>>>>> causing some strange bug like over decrease data sinfo->bytes_may_use,
>>>>>> mainly in generic/127 testcase.
>>>>>>
>>>>>> But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
>>>>>> old tests based on that), no generic/127 problem at all.
>>>>>
>>>>> Did I mismerge things?
>>>>>
>>>>> -chris
>>>>>
>>>> Not sure yet.
>>>>
>>>> But at least some patches in 4.3 is not in integration-4.4, like the
>>>> following patch:
>>>> btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
>>>> size
>>>
>>> Have you tried testing integration-4.4 merged with current Linus git?
>>>
>>> -chris
>>>
>> Integration-4.4 merged with Linus' master also fails. :(
>>
>> Current known working branches are all based on 4.3-integration(4.2-rc5):
>> https://github.com/adam900710/linux/tree/qgroup_reserve_good
>>
>> Tried 4.3-rc5 and 4.3-rc7, all fails with kernel warning in generic/137.
>>
>> And due to the huge difference, I'm afraid it won't take a short time to
>> find the root cause...
>
> Ok, this is the top merge commit in integration:
>
> commit a9e6d153563d2ed69c6cd7fb4fa5ce4ca7c712eb
> Merge: 56fa9d0 0584f71
> Author: Chris Mason <clm@fb.com>
> Date:   Wed Oct 21 19:00:38 2015 -0700
>
>      Merge branch 'allocator-fixes' into for-linus-4.4
>
> Please try commit 56fa9d0, which doesn't have Josef's allocator fixes.
> It's possible there is a conflict with your changes in there.
>
> -chris
>
Tried, still warning.

I'd better investigate it now, and forget why it's OK in 4.3-integration...

Thanks,
Qu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Holger Hoffstätte Oct. 28, 2015, 1:36 p.m. UTC | #12
On Tue, Oct 27, 2015 at 12:34 PM, Chris Mason <clm@fb.com> wrote:
> On Tue, Oct 27, 2015 at 05:05:56PM +0800, Qu Wenruo wrote:
>>
>>
>> Chris Mason wrote on 2015/10/27 02:12 -0400:
>> >On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
>> >>>>Are you testing integration-4.4 from Chris repo?
>> >>>>Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
>> >>>>
>> >>>>Although integration-4.4 already merged qgroup reserve patchset, but it's
>> >>>>causing some strange bug like over decrease data sinfo->bytes_may_use,
>> >>>>mainly in generic/127 testcase.
>> >>>>
>> >>>>But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
>> >>>>old tests based on that), no generic/127 problem at all.
>> >>>
>> >>>Did I mismerge things?
>> >>>
>> >>>-chris
>> >>>
>> >>Not sure yet.
>> >>
>> >>But at least some patches in 4.3 is not in integration-4.4, like the
>> >>following patch:
>> >>btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
>> >>size
>> >
>> >Have you tried testing integration-4.4 merged with current Linus git?

Chris, something went definitely wrong with the 4.4-integration
branch, and it's not the point where you merged from Josef. Mainline
has: 0f6925fa2907df58496cabc33fa4677c635e2223 ("btrfs: Avoid truncate
tailing page if fallocate range doesn't exceed inode size"), and that
commit just doesn't exist in 4.4-integration any more. Neither did any
merges touch file.c, so it
seems this just got lost for some reason (rebase? forced push?).
It's difficult to say what else might have gone missing.

-h
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Oct. 29, 2015, 6:29 a.m. UTC | #13
On Wed, Oct 28, 2015 at 02:36:42PM +0100, Holger Hoffstätte wrote:
> On Tue, Oct 27, 2015 at 12:34 PM, Chris Mason <clm@fb.com> wrote:
> > On Tue, Oct 27, 2015 at 05:05:56PM +0800, Qu Wenruo wrote:
> >>
> >>
> >> Chris Mason wrote on 2015/10/27 02:12 -0400:
> >> >On Tue, Oct 27, 2015 at 01:48:34PM +0800, Qu Wenruo wrote:
> >> >>>>Are you testing integration-4.4 from Chris repo?
> >> >>>>Or 4.3-rc from mainline repo with my qgroup reserve patchset applied?
> >> >>>>
> >> >>>>Although integration-4.4 already merged qgroup reserve patchset, but it's
> >> >>>>causing some strange bug like over decrease data sinfo->bytes_may_use,
> >> >>>>mainly in generic/127 testcase.
> >> >>>>
> >> >>>>But if qgroup reserve patchset is rebased to integration-4.3 (I did all my
> >> >>>>old tests based on that), no generic/127 problem at all.
> >> >>>
> >> >>>Did I mismerge things?
> >> >>>
> >> >>>-chris
> >> >>>
> >> >>Not sure yet.
> >> >>
> >> >>But at least some patches in 4.3 is not in integration-4.4, like the
> >> >>following patch:
> >> >>btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode
> >> >>size
> >> >
> >> >Have you tried testing integration-4.4 merged with current Linus git?
> 
> Chris, something went definitely wrong with the 4.4-integration
> branch, and it's not the point where you merged from Josef. Mainline
> has: 0f6925fa2907df58496cabc33fa4677c635e2223 ("btrfs: Avoid truncate
> tailing page if fallocate range doesn't exceed inode size"), and that
> commit just doesn't exist in 4.4-integration any more. Neither did any
> merges touch file.c, so it
> seems this just got lost for some reason (rebase? forced push?).
> It's difficult to say what else might have gone missing.

Hi Holger,

integration-4.4 is based on 4.3-rc5, and it doesn't include any of the
btrfs commits that went in after rc5.  So if you want the latest commits
from 4.3, you just need to merge integration-4.4 with a more recent
Linus rc.

This isn't completely intuitive ;)  I could merge in 4.3-rc7, but for the
trees that I send to Linus, he prefers I not add extra merges unless it
solves some dependency (like a new API, or highly critical bug).

So when I test integration, I test it merged into Linus' latest rc, but
I apply patches on top of the older base.  It makes the resulting graph
of merges look much nicer when Linus pulls from me, and if you scroll
through the commits with git log or gitweb, its more clear where the
new commits are.

-chris

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ac3e81d..bd9b63b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -476,6 +476,8 @@  add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 	INIT_LIST_HEAD(&head_ref->ref_list);
 	head_ref->processing = 0;
 	head_ref->total_ref_mod = count_mod;
+	head_ref->qgroup_reserved = 0;
+	head_ref->qgroup_ref_root = 0;
 
 	/* Record qgroup extent info if provided */
 	if (qrecord) {
@@ -746,6 +748,33 @@  int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
+				     struct btrfs_trans_handle *trans,
+				     u64 ref_root, u64 bytenr, u64 num_bytes)
+{
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct btrfs_delayed_ref_head *ref_head;
+	int ret = 0;
+
+	if (!fs_info->quota_enabled || !is_fstree(ref_root))
+		return 0;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	spin_lock(&delayed_refs->lock);
+	ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
+	if (!ref_head) {
+		ret = -ENOENT;
+		goto out;
+	}
+	WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
+	ref_head->qgroup_ref_root = ref_root;
+	ref_head->qgroup_reserved = num_bytes;
+out:
+	spin_unlock(&delayed_refs->lock);
+	return ret;
+}
+
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 13fb5e6..d4c41e2 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -113,6 +113,17 @@  struct btrfs_delayed_ref_head {
 	int total_ref_mod;
 
 	/*
+	 * For qgroup reserved space freeing.
+	 *
+	 * ref_root and reserved will be recorded after
+	 * BTRFS_ADD_DELAYED_EXTENT is called.
+	 * And will be used to free reserved qgroup space at
+	 * run_delayed_refs() time.
+	 */
+	u64 qgroup_ref_root;
+	u64 qgroup_reserved;
+
+	/*
 	 * when a new extent is allocated, it is just reserved in memory
 	 * The actual extent isn't inserted into the extent allocation tree
 	 * until the delayed ref is processed.  must_insert_reserved is
@@ -242,6 +253,9 @@  int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 			       u64 owner, u64 offset, int action,
 			       struct btrfs_delayed_extent_op *extent_op,
 			       int no_quota);
+int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
+				     struct btrfs_trans_handle *trans,
+				     u64 ref_root, u64 bytenr, u64 num_bytes);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,