diff mbox series

[4/4,v2] persistent-data: reduce lock contention while walking the btree

Message ID alpine.LRH.2.02.2210111505160.18298@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Mikulas Patocka Oct. 11, 2022, 7:10 p.m. UTC
Hi

Here I'm sending updated patch 4 that fixes hang on discard. We must not 
do the optimization in dm_btree_lookup_next.

Mikulas


From: Mikulas Patocka <mpatocka@redhat.com>

This patch reduces lock contention in btree walks. We modify the
functions init_ro_wpin, exit_ro_spine and ro_step so that they use
dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If
dm_bm_fast_get_block fails, we fallback to normal locking.

When doing tests on pmem and fully provisioned thin volume, it improves
thoughput from 286MiB/s to 442MiB/s.

fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1 --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin
before:
READ: bw=286MiB/s
WRITE: bw=286MiB/s
after:
READ: bw=442MiB/s
WRITE: bw=442MiB/s

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/persistent-data/dm-block-manager.c       |   32 +++++++++++++++
 drivers/md/persistent-data/dm-block-manager.h       |    6 ++
 drivers/md/persistent-data/dm-btree-internal.h      |    4 +
 drivers/md/persistent-data/dm-btree-spine.c         |   41 ++++++++++++++++++--
 drivers/md/persistent-data/dm-btree.c               |    6 +-
 drivers/md/persistent-data/dm-transaction-manager.c |   17 ++++++++
 drivers/md/persistent-data/dm-transaction-manager.h |    6 ++
 7 files changed, 104 insertions(+), 8 deletions(-)

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel

Comments

Joe Thornber Oct. 12, 2022, 6:31 a.m. UTC | #1
Thanks Mikulas,

I'll test this morning.

- Joe


On Tue, Oct 11, 2022 at 8:10 PM Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
>
> Here I'm sending updated patch 4 that fixes hang on discard. We must not
> do the optimization in dm_btree_lookup_next.
>
> Mikulas
>
>
> From: Mikulas Patocka <mpatocka@redhat.com>
>
> This patch reduces lock contention in btree walks. We modify the
> functions init_ro_wpin, exit_ro_spine and ro_step so that they use
> dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If
> dm_bm_fast_get_block fails, we fallback to normal locking.
>
> When doing tests on pmem and fully provisioned thin volume, it improves
> thoughput from 286MiB/s to 442MiB/s.
>
> fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1
> --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin
> before:
> READ: bw=286MiB/s
> WRITE: bw=286MiB/s
> after:
> READ: bw=442MiB/s
> WRITE: bw=442MiB/s
>
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
>
> ---
>  drivers/md/persistent-data/dm-block-manager.c       |   32 +++++++++++++++
>  drivers/md/persistent-data/dm-block-manager.h       |    6 ++
>  drivers/md/persistent-data/dm-btree-internal.h      |    4 +
>  drivers/md/persistent-data/dm-btree-spine.c         |   41
> ++++++++++++++++++--
>  drivers/md/persistent-data/dm-btree.c               |    6 +-
>  drivers/md/persistent-data/dm-transaction-manager.c |   17 ++++++++
>  drivers/md/persistent-data/dm-transaction-manager.h |    6 ++
>  7 files changed, 104 insertions(+), 8 deletions(-)
>
> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c
> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c
> @@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b)
>  }
>  EXPORT_SYMBOL_GPL(dm_bm_unlock);
>
> +void dm_bm_fast_lock(struct dm_block_manager *bm)
> +{
> +       dm_bufio_lock_read(bm->bufio);
> +}
> +
> +void dm_bm_fast_unlock(struct dm_block_manager *bm)
> +{
> +       dm_bufio_unlock_read(bm->bufio);
> +}
> +
> +int dm_bm_fast_get_block(struct dm_block_manager *bm,
> +                        dm_block_t b, struct dm_block_validator *v,
> +                        struct dm_block **result)
> +{
> +       int r;
> +       struct buffer_aux *aux;
> +       void *p;
> +
> +       p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **)
> result);
> +       if (IS_ERR(p))
> +               return PTR_ERR(p);
> +       if (unlikely(!p))
> +               return -EWOULDBLOCK;
> +
> +       aux = dm_bufio_get_aux_data(to_buffer(*result));
> +       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
> +       if (unlikely(r))
> +               return r;
> +
> +       return 0;
> +}
> +
>  int dm_bm_flush(struct dm_block_manager *bm)
>  {
>         if (dm_bm_is_read_only(bm))
> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h
> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h
> @@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc
>
>  void dm_bm_unlock(struct dm_block *b);
>
> +void dm_bm_fast_lock(struct dm_block_manager *bm);
> +void dm_bm_fast_unlock(struct dm_block_manager *bm);
> +int dm_bm_fast_get_block(struct dm_block_manager *bm,
> +                        dm_block_t b, struct dm_block_validator *v,
> +                        struct dm_block **result);
> +
>  /*
>   * It's a common idiom to have a superblock that should be committed last.
>   *
> Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h
> +++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
> @@ -64,9 +64,11 @@ struct ro_spine {
>         struct dm_btree_info *info;
>
>         struct dm_block *node;
> +       bool fast_locked;
> +       bool fast_lock_failed;
>  };
>
> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool
> disable_fast_access);
>  void exit_ro_spine(struct ro_spine *s);
>  int ro_step(struct ro_spine *s, dm_block_t new_child);
>  struct btree_node *ro_node(struct ro_spine *s);
> Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c
> +++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
> @@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info *
>         dm_tm_unlock(info->tm, b);
>  }
>
> +static void bn_fast_lock(struct dm_btree_info *info)
> +{
> +       dm_tm_fast_lock(info->tm);
> +}
> +
> +static void bn_fast_unlock(struct dm_btree_info *info)
> +{
> +       dm_tm_fast_unlock(info->tm);
> +}
> +
> +static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b,
> +                     struct dm_block **result)
> +{
> +       return dm_tm_fast_get_block(info->tm, b, &btree_node_validator,
> result);
> +}
> +
>  /*----------------------------------------------------------------*/
>
> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool
> disable_fast_access)
>  {
>         s->info = info;
>         s->node = NULL;
> +       s->fast_locked = false;
> +       s->fast_lock_failed = disable_fast_access;
>  }
>
>  void exit_ro_spine(struct ro_spine *s)
>  {
> -       if (s->node)
> +       if (s->fast_locked)
> +               bn_fast_unlock(s->info);
> +       else if (s->node)
>                 unlock_block(s->info, s->node);
>  }
>
>  int ro_step(struct ro_spine *s, dm_block_t new_child)
>  {
>         if (s->node) {
> -               unlock_block(s->info, s->node);
> +               if (unlikely(!s->fast_locked))
> +                       unlock_block(s->info, s->node);
>                 s->node = NULL;
>         }
> -
> +       if (likely(!s->fast_lock_failed)) {
> +               int r;
> +               if (!s->fast_locked) {
> +                       bn_fast_lock(s->info);
> +                       s->fast_locked = true;
> +               }
> +               r = bn_fast_get_block(s->info, new_child, &s->node);
> +               if (likely(!r))
> +                       return 0;
> +               s->fast_lock_failed = true;
> +               s->fast_locked = false;
> +               bn_fast_unlock(s->info);
> +       }
>         return bn_read_lock(s->info, new_child, &s->node);
>  }
>
> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c
> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
> @@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_
>  }
>  EXPORT_SYMBOL_GPL(dm_tm_unlock);
>
> +void dm_tm_fast_lock(struct dm_transaction_manager *tm)
> +{
> +       dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm);
> +}
> +
> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm)
> +{
> +       dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm);
> +}
> +
> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
> +                        struct dm_block_validator *v,
> +                        struct dm_block **blk)
> +{
> +       return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm,
> b, v, blk);
> +}
> +
>  void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
>  {
>         /*
> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h
> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
> @@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio
>
>  void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
>
> +void dm_tm_fast_lock(struct dm_transaction_manager *tm);
> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm);
> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
> +                        struct dm_block_validator *v,
> +                        struct dm_block **blk);
> +
>  /*
>   * Functions for altering the reference count of a block directly.
>   */
> Index: linux-2.6/drivers/md/persistent-data/dm-btree.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c
> +++ linux-2.6/drivers/md/persistent-data/dm-btree.c
> @@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info
>         __le64 internal_value_le;
>         struct ro_spine spine;
>
> -       init_ro_spine(&spine, info);
> +       init_ro_spine(&spine, info, false);
>         for (level = 0; level < info->levels; level++) {
>                 size_t size;
>                 void *value_p;
> @@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree
>         __le64 internal_value_le;
>         struct ro_spine spine;
>
> -       init_ro_spine(&spine, info);
> +       init_ro_spine(&spine, info, true);
>         for (level = 0; level < info->levels - 1u; level++) {
>                 r = btree_lookup_raw(&spine, root, keys[level],
>                                      lower_bound, rkey,
> @@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b
>         int r = 0, count = 0, level;
>         struct ro_spine spine;
>
> -       init_ro_spine(&spine, info);
> +       init_ro_spine(&spine, info, false);
>         for (level = 0; level < info->levels; level++) {
>                 r = find_key(&spine, root, find_highest, result_keys +
> level,
>                              level == info->levels - 1 ? NULL : &root);
>
>
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Joe Thornber Oct. 12, 2022, 8:45 a.m. UTC | #2
fio results (many jobs, large queue depth, 64 core machine, preallocated
thin):

upstream:
    READ: bw=303MiB/s (318MB/s), 15.7MiB/s-24.9MiB/s (16.5MB/s-26.1MB/s),
io=9093MiB (9535MB), run=30004-30007msec
    WRITE: bw=304MiB/s (318MB/s), 15.7MiB/s-24.8MiB/s (16.5MB/s-26.0MB/s),
io=9108MiB (9550MB), run=30004-30007msec

Mikulas' patches:
    READ: bw=524MiB/s (549MB/s), 32.4MiB/s-32.9MiB/s (34.0MB/s-34.5MB/s),
io=15.3GiB (16.5GB), run=30001-30002msec
    WRITE: bw=524MiB/s (550MB/s), 32.5MiB/s-32.9MiB/s (34.1MB/s-34.5MB/s),
io=15.4GiB (16.5GB), run=30001-30002msec

My patches:
    READ: bw=538MiB/s (564MB/s), 33.4MiB/s-33.7MiB/s (35.1MB/s-35.4MB/s),
io=15.8GiB (16.9GB), run=30001-30002msec
    WRITE: bw=539MiB/s (565MB/s), 33.4MiB/s-33.8MiB/s (35.0MB/s-35.4MB/s),
io=15.8GiB (16.9GB), run=30001-30002msec

Both our approaches give similar results.  Mine is slightly faster because
the changes are all internal to dm-bufio, so everything
benefits from the added concurrency not just the btrees (eg, space maps).

As mentioned on irc I don't like the new interface to dm-bufio.  Having
different methods of getting a buffer is ugly, you have to fall back to the
other method if it fails, and remember how you eventually got it for
unlocking.  Plus we need to change any code to take advantage of it.
Currently dm-bufio is used by dm-thin, dm-cache, dm-era, dm-verity,
dm-ebs-target, dm-integrity, and dm-snap-persistent.  We're not going to
audit all these and see which can benefit.

On the other hand I don't like the size of my patch (~1200 line diff).
I'll post it when it's complete and we can continue the discussion then.

- Joe


On Wed, Oct 12, 2022 at 7:31 AM Joe Thornber <thornber@redhat.com> wrote:

> Thanks Mikulas,
>
> I'll test this morning.
>
> - Joe
>
>
> On Tue, Oct 11, 2022 at 8:10 PM Mikulas Patocka <mpatocka@redhat.com>
> wrote:
>
>> Hi
>>
>> Here I'm sending updated patch 4 that fixes hang on discard. We must not
>> do the optimization in dm_btree_lookup_next.
>>
>> Mikulas
>>
>>
>> From: Mikulas Patocka <mpatocka@redhat.com>
>>
>> This patch reduces lock contention in btree walks. We modify the
>> functions init_ro_wpin, exit_ro_spine and ro_step so that they use
>> dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If
>> dm_bm_fast_get_block fails, we fallback to normal locking.
>>
>> When doing tests on pmem and fully provisioned thin volume, it improves
>> thoughput from 286MiB/s to 442MiB/s.
>>
>> fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1
>> --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin
>> before:
>> READ: bw=286MiB/s
>> WRITE: bw=286MiB/s
>> after:
>> READ: bw=442MiB/s
>> WRITE: bw=442MiB/s
>>
>> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
>>
>> ---
>>  drivers/md/persistent-data/dm-block-manager.c       |   32
>> +++++++++++++++
>>  drivers/md/persistent-data/dm-block-manager.h       |    6 ++
>>  drivers/md/persistent-data/dm-btree-internal.h      |    4 +
>>  drivers/md/persistent-data/dm-btree-spine.c         |   41
>> ++++++++++++++++++--
>>  drivers/md/persistent-data/dm-btree.c               |    6 +-
>>  drivers/md/persistent-data/dm-transaction-manager.c |   17 ++++++++
>>  drivers/md/persistent-data/dm-transaction-manager.h |    6 ++
>>  7 files changed, 104 insertions(+), 8 deletions(-)
>>
>> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c
>> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c
>> @@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b)
>>  }
>>  EXPORT_SYMBOL_GPL(dm_bm_unlock);
>>
>> +void dm_bm_fast_lock(struct dm_block_manager *bm)
>> +{
>> +       dm_bufio_lock_read(bm->bufio);
>> +}
>> +
>> +void dm_bm_fast_unlock(struct dm_block_manager *bm)
>> +{
>> +       dm_bufio_unlock_read(bm->bufio);
>> +}
>> +
>> +int dm_bm_fast_get_block(struct dm_block_manager *bm,
>> +                        dm_block_t b, struct dm_block_validator *v,
>> +                        struct dm_block **result)
>> +{
>> +       int r;
>> +       struct buffer_aux *aux;
>> +       void *p;
>> +
>> +       p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **)
>> result);
>> +       if (IS_ERR(p))
>> +               return PTR_ERR(p);
>> +       if (unlikely(!p))
>> +               return -EWOULDBLOCK;
>> +
>> +       aux = dm_bufio_get_aux_data(to_buffer(*result));
>> +       r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
>> +       if (unlikely(r))
>> +               return r;
>> +
>> +       return 0;
>> +}
>> +
>>  int dm_bm_flush(struct dm_block_manager *bm)
>>  {
>>         if (dm_bm_is_read_only(bm))
>> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h
>> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h
>> @@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc
>>
>>  void dm_bm_unlock(struct dm_block *b);
>>
>> +void dm_bm_fast_lock(struct dm_block_manager *bm);
>> +void dm_bm_fast_unlock(struct dm_block_manager *bm);
>> +int dm_bm_fast_get_block(struct dm_block_manager *bm,
>> +                        dm_block_t b, struct dm_block_validator *v,
>> +                        struct dm_block **result);
>> +
>>  /*
>>   * It's a common idiom to have a superblock that should be committed
>> last.
>>   *
>> Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h
>> +++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
>> @@ -64,9 +64,11 @@ struct ro_spine {
>>         struct dm_btree_info *info;
>>
>>         struct dm_block *node;
>> +       bool fast_locked;
>> +       bool fast_lock_failed;
>>  };
>>
>> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
>> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool
>> disable_fast_access);
>>  void exit_ro_spine(struct ro_spine *s);
>>  int ro_step(struct ro_spine *s, dm_block_t new_child);
>>  struct btree_node *ro_node(struct ro_spine *s);
>> Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c
>> +++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
>> @@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info *
>>         dm_tm_unlock(info->tm, b);
>>  }
>>
>> +static void bn_fast_lock(struct dm_btree_info *info)
>> +{
>> +       dm_tm_fast_lock(info->tm);
>> +}
>> +
>> +static void bn_fast_unlock(struct dm_btree_info *info)
>> +{
>> +       dm_tm_fast_unlock(info->tm);
>> +}
>> +
>> +static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b,
>> +                     struct dm_block **result)
>> +{
>> +       return dm_tm_fast_get_block(info->tm, b, &btree_node_validator,
>> result);
>> +}
>> +
>>  /*----------------------------------------------------------------*/
>>
>> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
>> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool
>> disable_fast_access)
>>  {
>>         s->info = info;
>>         s->node = NULL;
>> +       s->fast_locked = false;
>> +       s->fast_lock_failed = disable_fast_access;
>>  }
>>
>>  void exit_ro_spine(struct ro_spine *s)
>>  {
>> -       if (s->node)
>> +       if (s->fast_locked)
>> +               bn_fast_unlock(s->info);
>> +       else if (s->node)
>>                 unlock_block(s->info, s->node);
>>  }
>>
>>  int ro_step(struct ro_spine *s, dm_block_t new_child)
>>  {
>>         if (s->node) {
>> -               unlock_block(s->info, s->node);
>> +               if (unlikely(!s->fast_locked))
>> +                       unlock_block(s->info, s->node);
>>                 s->node = NULL;
>>         }
>> -
>> +       if (likely(!s->fast_lock_failed)) {
>> +               int r;
>> +               if (!s->fast_locked) {
>> +                       bn_fast_lock(s->info);
>> +                       s->fast_locked = true;
>> +               }
>> +               r = bn_fast_get_block(s->info, new_child, &s->node);
>> +               if (likely(!r))
>> +                       return 0;
>> +               s->fast_lock_failed = true;
>> +               s->fast_locked = false;
>> +               bn_fast_unlock(s->info);
>> +       }
>>         return bn_read_lock(s->info, new_child, &s->node);
>>  }
>>
>> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c
>> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
>> @@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_
>>  }
>>  EXPORT_SYMBOL_GPL(dm_tm_unlock);
>>
>> +void dm_tm_fast_lock(struct dm_transaction_manager *tm)
>> +{
>> +       dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm);
>> +}
>> +
>> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm)
>> +{
>> +       dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm);
>> +}
>> +
>> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
>> +                        struct dm_block_validator *v,
>> +                        struct dm_block **blk)
>> +{
>> +       return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm,
>> b, v, blk);
>> +}
>> +
>>  void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
>>  {
>>         /*
>> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h
>> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
>> @@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio
>>
>>  void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
>>
>> +void dm_tm_fast_lock(struct dm_transaction_manager *tm);
>> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm);
>> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
>> +                        struct dm_block_validator *v,
>> +                        struct dm_block **blk);
>> +
>>  /*
>>   * Functions for altering the reference count of a block directly.
>>   */
>> Index: linux-2.6/drivers/md/persistent-data/dm-btree.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c
>> +++ linux-2.6/drivers/md/persistent-data/dm-btree.c
>> @@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info
>>         __le64 internal_value_le;
>>         struct ro_spine spine;
>>
>> -       init_ro_spine(&spine, info);
>> +       init_ro_spine(&spine, info, false);
>>         for (level = 0; level < info->levels; level++) {
>>                 size_t size;
>>                 void *value_p;
>> @@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree
>>         __le64 internal_value_le;
>>         struct ro_spine spine;
>>
>> -       init_ro_spine(&spine, info);
>> +       init_ro_spine(&spine, info, true);
>>         for (level = 0; level < info->levels - 1u; level++) {
>>                 r = btree_lookup_raw(&spine, root, keys[level],
>>                                      lower_bound, rkey,
>> @@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b
>>         int r = 0, count = 0, level;
>>         struct ro_spine spine;
>>
>> -       init_ro_spine(&spine, info);
>> +       init_ro_spine(&spine, info, false);
>>         for (level = 0; level < info->levels; level++) {
>>                 r = find_key(&spine, root, find_highest, result_keys +
>> level,
>>                              level == info->levels - 1 ? NULL : &root);
>>
>>
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c
+++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c
@@ -601,6 +601,38 @@  void dm_bm_unlock(struct dm_block *b)
 }
 EXPORT_SYMBOL_GPL(dm_bm_unlock);
 
+void dm_bm_fast_lock(struct dm_block_manager *bm)
+{
+	dm_bufio_lock_read(bm->bufio);
+}
+
+void dm_bm_fast_unlock(struct dm_block_manager *bm)
+{
+	dm_bufio_unlock_read(bm->bufio);
+}
+
+int dm_bm_fast_get_block(struct dm_block_manager *bm,
+			 dm_block_t b, struct dm_block_validator *v,
+			 struct dm_block **result)
+{
+	int r;
+	struct buffer_aux *aux;
+	void *p;
+
+	p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **) result);
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+	if (unlikely(!p))
+		return -EWOULDBLOCK;
+
+	aux = dm_bufio_get_aux_data(to_buffer(*result));
+	r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
+	if (unlikely(r))
+		return r;
+
+	return 0;
+}
+
 int dm_bm_flush(struct dm_block_manager *bm)
 {
 	if (dm_bm_is_read_only(bm))
Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h
+++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h
@@ -96,6 +96,12 @@  int dm_bm_write_lock_zero(struct dm_bloc
 
 void dm_bm_unlock(struct dm_block *b);
 
+void dm_bm_fast_lock(struct dm_block_manager *bm);
+void dm_bm_fast_unlock(struct dm_block_manager *bm);
+int dm_bm_fast_get_block(struct dm_block_manager *bm,
+			 dm_block_t b, struct dm_block_validator *v,
+			 struct dm_block **result);
+
 /*
  * It's a common idiom to have a superblock that should be committed last.
  *
Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h
+++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h
@@ -64,9 +64,11 @@  struct ro_spine {
 	struct dm_btree_info *info;
 
 	struct dm_block *node;
+	bool fast_locked;
+	bool fast_lock_failed;
 };
 
-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access);
 void exit_ro_spine(struct ro_spine *s);
 int ro_step(struct ro_spine *s, dm_block_t new_child);
 struct btree_node *ro_node(struct ro_spine *s);
Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c
+++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c
@@ -118,27 +118,60 @@  void unlock_block(struct dm_btree_info *
 	dm_tm_unlock(info->tm, b);
 }
 
+static void bn_fast_lock(struct dm_btree_info *info)
+{
+	dm_tm_fast_lock(info->tm);
+}
+
+static void bn_fast_unlock(struct dm_btree_info *info)
+{
+	dm_tm_fast_unlock(info->tm);
+}
+
+static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b,
+		      struct dm_block **result)
+{
+	return dm_tm_fast_get_block(info->tm, b, &btree_node_validator, result);
+}
+
 /*----------------------------------------------------------------*/
 
-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
+void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access)
 {
 	s->info = info;
 	s->node = NULL;
+	s->fast_locked = false;
+	s->fast_lock_failed = disable_fast_access;
 }
 
 void exit_ro_spine(struct ro_spine *s)
 {
-	if (s->node)
+	if (s->fast_locked)
+		bn_fast_unlock(s->info);
+	else if (s->node)
 		unlock_block(s->info, s->node);
 }
 
 int ro_step(struct ro_spine *s, dm_block_t new_child)
 {
 	if (s->node) {
-		unlock_block(s->info, s->node);
+		if (unlikely(!s->fast_locked))
+			unlock_block(s->info, s->node);
 		s->node = NULL;
 	}
-
+	if (likely(!s->fast_lock_failed)) {
+		int r;
+		if (!s->fast_locked) {
+			bn_fast_lock(s->info);
+			s->fast_locked = true;
+		}
+		r = bn_fast_get_block(s->info, new_child, &s->node);
+		if (likely(!r))
+			return 0;
+		s->fast_lock_failed = true;
+		s->fast_locked = false;
+		bn_fast_unlock(s->info);
+	}
 	return bn_read_lock(s->info, new_child, &s->node);
 }
 
Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c
+++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c
@@ -348,6 +348,23 @@  void dm_tm_unlock(struct dm_transaction_
 }
 EXPORT_SYMBOL_GPL(dm_tm_unlock);
 
+void dm_tm_fast_lock(struct dm_transaction_manager *tm)
+{
+	dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm);
+}
+
+void dm_tm_fast_unlock(struct dm_transaction_manager *tm)
+{
+	dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm);
+}
+
+int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
+			 struct dm_block_validator *v,
+			 struct dm_block **blk)
+{
+	return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm, b, v, blk);
+}
+
 void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 {
 	/*
Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h
+++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h
@@ -96,6 +96,12 @@  int dm_tm_read_lock(struct dm_transactio
 
 void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
 
+void dm_tm_fast_lock(struct dm_transaction_manager *tm);
+void dm_tm_fast_unlock(struct dm_transaction_manager *tm);
+int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b,
+			 struct dm_block_validator *v,
+			 struct dm_block **blk);
+
 /*
  * Functions for altering the reference count of a block directly.
  */
Index: linux-2.6/drivers/md/persistent-data/dm-btree.c
===================================================================
--- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c
+++ linux-2.6/drivers/md/persistent-data/dm-btree.c
@@ -377,7 +377,7 @@  int dm_btree_lookup(struct dm_btree_info
 	__le64 internal_value_le;
 	struct ro_spine spine;
 
-	init_ro_spine(&spine, info);
+	init_ro_spine(&spine, info, false);
 	for (level = 0; level < info->levels; level++) {
 		size_t size;
 		void *value_p;
@@ -472,7 +472,7 @@  int dm_btree_lookup_next(struct dm_btree
 	__le64 internal_value_le;
 	struct ro_spine spine;
 
-	init_ro_spine(&spine, info);
+	init_ro_spine(&spine, info, true);
 	for (level = 0; level < info->levels - 1u; level++) {
 		r = btree_lookup_raw(&spine, root, keys[level],
 				     lower_bound, rkey,
@@ -1369,7 +1369,7 @@  static int dm_btree_find_key(struct dm_b
 	int r = 0, count = 0, level;
 	struct ro_spine spine;
 
-	init_ro_spine(&spine, info);
+	init_ro_spine(&spine, info, false);
 	for (level = 0; level < info->levels; level++) {
 		r = find_key(&spine, root, find_highest, result_keys + level,
 			     level == info->levels - 1 ? NULL : &root);