Message ID | alpine.LRH.2.02.2210111505160.18298@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | None | expand |
Thanks Mikulas, I'll test this morning. - Joe On Tue, Oct 11, 2022 at 8:10 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > Hi > > Here I'm sending updated patch 4 that fixes hang on discard. We must not > do the optimization in dm_btree_lookup_next. > > Mikulas > > > From: Mikulas Patocka <mpatocka@redhat.com> > > This patch reduces lock contention in btree walks. We modify the > functions init_ro_wpin, exit_ro_spine and ro_step so that they use > dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If > dm_bm_fast_get_block fails, we fallback to normal locking. > > When doing tests on pmem and fully provisioned thin volume, it improves > thoughput from 286MiB/s to 442MiB/s. > > fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1 > --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin > before: > READ: bw=286MiB/s > WRITE: bw=286MiB/s > after: > READ: bw=442MiB/s > WRITE: bw=442MiB/s > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> > > --- > drivers/md/persistent-data/dm-block-manager.c | 32 +++++++++++++++ > drivers/md/persistent-data/dm-block-manager.h | 6 ++ > drivers/md/persistent-data/dm-btree-internal.h | 4 + > drivers/md/persistent-data/dm-btree-spine.c | 41 > ++++++++++++++++++-- > drivers/md/persistent-data/dm-btree.c | 6 +- > drivers/md/persistent-data/dm-transaction-manager.c | 17 ++++++++ > drivers/md/persistent-data/dm-transaction-manager.h | 6 ++ > 7 files changed, 104 insertions(+), 8 deletions(-) > > Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c > +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c > @@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b) > } > EXPORT_SYMBOL_GPL(dm_bm_unlock); > > +void dm_bm_fast_lock(struct dm_block_manager *bm) > +{ > + dm_bufio_lock_read(bm->bufio); > +} > + > +void dm_bm_fast_unlock(struct dm_block_manager *bm) > +{ > + dm_bufio_unlock_read(bm->bufio); > +} > + > +int dm_bm_fast_get_block(struct dm_block_manager *bm, > + dm_block_t b, struct dm_block_validator *v, > + struct dm_block **result) > +{ > + int r; > + struct buffer_aux *aux; > + void *p; > + > + p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **) > result); > + if (IS_ERR(p)) > + return PTR_ERR(p); > + if (unlikely(!p)) > + return -EWOULDBLOCK; > + > + aux = dm_bufio_get_aux_data(to_buffer(*result)); > + r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); > + if (unlikely(r)) > + return r; > + > + return 0; > +} > + > int dm_bm_flush(struct dm_block_manager *bm) > { > if (dm_bm_is_read_only(bm)) > Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h > +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h > @@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc > > void dm_bm_unlock(struct dm_block *b); > > +void dm_bm_fast_lock(struct dm_block_manager *bm); > +void dm_bm_fast_unlock(struct dm_block_manager *bm); > +int dm_bm_fast_get_block(struct dm_block_manager *bm, > + dm_block_t b, struct dm_block_validator *v, > + struct dm_block **result); > + > /* > * It's a common idiom to have a superblock that should be committed last. > * > Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h > +++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h > @@ -64,9 +64,11 @@ struct ro_spine { > struct dm_btree_info *info; > > struct dm_block *node; > + bool fast_locked; > + bool fast_lock_failed; > }; > > -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); > +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool > disable_fast_access); > void exit_ro_spine(struct ro_spine *s); > int ro_step(struct ro_spine *s, dm_block_t new_child); > struct btree_node *ro_node(struct ro_spine *s); > Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c > +++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c > @@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info * > dm_tm_unlock(info->tm, b); > } > > +static void bn_fast_lock(struct dm_btree_info *info) > +{ > + dm_tm_fast_lock(info->tm); > +} > + > +static void bn_fast_unlock(struct dm_btree_info *info) > +{ > + dm_tm_fast_unlock(info->tm); > +} > + > +static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b, > + struct dm_block **result) > +{ > + return dm_tm_fast_get_block(info->tm, b, &btree_node_validator, > result); > +} > + > /*----------------------------------------------------------------*/ > > -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info) > +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool > disable_fast_access) > { > s->info = info; > s->node = NULL; > + s->fast_locked = false; > + s->fast_lock_failed = disable_fast_access; > } > > void exit_ro_spine(struct ro_spine *s) > { > - if (s->node) > + if (s->fast_locked) > + bn_fast_unlock(s->info); > + else if (s->node) > unlock_block(s->info, s->node); > } > > int ro_step(struct ro_spine *s, dm_block_t new_child) > { > if (s->node) { > - unlock_block(s->info, s->node); > + if (unlikely(!s->fast_locked)) > + unlock_block(s->info, s->node); > s->node = NULL; > } > - > + if (likely(!s->fast_lock_failed)) { > + int r; > + if (!s->fast_locked) { > + bn_fast_lock(s->info); > + s->fast_locked = true; > + } > + r = bn_fast_get_block(s->info, new_child, &s->node); > + if (likely(!r)) > + return 0; > + s->fast_lock_failed = true; > + s->fast_locked = false; > + bn_fast_unlock(s->info); > + } > return bn_read_lock(s->info, new_child, &s->node); > } > > Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c > +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c > @@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_ > } > EXPORT_SYMBOL_GPL(dm_tm_unlock); > > +void dm_tm_fast_lock(struct dm_transaction_manager *tm) > +{ > + dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm); > +} > + > +void dm_tm_fast_unlock(struct dm_transaction_manager *tm) > +{ > + dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm); > +} > + > +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, > + struct dm_block_validator *v, > + struct dm_block **blk) > +{ > + return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm, > b, v, blk); > +} > + > void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b) > { > /* > Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h > +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h > @@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio > > void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b); > > +void dm_tm_fast_lock(struct dm_transaction_manager *tm); > +void dm_tm_fast_unlock(struct dm_transaction_manager *tm); > +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, > + struct dm_block_validator *v, > + struct dm_block **blk); > + > /* > * Functions for altering the reference count of a block directly. > */ > Index: linux-2.6/drivers/md/persistent-data/dm-btree.c > =================================================================== > --- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c > +++ linux-2.6/drivers/md/persistent-data/dm-btree.c > @@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info > __le64 internal_value_le; > struct ro_spine spine; > > - init_ro_spine(&spine, info); > + init_ro_spine(&spine, info, false); > for (level = 0; level < info->levels; level++) { > size_t size; > void *value_p; > @@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree > __le64 internal_value_le; > struct ro_spine spine; > > - init_ro_spine(&spine, info); > + init_ro_spine(&spine, info, true); > for (level = 0; level < info->levels - 1u; level++) { > r = btree_lookup_raw(&spine, root, keys[level], > lower_bound, rkey, > @@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b > int r = 0, count = 0, level; > struct ro_spine spine; > > - init_ro_spine(&spine, info); > + init_ro_spine(&spine, info, false); > for (level = 0; level < info->levels; level++) { > r = find_key(&spine, root, find_highest, result_keys + > level, > level == info->levels - 1 ? NULL : &root); > > -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
fio results (many jobs, large queue depth, 64 core machine, preallocated thin): upstream: READ: bw=303MiB/s (318MB/s), 15.7MiB/s-24.9MiB/s (16.5MB/s-26.1MB/s), io=9093MiB (9535MB), run=30004-30007msec WRITE: bw=304MiB/s (318MB/s), 15.7MiB/s-24.8MiB/s (16.5MB/s-26.0MB/s), io=9108MiB (9550MB), run=30004-30007msec Mikulas' patches: READ: bw=524MiB/s (549MB/s), 32.4MiB/s-32.9MiB/s (34.0MB/s-34.5MB/s), io=15.3GiB (16.5GB), run=30001-30002msec WRITE: bw=524MiB/s (550MB/s), 32.5MiB/s-32.9MiB/s (34.1MB/s-34.5MB/s), io=15.4GiB (16.5GB), run=30001-30002msec My patches: READ: bw=538MiB/s (564MB/s), 33.4MiB/s-33.7MiB/s (35.1MB/s-35.4MB/s), io=15.8GiB (16.9GB), run=30001-30002msec WRITE: bw=539MiB/s (565MB/s), 33.4MiB/s-33.8MiB/s (35.0MB/s-35.4MB/s), io=15.8GiB (16.9GB), run=30001-30002msec Both our approaches give similar results. Mine is slightly faster because the changes are all internal to dm-bufio, so everything benefits from the added concurrency not just the btrees (eg, space maps). As mentioned on irc I don't like the new interface to dm-bufio. Having different methods of getting a buffer is ugly, you have to fall back to the other method if it fails, and remember how you eventually got it for unlocking. Plus we need to change any code to take advantage of it. Currently dm-bufio is used by dm-thin, dm-cache, dm-era, dm-verity, dm-ebs-target, dm-integrity, and dm-snap-persistent. We're not going to audit all these and see which can benefit. On the other hand I don't like the size of my patch (~1200 line diff). I'll post it when it's complete and we can continue the discussion then. - Joe On Wed, Oct 12, 2022 at 7:31 AM Joe Thornber <thornber@redhat.com> wrote: > Thanks Mikulas, > > I'll test this morning. > > - Joe > > > On Tue, Oct 11, 2022 at 8:10 PM Mikulas Patocka <mpatocka@redhat.com> > wrote: > >> Hi >> >> Here I'm sending updated patch 4 that fixes hang on discard. We must not >> do the optimization in dm_btree_lookup_next. >> >> Mikulas >> >> >> From: Mikulas Patocka <mpatocka@redhat.com> >> >> This patch reduces lock contention in btree walks. We modify the >> functions init_ro_wpin, exit_ro_spine and ro_step so that they use >> dm_bufio_lock_read/dm_bufio_get_unlocked/dm_bufio_unlock_read. If >> dm_bm_fast_get_block fails, we fallback to normal locking. >> >> When doing tests on pmem and fully provisioned thin volume, it improves >> thoughput from 286MiB/s to 442MiB/s. >> >> fio --ioengine=psync --iodepth=1 --rw=randrw --bs=4k --direct=1 >> --numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/vg/thin >> before: >> READ: bw=286MiB/s >> WRITE: bw=286MiB/s >> after: >> READ: bw=442MiB/s >> WRITE: bw=442MiB/s >> >> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> >> >> --- >> drivers/md/persistent-data/dm-block-manager.c | 32 >> +++++++++++++++ >> drivers/md/persistent-data/dm-block-manager.h | 6 ++ >> drivers/md/persistent-data/dm-btree-internal.h | 4 + >> drivers/md/persistent-data/dm-btree-spine.c | 41 >> ++++++++++++++++++-- >> drivers/md/persistent-data/dm-btree.c | 6 +- >> drivers/md/persistent-data/dm-transaction-manager.c | 17 ++++++++ >> drivers/md/persistent-data/dm-transaction-manager.h | 6 ++ >> 7 files changed, 104 insertions(+), 8 deletions(-) >> >> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c >> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c >> @@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b) >> } >> EXPORT_SYMBOL_GPL(dm_bm_unlock); >> >> +void dm_bm_fast_lock(struct dm_block_manager *bm) >> +{ >> + dm_bufio_lock_read(bm->bufio); >> +} >> + >> +void dm_bm_fast_unlock(struct dm_block_manager *bm) >> +{ >> + dm_bufio_unlock_read(bm->bufio); >> +} >> + >> +int dm_bm_fast_get_block(struct dm_block_manager *bm, >> + dm_block_t b, struct dm_block_validator *v, >> + struct dm_block **result) >> +{ >> + int r; >> + struct buffer_aux *aux; >> + void *p; >> + >> + p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **) >> result); >> + if (IS_ERR(p)) >> + return PTR_ERR(p); >> + if (unlikely(!p)) >> + return -EWOULDBLOCK; >> + >> + aux = dm_bufio_get_aux_data(to_buffer(*result)); >> + r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); >> + if (unlikely(r)) >> + return r; >> + >> + return 0; >> +} >> + >> int dm_bm_flush(struct dm_block_manager *bm) >> { >> if (dm_bm_is_read_only(bm)) >> Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h >> +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h >> @@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc >> >> void dm_bm_unlock(struct dm_block *b); >> >> +void dm_bm_fast_lock(struct dm_block_manager *bm); >> +void dm_bm_fast_unlock(struct dm_block_manager *bm); >> +int dm_bm_fast_get_block(struct dm_block_manager *bm, >> + dm_block_t b, struct dm_block_validator *v, >> + struct dm_block **result); >> + >> /* >> * It's a common idiom to have a superblock that should be committed >> last. >> * >> Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h >> +++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h >> @@ -64,9 +64,11 @@ struct ro_spine { >> struct dm_btree_info *info; >> >> struct dm_block *node; >> + bool fast_locked; >> + bool fast_lock_failed; >> }; >> >> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); >> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool >> disable_fast_access); >> void exit_ro_spine(struct ro_spine *s); >> int ro_step(struct ro_spine *s, dm_block_t new_child); >> struct btree_node *ro_node(struct ro_spine *s); >> Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c >> +++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c >> @@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info * >> dm_tm_unlock(info->tm, b); >> } >> >> +static void bn_fast_lock(struct dm_btree_info *info) >> +{ >> + dm_tm_fast_lock(info->tm); >> +} >> + >> +static void bn_fast_unlock(struct dm_btree_info *info) >> +{ >> + dm_tm_fast_unlock(info->tm); >> +} >> + >> +static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b, >> + struct dm_block **result) >> +{ >> + return dm_tm_fast_get_block(info->tm, b, &btree_node_validator, >> result); >> +} >> + >> /*----------------------------------------------------------------*/ >> >> -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info) >> +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool >> disable_fast_access) >> { >> s->info = info; >> s->node = NULL; >> + s->fast_locked = false; >> + s->fast_lock_failed = disable_fast_access; >> } >> >> void exit_ro_spine(struct ro_spine *s) >> { >> - if (s->node) >> + if (s->fast_locked) >> + bn_fast_unlock(s->info); >> + else if (s->node) >> unlock_block(s->info, s->node); >> } >> >> int ro_step(struct ro_spine *s, dm_block_t new_child) >> { >> if (s->node) { >> - unlock_block(s->info, s->node); >> + if (unlikely(!s->fast_locked)) >> + unlock_block(s->info, s->node); >> s->node = NULL; >> } >> - >> + if (likely(!s->fast_lock_failed)) { >> + int r; >> + if (!s->fast_locked) { >> + bn_fast_lock(s->info); >> + s->fast_locked = true; >> + } >> + r = bn_fast_get_block(s->info, new_child, &s->node); >> + if (likely(!r)) >> + return 0; >> + s->fast_lock_failed = true; >> + s->fast_locked = false; >> + bn_fast_unlock(s->info); >> + } >> return bn_read_lock(s->info, new_child, &s->node); >> } >> >> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c >> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c >> @@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_ >> } >> EXPORT_SYMBOL_GPL(dm_tm_unlock); >> >> +void dm_tm_fast_lock(struct dm_transaction_manager *tm) >> +{ >> + dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm); >> +} >> + >> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm) >> +{ >> + dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm); >> +} >> + >> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, >> + struct dm_block_validator *v, >> + struct dm_block **blk) >> +{ >> + return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm, >> b, v, blk); >> +} >> + >> void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b) >> { >> /* >> Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h >> +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h >> @@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio >> >> void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b); >> >> +void dm_tm_fast_lock(struct dm_transaction_manager *tm); >> +void dm_tm_fast_unlock(struct dm_transaction_manager *tm); >> +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, >> + struct dm_block_validator *v, >> + struct dm_block **blk); >> + >> /* >> * Functions for altering the reference count of a block directly. >> */ >> Index: linux-2.6/drivers/md/persistent-data/dm-btree.c >> =================================================================== >> --- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c >> +++ linux-2.6/drivers/md/persistent-data/dm-btree.c >> @@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info >> __le64 internal_value_le; >> struct ro_spine spine; >> >> - init_ro_spine(&spine, info); >> + init_ro_spine(&spine, info, false); >> for (level = 0; level < info->levels; level++) { >> size_t size; >> void *value_p; >> @@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree >> __le64 internal_value_le; >> struct ro_spine spine; >> >> - init_ro_spine(&spine, info); >> + init_ro_spine(&spine, info, true); >> for (level = 0; level < info->levels - 1u; level++) { >> r = btree_lookup_raw(&spine, root, keys[level], >> lower_bound, rkey, >> @@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b >> int r = 0, count = 0, level; >> struct ro_spine spine; >> >> - init_ro_spine(&spine, info); >> + init_ro_spine(&spine, info, false); >> for (level = 0; level < info->levels; level++) { >> r = find_key(&spine, root, find_highest, result_keys + >> level, >> level == info->levels - 1 ? NULL : &root); >> >> -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.c =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.c +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.c @@ -601,6 +601,38 @@ void dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); +void dm_bm_fast_lock(struct dm_block_manager *bm) +{ + dm_bufio_lock_read(bm->bufio); +} + +void dm_bm_fast_unlock(struct dm_block_manager *bm) +{ + dm_bufio_unlock_read(bm->bufio); +} + +int dm_bm_fast_get_block(struct dm_block_manager *bm, + dm_block_t b, struct dm_block_validator *v, + struct dm_block **result) +{ + int r; + struct buffer_aux *aux; + void *p; + + p = dm_bufio_get_unlocked(bm->bufio, b, (struct dm_buffer **) result); + if (IS_ERR(p)) + return PTR_ERR(p); + if (unlikely(!p)) + return -EWOULDBLOCK; + + aux = dm_bufio_get_aux_data(to_buffer(*result)); + r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); + if (unlikely(r)) + return r; + + return 0; +} + int dm_bm_flush(struct dm_block_manager *bm) { if (dm_bm_is_read_only(bm)) Index: linux-2.6/drivers/md/persistent-data/dm-block-manager.h =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-block-manager.h +++ linux-2.6/drivers/md/persistent-data/dm-block-manager.h @@ -96,6 +96,12 @@ int dm_bm_write_lock_zero(struct dm_bloc void dm_bm_unlock(struct dm_block *b); +void dm_bm_fast_lock(struct dm_block_manager *bm); +void dm_bm_fast_unlock(struct dm_block_manager *bm); +int dm_bm_fast_get_block(struct dm_block_manager *bm, + dm_block_t b, struct dm_block_validator *v, + struct dm_block **result); + /* * It's a common idiom to have a superblock that should be committed last. * Index: linux-2.6/drivers/md/persistent-data/dm-btree-internal.h =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-internal.h +++ linux-2.6/drivers/md/persistent-data/dm-btree-internal.h @@ -64,9 +64,11 @@ struct ro_spine { struct dm_btree_info *info; struct dm_block *node; + bool fast_locked; + bool fast_lock_failed; }; -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access); void exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); struct btree_node *ro_node(struct ro_spine *s); Index: linux-2.6/drivers/md/persistent-data/dm-btree-spine.c =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-btree-spine.c +++ linux-2.6/drivers/md/persistent-data/dm-btree-spine.c @@ -118,27 +118,60 @@ void unlock_block(struct dm_btree_info * dm_tm_unlock(info->tm, b); } +static void bn_fast_lock(struct dm_btree_info *info) +{ + dm_tm_fast_lock(info->tm); +} + +static void bn_fast_unlock(struct dm_btree_info *info) +{ + dm_tm_fast_unlock(info->tm); +} + +static int bn_fast_get_block(struct dm_btree_info *info, dm_block_t b, + struct dm_block **result) +{ + return dm_tm_fast_get_block(info->tm, b, &btree_node_validator, result); +} + /*----------------------------------------------------------------*/ -void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info) +void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info, bool disable_fast_access) { s->info = info; s->node = NULL; + s->fast_locked = false; + s->fast_lock_failed = disable_fast_access; } void exit_ro_spine(struct ro_spine *s) { - if (s->node) + if (s->fast_locked) + bn_fast_unlock(s->info); + else if (s->node) unlock_block(s->info, s->node); } int ro_step(struct ro_spine *s, dm_block_t new_child) { if (s->node) { - unlock_block(s->info, s->node); + if (unlikely(!s->fast_locked)) + unlock_block(s->info, s->node); s->node = NULL; } - + if (likely(!s->fast_lock_failed)) { + int r; + if (!s->fast_locked) { + bn_fast_lock(s->info); + s->fast_locked = true; + } + r = bn_fast_get_block(s->info, new_child, &s->node); + if (likely(!r)) + return 0; + s->fast_lock_failed = true; + s->fast_locked = false; + bn_fast_unlock(s->info); + } return bn_read_lock(s->info, new_child, &s->node); } Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.c +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.c @@ -348,6 +348,23 @@ void dm_tm_unlock(struct dm_transaction_ } EXPORT_SYMBOL_GPL(dm_tm_unlock); +void dm_tm_fast_lock(struct dm_transaction_manager *tm) +{ + dm_bm_fast_lock(tm->is_clone ? tm->real->bm : tm->bm); +} + +void dm_tm_fast_unlock(struct dm_transaction_manager *tm) +{ + dm_bm_fast_unlock(tm->is_clone ? tm->real->bm : tm->bm); +} + +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, + struct dm_block_validator *v, + struct dm_block **blk) +{ + return dm_bm_fast_get_block(tm->is_clone ? tm->real->bm : tm->bm, b, v, blk); +} + void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b) { /* Index: linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-transaction-manager.h +++ linux-2.6/drivers/md/persistent-data/dm-transaction-manager.h @@ -96,6 +96,12 @@ int dm_tm_read_lock(struct dm_transactio void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b); +void dm_tm_fast_lock(struct dm_transaction_manager *tm); +void dm_tm_fast_unlock(struct dm_transaction_manager *tm); +int dm_tm_fast_get_block(struct dm_transaction_manager *tm, dm_block_t b, + struct dm_block_validator *v, + struct dm_block **blk); + /* * Functions for altering the reference count of a block directly. */ Index: linux-2.6/drivers/md/persistent-data/dm-btree.c =================================================================== --- linux-2.6.orig/drivers/md/persistent-data/dm-btree.c +++ linux-2.6/drivers/md/persistent-data/dm-btree.c @@ -377,7 +377,7 @@ int dm_btree_lookup(struct dm_btree_info __le64 internal_value_le; struct ro_spine spine; - init_ro_spine(&spine, info); + init_ro_spine(&spine, info, false); for (level = 0; level < info->levels; level++) { size_t size; void *value_p; @@ -472,7 +472,7 @@ int dm_btree_lookup_next(struct dm_btree __le64 internal_value_le; struct ro_spine spine; - init_ro_spine(&spine, info); + init_ro_spine(&spine, info, true); for (level = 0; level < info->levels - 1u; level++) { r = btree_lookup_raw(&spine, root, keys[level], lower_bound, rkey, @@ -1369,7 +1369,7 @@ static int dm_btree_find_key(struct dm_b int r = 0, count = 0, level; struct ro_spine spine; - init_ro_spine(&spine, info); + init_ro_spine(&spine, info, false); for (level = 0; level < info->levels; level++) { r = find_key(&spine, root, find_highest, result_keys + level, level == info->levels - 1 ? NULL : &root);