===================================================================
@@ -24,6 +24,10 @@ struct dm_kobject_holder {
struct completion completion;
};
+struct dm_percpu {
+ unsigned inflight[2];
+};
+
/*
* DM core internal structure that used directly by dm.c and dm-rq.c
* DM targets must _not_ deference a mapped_device to directly access its members!
@@ -63,6 +67,7 @@ struct mapped_device {
/*
* A list of ios that arrived while we were suspended.
*/
+ struct dm_percpu __percpu *counters;
struct work_struct work;
wait_queue_head_t wait;
spinlock_t deferred_lock;
===================================================================
@@ -597,19 +597,33 @@ static void free_tio(struct dm_target_io
int md_in_flight(struct mapped_device *md)
{
- return atomic_read(&dm_disk(md)->part0.in_flight[READ]) +
- atomic_read(&dm_disk(md)->part0.in_flight[WRITE]);
+ int cpu;
+ unsigned sum = 0;
+ for_each_possible_cpu(cpu) {
+ struct dm_percpu *p = per_cpu_ptr(md->counters, cpu);
+ sum += p->inflight[READ] + p->inflight[WRITE];
+ }
+ return (int)sum;
}
static void start_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
+ struct hd_struct *part;
+ int sgrp, cpu;
io->start_time = jiffies;
- generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
- &dm_disk(md)->part0);
+ part = &dm_disk(md)->part0;
+ sgrp = op_stat_group(bio_op(bio));
+ cpu = part_stat_lock();
+ part_round_stats(md->queue, cpu, part);
+ part_stat_inc(cpu, part, ios[sgrp]);
+ part_stat_add(cpu, part, sectors[sgrp], bio_sectors(bio));
+ part_stat_unlock();
+
+ this_cpu_inc(md->counters->inflight[bio_data_dir(bio)]);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
@@ -622,25 +636,25 @@ static void end_io_acct(struct dm_io *io
struct mapped_device *md = io->md;
struct bio *bio = io->orig_bio;
unsigned long duration = jiffies - io->start_time;
+ struct hd_struct *part;
+ int sgrp, cpu;
- /*
- * make sure that atomic_dec in generic_end_io_acct is not reordered
- * with previous writes
- */
- smp_mb__before_atomic();
- generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
- io->start_time);
- /*
- * generic_end_io_acct does atomic_dec, this barrier makes sure that
- * atomic_dec is not reordered with waitqueue_active
- */
- smp_mb__after_atomic();
+ part = &dm_disk(md)->part0;
+ sgrp = op_stat_group(bio_op(bio));
+ cpu = part_stat_lock();
+ part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_round_stats(md->queue, cpu, part);
+ part_stat_unlock();
+
+ smp_wmb();
+ this_cpu_dec(md->counters->inflight[bio_data_dir(bio)]);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
true, duration, &io->stats_aux);
+ smp_mb();
/* nudge anyone waiting on suspend queue */
if (unlikely(waitqueue_active(&md->wait))) {
if (!md_in_flight(md))
@@ -1828,6 +1842,8 @@ static void cleanup_mapped_device(struct
if (md->queue)
blk_cleanup_queue(md->queue);
+ free_percpu(md->counters);
+
cleanup_srcu_struct(&md->io_barrier);
if (md->bdev) {
@@ -1899,6 +1915,10 @@ static struct mapped_device *alloc_dev(i
if (!md->disk)
goto bad;
+ md->counters = alloc_percpu(struct dm_percpu);
+ if (!md->counters)
+ goto bad;
+
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
===================================================================
@@ -172,6 +172,12 @@ static void rq_end_stats(struct mapped_d
}
}
+static unsigned rq_md_in_flight(struct mapped_device *md)
+{
+ return atomic_read(&dm_disk(md)->part0.in_flight[READ]) +
+ atomic_read(&dm_disk(md)->part0.in_flight[WRITE]);
+}
+
/*
* Don't touch any member of the md after calling this function because
* the md may be freed in dm_put() at the end of this function.
@@ -185,7 +191,7 @@ static void rq_completed(struct mapped_d
atomic_dec(&dm_disk(md)->part0.in_flight[rw]);
/* nudge anyone waiting on suspend queue */
- if (!md_in_flight(md))
+ if (!rq_md_in_flight(md))
wake_up(&md->wait);
/*
@@ -674,7 +680,7 @@ static void dm_old_request_fn(struct req
pos = blk_rq_pos(rq);
if ((dm_old_request_peeked_before_merge_deadline(md) &&
- md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
+ rq_md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
(ti->type->busy && ti->type->busy(ti))) {
blk_delay_queue(q, 10);
Use percpu inflight counters to avoid cache line bouncing and improve performance. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- drivers/md/dm-core.h | 5 ++++ drivers/md/dm-rq.c | 10 +++++++-- drivers/md/dm.c | 52 +++++++++++++++++++++++++++++++++++---------------- 3 files changed, 49 insertions(+), 18 deletions(-) -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel