From patchwork Mon Oct 5 19:00:28 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mike Snitzer X-Patchwork-Id: 51770 Received: from hormel.redhat.com (hormel1.redhat.com [209.132.177.33]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n95J0kp4015830 for ; Mon, 5 Oct 2009 19:00:46 GMT Received: from listman.util.phx.redhat.com (listman.util.phx.redhat.com [10.8.4.110]) by hormel.redhat.com (Postfix) with ESMTP id 8B41161AE2B; Mon, 5 Oct 2009 15:00:46 -0400 (EDT) Received: from int-mx02.intmail.prod.int.phx2.redhat.com (nat-pool.util.phx.redhat.com [10.8.5.200]) by listman.util.phx.redhat.com (8.13.1/8.13.1) with ESMTP id n95J0hnD009705 for ; Mon, 5 Oct 2009 15:00:43 -0400 Received: from localhost (dhcp-100-18-171.bos.redhat.com [10.16.18.171]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id n95J0fju011352; Mon, 5 Oct 2009 15:00:42 -0400 From: Mike Snitzer To: dm-devel@redhat.com Date: Mon, 5 Oct 2009 15:00:28 -0400 Message-Id: <1254769231-12053-2-git-send-email-snitzer@redhat.com> In-Reply-To: <1254769231-12053-1-git-send-email-snitzer@redhat.com> References: <1254769231-12053-1-git-send-email-snitzer@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 X-loop: dm-devel@redhat.com Cc: Mikulas Patocka Subject: [dm-devel] [PATCH 1/4] dm-snapshot-rework-origin-write X-BeenThere: dm-devel@redhat.com X-Mailman-Version: 2.1.5 Precedence: junk Reply-To: device-mapper development List-Id: device-mapper development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dm-devel-bounces@redhat.com Errors-To: dm-devel-bounces@redhat.com diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 668457f..b23decb 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -125,28 +125,6 @@ struct dm_snap_pending_exception { struct bio_list origin_bios; struct bio_list snapshot_bios; - /* - * Short-term queue of pending exceptions prior to submission. - */ - struct list_head list; - - /* - * The primary pending_exception is the one that holds - * the ref_count and the list of origin_bios for a - * group of pending_exceptions. It is always last to get freed. - * These fields get set up when writing to the origin. - */ - struct dm_snap_pending_exception *primary_pe; - - /* - * Number of pending_exceptions processing this chunk. - * When this drops to zero we must complete the origin bios. - * If incrementing or decrementing this, hold pe->snap->lock for - * the sibling concerned and not pe->primary_pe->snap->lock unless - * they are the same. - */ - atomic_t ref_count; - /* Pointer back to snapshot context */ struct dm_snapshot *snap; @@ -809,6 +787,28 @@ static void flush_queued_bios(struct work_struct *work) flush_bios(queued_bios); } +static int do_origin(struct dm_dev *origin, struct bio *bio); + +/* + * Flush a list of buffers. + */ +static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) +{ + struct bio *n; + int r; + + while (bio) { + n = bio->bi_next; + bio->bi_next = NULL; + r = do_origin(s->origin, bio); + if (r == DM_MAPIO_REMAPPED) + generic_make_request(bio); + else + BUG_ON(r != DM_MAPIO_SUBMITTED); + bio = n; + } +} + /* * Error a list of buffers. */ @@ -842,39 +842,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->store->ti->table); } -static void get_pending_exception(struct dm_snap_pending_exception *pe) -{ - atomic_inc(&pe->ref_count); -} - -static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) -{ - struct dm_snap_pending_exception *primary_pe; - struct bio *origin_bios = NULL; - - primary_pe = pe->primary_pe; - - /* - * If this pe is involved in a write to the origin and - * it is the last sibling to complete then release - * the bios for the original write to the origin. - */ - if (primary_pe && - atomic_dec_and_test(&primary_pe->ref_count)) { - origin_bios = bio_list_get(&primary_pe->origin_bios); - free_pending_exception(primary_pe); - } - - /* - * Free the pe if it's not linked to an origin write or if - * it's not itself a primary pe. - */ - if (!primary_pe || primary_pe != pe) - free_pending_exception(pe); - - return origin_bios; -} - static void pending_complete(struct dm_snap_pending_exception *pe, int success) { struct dm_snap_exception *e; @@ -923,7 +890,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) out: remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); - origin_bios = put_pending_exception(pe); + origin_bios = bio_list_get(&pe->origin_bios); + free_pending_exception(pe); up_write(&s->lock); @@ -933,7 +901,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) else flush_bios(snapshot_bios); - flush_bios(origin_bios); + retry_origin_bios(s, origin_bios); } static void commit_callback(void *context, int success) @@ -1020,8 +988,6 @@ __find_pending_exception(struct dm_snapshot *s, pe->e.old_chunk = chunk; bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); - pe->primary_pe = NULL; - atomic_set(&pe->ref_count, 0); pe->started = 0; if (s->store->type->prepare_exception(s->store, &pe->e)) { @@ -1029,7 +995,6 @@ __find_pending_exception(struct dm_snapshot *s, return NULL; } - get_pending_exception(pe); insert_exception(&s->pending, &pe->e); return pe; @@ -1212,14 +1177,21 @@ static int snapshot_iterate_devices(struct dm_target *ti, /*----------------------------------------------------------------- * Origin methods *---------------------------------------------------------------*/ -static int __origin_write(struct list_head *snapshots, struct bio *bio) + +/* + * Returns: + * DM_MAPIO_REMAPPED: bio may be submitted to origin device + * DM_MAPIO_SUBMITTED: bio was queued on queue on one of exceptions + */ + +static int __origin_write(struct list_head *snapshots, + sector_t sector, struct bio *bio) { - int r = DM_MAPIO_REMAPPED, first = 0; + int r = DM_MAPIO_REMAPPED; struct dm_snapshot *snap; struct dm_snap_exception *e; - struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; + struct dm_snap_pending_exception *pe, *pe_to_start = NULL; chunk_t chunk; - LIST_HEAD(pe_queue); /* Do all the snapshots on this origin */ list_for_each_entry (snap, snapshots, list) { @@ -1231,22 +1203,19 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) goto next_snapshot; /* Nothing to do if writing beyond end of snapshot */ - if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table)) + if (sector >= dm_table_get_size(snap->store->ti->table)) goto next_snapshot; /* * Remember, different snapshots can have * different chunk sizes. */ - chunk = sector_to_chunk(snap->store, bio->bi_sector); + chunk = sector_to_chunk(snap->store, sector); /* * Check exception table to see if block * is already remapped in this snapshot * and trigger an exception if not. - * - * ref_count is initialised to 1 so pending_complete() - * won't destroy the primary_pe while we're inside this loop. */ e = lookup_exception(&snap->complete, chunk); if (e) @@ -1276,59 +1245,39 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) } } - if (!primary_pe) { - /* - * Either every pe here has same - * primary_pe or none has one yet. - */ - if (pe->primary_pe) - primary_pe = pe->primary_pe; - else { - primary_pe = pe; - first = 1; - } - - bio_list_add(&primary_pe->origin_bios, bio); + r = DM_MAPIO_SUBMITTED; - r = DM_MAPIO_SUBMITTED; - } + if (bio) { + bio_list_add(&pe->origin_bios, bio); + bio = NULL; - if (!pe->primary_pe) { - pe->primary_pe = primary_pe; - get_pending_exception(primary_pe); + if (!pe->started) { + pe->started = 1; + pe_to_start = pe; + } } if (!pe->started) { pe->started = 1; - list_add_tail(&pe->list, &pe_queue); + start_copy(pe); } next_snapshot: up_write(&snap->lock); } - if (!primary_pe) - return r; - /* - * If this is the first time we're processing this chunk and - * ref_count is now 1 it means all the pending exceptions - * got completed while we were in the loop above, so it falls to - * us here to remove the primary_pe and submit any origin_bios. + * pe_to_start is a small performance improvement: + * To avoid calling __origin_write N times for N snapshots, we start + * the snapshot where we queued the bio as the last one. + * + * If we start it as the last one, it finishes most likely as the last + * one and exceptions in other snapshots will be already finished when + * the bio will be retried. */ - if (first && atomic_dec_and_test(&primary_pe->ref_count)) { - flush_bios(bio_list_get(&primary_pe->origin_bios)); - free_pending_exception(primary_pe); - /* If we got here, pe_queue is necessarily empty. */ - return r; - } - - /* - * Now that we have a complete pe list we can start the copying. - */ - list_for_each_entry_safe(pe, next_pe, &pe_queue, list) - start_copy(pe); + if (pe_to_start) + start_copy(pe_to_start); return r; } @@ -1344,7 +1293,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio) down_read(&_origins_lock); o = __lookup_origin(origin->bdev); if (o) - r = __origin_write(&o->snapshots, bio); + r = __origin_write(&o->snapshots, bio->bi_sector, bio); up_read(&_origins_lock); return r;