From patchwork Tue Mar 17 14:06:51 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jonthan Brassow X-Patchwork-Id: 12629 Received: from hormel.redhat.com (hormel1.redhat.com [209.132.177.33]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2HE6wSE018847 for ; Tue, 17 Mar 2009 14:06:58 GMT Received: from listman.util.phx.redhat.com (listman.util.phx.redhat.com [10.8.4.110]) by hormel.redhat.com (Postfix) with ESMTP id 51E6C619B83; Tue, 17 Mar 2009 10:06:57 -0400 (EDT) Received: from int-mx1.corp.redhat.com (int-mx1.corp.redhat.com [172.16.52.254]) by listman.util.phx.redhat.com (8.13.1/8.13.1) with ESMTP id n2HE6sTk023956 for ; Tue, 17 Mar 2009 10:06:54 -0400 Received: from hydrogen.msp.redhat.com (hydrogen.msp.redhat.com [10.15.80.1]) by int-mx1.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n2HE6rfd009408 for ; Tue, 17 Mar 2009 10:06:53 -0400 Received: from hydrogen.msp.redhat.com (localhost.localdomain [127.0.0.1]) by hydrogen.msp.redhat.com (8.14.1/8.14.1) with ESMTP id n2HE6pNr017554 for ; Tue, 17 Mar 2009 09:06:51 -0500 Received: (from jbrassow@localhost) by hydrogen.msp.redhat.com (8.14.1/8.14.1/Submit) id n2HE6pCx017552 for dm-devel@redhat.com; Tue, 17 Mar 2009 09:06:51 -0500 Date: Tue, 17 Mar 2009 09:06:51 -0500 From: Jonathan Brassow Message-Id: <200903171406.n2HE6pCx017552@hydrogen.msp.redhat.com> To: dm-devel@redhat.com X-Scanned-By: MIMEDefang 2.58 on 172.16.52.254 X-loop: dm-devel@redhat.com Subject: [dm-devel] [PATCH 28 of 29] dm-snap-snapshare-1.patch X-BeenThere: dm-devel@redhat.com X-Mailman-Version: 2.1.5 Precedence: junk Reply-To: device-mapper development List-Id: device-mapper development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dm-devel-bounces@redhat.com Errors-To: dm-devel-bounces@redhat.com -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel Index: linux-2.6/drivers/md/dm-snap.c =================================================================== --- linux-2.6.orig/drivers/md/dm-snap.c +++ linux-2.6/drivers/md/dm-snap.c @@ -51,6 +51,17 @@ #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ (DM_TRACKED_CHUNK_HASH_SIZE - 1)) +/* + * Exception table hash sizes for pending exceptions + * The snapshot pending exception table holds pending exceptions + * that affect all snapshots in the share group (due to origin write). + * The snapshare pending exception table holds pending exceptions + * that affect just one snapshot in the share group (due to a + * write to one of the snapshots). + */ +#define DM_SNAPSHARE_HASH_SIZE 16 +#define DM_SNAPSHOT_HASH_SIZE 64 + struct dm_snapshot { struct rw_semaphore lock; @@ -67,19 +78,17 @@ struct dm_snapshot { mempool_t *pending_pool; - atomic_t pending_exceptions_count; - struct dm_exception_table *pending; + uint64_t shared_uuid; + struct list_head shared_list; + /* * pe_lock protects all pending_exception operations and access * as well as the snapshot_bios list. */ spinlock_t pe_lock; - /* The on disk metadata handler */ - struct dm_exception_store *store; - struct dm_kcopyd_client *kcopyd_client; /* Queue of snapshot writes for ksnapd to flush */ @@ -98,6 +107,7 @@ struct dm_snapshare { struct list_head shared_list; atomic_t pending_exceptions_count; + struct dm_exception_table *pending; /* The on disk metadata handler */ struct dm_exception_store *store; @@ -106,6 +116,19 @@ struct dm_snapshare { static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); +static struct dm_exception_store *get_first_store(struct dm_snapshot *s) +{ + struct dm_snapshare *ss; + + list_for_each_entry(ss, &s->shared_list, shared_list) + return ss->store; + + DMERR("No snapshares in snapshot"); + BUG(); + + return NULL; +} + static sector_t chunk_to_sector(struct dm_exception_store *store, chunk_t chunk) { @@ -153,8 +176,12 @@ struct dm_snap_pending_exception { */ atomic_t ref_count; - /* Pointer back to snapshot context */ + /* + * Pointer back to snapshot or snapshare context + * Only one of 'ss' or 'snap' may be populated. + */ struct dm_snapshot *snap; + struct dm_snapshare *ss; /* * 1 indicates the exception has already been sent to @@ -296,13 +323,21 @@ static void __insert_origin(struct origi } /* + * register_snapshare + * @ss: snapshare - initialized and populated with 's' + * * Make a note of the snapshot and its origin so we can look it * up when the origin has a write on it. + * + * Returns: 0 on success, -Exxx on failure */ -static int register_snapshot(struct dm_snapshot *snap) +static void dealloc_snapshot(struct dm_snapshot *s); +static int register_snapshare(struct dm_snapshare *ss) { + int found = 0; struct origin *o, *new_o; - struct block_device *bdev = snap->origin->bdev; + struct dm_snapshot *s; + struct block_device *bdev = ss->snap->origin->bdev; new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); if (!new_o) @@ -324,20 +359,61 @@ static int register_snapshot(struct dm_s __insert_origin(o); } - list_add_tail(&snap->list, &o->snapshots); + if (!ss->snap->shared_uuid) + goto new_snapshot; + + list_for_each_entry(s, &o->snapshots, list) { + down_write(&s->lock); + if (s->shared_uuid == ss->snap->shared_uuid) { + DMERR("Putting origin because it is shared"); + dm_put_device(ss->store->ti, ss->snap->origin); + + DMERR("Adding share to existing snapshot"); + list_add(&ss->shared_list, &s->shared_list); + + DMERR("Deallocating duplicate snapshot"); + dealloc_snapshot(ss->snap); + + ss->snap = s; + + up_write(&s->lock); + found = 1; + break; + } + up_write(&s->lock); + } + +new_snapshot: + if (!found) + list_add_tail(&ss->snap->list, &o->snapshots); up_write(&_origins_lock); return 0; } -static void unregister_snapshot(struct dm_snapshot *s) +static void unregister_snapshare(struct dm_snapshare *ss) { struct origin *o; + /* + * Always origin lock, then snapshot lock + */ down_write(&_origins_lock); - o = __lookup_origin(s->origin->bdev); + o = __lookup_origin(ss->snap->origin->bdev); + + down_write(&ss->snap->lock); + + /* + * Remove the snapshare, then if there are no + * more snapshares left, remove the snapshot + * from the origin's list + */ + list_del(&ss->shared_list); + + if (list_empty(&ss->snap->shared_list)) + list_del(&ss->snap->list); + up_write(&ss->snap->lock); - list_del(&s->list); if (list_empty(&o->snapshots)) { list_del(&o->hash_list); kfree(o); @@ -349,11 +425,14 @@ static void unregister_snapshot(struct d static struct dm_exception *alloc_pending_exception(void *context) { struct dm_snapshot *s = context; + struct dm_snapshare *ss; struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, GFP_NOIO); - atomic_inc(&s->pending_exceptions_count); + list_for_each_entry(ss, &s->shared_list, shared_list) + atomic_inc(&ss->pending_exceptions_count); pe->snap = s; + pe->ss = NULL; return &pe->e; } @@ -362,25 +441,43 @@ static void free_pending_exception(struc { struct dm_snap_pending_exception *pe; struct dm_snapshot *s; + struct dm_snapshare *ss; pe = container_of(e, struct dm_snap_pending_exception, e); s = pe->snap; mempool_free(pe, s->pending_pool); smp_mb__before_atomic_dec(); - atomic_dec(&s->pending_exceptions_count); + + list_for_each_entry(ss, &s->shared_list, shared_list) + atomic_dec(&ss->pending_exceptions_count); } -/* - * Hard coded magic. - */ -static int calc_max_buckets(void) +static struct dm_exception *alloc_snapshare_pending_exception(void *context) { - /* use a fixed size of 2MB */ - unsigned long mem = 2 * 1024 * 1024; - mem /= sizeof(struct list_head); + struct dm_snapshare *ss = context; + struct dm_snap_pending_exception *pe; + + pe = mempool_alloc(ss->snap->pending_pool, GFP_NOIO); + + atomic_inc(&ss->pending_exceptions_count); + pe->ss = ss; + pe->snap = NULL; + + return &pe->e; +} + +static void free_snapshare_pending_exception(struct dm_exception *e, void *unused) +{ + struct dm_snap_pending_exception *pe; + struct dm_snapshare *ss; + + pe = container_of(e, struct dm_snap_pending_exception, e); + ss = pe->ss; - return mem; + mempool_free(pe, ss->snap->pending_pool); + smp_mb__before_atomic_dec(); + atomic_dec(&ss->pending_exceptions_count); } /* @@ -442,7 +539,7 @@ static int create_exception_store(struct argv + 2, store); } -static struct dm_snapshot *alloc_snapshot(sector_t hash_size) +static struct dm_snapshot *alloc_snapshot(void) { int r, i; struct dm_snapshot *s; @@ -453,14 +550,14 @@ static struct dm_snapshot *alloc_snapsho return NULL; } + INIT_LIST_HEAD(&s->shared_list); s->valid = 1; s->active = 0; - atomic_set(&s->pending_exceptions_count, 0); init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); /* Allocate hash table for pending COW data */ - s->pending = dm_exception_table_create(hash_size, 0, + s->pending = dm_exception_table_create(DM_SNAPSHOT_HASH_SIZE, 0, alloc_pending_exception, s, free_pending_exception, NULL); if (!s->pending) { @@ -539,11 +636,9 @@ static void dealloc_snapshot(struct dm_s */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; struct dm_dev *origin; struct dm_snapshare *ss; struct dm_snapshot *s; - int i; int r = -EINVAL; char *origin_path; struct dm_exception_store *store; @@ -566,6 +661,14 @@ static int snapshot_ctr(struct dm_target INIT_LIST_HEAD(&ss->shared_list); atomic_set(&ss->pending_exceptions_count, 0); + ss->pending = dm_exception_table_create(DM_SNAPSHARE_HASH_SIZE, 0, + alloc_snapshare_pending_exception, ss, + free_snapshare_pending_exception, NULL); + if (!ss->pending) { + ti->error = "Failed to allocate exception hash table"; + goto bad_hash_table; + } + r = create_exception_store(ti, argc, argv, &args_used, &store); if (r) { ti->error = "Failed to create snapshot exception store"; @@ -583,25 +686,9 @@ static int snapshot_ctr(struct dm_target } /* - * Calculate based on the size of the original volume or - * the COW volume... - */ - cow_dev_size = get_dev_size(store->cow->bdev); - origin_dev_size = get_dev_size(origin->bdev); - max_buckets = calc_max_buckets(); - - hash_size = min(origin_dev_size, cow_dev_size) >> store->chunk_shift; - hash_size = min(hash_size, max_buckets); - - hash_size = rounddown_pow_of_two(hash_size); - hash_size >>= 3; - if (hash_size < 64) - hash_size = 64; - - /* * Allocate the snapshot */ - s = alloc_snapshot(hash_size); + s = alloc_snapshot(); if (!s) { r = -ENOMEM; ti->error = "Failed to create snapshot structure"; @@ -609,11 +696,12 @@ static int snapshot_ctr(struct dm_target } ss->snap = s; s->origin = origin; - s->store = ss->store; + s->shared_uuid = store->shared_uuid; + list_add(&ss->shared_list, &s->shared_list); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ - if (register_snapshot(s)) { + if (register_snapshare(ss)) { r = -EINVAL; ti->error = "Cannot register snapshot with origin"; goto bad_load_and_register; @@ -634,6 +722,9 @@ bad_origin: dm_exception_store_destroy(store); bad_exception_store: + dm_exception_table_destroy(ss->pending); + +bad_hash_table: kfree(ss); return r; @@ -651,9 +742,9 @@ static void snapshot_dtr(struct dm_targe /* Prevent further origin writes from using this snapshot. */ /* After this returns there can be no new kcopyd jobs. */ - unregister_snapshot(s); + unregister_snapshare(ss); - while (atomic_read(&s->pending_exceptions_count)) + while (atomic_read(&ss->pending_exceptions_count)) msleep(1); /* * Ensure instructions in mempool_destroy aren't reordered @@ -672,6 +763,8 @@ static void snapshot_dtr(struct dm_targe dm_exception_store_destroy(ss->store); + dm_exception_table_destroy(ss->pending); + kfree(ss); } @@ -721,6 +814,7 @@ static void error_bios(struct bio *bio) static void __invalidate_snapshot(struct dm_snapshot *s, int err) { + struct dm_snapshare *ss; char *tmp_str = "ES_INVALIDATE"; if (!s->valid) @@ -731,12 +825,15 @@ static void __invalidate_snapshot(struct else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); - if (s->store->type->message) - s->store->type->message(s->store, 1, &tmp_str); + + /* Invalidating the snapshot will invalidate all snapshares. */ + list_for_each_entry(ss, &s->shared_list, shared_list) + if (ss->store->type->message) + ss->store->type->message(ss->store, 1, &tmp_str); s->valid = 0; - dm_table_event(s->store->ti->table); + dm_table_event(ss->store->ti->table); } static void get_pending_exception(struct dm_snap_pending_exception *pe) @@ -747,7 +844,6 @@ static void get_pending_exception(struct static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) { struct dm_snap_pending_exception *primary_pe; - struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; primary_pe = pe->primary_pe; @@ -760,7 +856,9 @@ static struct bio *put_pending_exception if (primary_pe && atomic_dec_and_test(&primary_pe->ref_count)) { origin_bios = bio_list_get(&primary_pe->origin_bios); - dm_free_exception(s->pending, &primary_pe->e); + dm_free_exception(primary_pe->ss ? primary_pe->ss->pending : + primary_pe->snap->pending, + &primary_pe->e); } /* @@ -768,14 +866,15 @@ static struct bio *put_pending_exception * it's not itself a primary pe. */ if (!primary_pe || primary_pe != pe) - dm_free_exception(s->pending, &pe->e); + dm_free_exception(pe->ss ? pe->ss->pending : pe->snap->pending, + &pe->e); return origin_bios; } static void pending_complete(struct dm_snap_pending_exception *pe, int success) { - struct dm_snapshot *s = pe->snap; + struct dm_snapshot *s = pe->snap ? pe->snap : pe->ss->snap; struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; int error = 0; @@ -825,15 +924,17 @@ static void commit_callback(void *contex static void copy_callback(int read_err, unsigned long write_err, void *context) { struct dm_snap_pending_exception *pe = context; - struct dm_snapshot *s = pe->snap; + struct dm_exception_store *store; + + store = pe->ss ? pe->ss->store : get_first_store(pe->snap); if (read_err || write_err) pending_complete(pe, 0); else /* Update the metadata if we are persistent */ - s->store->type->commit_exception(s->store, &pe->e, - commit_callback, pe); + store->type->commit_exception(store, &pe->e, + commit_callback, pe); } /* @@ -841,19 +942,24 @@ static void copy_callback(int read_err, */ static void start_copy(struct dm_snap_pending_exception *pe) { - struct dm_snapshot *s = pe->snap; + struct dm_exception_store *store; + struct dm_snapshot *s; struct dm_io_region src, dest; - struct block_device *bdev = s->origin->bdev; + struct block_device *bdev; sector_t dev_size; + store = (pe->ss) ? pe->ss->store : get_first_store(pe->snap); + s = pe->snap ? pe->snap : pe->ss->snap; + bdev = s->origin->bdev; + dev_size = get_dev_size(bdev); src.bdev = bdev; - src.sector = chunk_to_sector(s->store, pe->e.old_chunk); - src.count = min(s->store->chunk_size, dev_size - src.sector); + src.sector = chunk_to_sector(store, pe->e.old_chunk); + src.count = min(store->chunk_size, dev_size - src.sector); - dest.bdev = s->store->cow->bdev; - dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); + dest.bdev = store->cow->bdev; + dest.sector = chunk_to_sector(store, pe->e.new_chunk); dest.count = src.count; /* Hand over to kcopyd */ @@ -873,14 +979,17 @@ static struct dm_snap_pending_exception __find_pending_exception(struct dm_snapshot *s, struct bio *bio, struct dm_snapshare *ss) { + int r; struct dm_exception *e, *tmp_e; struct dm_snap_pending_exception *pe; - chunk_t chunk = sector_to_chunk(s->store, bio->bi_sector); + struct dm_exception_store *store = ss ? ss->store : get_first_store(s); + struct dm_exception_table *table = ss ? ss->pending : s->pending; + chunk_t chunk = sector_to_chunk(store, bio->bi_sector); /* * Is there a pending exception for this already ? */ - e = dm_lookup_exception(s->pending, chunk); + e = dm_lookup_exception(table, chunk); if (e) { /* cast the exception to a pending exception */ pe = container_of(e, struct dm_snap_pending_exception, e); @@ -892,18 +1001,18 @@ __find_pending_exception(struct dm_snaps * to hold the lock while we do this. */ up_write(&s->lock); - tmp_e = dm_alloc_exception(s->pending); + tmp_e = dm_alloc_exception(table); pe = container_of(tmp_e, struct dm_snap_pending_exception, e); down_write(&s->lock); if (!s->valid) { - dm_free_exception(s->pending, &pe->e); + dm_free_exception(table, &pe->e); return NULL; } - e = dm_lookup_exception(s->pending, chunk); + e = dm_lookup_exception(table, chunk); if (e) { - dm_free_exception(s->pending, &pe->e); + dm_free_exception(table, &pe->e); pe = container_of(e, struct dm_snap_pending_exception, e); goto out; } @@ -915,23 +1024,24 @@ __find_pending_exception(struct dm_snaps atomic_set(&pe->ref_count, 0); pe->started = 0; - if (s->store->type->prepare_exception(s->store, &pe->e, ss ? 0 : 1)) { - dm_free_exception(s->pending, &pe->e); + r = store->type->prepare_exception(store, &pe->e, ss ? 0 : 1); + if (r) { + dm_free_exception(table, &pe->e); return NULL; } get_pending_exception(pe); - dm_insert_exception(s->pending, &pe->e); + dm_insert_exception(table, &pe->e); out: return pe; } -static void remap_exception(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) +static void remap_exception(struct dm_snapshare *ss, struct bio *bio, chunk_t chunk) { - bio->bi_bdev = s->store->cow->bdev; - bio->bi_sector = chunk_to_sector(s->store, dm_chunk_number(chunk)) + - (bio->bi_sector & s->store->chunk_mask); + bio->bi_bdev = ss->store->cow->bdev; + bio->bi_sector = chunk_to_sector(ss->store, dm_chunk_number(chunk)) + + (bio->bi_sector & ss->store->chunk_mask); } static int snapshot_map(struct dm_target *ti, struct bio *bio, @@ -962,7 +1072,7 @@ static int snapshot_map(struct dm_target /* If the block is already remapped - use that, else remap it */ rtn = ss->store->type->lookup_exception(ss->store, chunk, &new_chunk, 0); if (!rtn) { - remap_exception(s, bio, new_chunk); + remap_exception(ss, bio, new_chunk); goto out_unlock; } @@ -986,7 +1096,7 @@ static int snapshot_map(struct dm_target goto out_unlock; } - remap_exception(s, bio, pe->e.new_chunk); + remap_exception(ss, bio, pe->e.new_chunk); bio_list_add(&pe->snapshot_bios, bio); r = DM_MAPIO_SUBMITTED; @@ -1112,13 +1222,38 @@ static int snapshot_message(struct dm_ta return r; } +static int is_completely_remapped(struct dm_snapshot *s, chunk_t chunk) +{ + int r; + struct dm_snapshare *ss; + + list_for_each_entry(ss, &s->shared_list, shared_list) { + r = ss->store->type->lookup_exception(ss->store, chunk, + NULL, 0); + switch (r) { + case 0: + continue; + case -ENOENT: + return 0; + case -EWOULDBLOCK: + DMERR("Unable to handle blocking exception stores"); + BUG(); + default: + DMERR("Invalid return from exception store lookup"); + BUG(); + } + } + return 1; +} + /*----------------------------------------------------------------- * Origin methods *---------------------------------------------------------------*/ static int __origin_write(struct list_head *snapshots, struct bio *bio) { - int rtn, r = DM_MAPIO_REMAPPED, first = 0; + int r = DM_MAPIO_REMAPPED, first = 0; struct dm_snapshot *snap; + struct dm_exception_store *store; struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; chunk_t chunk; LIST_HEAD(pe_queue); @@ -1132,36 +1267,28 @@ static int __origin_write(struct list_he if (!snap->valid || !snap->active) goto next_snapshot; + store = get_first_store(snap); + /* Nothing to do if writing beyond end of snapshot */ - if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table)) + if (bio->bi_sector >= dm_table_get_size(store->ti->table)) goto next_snapshot; /* * Remember, different snapshots can have * different chunk sizes. */ - chunk = sector_to_chunk(snap->store, bio->bi_sector); + chunk = sector_to_chunk(store, bio->bi_sector); /* - * Check exception table to see if block - * is already remapped in this snapshot - * and trigger an exception if not. + * Check exception table to see if block is already + * remapped in this snapshot and trigger an exception if not. * * ref_count is initialised to 1 so pending_complete() * won't destroy the primary_pe while we're inside this loop. */ - rtn = snap->store->type->lookup_exception(snap->store, chunk, - NULL, 0); - if (!rtn) + if (is_completely_remapped(snap, chunk)) goto next_snapshot; - /* - * Could be -EWOULDBLOCK, but we don't handle that yet - * and there are currently no exception store - * implementations that would require us to. - */ - BUG_ON(rtn != -ENOENT); - pe = __find_pending_exception(snap, bio, NULL); if (!pe) { __invalidate_snapshot(snap, -ENOMEM); @@ -1299,15 +1426,18 @@ static void origin_resume(struct dm_targ { struct dm_dev *dev = ti->private; struct dm_snapshot *snap; + struct dm_exception_store *store; struct origin *o; chunk_t chunk_size = 0; down_read(&_origins_lock); o = __lookup_origin(dev->bdev); if (o) - list_for_each_entry (snap, &o->snapshots, list) + list_for_each_entry (snap, &o->snapshots, list) { + store = get_first_store(snap); chunk_size = min_not_zero(chunk_size, - snap->store->chunk_size); + store->chunk_size); + } up_read(&_origins_lock); ti->split_io = chunk_size;