From patchwork Fri Dec 3 19:55:42 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jonthan Brassow X-Patchwork-Id: 378991 Received: from mx3-phx2.redhat.com (mx3-phx2.redhat.com [209.132.183.24]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oB3JvTcc008350 for ; Fri, 3 Dec 2010 19:57:49 GMT Received: from lists01.pubmisc.prod.ext.phx2.redhat.com (lists01.pubmisc.prod.ext.phx2.redhat.com [10.5.19.33]) by mx3-phx2.redhat.com (8.13.8/8.13.8) with ESMTP id oB3Jtntw009802; Fri, 3 Dec 2010 14:55:49 -0500 Received: from int-mx12.intmail.prod.int.phx2.redhat.com (int-mx12.intmail.prod.int.phx2.redhat.com [10.5.11.25]) by lists01.pubmisc.prod.ext.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id oB3JtmHr001217 for ; Fri, 3 Dec 2010 14:55:48 -0500 Received: from hydrogen.msp.redhat.com (hydrogen.msp.redhat.com [10.15.80.1]) by int-mx12.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id oB3JtgTq010060 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO) for ; Fri, 3 Dec 2010 14:55:43 -0500 Received: from hydrogen.msp.redhat.com ([127.0.0.1]) by hydrogen.msp.redhat.com (8.14.1/8.14.1) with ESMTP id oB3JtgT5003637; Fri, 3 Dec 2010 13:55:42 -0600 Received: (from jbrassow@localhost) by hydrogen.msp.redhat.com (8.14.1/8.14.1/Submit) id oB3JtgiV003636; Fri, 3 Dec 2010 13:55:42 -0600 Date: Fri, 3 Dec 2010 13:55:42 -0600 From: Jonathan Brassow Message-Id: <201012031955.oB3JtgiV003636@hydrogen.msp.redhat.com> To: dm-devel@redhat.com X-Scanned-By: MIMEDefang 2.68 on 10.5.11.25 X-loop: dm-devel@redhat.com Subject: [dm-devel] [PATCH 12 of 15] md separate meta and data devs X-BeenThere: dm-devel@redhat.com X-Mailman-Version: 2.1.12 Precedence: junk Reply-To: device-mapper development List-Id: device-mapper development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: dm-devel-bounces@redhat.com Errors-To: dm-devel-bounces@redhat.com X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Fri, 03 Dec 2010 19:57:49 +0000 (UTC) Index: linux-2.6/drivers/md/bitmap.c =================================================================== --- linux-2.6.orig/drivers/md/bitmap.c +++ linux-2.6/drivers/md/bitmap.c @@ -263,14 +263,18 @@ static mdk_rdev_t *next_active_rdev(mdk_ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { mdk_rdev_t *rdev = NULL; + struct block_device *bdev; mddev_t *mddev = bitmap->mddev; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; loff_t offset = mddev->bitmap_info.offset; + + bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; + if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, - bdev_logical_block_size(rdev->bdev)); + bdev_logical_block_size(bdev)); /* Just make sure we aren't corrupting data or * metadata */ Index: linux-2.6/drivers/md/md.c =================================================================== --- linux-2.6.orig/drivers/md/md.c +++ linux-2.6/drivers/md/md.c @@ -707,6 +707,20 @@ static struct mdk_personality *find_pers static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev) { sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512; + + if (rdev->meta_bdev) + return 0; + + return MD_NEW_SIZE_SECTORS(num_sectors); +} + +static inline sector_t calc_dev_sectors(mdk_rdev_t *rdev) +{ + sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512; + + if (rdev->meta_bdev) + return num_sectors; + return MD_NEW_SIZE_SECTORS(num_sectors); } @@ -764,7 +778,7 @@ void md_super_write(mddev_t *mddev, mdk_ */ struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); - bio->bi_bdev = rdev->bdev; + bio->bi_bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; bio->bi_sector = sector; bio_add_page(bio, page, size, 0); bio->bi_private = rdev; @@ -802,7 +816,8 @@ int sync_page_io(mdk_rdev_t *rdev, secto rw |= REQ_SYNC | REQ_UNPLUG; - bio->bi_bdev = rdev->bdev; + bio->bi_bdev = (metadata_op && rdev->meta_bdev) ? + rdev->meta_bdev : rdev->bdev; bio->bi_sector = sector; bio_add_page(bio, page, size, 0); init_completion(&event); @@ -820,6 +835,7 @@ EXPORT_SYMBOL_GPL(sync_page_io); static int read_disk_sb(mdk_rdev_t * rdev, int size) { char b[BDEVNAME_SIZE]; + if (!rdev->sb_page) { MD_BUG(); return -EINVAL; @@ -1678,7 +1694,7 @@ super_1_rdev_size_change(mdk_rdev_t *rde sector_t max_sectors; if (num_sectors && num_sectors < rdev->mddev->dev_sectors) return 0; /* component must fit device */ - if (rdev->sb_start < rdev->data_offset) { + if (rdev->meta_bdev || rdev->sb_start < rdev->data_offset) { /* minor versions 1 and 2; superblock before data */ max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; max_sectors -= rdev->data_offset; @@ -1769,6 +1785,7 @@ int md_integrity_register(mddev_t *mddev * If at least one rdev is not integrity capable, we can not * enable data integrity for the md device. */ + /* FIXME (brassow): check both [meta_]bdev ? */ if (!bdev_get_integrity(rdev->bdev)) return -EINVAL; if (!reference) { @@ -1935,6 +1952,8 @@ static int lock_rdev(mdk_rdev_t *rdev, d struct block_device *bdev; char b[BDEVNAME_SIZE]; + /* FIXME (brassow): [un]lock all both [meta_]bdev ? */ + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", @@ -1957,6 +1976,8 @@ static int lock_rdev(mdk_rdev_t *rdev, d static void unlock_rdev(mdk_rdev_t *rdev) { struct block_device *bdev = rdev->bdev; + + /* FIXME brassow: end here on 'bdev' search */ rdev->bdev = NULL; if (!bdev) MD_BUG(); @@ -4434,7 +4455,18 @@ int md_run(mddev_t *mddev) * We don't want the data to overlap the metadata, * Internal Bitmap issues have been handled elsewhere. */ - if (rdev->data_offset < rdev->sb_start) { + if (rdev->meta_bdev) { + /* Metadata is on a separate device */ + if (rdev->data_offset) { + printk(KERN_ERR "md: data_offset should be 0\n"); + return -EINVAL; + } + + if (rdev->sb_start) { + printk(KERN_ERR "md: sb_start should be 0\n"); + return -EINVAL; + } + } else if (rdev->data_offset < rdev->sb_start) { if (mddev->dev_sectors && rdev->data_offset + mddev->dev_sectors > rdev->sb_start) { @@ -5240,7 +5272,7 @@ static int add_new_disk(mddev_t * mddev, rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; } else rdev->sb_start = calc_dev_sboffset(rdev); - rdev->sectors = rdev->sb_start; + rdev->sectors = calc_dev_sectors(rdev); err = bind_rdev_to_array(rdev, mddev); if (err) { @@ -5310,7 +5342,7 @@ static int hot_add_disk(mddev_t * mddev, else rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; - rdev->sectors = rdev->sb_start; + rdev->sectors = calc_dev_sectors(rdev); if (test_bit(Faulty, &rdev->flags)) { printk(KERN_WARNING @@ -5519,7 +5551,6 @@ static int update_size(mddev_t *mddev, s * sb_start or, if that is sync_thread) return -EBUSY; Index: linux-2.6/drivers/md/md.h =================================================================== --- linux-2.6.orig/drivers/md/md.h +++ linux-2.6/drivers/md/md.h @@ -60,6 +60,12 @@ struct mdk_rdev_s mddev_t *mddev; /* RAID array if running */ int last_events; /* IO event timestamp */ + /* + * If meta_bdev is non-NULL, it means that a separate device is + * being used to store the metadata (superblock/bitmap) which + * would otherwise be contained on the same device as the data (bdev). + */ + struct block_device *meta_bdev; struct block_device *bdev; /* block device handle */ struct page *sb_page;