===================================================================
@@ -1723,6 +1723,171 @@ super_1_rdev_size_change(mdk_rdev_t *rde
return num_sectors;
}
+/*
+ * This structure is never used by userspace. It is only ever
+ * used in these particular super block accessing functions.
+ * Therefore, we don't put it in any .h file.
+ *
+ * It makes sense to define a new magic number here. This way,
+ * no userspace application will confuse the device as a device
+ * that is accessible through MD operations. Devices with this
+ * superblock should only ever be accessed via device-mapper.
+ */
+#define MD_DM_SB_MAGIC 0x426E6F4A
+struct mdp_superblock_2 {
+ __le32 magic;
+ __le32 flags;
+
+ __le64 events;
+ __le64 reshape_position;
+
+ __le32 num_devices; /* Number of devs in RAID, Max = 32 */
+ __le32 failed_devices; /* bitmap of devs used to indicate a failure */
+
+ __le32 reserved[120]; /* Round out the struct to 512 bytes */
+};
+
+static void super_2_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ mdk_rdev_t *r, *t;
+ uint32_t failed_devices;
+ struct mdp_superblock_2 *sb;
+
+ sb = (struct mdp_superblock_2 *)page_address(rdev->sb_page);
+ failed_devices = le32_to_cpu(sb->failed_devices);
+
+ rdev_for_each(r, t, mddev)
+ if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) {
+ printk(KERN_INFO " Dev #%d is faulty\n",
+ (r->raid_disk < 0) ?
+ r->saved_raid_disk : r->raid_disk);
+ failed_devices |= (1 << r->raid_disk);
+ }
+
+ memset(sb, 0, sizeof(*sb));
+
+ sb->magic = cpu_to_le32(MD_DM_SB_MAGIC);
+ sb->events = cpu_to_le64(mddev->events);
+ sb->num_devices = cpu_to_le32(mddev->raid_disks);
+ sb->failed_devices = cpu_to_le32(failed_devices);
+}
+
+/*
+ * super_2_load
+ *
+ * This function creates a superblock if one is not found on the device
+ * and will indicate the more appropriate device whose superblock should
+ * be used - if given two.
+ *
+ * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
+ */
+static int super_2_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
+{
+ int r;
+ uint64_t ev1, ev2;
+ struct mdp_superblock_2 *sb;
+ struct mdp_superblock_2 *refsb;
+
+ if (sizeof(*sb) & (sizeof(*sb) - 1)) {
+ printk(KERN_ERR "Programmer error: Bad sized superblock (%lu)\n",
+ sizeof(*sb));
+ return -EIO;
+ }
+
+ rdev->sb_start = 0;
+ rdev->sb_size = sizeof(*sb);
+ r = read_disk_sb(rdev, rdev->sb_size);
+ if (r)
+ return r;
+
+ sb = (struct mdp_superblock_2 *)page_address(rdev->sb_page);
+ if (sb->magic != cpu_to_le32(MD_DM_SB_MAGIC)) {
+ printk(KERN_INFO " Superblock not found: creating new\n");
+ super_2_sync(rdev->mddev, rdev);
+
+ /* Force new superblocks to disk */
+ set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
+
+ /* Any superblock is better than none, choose that if given */
+ return refdev ? 0 : 1;
+ }
+
+ ev1 = le64_to_cpu(sb->events);
+ if (!refdev) {
+ if (le32_to_cpu(sb->num_devices) != rdev->mddev->raid_disks) {
+ /*
+ * User should clear device of old superblocks before
+ * attempting to create something different.
+ */
+
+ printk(KERN_ERR "Configuration incompatible with on-disk information\n");
+ return -EINVAL;
+ }
+ return 1;
+ }
+
+ refsb = (struct mdp_superblock_2 *)page_address(refdev->sb_page);
+ ev2 = le64_to_cpu(refsb->events);
+
+ if (ev1 != ev2)
+ printk(KERN_INFO "Comparing event counts [%llu %llu], choosing dev #%d\n",
+ ev1, ev2, (ev1 > ev2) ? rdev->raid_disk :
+ refdev->raid_disk);
+
+ return (ev1 > ev2) ? 1 : 0;
+}
+
+static int super_2_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+ uint64_t ev1;
+ uint32_t failed_devices;
+ struct mdp_superblock_2 *sb;
+
+ sb = (struct mdp_superblock_2 *)page_address(rdev->sb_page);
+ ev1 = le64_to_cpu(sb->events);
+ failed_devices = le32_to_cpu(sb->failed_devices);
+
+ if (!mddev->events) {
+ mdk_rdev_t *r, *t;
+ struct mdp_superblock_2 *sb2;
+
+ mddev->events = ev1;
+ rdev_for_each(r, t, mddev) {
+ if (!r->sb_page)
+ continue;
+ sb2 = (struct mdp_superblock_2 *)
+ page_address(r->sb_page);
+ sb2->failed_devices = 0;
+
+ if ((r->raid_disk >= 0) &&
+ (failed_devices & (1 << r->raid_disk)))
+ set_bit(Faulty, &r->flags);
+ }
+ }
+
+ rdev->mddev->bitmap_info.offset = 0; /* disable bitmap creation */
+ rdev->mddev->bitmap_info.default_offset = 1024 >> 9;
+
+ /*
+ * If the device was marked as failed when the array
+ * was previously active, we must mark the device as
+ * not In_sync
+ */
+ if (test_bit(Faulty, &rdev->flags)) {
+ printk(KERN_INFO " Dev #%d marked as failed, clearing In_sync\n",
+ rdev->raid_disk);
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ rdev->recovery_offset = 0;
+ }
+
+ /* FIXME: Pull these debug statements */
+ if (test_bit(In_sync, &rdev->flags))
+ printk(KERN_INFO " In_sync flag set\n");
+
+ return 0;
+}
+
static struct super_type super_types[] = {
[0] = {
.name = "0.90.0",
@@ -1740,6 +1905,14 @@ static struct super_type super_types[] =
.sync_super = super_1_sync,
.rdev_size_change = super_1_rdev_size_change,
},
+ [2] = {
+ .name = "dm",
+ .owner = THIS_MODULE,
+ .load_super = super_2_load,
+ .validate_super = super_2_validate,
+ .sync_super = super_2_sync,
+ .rdev_size_change = super_1_rdev_size_change,
+ },
};
static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
@@ -4408,6 +4581,20 @@ static void md_safemode_timeout(unsigned
md_wakeup_thread(mddev->thread);
}
+static int should_read_super(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev, *tmp;
+
+ if (!mddev->raid_disks)
+ return 1;
+
+ rdev_for_each(rdev, tmp, mddev)
+ if (rdev->meta_bdev)
+ return 1;
+
+ return 0;
+}
+
static int start_dirty_degraded;
int md_run(mddev_t *mddev)
@@ -4429,7 +4616,7 @@ int md_run(mddev_t *mddev)
/*
* Analyze all RAID superblock(s)
*/
- if (!mddev->raid_disks) {
+ if (should_read_super(mddev)) {
if (!mddev->persistent)
return -EINVAL;
analyze_sbs(mddev);