@@ -337,6 +337,8 @@ struct ocfs2_super
unsigned int node_num;
int slot_num;
int preferred_slot;
+ u16 mmp_update_interval;
+ struct task_struct *mmp_task;
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
@@ -87,7 +87,8 @@
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
| OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
| OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
- | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
+ | OCFS2_FEATURE_INCOMPAT_APPEND_DIO \
+ | OCFS2_FEATURE_INCOMPAT_MMP)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
| OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -167,6 +168,11 @@
*/
#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
+/*
+ * Multiple mount protection
+ */
+#define OCFS2_FEATURE_INCOMPAT_MMP 0x10000
+
/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
@@ -535,8 +541,7 @@ struct ocfs2_slot_map {
};
struct ocfs2_extended_slot {
-/*00*/ __u8 es_valid;
- __u8 es_reserved1[3];
+/*00*/ __le32 es_valid;
__le32 es_node_num;
/*08*/
};
@@ -611,7 +616,7 @@ struct ocfs2_super_block {
INCOMPAT flag set. */
/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
for this fs*/
- __le16 s_reserved0;
+ __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
__le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
* s_uuid_hash serves as seed[3]. */
/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
@@ -8,6 +8,8 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/random.h>
+#include <linux/kthread.h>
#include <cluster/masklog.h>
@@ -24,9 +26,48 @@
#include "buffer_head_io.h"
+/*
+ * This structure will be used for multiple mount protection. It will be
+ * written into the '//slot_map' field in the system dir.
+ * Programs that check MMP should assume that if SEQ_FSCK (or any unknown
+ * code above SEQ_MAX) is present then it is NOT safe to use the filesystem.
+ */
+#define OCFS2_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
+#define OCFS2_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
+#define OCFS2_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
+#define OCFS2_MMP_SEQ_INIT 0x0 /* mmp_seq init value */
+#define OCFS2_VALID_CLUSTER 0xE24D4D55U /* value for clustered mount
+ under MMP disabled */
+#define OCFS2_VALID_NOCLUSTER 0xE24D4D5AU /* value for noclustered mount
+ under MMP disabled */
+
+#define OCFS2_SLOT_INFO_OLD_VALID 1 /* use for old slot info */
+
+/*
+ * Check interval multiplier
+ * The MMP block is written every update interval and initially checked every
+ * update interval x the multiplier (the value is then adapted based on the
+ * write latency). The reason is that writes can be delayed under load and we
+ * don't want readers to incorrectly assume that the filesystem is no longer
+ * in use.
+ */
+#define OCFS2_MMP_CHECK_MULT 2UL
+
+/*
+ * Minimum interval for MMP checking in seconds.
+ */
+#define OCFS2_MMP_MIN_CHECK_INTERVAL 5UL
+
+/*
+ * Maximum interval for MMP checking in seconds.
+ */
+#define OCFS2_MMP_MAX_CHECK_INTERVAL 300UL
struct ocfs2_slot {
- int sl_valid;
+ union {
+ unsigned int sl_valid;
+ unsigned int mmp_seq;
+ };
unsigned int sl_node_num;
};
@@ -52,11 +93,11 @@ static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
}
static void ocfs2_set_slot(struct ocfs2_slot_info *si,
- int slot_num, unsigned int node_num)
+ int slot_num, unsigned int node_num, unsigned int valid)
{
BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
- si->si_slots[slot_num].sl_valid = 1;
+ si->si_slots[slot_num].sl_valid = valid;
si->si_slots[slot_num].sl_node_num = node_num;
}
@@ -75,7 +116,8 @@ static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
i++, slotno++) {
if (se->se_slots[i].es_valid)
ocfs2_set_slot(si, slotno,
- le32_to_cpu(se->se_slots[i].es_node_num));
+ le32_to_cpu(se->se_slots[i].es_node_num),
+ le32_to_cpu(se->se_slots[i].es_valid));
else
ocfs2_invalidate_slot(si, slotno);
}
@@ -97,7 +139,8 @@ static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
ocfs2_invalidate_slot(si, i);
else
- ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
+ ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]),
+ OCFS2_SLOT_INFO_OLD_VALID);
}
}
@@ -252,16 +295,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid ||
- !si->si_slots[preferred].sl_node_num) {
+ if (!si->si_slots[preferred].sl_valid) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid ||
- !si->si_slots[i].sl_node_num) {
+ if (!si->si_slots[i].sl_valid) {
ret = i;
break;
}
@@ -270,6 +311,43 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
return ret;
}
+/* Return first used slot.
+ * -ENOENT means all slots are clean, ->sl_valid should be
+ * OCFS2_MMP_SEQ_CLEAN or ZERO */
+static int __ocfs2_find_used_slot(struct ocfs2_slot_info *si)
+{
+ int i, ret = -ENOENT, valid;
+
+ for (i = 0; i < si->si_num_slots; i++) {
+ valid = si->si_slots[i].sl_valid;
+ if (valid == 0 || valid == OCFS2_MMP_SEQ_CLEAN)
+ continue;
+ if (valid <= OCFS2_MMP_SEQ_MAX ||
+ valid == OCFS2_MMP_SEQ_FSCK ||
+ valid == OCFS2_VALID_CLUSTER ||
+ valid == OCFS2_VALID_NOCLUSTER) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int __ocfs2_find_expected_slot(struct ocfs2_slot_info *si,
+ unsigned int expected)
+{
+ int i;
+
+ for (i = 0; i < si->si_num_slots; i++) {
+ if (si->si_slots[i].sl_valid == expected) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
{
int slot;
@@ -445,21 +523,357 @@ void ocfs2_free_slot_info(struct ocfs2_super *osb)
__ocfs2_free_slot_info(si);
}
+/*
+ * Get a random new sequence number but make sure it is not greater than
+ * EXT4_MMP_SEQ_MAX.
+ */
+static unsigned int mmp_new_seq(void)
+{
+ u32 new_seq;
+
+ do {
+ new_seq = prandom_u32();
+ } while (new_seq > OCFS2_MMP_SEQ_MAX);
+
+ if (new_seq == 0)
+ return 1;
+ else
+ return new_seq;
+}
+
+/*
+ * kmmpd will update the MMP sequence every mmp_update_interval seconds
+ */
+static int kmmpd(void *data)
+{
+ struct ocfs2_super *osb = data;
+ struct super_block *sb = osb->sb;
+ struct ocfs2_slot_info *si = osb->slot_info;
+ int slot = osb->slot_num;
+ u32 seq, mmp_seq;
+ unsigned long failed_writes = 0;
+ u16 mmp_update_interval = osb->mmp_update_interval;
+ unsigned int mmp_check_interval;
+ unsigned long last_update_time;
+ unsigned long diff;
+ int retval = 0;
+
+ if (!ocfs2_mount_local(osb)) {
+ mlog(ML_ERROR, "kmmpd thread only works for local mount mode.\n");
+ goto wait_to_exit;
+ }
+
+ retval = ocfs2_refresh_slot_info(osb);
+ seq = si->si_slots[slot].mmp_seq;
+
+ /*
+ * Start with the higher mmp_check_interval and reduce it if
+ * the MMP block is being updated on time.
+ */
+ mmp_check_interval = max(OCFS2_MMP_CHECK_MULT * mmp_update_interval,
+ OCFS2_MMP_MIN_CHECK_INTERVAL);
+
+ while (!kthread_should_stop() && !sb_rdonly(sb)) {
+ if (!OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_MMP)) {
+ mlog(ML_WARNING, "kmmpd being stopped since MMP feature"
+ " has been disabled.");
+ goto wait_to_exit;
+ }
+ if (++seq > OCFS2_MMP_SEQ_MAX)
+ seq = 1;
+
+ spin_lock(&osb->osb_lock);
+ si->si_slots[slot].mmp_seq = mmp_seq = seq;
+ spin_unlock(&osb->osb_lock);
+
+ last_update_time = jiffies;
+ retval = ocfs2_update_disk_slot(osb, si, slot);
+
+ /*
+ * Don't spew too many error messages. Print one every
+ * (s_mmp_update_interval * 60) seconds.
+ */
+ if (retval) {
+ if ((failed_writes % 60) == 0) {
+ ocfs2_error(sb, "Error writing to MMP block");
+ }
+ failed_writes++;
+ }
+
+ diff = jiffies - last_update_time;
+ if (diff < mmp_update_interval * HZ)
+ schedule_timeout_interruptible(mmp_update_interval *
+ HZ - diff);
+
+ /*
+ * We need to make sure that more than mmp_check_interval
+ * seconds have not passed since writing. If that has happened
+ * we need to check if the MMP block is as we left it.
+ */
+ diff = jiffies - last_update_time;
+ if (diff > mmp_check_interval * HZ) {
+ retval = ocfs2_refresh_slot_info(osb);
+ if (retval) {
+ ocfs2_error(sb, "error reading MMP data: %d", retval);
+ goto wait_to_exit;
+ }
+
+ if (si->si_slots[slot].mmp_seq != mmp_seq) {
+ ocfs2_error(sb, "Error while updating MMP info. "
+ "The filesystem seems to have been"
+ " multiply mounted.");
+ retval = -EBUSY;
+ goto wait_to_exit;
+ }
+ }
+
+ /*
+ * Adjust the mmp_check_interval depending on how much time
+ * it took for the MMP block to be written.
+ */
+ mmp_check_interval = max(min(OCFS2_MMP_CHECK_MULT * diff / HZ,
+ OCFS2_MMP_MAX_CHECK_INTERVAL),
+ OCFS2_MMP_MIN_CHECK_INTERVAL);
+ }
+
+ /*
+ * Unmount seems to be clean.
+ */
+ spin_lock(&osb->osb_lock);
+ si->si_slots[slot].mmp_seq = OCFS2_MMP_SEQ_CLEAN;
+ spin_unlock(&osb->osb_lock);
+
+ retval = ocfs2_update_disk_slot(osb, si, 0);
+
+wait_to_exit:
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!kthread_should_stop())
+ schedule();
+ }
+ set_current_state(TASK_RUNNING);
+ return retval;
+}
+
+void ocfs2_stop_mmpd(struct ocfs2_super *osb)
+{
+ if (osb->mmp_task) {
+ kthread_stop(osb->mmp_task);
+ osb->mmp_task = NULL;
+ }
+}
+
+/*
+ * Protect the filesystem from being mounted more than once.
+ *
+ * This function was inspired by ext4 MMP feature. Because HA stack
+ * helps ocfs2 to manage nodes join/leave, so we only focus on MMP
+ * under nocluster mode.
+ * Another info is ocfs2 only uses slot 0 on nocuster mode.
+ *
+ * es_valid:
+ * 0: not available
+ * 1: valid, cluster mode
+ * 2: valid, nocluster mode
+ *
+ * parameters:
+ * osb: the struct ocfs2_super
+ * noclustered: under noclustered mount
+ * slot: prefer slot number
+ */
+int ocfs2_multi_mount_protect(struct ocfs2_super *osb, int noclustered)
+{
+ struct buffer_head *bh = NULL;
+ u32 seq;
+ struct ocfs2_slot_info *si = osb->slot_info;
+ unsigned int mmp_check_interval = osb->mmp_update_interval;
+ unsigned int wait_time = 0;
+ int retval = 0;
+ int slot = osb->slot_num;
+
+ if (!ocfs2_uses_extended_slot_map(osb)) {
+ mlog(ML_WARNING, "MMP only works on extended slot map.\n");
+ retval = -EINVAL;
+ goto bail;
+ }
+
+ retval = ocfs2_refresh_slot_info(osb);
+ if (retval)
+ goto bail;
+
+ if (mmp_check_interval < OCFS2_MMP_MIN_CHECK_INTERVAL)
+ mmp_check_interval = OCFS2_MMP_MIN_CHECK_INTERVAL;
+
+ spin_lock(&osb->osb_lock);
+ seq = si->si_slots[slot].mmp_seq;
+
+ if (__ocfs2_find_used_slot(si) == -ENOENT)
+ goto skip;
+
+ /* TODO ocfs2-tools need to support this flag */
+ if (__ocfs2_find_expected_slot(si, OCFS2_MMP_SEQ_FSCK)) {
+ mlog(ML_NOTICE, "fsck is running on the filesystem");
+ spin_unlock(&osb->osb_lock);
+ retval = -EBUSY;
+ goto bail;
+ }
+ spin_unlock(&osb->osb_lock);
+
+ wait_time = min(mmp_check_interval * 2 + 1, mmp_check_interval + 60);
+
+ /* Print MMP interval if more than 20 secs. */
+ if (wait_time > OCFS2_MMP_MIN_CHECK_INTERVAL * 4)
+ mlog(ML_WARNING, "MMP interval %u higher than expected, please"
+ " wait.\n", wait_time * 2);
+
+ if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
+ mlog(ML_WARNING, "MMP startup interrupted, failing mount.\n");
+ retval = -EPERM;
+ goto bail;
+ }
+
+ retval = ocfs2_refresh_slot_info(osb);
+ if (retval)
+ goto bail;
+ if (seq != si->si_slots[slot].mmp_seq) {
+ mlog(ML_ERROR, "Device is already active on another node.\n");
+ retval = -EPERM;
+ goto bail;
+ }
+
+ spin_lock(&osb->osb_lock);
+skip:
+ /*
+ * write a new random sequence number.
+ */
+ seq = mmp_new_seq();
+ mlog(ML_ERROR, "seq: 0x%x mmp_seq: 0x%x\n", seq, si->si_slots[slot].mmp_seq);
+ ocfs2_set_slot(si, slot, osb->node_num, seq);
+ spin_unlock(&osb->osb_lock);
+
+ ocfs2_update_disk_slot_extended(si, slot, &bh);
+ mlog(ML_ERROR, "seq: 0x%x mmp_seq: 0x%x\n", seq, si->si_slots[slot].mmp_seq);
+ retval = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
+ if (retval < 0) {
+ mlog_errno(retval);
+ goto bail;
+ }
+ mlog(ML_ERROR, "seq: 0x%x mmp_seq: 0x%x wait_time: %u\n", seq, si->si_slots[slot].mmp_seq, wait_time);
+
+ /*
+ * wait for MMP interval and check mmp_seq.
+ */
+ if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
+ mlog(ML_WARNING, "MMP startup interrupted, failing mount.\n");
+ retval = -EPERM;
+ goto bail;
+ }
+
+ retval = ocfs2_refresh_slot_info(osb);
+ if (retval)
+ goto bail;
+
+ mlog(ML_ERROR, "seq: 0x%x mmp_seq: 0x%x\n", seq, si->si_slots[slot].mmp_seq);
+ if (seq != si->si_slots[slot].mmp_seq) {
+ mlog(ML_ERROR, "Update seq failed, device is already active on another node.\n");
+ retval = -EPERM;
+ goto bail;
+ }
+
+ /*
+ * There are two reasons we don't create kmmpd on clustered mount:
+ * - ocfs2 needs to grab osb->osb_lock to modify/access osb->si.
+ * - For huge number nodes cluster, nodes update same sector
+ * of '//slot_map' will cause IO performance issue.
+ *
+ * Then there has another question:
+ * On clustered mount, MMP seq won't update, and MMP how to
+ * handle a noclustered mount when there already exist
+ * clustered mount.
+ * The answer is the rule mentioned in ocfs2_find_slot().
+ */
+ if (!noclustered) {
+ spin_lock(&osb->osb_lock);
+ ocfs2_set_slot(si, slot, osb->node_num, OCFS2_VALID_CLUSTER);
+ spin_unlock(&osb->osb_lock);
+
+ ocfs2_update_disk_slot_extended(si, slot, &bh);
+ retval = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
+ goto bail;
+ }
+
+ /*
+ * Start a kernel thread to update the MMP block periodically.
+ */
+ osb->mmp_task = kthread_run(kmmpd, osb, "kmmpd-%s", osb->sb->s_id);
+ if (IS_ERR(osb->mmp_task)) {
+ osb->mmp_task = NULL;
+ mlog(ML_WARNING, "Unable to create kmmpd thread for %s.",
+ osb->sb->s_id);
+ retval = -EPERM;
+ goto bail;
+ }
+
+bail:
+ return retval;
+}
+
+static void show_conflict_mnt_msg(int clustered)
+{
+ const char *exist = clustered ? "non-clustered" : "clustered";
+
+ mlog(ML_ERROR, "Found %s mount info!", exist);
+ mlog(ML_ERROR, "Please clean %s slotmap info for mounting.\n", exist);
+ mlog(ML_ERROR, "eg. remount then unmount with %s mode\n", exist);
+}
+
+/*
+ * Even under readonly mode, we write slot info on disk.
+ * The logic is correct: if not change slot info on readonly
+ * mode, in cluster env, later mount from another node
+ * may reuse the same slot, deadlock happen!
+ */
int ocfs2_find_slot(struct ocfs2_super *osb)
{
- int status;
+ int status = -EPERM;
int slot;
+ int noclustered = 0;
struct ocfs2_slot_info *si;
si = osb->slot_info;
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
+ slot = __ocfs2_find_used_slot(si);
+ if (slot == 0 &&
+ ((si->si_slots[0].sl_valid == OCFS2_VALID_NOCLUSTER) ||
+ (si->si_slots[0].sl_valid < OCFS2_MMP_SEQ_MAX)))
+ noclustered = 1;
- if (ocfs2_mount_local(osb))
- /* use slot 0 directly in local mode */
- slot = 0;
- else {
+ /*
+ * We set a rule:
+ * If last mount didn't do unmount, (eg: crash), the next mount
+ * MUST be same mount type.
+ */
+ if (ocfs2_mount_local(osb)) {
+ /* empty slotmap, or device didn't unmount from last time */
+ if ((slot == -ENOENT) || noclustered) {
+ /* use slot 0 directly in local mode */
+ slot = 0;
+ noclustered = 1;
+ } else {
+ spin_unlock(&osb->osb_lock);
+ show_conflict_mnt_msg(0);
+ status = -EINVAL;
+ goto bail;
+ }
+ } else {
+ if (noclustered) {
+ spin_unlock(&osb->osb_lock);
+ show_conflict_mnt_msg(1);
+ status = -EINVAL;
+ goto bail;
+ }
/* search for ourselves first and take the slot if it already
* exists. Perhaps we need to mark this in a variable for our
* own journal recovery? Possibly not, though we certainly
@@ -481,7 +895,21 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
slot, osb->dev_str);
}
- ocfs2_set_slot(si, slot, osb->node_num);
+ if (OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, OCFS2_FEATURE_INCOMPAT_MMP)) {
+ osb->slot_num = slot;
+ spin_unlock(&osb->osb_lock);
+ status = ocfs2_multi_mount_protect(osb, noclustered);
+ if (status < 0) {
+ mlog(ML_ERROR, "MMP failed to start.\n");
+ goto mmp_fail;
+ }
+
+ trace_ocfs2_find_slot(osb->slot_num);
+ return status;
+ }
+
+ ocfs2_set_slot(si, slot, osb->node_num, noclustered ?
+ OCFS2_VALID_NOCLUSTER : OCFS2_VALID_CLUSTER);
osb->slot_num = slot;
spin_unlock(&osb->osb_lock);
@@ -490,6 +918,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
if (status < 0) {
mlog_errno(status);
+mmp_fail:
/*
* if write block failed, invalidate slot to avoid overwrite
* slot during dismount in case another node rightly has mounted
@@ -25,4 +25,7 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num);
+int ocfs2_multi_mount_protect(struct ocfs2_super *osb, int noclustered);
+void ocfs2_stop_mmpd(struct ocfs2_super *osb);
+
#endif
@@ -609,6 +609,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
struct mount_options parsed_options;
struct ocfs2_super *osb = OCFS2_SB(sb);
u32 tmp;
+ int noclustered;
sync_filesystem(sb);
@@ -619,7 +620,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
tmp = OCFS2_MOUNT_NOCLUSTER;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+ noclustered = osb->s_mount_opt & tmp;
+ if (noclustered != (parsed_options.mount_opt & tmp)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
goto out;
@@ -686,10 +688,20 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
sb->s_flags &= ~SB_RDONLY;
osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
+ if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_MMP)) {
+ spin_unlock(&osb->osb_lock);
+ if (ocfs2_multi_mount_protect(osb, noclustered)) {
+ mlog(ML_ERROR, "started MMP failed.\n");
+ ocfs2_stop_mmpd(osb);
+ ret = -EROFS;
+ goto unlocked_osb;
+ }
+ }
}
trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
unlock_osb:
spin_unlock(&osb->osb_lock);
+unlocked_osb:
/* Enable quota accounting after remounting RW */
if (!ret && !(*flags & SB_RDONLY)) {
if (sb_any_quota_suspended(sb))
@@ -722,6 +734,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
SB_POSIXACL : 0);
+ if (sb_rdonly(osb->sb))
+ ocfs2_stop_mmpd(osb);
}
out:
return ret;
@@ -1833,7 +1847,7 @@ static int ocfs2_mount_volume(struct super_block *sb)
status = ocfs2_init_local_system_inodes(osb);
if (status < 0) {
mlog_errno(status);
- goto out_super_lock;
+ goto out_find_slot;
}
status = ocfs2_check_volume(osb);
@@ -1858,6 +1872,8 @@ static int ocfs2_mount_volume(struct super_block *sb)
/* before journal shutdown, we should release slot_info */
ocfs2_free_slot_info(osb);
ocfs2_journal_shutdown(osb);
+out_find_slot:
+ ocfs2_stop_mmpd(osb);
out_super_lock:
ocfs2_super_unlock(osb, 1);
out_dlm:
@@ -1878,6 +1894,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
+ ocfs2_stop_mmpd(osb);
+
/* Remove file check sysfs related directores/files,
* and wait for the pending file check operations */
ocfs2_filecheck_remove_sysfs(osb);
@@ -2086,6 +2104,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
+ osb->mmp_update_interval = le16_to_cpu(di->id2.i_super.s_mmp_update_interval);
osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
MMP (multiple mount protection) gives filesystem ability to prevent from being mounted multiple times. For avoiding data corruption when non-clustered and/or clustered mount are happening at same time, this commit introduced MMP feature. MMP idea is from ext4 MMP (fs/ext4/mmp.c) code. For ocfs2 is a clustered fs and also for compatible with existing slotmap feature, I did some optimization and modification when porting from ext4 to ocfs2. For optimization: mmp has a kthread kmmpd-<dev>, which is only created in non-clustered mode. We set a rule: If last mount didn't do unmount, (eg: crash), the next mount MUST be same mount type. At last, this commit also fix commit c80af0c250c8 ("Revert "ocfs2: mount shared volume without ha stack") mentioned issue. Signed-off-by: Heming Zhao <heming.zhao@suse.com> --- fs/ocfs2/ocfs2.h | 2 + fs/ocfs2/ocfs2_fs.h | 13 +- fs/ocfs2/slot_map.c | 459 ++++++++++++++++++++++++++++++++++++++++++-- fs/ocfs2/slot_map.h | 3 + fs/ocfs2/super.c | 23 ++- 5 files changed, 479 insertions(+), 21 deletions(-)