diff mbox series

[2/2] md: add feature flag MD_FEATURE_RAID0_LAYOUT

Message ID 87lfuyarcb.fsf@notabene.neil.brown.name (mailing list archive)
State New, archived
Headers show
Series md/raid0: avoid RAID0 data corruption due to layout confusion. | expand

Commit Message

NeilBrown Sept. 9, 2019, 6:58 a.m. UTC
Due to a bug introduced in Linux 3.14 we cannot determine the
correctly layout for a multi-zone RAID0 array - there are two
possibiities.

It is possible to tell the kernel which to chose using a module
parameter, but this can be clumsy to use.  It would be best if
the choice were recorded in the metadata.
So add a feature flag for this purpose.
If it is set, then the 'layout' field of the superblock is used
to determine which layout to use.

If this flag is not set, then mddev->layout gets set to -1,
which causes the module parameter to be required.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c                | 13 +++++++++++++
 drivers/md/raid0.c             |  2 ++
 include/uapi/linux/raid/md_p.h |  2 ++
 3 files changed, 17 insertions(+)

Comments

Guoqing Jiang Sept. 9, 2019, 3:33 p.m. UTC | #1
Hi Neil,

On 9/9/19 8:58 AM, NeilBrown wrote:
> 
> Due to a bug introduced in Linux 3.14 we cannot determine the
> correctly layout for a multi-zone RAID0 array - there are two
> possibiities.

possibilities.

> 
> It is possible to tell the kernel which to chose using a module
> parameter, but this can be clumsy to use.  It would be best if
> the choice were recorded in the metadata.
> So add a feature flag for this purpose.
> If it is set, then the 'layout' field of the superblock is used
> to determine which layout to use.
> 
> If this flag is not set, then mddev->layout gets set to -1,
> which causes the module parameter to be required.

Could you point where the flag is set? Thanks.

> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>   drivers/md/md.c                | 13 +++++++++++++
>   drivers/md/raid0.c             |  2 ++
>   include/uapi/linux/raid/md_p.h |  2 ++
>   3 files changed, 17 insertions(+)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 1f70ec595282..a41fce7f8b4c 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -1232,6 +1232,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
>   			mddev->new_layout = mddev->layout;
>   			mddev->new_chunk_sectors = mddev->chunk_sectors;
>   		}
> +		if (mddev->level == 0)
> +			mddev->layout = -1;
>   
>   		if (sb->state & (1<<MD_SB_CLEAN))
>   			mddev->recovery_cp = MaxSector;
> @@ -1647,6 +1649,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
>   		rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
>   	}
>   
> +	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
> +	    sb->level != 0)
> +		return -EINVAL;
> +
>   	if (!refdev) {
>   		ret = 1;
>   	} else {
> @@ -1757,6 +1763,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
>   			mddev->new_chunk_sectors = mddev->chunk_sectors;
>   		}
>   
> +		if (mddev->level == 0 &&
> +		    !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
> +			mddev->layout = -1;
> +
>   		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
>   			set_bit(MD_HAS_JOURNAL, &mddev->flags);
>   
> @@ -6852,6 +6862,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
>   	mddev->external	     = 0;
>   
>   	mddev->layout        = info->layout;
> +	if (mddev->level == 0)
> +		/* Cannot trust RAID0 layout info here */
> +		mddev->layout = -1;
>   	mddev->chunk_sectors = info->chunk_size >> 9;
>   
>   	if (mddev->persistent) {
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index a8888c12308a..6f095b0b6f5c 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -145,6 +145,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
>   
>   	if (conf->nr_strip_zones == 1) {
>   		conf->layout = RAID0_ORIG_LAYOUT;
> +	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
> +		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {

Maybe "conf->layout = mddev->layout" here? Otherwise seems conf->layout is not set accordingly, just 
my 2 cents.

>   	} else if (default_layout == RAID0_ORIG_LAYOUT ||
>   		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
>   		conf->layout = default_layout;
> diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
> index b0d15c73f6d7..1f2d8c81f0e0 100644
> --- a/include/uapi/linux/raid/md_p.h
> +++ b/include/uapi/linux/raid/md_p.h
> @@ -329,6 +329,7 @@ struct mdp_superblock_1 {
>   #define	MD_FEATURE_JOURNAL		512 /* support write cache */
>   #define	MD_FEATURE_PPL			1024 /* support PPL */
>   #define	MD_FEATURE_MULTIPLE_PPLS	2048 /* support for multiple PPLs */
> +#define	MD_FEATURE_RAID0_LAYOUT		4096 /* layout is meaningful for RAID0 */
>   #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
>   					|MD_FEATURE_RECOVERY_OFFSET	\
>   					|MD_FEATURE_RESHAPE_ACTIVE	\
> @@ -341,6 +342,7 @@ struct mdp_superblock_1 {
>   					|MD_FEATURE_JOURNAL		\
>   					|MD_FEATURE_PPL			\
>   					|MD_FEATURE_MULTIPLE_PPLS	\
> +					|MD_FEATURE_RAID0_LAYOUT	\
>   					)
>   
>   struct r5l_payload_header {
> 

Thanks,
Guoqing
NeilBrown Sept. 9, 2019, 11:26 p.m. UTC | #2
On Mon, Sep 09 2019, Guoqing Jiang wrote:

> Hi Neil,
>
> On 9/9/19 8:58 AM, NeilBrown wrote:
>> 
>> Due to a bug introduced in Linux 3.14 we cannot determine the
>> correctly layout for a multi-zone RAID0 array - there are two
>> possibiities.
>
> possibilities.

Thanks.

>
>> 
>> It is possible to tell the kernel which to chose using a module
>> parameter, but this can be clumsy to use.  It would be best if
>> the choice were recorded in the metadata.
>> So add a feature flag for this purpose.
>> If it is set, then the 'layout' field of the superblock is used
>> to determine which layout to use.
>> 
>> If this flag is not set, then mddev->layout gets set to -1,
>> which causes the module parameter to be required.
>
> Could you point where the flag is set? Thanks.

It isn't set by the kernel - the kernel doesn't know when to set it.

We would need to change mdadm to set the flag, either when creating an
array, or when asked to be --update.

Actually.... that would be a problem if someone used the new mdadm on an
old kernel.  The old kernel would refuse to assemble the array with the
flag set.
Maybe that is what we want anyway.  We *want* people to never use
multi-zone RAID0 on old kernels, because the result could be data
corruption.

So - mdadm needs to add the flag, and maybe warn in the kernel is too
old.


>
>> 
>> Signed-off-by: NeilBrown <neilb@suse.de>
>> ---
>>   drivers/md/md.c                | 13 +++++++++++++
>>   drivers/md/raid0.c             |  2 ++
>>   include/uapi/linux/raid/md_p.h |  2 ++
>>   3 files changed, 17 insertions(+)
>> 
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 1f70ec595282..a41fce7f8b4c 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -1232,6 +1232,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
>>   			mddev->new_layout = mddev->layout;
>>   			mddev->new_chunk_sectors = mddev->chunk_sectors;
>>   		}
>> +		if (mddev->level == 0)
>> +			mddev->layout = -1;
>>   
>>   		if (sb->state & (1<<MD_SB_CLEAN))
>>   			mddev->recovery_cp = MaxSector;
>> @@ -1647,6 +1649,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
>>   		rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
>>   	}
>>   
>> +	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
>> +	    sb->level != 0)
>> +		return -EINVAL;
>> +
>>   	if (!refdev) {
>>   		ret = 1;
>>   	} else {
>> @@ -1757,6 +1763,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
>>   			mddev->new_chunk_sectors = mddev->chunk_sectors;
>>   		}
>>   
>> +		if (mddev->level == 0 &&
>> +		    !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
>> +			mddev->layout = -1;
>> +
>>   		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
>>   			set_bit(MD_HAS_JOURNAL, &mddev->flags);
>>   
>> @@ -6852,6 +6862,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
>>   	mddev->external	     = 0;
>>   
>>   	mddev->layout        = info->layout;
>> +	if (mddev->level == 0)
>> +		/* Cannot trust RAID0 layout info here */
>> +		mddev->layout = -1;
>>   	mddev->chunk_sectors = info->chunk_size >> 9;
>>   
>>   	if (mddev->persistent) {
>> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
>> index a8888c12308a..6f095b0b6f5c 100644
>> --- a/drivers/md/raid0.c
>> +++ b/drivers/md/raid0.c
>> @@ -145,6 +145,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
>>   
>>   	if (conf->nr_strip_zones == 1) {
>>   		conf->layout = RAID0_ORIG_LAYOUT;
>> +	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
>> +		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
>
> Maybe "conf->layout = mddev->layout" here? Otherwise seems conf->layout is not set accordingly, just 
> my 2 cents.
>

Yes, of course.  thanks.

Thanks for your review,
NeilBrown


>>   	} else if (default_layout == RAID0_ORIG_LAYOUT ||
>>   		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
>>   		conf->layout = default_layout;
>> diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
>> index b0d15c73f6d7..1f2d8c81f0e0 100644
>> --- a/include/uapi/linux/raid/md_p.h
>> +++ b/include/uapi/linux/raid/md_p.h
>> @@ -329,6 +329,7 @@ struct mdp_superblock_1 {
>>   #define	MD_FEATURE_JOURNAL		512 /* support write cache */
>>   #define	MD_FEATURE_PPL			1024 /* support PPL */
>>   #define	MD_FEATURE_MULTIPLE_PPLS	2048 /* support for multiple PPLs */
>> +#define	MD_FEATURE_RAID0_LAYOUT		4096 /* layout is meaningful for RAID0 */
>>   #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
>>   					|MD_FEATURE_RECOVERY_OFFSET	\
>>   					|MD_FEATURE_RESHAPE_ACTIVE	\
>> @@ -341,6 +342,7 @@ struct mdp_superblock_1 {
>>   					|MD_FEATURE_JOURNAL		\
>>   					|MD_FEATURE_PPL			\
>>   					|MD_FEATURE_MULTIPLE_PPLS	\
>> +					|MD_FEATURE_RAID0_LAYOUT	\
>>   					)
>>   
>>   struct r5l_payload_header {
>> 
>
> Thanks,
> Guoqing
diff mbox series

Patch

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1f70ec595282..a41fce7f8b4c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1232,6 +1232,8 @@  static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
 			mddev->new_layout = mddev->layout;
 			mddev->new_chunk_sectors = mddev->chunk_sectors;
 		}
+		if (mddev->level == 0)
+			mddev->layout = -1;
 
 		if (sb->state & (1<<MD_SB_CLEAN))
 			mddev->recovery_cp = MaxSector;
@@ -1647,6 +1649,10 @@  static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 		rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
 	}
 
+	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
+	    sb->level != 0)
+		return -EINVAL;
+
 	if (!refdev) {
 		ret = 1;
 	} else {
@@ -1757,6 +1763,10 @@  static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 			mddev->new_chunk_sectors = mddev->chunk_sectors;
 		}
 
+		if (mddev->level == 0 &&
+		    !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
+			mddev->layout = -1;
+
 		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
 			set_bit(MD_HAS_JOURNAL, &mddev->flags);
 
@@ -6852,6 +6862,9 @@  static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
 	mddev->external	     = 0;
 
 	mddev->layout        = info->layout;
+	if (mddev->level == 0)
+		/* Cannot trust RAID0 layout info here */
+		mddev->layout = -1;
 	mddev->chunk_sectors = info->chunk_size >> 9;
 
 	if (mddev->persistent) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a8888c12308a..6f095b0b6f5c 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -145,6 +145,8 @@  static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
 	if (conf->nr_strip_zones == 1) {
 		conf->layout = RAID0_ORIG_LAYOUT;
+	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
 	} else if (default_layout == RAID0_ORIG_LAYOUT ||
 		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
 		conf->layout = default_layout;
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index b0d15c73f6d7..1f2d8c81f0e0 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -329,6 +329,7 @@  struct mdp_superblock_1 {
 #define	MD_FEATURE_JOURNAL		512 /* support write cache */
 #define	MD_FEATURE_PPL			1024 /* support PPL */
 #define	MD_FEATURE_MULTIPLE_PPLS	2048 /* support for multiple PPLs */
+#define	MD_FEATURE_RAID0_LAYOUT		4096 /* layout is meaningful for RAID0 */
 #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
 					|MD_FEATURE_RECOVERY_OFFSET	\
 					|MD_FEATURE_RESHAPE_ACTIVE	\
@@ -341,6 +342,7 @@  struct mdp_superblock_1 {
 					|MD_FEATURE_JOURNAL		\
 					|MD_FEATURE_PPL			\
 					|MD_FEATURE_MULTIPLE_PPLS	\
+					|MD_FEATURE_RAID0_LAYOUT	\
 					)
 
 struct r5l_payload_header {