Message ID | 20230506012315.3370489-4-linan666@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | md: bugfix of writing raid sysfs | expand |
Hi, 在 2023/05/06 9:23, linan666@huaweicloud.com 写道: > From: Li Nan <linan122@huawei.com> > > max_corr_read_errors should not be negative number. Change it to > unsigned int where use it. > Looks good, feel free to add: Reviewed-by: Yu Kuai <yukuai3@huawei.com> > Fixes: 1e50915fe0bb ("raid: improve MD/raid10 handling of correctable read errors.") > Signed-off-by: Li Nan <linan122@huawei.com> > --- > drivers/md/md.c | 2 +- > drivers/md/raid10.c | 5 +++-- > 2 files changed, 4 insertions(+), 3 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index fd5c3babcd6d..4a1e566d6bdc 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4486,7 +4486,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor > > static ssize_t > max_corrected_read_errors_show(struct mddev *mddev, char *page) { > - return sprintf(page, "%d\n", > + return sprintf(page, "%u\n", > atomic_read(&mddev->max_corr_read_errors)); > } > > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index 4fcfcb350d2b..4d615fcc6a50 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -2727,7 +2727,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 > int sect = 0; /* Offset from r10_bio->sector */ > int sectors = r10_bio->sectors; > struct md_rdev *rdev; > - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); > + unsigned int max_read_errors = > + atomic_read(&mddev->max_corr_read_errors); > int d = r10_bio->devs[r10_bio->read_slot].devnum; > > /* still own a reference to this rdev, so it cannot > @@ -2743,7 +2744,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 > check_decay_read_errors(mddev, rdev); > atomic_inc(&rdev->read_errors); > if (atomic_read(&rdev->read_errors) > max_read_errors) { > - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", > + pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n", > mdname(mddev), rdev->bdev, > atomic_read(&rdev->read_errors), max_read_errors); > pr_notice("md/raid10:%s: %pg: Failing raid device\n", >
On Fri, May 5, 2023 at 7:02 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2023/05/06 9:23, linan666@huaweicloud.com 写道: > > From: Li Nan <linan122@huawei.com> > > > > max_corr_read_errors should not be negative number. Change it to > > unsigned int where use it. > > > > Looks good, feel free to add: > > Reviewed-by: Yu Kuai <yukuai3@huawei.com> > > > Fixes: 1e50915fe0bb ("raid: improve MD/raid10 handling of correctable read errors.") > > Signed-off-by: Li Nan <linan122@huawei.com> Hmm.. Does the current code break in any cases? Thanks, Song > > --- > > drivers/md/md.c | 2 +- > > drivers/md/raid10.c | 5 +++-- > > 2 files changed, 4 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/md/md.c b/drivers/md/md.c > > index fd5c3babcd6d..4a1e566d6bdc 100644 > > --- a/drivers/md/md.c > > +++ b/drivers/md/md.c > > @@ -4486,7 +4486,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor > > > > static ssize_t > > max_corrected_read_errors_show(struct mddev *mddev, char *page) { > > - return sprintf(page, "%d\n", > > + return sprintf(page, "%u\n", > > atomic_read(&mddev->max_corr_read_errors)); > > } > > > > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > > index 4fcfcb350d2b..4d615fcc6a50 100644 > > --- a/drivers/md/raid10.c > > +++ b/drivers/md/raid10.c > > @@ -2727,7 +2727,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 > > int sect = 0; /* Offset from r10_bio->sector */ > > int sectors = r10_bio->sectors; > > struct md_rdev *rdev; > > - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); > > + unsigned int max_read_errors = > > + atomic_read(&mddev->max_corr_read_errors); > > int d = r10_bio->devs[r10_bio->read_slot].devnum; > > > > /* still own a reference to this rdev, so it cannot > > @@ -2743,7 +2744,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 > > check_decay_read_errors(mddev, rdev); > > atomic_inc(&rdev->read_errors); > > if (atomic_read(&rdev->read_errors) > max_read_errors) { > > - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", > > + pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n", > > mdname(mddev), rdev->bdev, > > atomic_read(&rdev->read_errors), max_read_errors); > > pr_notice("md/raid10:%s: %pg: Failing raid device\n", > > >
Hi, 在 2023/05/13 9:08, Song Liu 写道: > On Fri, May 5, 2023 at 7:02 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: >> >> Hi, >> >> 在 2023/05/06 9:23, linan666@huaweicloud.com 写道: >>> From: Li Nan <linan122@huawei.com> >>> >>> max_corr_read_errors should not be negative number. Change it to >>> unsigned int where use it. >>> >> >> Looks good, feel free to add: >> >> Reviewed-by: Yu Kuai <yukuai3@huawei.com> >> >>> Fixes: 1e50915fe0bb ("raid: improve MD/raid10 handling of correctable read errors.") >>> Signed-off-by: Li Nan <linan122@huawei.com> > > Hmm.. Does the current code break in any cases? The problem is that somewhere use unsigned value, and somewhere use signed value, and I thinsk the only functional change is in fix_read_error(), if max_read_errors is negative, the judgement will always pass before this patch: if (atomic_read(&rdev->read_errors) > max_read_errors) Thanks, Kuai > > Thanks, > Song > >>> --- >>> drivers/md/md.c | 2 +- >>> drivers/md/raid10.c | 5 +++-- >>> 2 files changed, 4 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/md/md.c b/drivers/md/md.c >>> index fd5c3babcd6d..4a1e566d6bdc 100644 >>> --- a/drivers/md/md.c >>> +++ b/drivers/md/md.c >>> @@ -4486,7 +4486,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor >>> >>> static ssize_t >>> max_corrected_read_errors_show(struct mddev *mddev, char *page) { >>> - return sprintf(page, "%d\n", >>> + return sprintf(page, "%u\n", >>> atomic_read(&mddev->max_corr_read_errors)); >>> } >>> >>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c >>> index 4fcfcb350d2b..4d615fcc6a50 100644 >>> --- a/drivers/md/raid10.c >>> +++ b/drivers/md/raid10.c >>> @@ -2727,7 +2727,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 >>> int sect = 0; /* Offset from r10_bio->sector */ >>> int sectors = r10_bio->sectors; >>> struct md_rdev *rdev; >>> - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); >>> + unsigned int max_read_errors = >>> + atomic_read(&mddev->max_corr_read_errors); >>> int d = r10_bio->devs[r10_bio->read_slot].devnum; >>> >>> /* still own a reference to this rdev, so it cannot >>> @@ -2743,7 +2744,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 >>> check_decay_read_errors(mddev, rdev); >>> atomic_inc(&rdev->read_errors); >>> if (atomic_read(&rdev->read_errors) > max_read_errors) { >>> - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", >>> + pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n", >>> mdname(mddev), rdev->bdev, >>> atomic_read(&rdev->read_errors), max_read_errors); >>> pr_notice("md/raid10:%s: %pg: Failing raid device\n", >>> >> > . >
在 2023/5/13 10:21, Yu Kuai 写道: > Hi, > > 在 2023/05/13 9:08, Song Liu 写道: >> On Fri, May 5, 2023 at 7:02 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: >>> >>> Hi, >>> >>> 在 2023/05/06 9:23, linan666@huaweicloud.com 写道: >>>> From: Li Nan <linan122@huawei.com> >>>> >>>> max_corr_read_errors should not be negative number. Change it to >>>> unsigned int where use it. >>>> >>> >>> Looks good, feel free to add: >>> >>> Reviewed-by: Yu Kuai <yukuai3@huawei.com> >>> >>>> Fixes: 1e50915fe0bb ("raid: improve MD/raid10 handling of >>>> correctable read errors.") >>>> Signed-off-by: Li Nan <linan122@huawei.com> >> >> Hmm.. Does the current code break in any cases? > > The problem is that somewhere use unsigned value, and somewhere use > signed value, and I thinsk the only functional change is in > fix_read_error(), if max_read_errors is negative, the judgement will > always pass before this patch: > > if (atomic_read(&rdev->read_errors) > max_read_errors) > In addition, it is confusing for users after setting a huge number to it. # echo 4294967295 > /sys/block/$disk/md/max_read_errors # cat /sys/block/$disk/md/max_read_errors -1
diff --git a/drivers/md/md.c b/drivers/md/md.c index fd5c3babcd6d..4a1e566d6bdc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4486,7 +4486,7 @@ __ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_stor static ssize_t max_corrected_read_errors_show(struct mddev *mddev, char *page) { - return sprintf(page, "%d\n", + return sprintf(page, "%u\n", atomic_read(&mddev->max_corr_read_errors)); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 4fcfcb350d2b..4d615fcc6a50 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2727,7 +2727,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 int sect = 0; /* Offset from r10_bio->sector */ int sectors = r10_bio->sectors; struct md_rdev *rdev; - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); + unsigned int max_read_errors = + atomic_read(&mddev->max_corr_read_errors); int d = r10_bio->devs[r10_bio->read_slot].devnum; /* still own a reference to this rdev, so it cannot @@ -2743,7 +2744,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 check_decay_read_errors(mddev, rdev); atomic_inc(&rdev->read_errors); if (atomic_read(&rdev->read_errors) > max_read_errors) { - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", + pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n", mdname(mddev), rdev->bdev, atomic_read(&rdev->read_errors), max_read_errors); pr_notice("md/raid10:%s: %pg: Failing raid device\n",