diff mbox series

[V3] brd: check and limit max_part par

Message ID c8236e55-f64f-ef40-b394-8b7e86ce50df@huawei.com (mailing list archive)
State New, archived
Headers show
Series [V3] brd: check and limit max_part par | expand

Commit Message

Zhiqiang Liu Jan. 14, 2020, 11:56 a.m. UTC
In brd_init func, rd_nr num of brd_device are firstly allocated
and add in brd_devices, then brd_devices are traversed to add each
brd_device by calling add_disk func. When allocating brd_device,
the disk->first_minor is set to i * max_part, if rd_nr * max_part
is larger than MINORMASK, two different brd_device may have the same
devt, then only one of them can be successfully added.
when rmmod brd.ko, it will cause oops when calling brd_exit.

Follow those steps:
  # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576
  # rmmod brd
then, the oops will appear.

Oops log:
[  726.613722] Call trace:
[  726.614175]  kernfs_find_ns+0x24/0x130
[  726.614852]  kernfs_find_and_get_ns+0x44/0x68
[  726.615749]  sysfs_remove_group+0x38/0xb0
[  726.616520]  blk_trace_remove_sysfs+0x1c/0x28
[  726.617320]  blk_unregister_queue+0x98/0x100
[  726.618105]  del_gendisk+0x144/0x2b8
[  726.618759]  brd_exit+0x68/0x560 [brd]
[  726.619501]  __arm64_sys_delete_module+0x19c/0x2a0
[  726.620384]  el0_svc_common+0x78/0x130
[  726.621057]  el0_svc_handler+0x38/0x78
[  726.621738]  el0_svc+0x8/0xc
[  726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260)

Here, we add brd_check_and_reset_par func to check and limit max_part par.

--
V2->V3: (suggested by Ming Lei)
 - clear .minors when running out of consecutive minor space in brd_alloc
 - remove limit of rd_nr

V1->V2: add more checks in brd_check_par_valid as suggested by Ming Lei.

Signed-off-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
---
 drivers/block/brd.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

Comments

Ming Lei Jan. 15, 2020, 2:27 a.m. UTC | #1
On Tue, Jan 14, 2020 at 07:56:07PM +0800, Zhiqiang Liu wrote:
> 
> In brd_init func, rd_nr num of brd_device are firstly allocated
> and add in brd_devices, then brd_devices are traversed to add each
> brd_device by calling add_disk func. When allocating brd_device,
> the disk->first_minor is set to i * max_part, if rd_nr * max_part
> is larger than MINORMASK, two different brd_device may have the same
> devt, then only one of them can be successfully added.
> when rmmod brd.ko, it will cause oops when calling brd_exit.
> 
> Follow those steps:
>   # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576
>   # rmmod brd
> then, the oops will appear.
> 
> Oops log:
> [  726.613722] Call trace:
> [  726.614175]  kernfs_find_ns+0x24/0x130
> [  726.614852]  kernfs_find_and_get_ns+0x44/0x68
> [  726.615749]  sysfs_remove_group+0x38/0xb0
> [  726.616520]  blk_trace_remove_sysfs+0x1c/0x28
> [  726.617320]  blk_unregister_queue+0x98/0x100
> [  726.618105]  del_gendisk+0x144/0x2b8
> [  726.618759]  brd_exit+0x68/0x560 [brd]
> [  726.619501]  __arm64_sys_delete_module+0x19c/0x2a0
> [  726.620384]  el0_svc_common+0x78/0x130
> [  726.621057]  el0_svc_handler+0x38/0x78
> [  726.621738]  el0_svc+0x8/0xc
> [  726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260)
> 
> Here, we add brd_check_and_reset_par func to check and limit max_part par.
> 
> --
> V2->V3: (suggested by Ming Lei)
>  - clear .minors when running out of consecutive minor space in brd_alloc
>  - remove limit of rd_nr
> 
> V1->V2: add more checks in brd_check_par_valid as suggested by Ming Lei.
> 
> Signed-off-by: Zhiqiang Liu <liuzhiqiang26@huawei.com>
> ---
>  drivers/block/brd.c | 35 ++++++++++++++++++++++++++++-------
>  1 file changed, 28 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index df8103dd40ac..2295a0bafb5e 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -330,16 +330,16 @@ static const struct block_device_operations brd_fops = {
>  /*
>   * And now the modules code and kernel interface.
>   */
> -static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
> -module_param(rd_nr, int, 0444);
> +static unsigned int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
> +module_param(rd_nr, uint, 0444);

The above change isn't needed.

>  MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
> 
>  unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
>  module_param(rd_size, ulong, 0444);
>  MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
> 
> -static int max_part = 1;
> -module_param(max_part, int, 0444);
> +static unsigned int max_part = 1;
> +module_param(max_part, uint, 0444);

The above change isn't needed.

>  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
> 
>  MODULE_LICENSE("GPL");
> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
>  	if (!disk)
>  		goto out_free_queue;
>  	disk->major		= RAMDISK_MAJOR;
> -	disk->first_minor	= i * max_part;
> +	/*
> +	 * Clear .minors when running out of consecutive minor space since
> +	 * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
> +	 */
> +	if ((i * disk->minors) & ~MINORMASK)
> +		disk->minors = 0;
> +	else
> +		disk->first_minor = i * disk->minors;

The above looks a bit ugly, one nice way could be to change in
brd_alloc():

	disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
		0 : max_part);

>  	disk->fops		= &brd_fops;
>  	disk->private_data	= brd;
>  	disk->queue		= brd->brd_queue;
> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
>  	return kobj;
>  }
> 
> +static inline void brd_check_and_reset_par(void)
> +{
> +	if (unlikely(!rd_nr))
> +		rd_nr = 1;

zero rd_nr should work as expected, given user can create dev file via
mknod, and brd_probe() will be called for populate brd disk/queue when
the disk file is opened.

> +static inline void brd_check_and_reset_par(void)
> +{
> +       if (unlikely(!rd_nr))
> +               rd_nr = 1;
> +
> +       if (unlikely(!max_part))
> +               max_part = 1;

Another limit is that 'max_part' needs to be divided exactly by (1U <<
MINORBITS), something like:

	max_part = 1UL << fls(max_part);


Thanks, 
Ming
Zhiqiang Liu Jan. 20, 2020, 1:14 p.m. UTC | #2
On 2020/1/15 10:27, Ming Lei wrote:

> 
>>  MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
>>
>>  unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
>>  module_param(rd_size, ulong, 0444);
>>  MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
>>
>> -static int max_part = 1;
>> -module_param(max_part, int, 0444);
>> +static unsigned int max_part = 1;
>> +module_param(max_part, uint, 0444);
> 
> The above change isn't needed.
Thanks for your suggestion.
I will remove that in v4 patch.
> 
>>  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>>
>>  MODULE_LICENSE("GPL");
>> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
>>  	if (!disk)
>>  		goto out_free_queue;
>>  	disk->major		= RAMDISK_MAJOR;
>> -	disk->first_minor	= i * max_part;
>> +	/*
>> +	 * Clear .minors when running out of consecutive minor space since
>> +	 * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
>> +	 */
>> +	if ((i * disk->minors) & ~MINORMASK)
>> +		disk->minors = 0;
>> +	else
>> +		disk->first_minor = i * disk->minors;
> 
> The above looks a bit ugly, one nice way could be to change in
> brd_alloc():
> 
> 	disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
> 		0 : max_part);

I will change it as your suggestion.

> 
>>  	disk->fops		= &brd_fops;
>>  	disk->private_data	= brd;
>>  	disk->queue		= brd->brd_queue;
>> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
>>  	return kobj;
>>  }
>>
>> +static inline void brd_check_and_reset_par(void)
>> +{
>> +	if (unlikely(!rd_nr))
>> +		rd_nr = 1;
> 
> zero rd_nr should work as expected, given user can create dev file via
> mknod, and brd_probe() will be called for populate brd disk/queue when
> the disk file is opened.
> 
>> +static inline void brd_check_and_reset_par(void)
>> +{
>> +       if (unlikely(!rd_nr))
>> +               rd_nr = 1;
>> +
>> +       if (unlikely(!max_part))
>> +               max_part = 1;
> 
> Another limit is that 'max_part' needs to be divided exactly by (1U <<
> MINORBITS), something like:
> 
> 	max_part = 1UL << fls(max_part)

Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
> MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
we can allocate from extended devt.

Thanks,
Zhiqiang Liu
Ming Lei Jan. 20, 2020, 10:58 p.m. UTC | #3
On Mon, Jan 20, 2020 at 09:14:50PM +0800, Zhiqiang Liu wrote:
> 
> 
> On 2020/1/15 10:27, Ming Lei wrote:
> 
> > 
> >>  MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
> >>
> >>  unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
> >>  module_param(rd_size, ulong, 0444);
> >>  MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
> >>
> >> -static int max_part = 1;
> >> -module_param(max_part, int, 0444);
> >> +static unsigned int max_part = 1;
> >> +module_param(max_part, uint, 0444);
> > 
> > The above change isn't needed.
> Thanks for your suggestion.
> I will remove that in v4 patch.
> > 
> >>  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
> >>
> >>  MODULE_LICENSE("GPL");
> >> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
> >>  	if (!disk)
> >>  		goto out_free_queue;
> >>  	disk->major		= RAMDISK_MAJOR;
> >> -	disk->first_minor	= i * max_part;
> >> +	/*
> >> +	 * Clear .minors when running out of consecutive minor space since
> >> +	 * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
> >> +	 */
> >> +	if ((i * disk->minors) & ~MINORMASK)
> >> +		disk->minors = 0;
> >> +	else
> >> +		disk->first_minor = i * disk->minors;
> > 
> > The above looks a bit ugly, one nice way could be to change in
> > brd_alloc():
> > 
> > 	disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
> > 		0 : max_part);
> 
> I will change it as your suggestion.
> 
> > 
> >>  	disk->fops		= &brd_fops;
> >>  	disk->private_data	= brd;
> >>  	disk->queue		= brd->brd_queue;
> >> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
> >>  	return kobj;
> >>  }
> >>
> >> +static inline void brd_check_and_reset_par(void)
> >> +{
> >> +	if (unlikely(!rd_nr))
> >> +		rd_nr = 1;
> > 
> > zero rd_nr should work as expected, given user can create dev file via
> > mknod, and brd_probe() will be called for populate brd disk/queue when
> > the disk file is opened.
> > 
> >> +static inline void brd_check_and_reset_par(void)
> >> +{
> >> +       if (unlikely(!rd_nr))
> >> +               rd_nr = 1;
> >> +
> >> +       if (unlikely(!max_part))
> >> +               max_part = 1;
> > 
> > Another limit is that 'max_part' needs to be divided exactly by (1U <<
> > MINORBITS), something like:
> > 
> > 	max_part = 1UL << fls(max_part)
> 
> Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
> > MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
> we can allocate from extended devt.

Exact dividing is for reserving same minors for all disks with
RAMDISK_MAJOR, otherwise there is still chance to get same dev_t when
adding partitions.

Extended devt is for covering more disks, not related with 'max_part'.


Thanks,
Ming
Zhiqiang Liu Jan. 21, 2020, 1:44 a.m. UTC | #4
On 2020/1/21 6:58, Ming Lei wrote:
> On Mon, Jan 20, 2020 at 09:14:50PM +0800, Zhiqiang Liu wrote:
>>>> +static inline void brd_check_and_reset_par(void)
>>>> +{
>>>> +       if (unlikely(!rd_nr))
>>>> +               rd_nr = 1;
>>>> +
>>>> +       if (unlikely(!max_part))
>>>> +               max_part = 1;
>>>
>>> Another limit is that 'max_part' needs to be divided exactly by (1U <<
>>> MINORBITS), something like:
>>>
>>> 	max_part = 1UL << fls(max_part)
>>
>> Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
>>> MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
>> we can allocate from extended devt.
> 
> Exact dividing is for reserving same minors for all disks with
> RAMDISK_MAJOR, otherwise there is still chance to get same dev_t when
> adding partitions.
> 
> Extended devt is for covering more disks, not related with 'max_part'.
> 

Thank you very much.
I will change that as you said.
diff mbox series

Patch

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index df8103dd40ac..2295a0bafb5e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -330,16 +330,16 @@  static const struct block_device_operations brd_fops = {
 /*
  * And now the modules code and kernel interface.
  */
-static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
-module_param(rd_nr, int, 0444);
+static unsigned int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
+module_param(rd_nr, uint, 0444);
 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");

 unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
 module_param(rd_size, ulong, 0444);
 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");

-static int max_part = 1;
-module_param(max_part, int, 0444);
+static unsigned int max_part = 1;
+module_param(max_part, uint, 0444);
 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");

 MODULE_LICENSE("GPL");
@@ -393,7 +393,14 @@  static struct brd_device *brd_alloc(int i)
 	if (!disk)
 		goto out_free_queue;
 	disk->major		= RAMDISK_MAJOR;
-	disk->first_minor	= i * max_part;
+	/*
+	 * Clear .minors when running out of consecutive minor space since
+	 * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
+	 */
+	if ((i * disk->minors) & ~MINORMASK)
+		disk->minors = 0;
+	else
+		disk->first_minor = i * disk->minors;
 	disk->fops		= &brd_fops;
 	disk->private_data	= brd;
 	disk->queue		= brd->brd_queue;
@@ -468,6 +475,21 @@  static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 	return kobj;
 }

+static inline void brd_check_and_reset_par(void)
+{
+	if (unlikely(!rd_nr))
+		rd_nr = 1;
+
+	if (unlikely(!max_part))
+		max_part = 1;
+
+	if (max_part > DISK_MAX_PARTS) {
+		pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
+			DISK_MAX_PARTS, DISK_MAX_PARTS);
+		max_part = DISK_MAX_PARTS;
+	}
+}
+
 static int __init brd_init(void)
 {
 	struct brd_device *brd, *next;
@@ -491,8 +513,7 @@  static int __init brd_init(void)
 	if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
 		return -EIO;

-	if (unlikely(!max_part))
-		max_part = 1;
+	brd_check_and_reset_par();

 	for (i = 0; i < rd_nr; i++) {
 		brd = brd_alloc(i);