diff mbox series

[4/7] brd: make sector size configurable

Message ID 20230614114637.89759-5-hare@suse.de (mailing list archive)
State New, archived
Headers show
Series RFC: high-order folio support for I/O | expand

Commit Message

Hannes Reinecke June 14, 2023, 11:46 a.m. UTC
Add a module option 'rd_blksize' to allow the user to change
the sector size of the RAM disks.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/block/brd.c | 50 +++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 15 deletions(-)

Comments

Matthew Wilcox June 14, 2023, 12:55 p.m. UTC | #1
On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
> @@ -43,9 +43,11 @@ struct brd_device {
>  	 */
>  	struct xarray	        brd_folios;
>  	u64			brd_nr_folios;
> +	unsigned int		brd_sector_shift;
> +	unsigned int		brd_sector_size;
>  };
>  
> -#define BRD_SECTOR_SHIFT(b) (PAGE_SHIFT - SECTOR_SHIFT)
> +#define BRD_SECTOR_SHIFT(b) ((b)->brd_sector_shift - SECTOR_SHIFT)
>  
>  static pgoff_t brd_sector_index(struct brd_device *brd, sector_t sector)
>  {
> @@ -85,7 +87,7 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
>  {
>  	pgoff_t idx;
>  	struct folio *folio, *cur;
> -	unsigned int rd_sector_order = get_order(PAGE_SIZE);
> +	unsigned int rd_sector_order = get_order(brd->brd_sector_size);

Surely max(0, brd->brd_sector_shift - PAGE_SHIFT) ?

> @@ -346,6 +353,25 @@ static int brd_alloc(int i)
>  		return -ENOMEM;
>  	brd->brd_number		= i;
>  	list_add_tail(&brd->brd_list, &brd_devices);
> +	brd->brd_sector_shift = ilog2(rd_blksize);
> +	if ((1ULL << brd->brd_sector_shift) != rd_blksize) {
> +		pr_err("rd_blksize %d is not supported\n", rd_blksize);

Are you trying to require power-of-two here?  We have is_power_of_2()
for that purpose.
Hannes Reinecke June 14, 2023, 1:02 p.m. UTC | #2
On 6/14/23 14:55, Matthew Wilcox wrote:
> On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
>> @@ -43,9 +43,11 @@ struct brd_device {
>>   	 */
>>   	struct xarray	        brd_folios;
>>   	u64			brd_nr_folios;
>> +	unsigned int		brd_sector_shift;
>> +	unsigned int		brd_sector_size;
>>   };
>>   
>> -#define BRD_SECTOR_SHIFT(b) (PAGE_SHIFT - SECTOR_SHIFT)
>> +#define BRD_SECTOR_SHIFT(b) ((b)->brd_sector_shift - SECTOR_SHIFT)
>>   
>>   static pgoff_t brd_sector_index(struct brd_device *brd, sector_t sector)
>>   {
>> @@ -85,7 +87,7 @@ static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
>>   {
>>   	pgoff_t idx;
>>   	struct folio *folio, *cur;
>> -	unsigned int rd_sector_order = get_order(PAGE_SIZE);
>> +	unsigned int rd_sector_order = get_order(brd->brd_sector_size);
> 
> Surely max(0, brd->brd_sector_shift - PAGE_SHIFT) ?
> 
Errm. Possibly.

>> @@ -346,6 +353,25 @@ static int brd_alloc(int i)
>>   		return -ENOMEM;
>>   	brd->brd_number		= i;
>>   	list_add_tail(&brd->brd_list, &brd_devices);
>> +	brd->brd_sector_shift = ilog2(rd_blksize);
>> +	if ((1ULL << brd->brd_sector_shift) != rd_blksize) {
>> +		pr_err("rd_blksize %d is not supported\n", rd_blksize);
> 
> Are you trying to require power-of-two here?  We have is_power_of_2()
> for that purpose.
> 
Ah. So let's use that, then :-)

Cheers,

Hannes
Dave Chinner June 15, 2023, 2:17 a.m. UTC | #3
On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
> @@ -310,6 +312,10 @@ static int max_part = 1;
>  module_param(max_part, int, 0444);
>  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>  
> +static unsigned int rd_blksize = PAGE_SIZE;
> +module_param(rd_blksize, uint, 0444);
> +MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");

This needs CONFIG_BLK_DEV_RAM_BLOCK_SIZE to set the default size
for those of us who don't use modular kernels....

Cheers,

Dave.
Christoph Hellwig June 15, 2023, 5:55 a.m. UTC | #4
On Thu, Jun 15, 2023 at 12:17:12PM +1000, Dave Chinner wrote:
> On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
> > @@ -310,6 +312,10 @@ static int max_part = 1;
> >  module_param(max_part, int, 0444);
> >  MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
> >  
> > +static unsigned int rd_blksize = PAGE_SIZE;
> > +module_param(rd_blksize, uint, 0444);
> > +MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
> 
> This needs CONFIG_BLK_DEV_RAM_BLOCK_SIZE to set the default size
> for those of us who don't use modular kernels....

You can set module parameter on the command line for built-in code
like brd.rd_blksize=8196

While we're at it, why that weird rd_ prefix for the parameter?
Hannes Reinecke June 15, 2023, 6:23 a.m. UTC | #5
On 6/15/23 04:17, Dave Chinner wrote:
> On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
>> @@ -310,6 +312,10 @@ static int max_part = 1;
>>   module_param(max_part, int, 0444);
>>   MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>>   
>> +static unsigned int rd_blksize = PAGE_SIZE;
>> +module_param(rd_blksize, uint, 0444);
>> +MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
> 
> This needs CONFIG_BLK_DEV_RAM_BLOCK_SIZE to set the default size
> for those of us who don't use modular kernels....
> Ok, will do.

Cheers,

Hannes
Hannes Reinecke June 15, 2023, 6:33 a.m. UTC | #6
On 6/15/23 07:55, Christoph Hellwig wrote:
> On Thu, Jun 15, 2023 at 12:17:12PM +1000, Dave Chinner wrote:
>> On Wed, Jun 14, 2023 at 01:46:34PM +0200, Hannes Reinecke wrote:
>>> @@ -310,6 +312,10 @@ static int max_part = 1;
>>>   module_param(max_part, int, 0444);
>>>   MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>>>   
>>> +static unsigned int rd_blksize = PAGE_SIZE;
>>> +module_param(rd_blksize, uint, 0444);
>>> +MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
>>
>> This needs CONFIG_BLK_DEV_RAM_BLOCK_SIZE to set the default size
>> for those of us who don't use modular kernels....
> 
> You can set module parameter on the command line for built-in code
> like brd.rd_blksize=8196
> 
> While we're at it, why that weird rd_ prefix for the parameter?
> 
Because that's what's used for all the existing parameters, too.

We can remove it, though, but then we either have inconsistent naming
(some parameters with 'rd_', others without), or break existing setups.

Cheers,

Hannes
diff mbox series

Patch

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 71d3d8af8b0d..2ebb5532a204 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -30,7 +30,7 @@ 
 /*
  * Each block ramdisk device has a xarray of folios that stores the folios
  * containing the block device's contents. A brd folio's ->index is its offset
- * in PAGE_SIZE units. This is similar to, but in no way connected with,
+ * in brd_sector_size units. This is similar to, but in no way connected with,
  * the kernel's pagecache or buffer cache (which sit above our block device).
  */
 struct brd_device {
@@ -43,9 +43,11 @@  struct brd_device {
 	 */
 	struct xarray	        brd_folios;
 	u64			brd_nr_folios;
+	unsigned int		brd_sector_shift;
+	unsigned int		brd_sector_size;
 };
 
-#define BRD_SECTOR_SHIFT(b) (PAGE_SHIFT - SECTOR_SHIFT)
+#define BRD_SECTOR_SHIFT(b) ((b)->brd_sector_shift - SECTOR_SHIFT)
 
 static pgoff_t brd_sector_index(struct brd_device *brd, sector_t sector)
 {
@@ -85,7 +87,7 @@  static int brd_insert_folio(struct brd_device *brd, sector_t sector, gfp_t gfp)
 {
 	pgoff_t idx;
 	struct folio *folio, *cur;
-	unsigned int rd_sector_order = get_order(PAGE_SIZE);
+	unsigned int rd_sector_order = get_order(brd->brd_sector_size);
 	int ret = 0;
 
 	folio = brd_lookup_folio(brd, sector);
@@ -140,7 +142,7 @@  static void brd_free_folios(struct brd_device *brd)
 static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
 			     gfp_t gfp)
 {
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = brd_sector_offset(brd, sector);
 	size_t copy;
 	int ret;
@@ -164,7 +166,7 @@  static void copy_to_brd(struct brd_device *brd, const void *src,
 {
 	struct folio *folio;
 	void *dst;
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = brd_sector_offset(brd, sector);
 	size_t copy;
 
@@ -197,7 +199,7 @@  static void copy_from_brd(void *dst, struct brd_device *brd,
 {
 	struct folio *folio;
 	void *src;
-	unsigned int rd_sector_size = PAGE_SIZE;
+	unsigned int rd_sector_size = brd->brd_sector_size;
 	unsigned int offset = brd_sector_offset(brd, sector);
 	size_t copy;
 
@@ -310,6 +312,10 @@  static int max_part = 1;
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
 
+static unsigned int rd_blksize = PAGE_SIZE;
+module_param(rd_blksize, uint, 0444);
+MODULE_PARM_DESC(rd_blksize, "Blocksize of each RAM disk in bytes.");
+
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 MODULE_ALIAS("rd");
@@ -336,6 +342,7 @@  static int brd_alloc(int i)
 	struct brd_device *brd;
 	struct gendisk *disk;
 	char buf[DISK_NAME_LEN];
+	unsigned int rd_max_sectors;
 	int err = -ENOMEM;
 
 	list_for_each_entry(brd, &brd_devices, brd_list)
@@ -346,6 +353,25 @@  static int brd_alloc(int i)
 		return -ENOMEM;
 	brd->brd_number		= i;
 	list_add_tail(&brd->brd_list, &brd_devices);
+	brd->brd_sector_shift = ilog2(rd_blksize);
+	if ((1ULL << brd->brd_sector_shift) != rd_blksize) {
+		pr_err("rd_blksize %d is not supported\n", rd_blksize);
+		err = -EINVAL;
+		goto out_free_dev;
+	}
+	if (rd_blksize < SECTOR_SIZE) {
+		pr_err("rd_blksize must be at least 512 bytes\n");
+		err = -EINVAL;
+		goto out_free_dev;
+	}
+	/* We can't allocate more than MAX_ORDER pages */
+	rd_max_sectors = (1ULL << MAX_ORDER) << BRD_SECTOR_SHIFT(brd);
+	if (rd_blksize > rd_max_sectors) {
+		pr_err("rd_blocksize too large\n");
+		err = -EINVAL;
+		goto out_free_dev;
+	}
+	brd->brd_sector_size = rd_blksize;
 
 	xa_init(&brd->brd_folios);
 
@@ -365,15 +391,9 @@  static int brd_alloc(int i)
 	disk->private_data	= brd;
 	strscpy(disk->disk_name, buf, DISK_NAME_LEN);
 	set_capacity(disk, rd_size * 2);
-	
-	/*
-	 * This is so fdisk will align partitions on 4k, because of
-	 * direct_access API needing 4k alignment, returning a PFN
-	 * (This is only a problem on very small devices <= 4M,
-	 *  otherwise fdisk will align on 1M. Regardless this call
-	 *  is harmless)
-	 */
-	blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
+
+	blk_queue_physical_block_size(disk->queue, rd_blksize);
+	blk_queue_max_hw_sectors(disk->queue, 1ULL << (MAX_ORDER + PAGE_SECTORS_SHIFT));
 
 	/* Tell the block layer that this is not a rotational device */
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);