diff mbox

brd: detect zero writes for saving ram

Message ID 1423321424-9657-1-git-send-email-akinobu.mita@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Akinobu Mita Feb. 7, 2015, 3:03 p.m. UTC
This introduces a module parameter to detect zero writes and not to
allocate memory.  Read requests for unallocated (unwritten) region
end up by reading zero.  So this can save zeroed memory consumption
with extra overhead for the detection.

This feature is useful for testing filesystems and user programs to
huge files without huge real storage.  So this change also extends
the upper limit on the size of the RAM disk.

The following commands demonstrate that creating about 16TB zero file
on ext4 without huge memory installed.

	# modprobe brd zero_detect=1 rd_size=$((16*1024*1024*1024))
	# mkfs.ext4 /dev/ram0
	# mount /dev/ram0 /mnt/
	# dd if=/dev/zero of=/mnt/huge-zero-file bs=1M oflag=direct

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
---
 drivers/block/brd.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

Comments

Akinobu Mita Feb. 7, 2015, 3:18 p.m. UTC | #1
2015-02-08 0:03 GMT+09:00 Akinobu Mita <akinobu.mita@gmail.com>:
> This introduces a module parameter to detect zero writes and not to
> allocate memory.  Read requests for unallocated (unwritten) region
> end up by reading zero.  So this can save zeroed memory consumption
> with extra overhead for the detection.
>
> This feature is useful for testing filesystems and user programs to
> huge files without huge real storage.  So this change also extends
> the upper limit on the size of the RAM disk.

I have just noticed about zram and I'll check I can use it instead.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Akinobu Mita Feb. 8, 2015, 4:03 a.m. UTC | #2
2015-02-08 0:18 GMT+09:00 Akinobu Mita <akinobu.mita@gmail.com>:
> 2015-02-08 0:03 GMT+09:00 Akinobu Mita <akinobu.mita@gmail.com>:
>> This introduces a module parameter to detect zero writes and not to
>> allocate memory.  Read requests for unallocated (unwritten) region
>> end up by reading zero.  So this can save zeroed memory consumption
>> with extra overhead for the detection.
>>
>> This feature is useful for testing filesystems and user programs to
>> huge files without huge real storage.  So this change also extends
>> the upper limit on the size of the RAM disk.
>
> I have just noticed about zram and I'll check I can use it instead.

zram currently needs to allocate memory proportional to the size of
the disk ((disksize >> PAGE_SHIFT) * sizeof(struct zram_table_entry)).
If disksize is TB or PB class, it requires huge amount of memory.
But brd with this patch can create 1PB filesystem.  So there is a
reason for this new brd feature to exist.

        # modprobe brd zero_detect=1 rd_size=$((1024*1024*1024*1024))
        # mkfs.xfs /dev/ram0
        # mount /dev/ram0 /mnt
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 3598110..82e4328 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -293,6 +293,42 @@  static void copy_from_brd(void *dst, struct brd_device *brd,
 	}
 }
 
+static bool page_is_zero(struct page *page, unsigned int len, unsigned int off)
+{
+	bool is_zero;
+	void *mem;
+
+	mem = kmap_atomic(page);
+	is_zero = !memchr_inv(mem + off, 0, len);
+	kunmap_atomic(mem);
+
+	return is_zero;
+}
+
+static bool zero_detect;
+
+static bool brd_zero_detect(struct brd_device *brd, struct page *page,
+			    unsigned int len, unsigned int off, sector_t sector)
+{
+	sector_t end_sector;
+
+	if (!zero_detect)
+		return false;
+	if (!page_is_zero(page, len, off))
+		return false;
+
+	if (brd_lookup_page(brd, sector))
+		return false;
+
+	end_sector = sector + (len >> SECTOR_SHIFT) - 1;
+	if (sector >> PAGE_SECTORS_SHIFT != end_sector >> PAGE_SECTORS_SHIFT) {
+		if (brd_lookup_page(brd, end_sector))
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * Process a single bvec of a bio.
  */
@@ -304,6 +340,9 @@  static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	int err = 0;
 
 	if (rw != READ) {
+		if (brd_zero_detect(brd, page, len, off, sector))
+			return 0;
+
 		err = copy_to_brd_setup(brd, sector, len);
 		if (err)
 			goto out;
@@ -439,18 +478,20 @@  static const struct block_device_operations brd_fops = {
  * And now the modules code and kernel interface.
  */
 static int rd_nr;
-int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
+unsigned long long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
 static int max_part;
 static int part_shift;
 static int part_show = 0;
 module_param(rd_nr, int, S_IRUGO);
 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
-module_param(rd_size, int, S_IRUGO);
+module_param(rd_size, ullong, S_IRUGO);
 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
 module_param(max_part, int, S_IRUGO);
 MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
 module_param(part_show, int, S_IRUGO);
 MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions");
+module_param(zero_detect, bool, 0644);
+MODULE_PARM_DESC(zero_detect, "Detect zero writes for saving ram");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 MODULE_ALIAS("rd");