diff mbox series

[v2] block: add filemap_invalidate_lock_killable()

Message ID ff8f59e5-7699-0ccd-4da3-a34aa934a16b@I-love.SAKURA.ne.jp (mailing list archive)
State New
Headers show
Series [v2] block: add filemap_invalidate_lock_killable() | expand

Commit Message

Tetsuo Handa April 17, 2022, 7:46 a.m. UTC
syzbot is reporting hung task at blkdev_fallocate() [1], for it can take
minutes with mapping->invalidate_lock held. Since fallocate() has to accept
64bits size, we can't predict how long it will take. Thus, mitigate this
problem by using killable wait where possible.

  ----------
  #define _GNU_SOURCE
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <unistd.h>

  int main(int argc, char *argv[])
  {
    fork();
    fallocate(open("/dev/nullb0", O_RDWR), 0x11, 0, ~0UL >> 1);
    return 0;
  }
  ----------

Note that, even after this patch, e.g. "cat /dev/nullb0" can be reported
as hung task at filemap_invalidate_lock_shared() when this reproducer is
running. We will need to also make fault-acceptable reads killable.

  __schedule+0x9a0/0xb20
  schedule+0xc1/0x120
  rwsem_down_read_slowpath+0x3b5/0x670
  __down_read_common+0x56/0x1f0
  page_cache_ra_unbounded+0x12d/0x400
  filemap_read+0x4bb/0x1280
  blkdev_read_iter+0x1d5/0x260
  vfs_read+0x5f8/0x690
  ksys_read+0xee/0x190
  do_syscall_64+0x3d/0x90
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Link: https://syzkaller.appspot.com/bug?extid=39b75c02b8be0a061bfc [1]
Reported-by: syzbot <syzbot+39b75c02b8be0a061bfc@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
Changes in v2:
  Converted all users in block/ directory and fs/open.c file.
  I didn't convert remaining users because remaining users should be
  carefully converted by each filesystem's developers.

 block/blk-zoned.c  | 3 ++-
 block/fops.c       | 3 ++-
 block/ioctl.c      | 6 ++++--
 fs/open.c          | 3 ++-
 include/linux/fs.h | 5 +++++
 5 files changed, 15 insertions(+), 5 deletions(-)

Comments

Christoph Hellwig April 18, 2022, 5:14 a.m. UTC | #1
I'm starting to sound like a broken record as I'm stating it the third
time now, but: 

no, this does not in any way fix the problem of holding the invalidate
lock over long running I/O.  It just does ease the symptoms in the least
important but apparently visible to you caller.

What we need to do is to get the zeroing I/O out from under the
invalidate lock, just like how we don't do direct I/O under it.
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 38cd840d8838..07a8841f4724 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -422,7 +422,8 @@  int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 		op = REQ_OP_ZONE_RESET;
 
 		/* Invalidate the page cache, including dirty pages. */
-		filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+		if (filemap_invalidate_lock_killable(bdev->bd_inode->i_mapping))
+			return -EINTR;
 		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
 		if (ret)
 			goto fail;
diff --git a/block/fops.c b/block/fops.c
index ba5e7d5ff9a5..418fb1d789ff 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -656,7 +656,8 @@  static long blkdev_fallocate(struct file *file, int mode, loff_t start,
 	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	filemap_invalidate_lock(inode->i_mapping);
+	if (filemap_invalidate_lock_killable(inode->i_mapping))
+		return -EINTR;
 
 	/* Invalidate the page cache, including dirty pages. */
 	error = truncate_bdev_range(bdev, file->f_mode, start, end);
diff --git a/block/ioctl.c b/block/ioctl.c
index 4a86340133e4..13f863f79f68 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -111,7 +111,8 @@  static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 	if (start + len > bdev_nr_bytes(bdev))
 		return -EINVAL;
 
-	filemap_invalidate_lock(inode->i_mapping);
+	if (filemap_invalidate_lock_killable(inode->i_mapping))
+		return -EINTR;
 	err = truncate_bdev_range(bdev, mode, start, start + len - 1);
 	if (err)
 		goto fail;
@@ -152,7 +153,8 @@  static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 		return -EINVAL;
 
 	/* Invalidate the page cache, including dirty pages */
-	filemap_invalidate_lock(inode->i_mapping);
+	if (filemap_invalidate_lock_killable(inode->i_mapping))
+		return -EINTR;
 	err = truncate_bdev_range(bdev, mode, start, end);
 	if (err)
 		goto fail;
diff --git a/fs/open.c b/fs/open.c
index 7b50d7a2f51d..adf62e1c186b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -859,7 +859,8 @@  static int do_dentry_open(struct file *f,
 		if (filemap_nr_thps(inode->i_mapping)) {
 			struct address_space *mapping = inode->i_mapping;
 
-			filemap_invalidate_lock(inode->i_mapping);
+			if (filemap_invalidate_lock_killable(inode->i_mapping))
+				return -EINTR;
 			/*
 			 * unmap_mapping_range just need to be called once
 			 * here, because the private pages is not need to be
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 03f8d95bd4ef..e0134c372b6d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -797,6 +797,11 @@  static inline void filemap_invalidate_lock(struct address_space *mapping)
 	down_write(&mapping->invalidate_lock);
 }
 
+static inline int filemap_invalidate_lock_killable(struct address_space *mapping)
+{
+	return down_write_killable(&mapping->invalidate_lock);
+}
+
 static inline void filemap_invalidate_unlock(struct address_space *mapping)
 {
 	up_write(&mapping->invalidate_lock);