diff mbox series

[5/5] aio: add support for file based polled IO

Message ID 20181117235317.7366-6-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Support for polled aio | expand

Commit Message

Jens Axboe Nov. 17, 2018, 11:53 p.m. UTC
Needs further work, but this should work fine on normal setups
with a file system on a pollable block device.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/aio.c       | 2 ++
 fs/direct-io.c | 4 +++-
 fs/iomap.c     | 7 +++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

Comments

Dave Chinner Nov. 19, 2018, 1:57 a.m. UTC | #1
On Sat, Nov 17, 2018 at 04:53:17PM -0700, Jens Axboe wrote:
> Needs further work, but this should work fine on normal setups
> with a file system on a pollable block device.

What should work fine? I've got no idea what this patch is actually
providing....

> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/aio.c       | 2 ++
>  fs/direct-io.c | 4 +++-
>  fs/iomap.c     | 7 +++++--
>  3 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 500da3ffc376..e02085fe10d7 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1310,6 +1310,8 @@ static struct block_device *aio_bdev_host(struct kiocb *req)
>  
>  	if (S_ISBLK(inode->i_mode))
>  		return I_BDEV(inode);
> +	else if (inode->i_sb && inode->i_sb->s_bdev)
> +		return inode->i_sb->s_bdev;

XFS might be doing AIO to files on real-time device, not
inode->i_sb->s_bdev. So this may well be the wrong block device
for the IO being submitted.

>  
>  	return NULL;
>  }
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index a5a4e5a1423e..34de494e9061 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -477,8 +477,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
>  	if (sdio->submit_io) {
>  		sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
>  		dio->bio_cookie = BLK_QC_T_NONE;
> -	} else
> +	} else {
>  		dio->bio_cookie = submit_bio(bio);
> +		WRITE_ONCE(dio->iocb->ki_blk_qc, dio->bio_cookie);
> +	}
>  
>  	sdio->bio = NULL;
>  	sdio->boundary = 0;
> diff --git a/fs/iomap.c b/fs/iomap.c
> index 74c1f37f0fd6..4cf412b6230a 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1555,6 +1555,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
>  	struct page *page = ZERO_PAGE(0);
>  	int flags = REQ_SYNC | REQ_IDLE;
>  	struct bio *bio;
> +	blk_qc_t qc;
>  
>  	bio = bio_alloc(GFP_KERNEL, 1);
>  	bio_set_dev(bio, iomap->bdev);
> @@ -1570,7 +1571,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
>  	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
>  
>  	atomic_inc(&dio->ref);
> -	return submit_bio(bio);
> +	qc = submit_bio(bio);
> +	WRITE_ONCE(dio->iocb->ki_blk_qc, qc);
> +	return qc;
>  }

Why is this added to sub-block zeroing IO calls? It gets overwritten
by the data IO submission, so this value is going to change as the
IO progresses. What does making these partial IOs visible provide,
especially as they then get overwritten by the next submissions?
Indeed, how does one wait on all IOs in the DIO to complete if we
are only tracking one of many?

Cheers,

Dave.
Jens Axboe Nov. 19, 2018, 2:58 a.m. UTC | #2
On 11/18/18 6:57 PM, Dave Chinner wrote:
> On Sat, Nov 17, 2018 at 04:53:17PM -0700, Jens Axboe wrote:
>> Needs further work, but this should work fine on normal setups
>> with a file system on a pollable block device.
> 
> What should work fine? I've got no idea what this patch is actually
> providing....

Sorry, should have made it more clear that this one is just a test
patch to enable polled aio for files, it's not supposed to be
considered complete.

>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  fs/aio.c       | 2 ++
>>  fs/direct-io.c | 4 +++-
>>  fs/iomap.c     | 7 +++++--
>>  3 files changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/aio.c b/fs/aio.c
>> index 500da3ffc376..e02085fe10d7 100644
>> --- a/fs/aio.c
>> +++ b/fs/aio.c
>> @@ -1310,6 +1310,8 @@ static struct block_device *aio_bdev_host(struct kiocb *req)
>>  
>>  	if (S_ISBLK(inode->i_mode))
>>  		return I_BDEV(inode);
>> +	else if (inode->i_sb && inode->i_sb->s_bdev)
>> +		return inode->i_sb->s_bdev;
> 
> XFS might be doing AIO to files on real-time device, not
> inode->i_sb->s_bdev. So this may well be the wrong block device
> for the IO being submitted.

See patch description, XFS doing AIO on a real-time device is not a
"normal setup".

It doesn't work for btrfs either, for instance. As far as I can tell, there
are a few routes that we can go here:

1) Have an fs ops that returns the bdev.
2) Set it after submit time, like we do in other spots. This won't work
   if we straddle devices.

I don't feel that strongly about it, and frankly I'm fine with just
dropping non-bdev support for now. The patch is provided for testing
for the 99% of use cases, which is a file system on top of a single
device.

>> diff --git a/fs/iomap.c b/fs/iomap.c
>> index 74c1f37f0fd6..4cf412b6230a 100644
>> --- a/fs/iomap.c
>> +++ b/fs/iomap.c
>> @@ -1555,6 +1555,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
>>  	struct page *page = ZERO_PAGE(0);
>>  	int flags = REQ_SYNC | REQ_IDLE;
>>  	struct bio *bio;
>> +	blk_qc_t qc;
>>  
>>  	bio = bio_alloc(GFP_KERNEL, 1);
>>  	bio_set_dev(bio, iomap->bdev);
>> @@ -1570,7 +1571,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
>>  	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
>>  
>>  	atomic_inc(&dio->ref);
>> -	return submit_bio(bio);
>> +	qc = submit_bio(bio);
>> +	WRITE_ONCE(dio->iocb->ki_blk_qc, qc);
>> +	return qc;
>>  }
> 
> Why is this added to sub-block zeroing IO calls? It gets overwritten
> by the data IO submission, so this value is going to change as the
> IO progresses. What does making these partial IOs visible provide,
> especially as they then get overwritten by the next submissions?
> Indeed, how does one wait on all IOs in the DIO to complete if we
> are only tracking one of many?

That does look like a mistake, the zeroing should just be ignored.
We only care about the actual IO.
Christoph Hellwig Nov. 19, 2018, 8:12 a.m. UTC | #3
On Sat, Nov 17, 2018 at 04:53:17PM -0700, Jens Axboe wrote:
> Needs further work, but this should work fine on normal setups
> with a file system on a pollable block device.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/aio.c       | 2 ++
>  fs/direct-io.c | 4 +++-
>  fs/iomap.c     | 7 +++++--
>  3 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 500da3ffc376..e02085fe10d7 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1310,6 +1310,8 @@ static struct block_device *aio_bdev_host(struct kiocb *req)
>  
>  	if (S_ISBLK(inode->i_mode))
>  		return I_BDEV(inode);
> +	else if (inode->i_sb && inode->i_sb->s_bdev)
> +		return inode->i_sb->s_bdev;

This is broken for multi-device filesystems, e.g. XFS with the RT
device, or btrfs.  See my patch in reply to your last one for an idea
how to solve this.
diff mbox series

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 500da3ffc376..e02085fe10d7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1310,6 +1310,8 @@  static struct block_device *aio_bdev_host(struct kiocb *req)
 
 	if (S_ISBLK(inode->i_mode))
 		return I_BDEV(inode);
+	else if (inode->i_sb && inode->i_sb->s_bdev)
+		return inode->i_sb->s_bdev;
 
 	return NULL;
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a5a4e5a1423e..34de494e9061 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -477,8 +477,10 @@  static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 	if (sdio->submit_io) {
 		sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
 		dio->bio_cookie = BLK_QC_T_NONE;
-	} else
+	} else {
 		dio->bio_cookie = submit_bio(bio);
+		WRITE_ONCE(dio->iocb->ki_blk_qc, dio->bio_cookie);
+	}
 
 	sdio->bio = NULL;
 	sdio->boundary = 0;
diff --git a/fs/iomap.c b/fs/iomap.c
index 74c1f37f0fd6..4cf412b6230a 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1555,6 +1555,7 @@  iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 	struct page *page = ZERO_PAGE(0);
 	int flags = REQ_SYNC | REQ_IDLE;
 	struct bio *bio;
+	blk_qc_t qc;
 
 	bio = bio_alloc(GFP_KERNEL, 1);
 	bio_set_dev(bio, iomap->bdev);
@@ -1570,7 +1571,9 @@  iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
 
 	atomic_inc(&dio->ref);
-	return submit_bio(bio);
+	qc = submit_bio(bio);
+	WRITE_ONCE(dio->iocb->ki_blk_qc, qc);
+	return qc;
 }
 
 static loff_t
@@ -1680,7 +1683,7 @@  iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		atomic_inc(&dio->ref);
 
 		dio->submit.last_queue = bdev_get_queue(iomap->bdev);
-		dio->submit.cookie = submit_bio(bio);
+		dio->iocb->ki_blk_qc = dio->submit.cookie = submit_bio(bio);
 	} while (nr_pages);
 
 	if (need_zeroout) {