diff mbox

[4/7] Add stream ID support for buffered mpage/__block_write_full_page()

Message ID 1427296070-8472-5-git-send-email-axboe@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jens Axboe March 25, 2015, 3:07 p.m. UTC
Pass on the inode stream ID to the bio allocation.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/buffer.c | 4 ++--
 fs/mpage.c  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

Comments

Ming Lin-SSI March 25, 2015, 10:42 p.m. UTC | #1
> -----Original Message-----
> From: Jens Axboe [mailto:axboe@fb.com]
> Sent: Wednesday, March 25, 2015 8:08 AM
> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org
> Cc: Ming Lin-SSI; david@fromorbit.com; Jens Axboe
> Subject: [PATCH 4/7] Add stream ID support for buffered
> mpage/__block_write_full_page()
> 
> Pass on the inode stream ID to the bio allocation.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  fs/buffer.c | 4 ++--
>  fs/mpage.c  | 1 +
>  2 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 20805db2c987..0220925ff26d 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1774,7 +1774,7 @@ static int __block_write_full_page(struct inode
> *inode, struct page *page,
>  	do {
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
> -			submit_bh(write_op, bh);
> +			_submit_bh(write_op, bh,
> streamid_to_flags(inode_streamid(inode)));
>  			nr_underway++;
>  		}
>  		bh = next;
> @@ -1828,7 +1828,7 @@ recover:
>  		struct buffer_head *next = bh->b_this_page;
>  		if (buffer_async_write(bh)) {
>  			clear_buffer_dirty(bh);
> -			submit_bh(write_op, bh);
> +			_submit_bh(write_op, bh,
> streamid_to_flags(inode_streamid(inode)));
>  			nr_underway++;
>  		}
>  		bh = next;
> diff --git a/fs/mpage.c b/fs/mpage.c
> index 3e79220babac..fba13f4b981d 100644
> --- a/fs/mpage.c
> +++ b/fs/mpage.c
> @@ -605,6 +605,7 @@ alloc_new:
>  				bio_get_nr_vecs(bdev),
> GFP_NOFS|__GFP_HIGH);
>  		if (bio == NULL)
>  			goto confused;
> +		bio_set_streamid(bio, inode_streamid(inode));

This will not work when multiple processes write to the same raw disk.
Let's say 2 process concurrently pwrite to /dev/nvme0n1 with different stream_id.

Process 1:
fd = open("/dev/nvme0n1", ...);
posix_fadvise(fd, stream_id_1, 0, POSIX_FADV_STREAMID);
pwrite( fd, buf1, count1, offset1);

Process 2:
fd = open("/dev/nvme0n1", ...);
posix_fadvise(fd, stream_id_2, 0, POSIX_FADV_STREAMID);
pwrite(fd, buf2, count2, offset2);

One stream_id will overwrite the other one because "inode" is same.

Thanks,
Ming
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jens Axboe March 25, 2015, 11:08 p.m. UTC | #2
On 03/25/2015 04:42 PM, Ming Lin-SSI wrote:
>> -----Original Message-----
>> From: Jens Axboe [mailto:axboe@fb.com]
>> Sent: Wednesday, March 25, 2015 8:08 AM
>> To: linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org
>> Cc: Ming Lin-SSI; david@fromorbit.com; Jens Axboe
>> Subject: [PATCH 4/7] Add stream ID support for buffered
>> mpage/__block_write_full_page()
>>
>> Pass on the inode stream ID to the bio allocation.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>   fs/buffer.c | 4 ++--
>>   fs/mpage.c  | 1 +
>>   2 files changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/buffer.c b/fs/buffer.c
>> index 20805db2c987..0220925ff26d 100644
>> --- a/fs/buffer.c
>> +++ b/fs/buffer.c
>> @@ -1774,7 +1774,7 @@ static int __block_write_full_page(struct inode
>> *inode, struct page *page,
>>   	do {
>>   		struct buffer_head *next = bh->b_this_page;
>>   		if (buffer_async_write(bh)) {
>> -			submit_bh(write_op, bh);
>> +			_submit_bh(write_op, bh,
>> streamid_to_flags(inode_streamid(inode)));
>>   			nr_underway++;
>>   		}
>>   		bh = next;
>> @@ -1828,7 +1828,7 @@ recover:
>>   		struct buffer_head *next = bh->b_this_page;
>>   		if (buffer_async_write(bh)) {
>>   			clear_buffer_dirty(bh);
>> -			submit_bh(write_op, bh);
>> +			_submit_bh(write_op, bh,
>> streamid_to_flags(inode_streamid(inode)));
>>   			nr_underway++;
>>   		}
>>   		bh = next;
>> diff --git a/fs/mpage.c b/fs/mpage.c
>> index 3e79220babac..fba13f4b981d 100644
>> --- a/fs/mpage.c
>> +++ b/fs/mpage.c
>> @@ -605,6 +605,7 @@ alloc_new:
>>   				bio_get_nr_vecs(bdev),
>> GFP_NOFS|__GFP_HIGH);
>>   		if (bio == NULL)
>>   			goto confused;
>> +		bio_set_streamid(bio, inode_streamid(inode));
>
> This will not work when multiple processes write to the same raw disk.
> Let's say 2 process concurrently pwrite to /dev/nvme0n1 with different stream_id.
>
> Process 1:
> fd = open("/dev/nvme0n1", ...);
> posix_fadvise(fd, stream_id_1, 0, POSIX_FADV_STREAMID);
> pwrite( fd, buf1, count1, offset1);
>
> Process 2:
> fd = open("/dev/nvme0n1", ...);
> posix_fadvise(fd, stream_id_2, 0, POSIX_FADV_STREAMID);
> pwrite(fd, buf2, count2, offset2);
>
> One stream_id will overwrite the other one because "inode" is same.

Well, that's how buffered writeback works... There's no file available 
at that point in time, in fact it could be long gone. So the only 
reliable part we have here is the inode.

If you want the above scenario to work, you have to use O_DIRECT. Then 
it will work.
diff mbox

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..0220925ff26d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1774,7 +1774,7 @@  static int __block_write_full_page(struct inode *inode, struct page *page,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh(write_op, bh);
+			_submit_bh(write_op, bh, streamid_to_flags(inode_streamid(inode)));
 			nr_underway++;
 		}
 		bh = next;
@@ -1828,7 +1828,7 @@  recover:
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh(write_op, bh);
+			_submit_bh(write_op, bh, streamid_to_flags(inode_streamid(inode)));
 			nr_underway++;
 		}
 		bh = next;
diff --git a/fs/mpage.c b/fs/mpage.c
index 3e79220babac..fba13f4b981d 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -605,6 +605,7 @@  alloc_new:
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
 			goto confused;
+		bio_set_streamid(bio, inode_streamid(inode));
 	}
 
 	/*