[5/5] iomap: support RWF_UNCACHED for buffered writes
diff mbox series

Message ID 20191212190133.18473-6-axboe@kernel.dk
State New
Headers show
Series
  • Support for RWF_UNCACHED
Related show

Commit Message

Jens Axboe Dec. 12, 2019, 7:01 p.m. UTC
This adds support for RWF_UNCACHED for file systems using iomap to
perform buffered writes. We use the generic infrastructure for this,
by tracking pages we created and calling write_drop_cached_pages()
to issue writeback and prune those pages.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/iomap/apply.c       | 24 ++++++++++++++++++++++++
 fs/iomap/buffered-io.c | 23 +++++++++++++++++++----
 include/linux/iomap.h  |  5 +++++
 3 files changed, 48 insertions(+), 4 deletions(-)

Comments

Darrick J. Wong Dec. 13, 2019, 2:26 a.m. UTC | #1
On Thu, Dec 12, 2019 at 12:01:33PM -0700, Jens Axboe wrote:
> This adds support for RWF_UNCACHED for file systems using iomap to
> perform buffered writes. We use the generic infrastructure for this,
> by tracking pages we created and calling write_drop_cached_pages()
> to issue writeback and prune those pages.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  fs/iomap/apply.c       | 24 ++++++++++++++++++++++++
>  fs/iomap/buffered-io.c | 23 +++++++++++++++++++----
>  include/linux/iomap.h  |  5 +++++
>  3 files changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index e76148db03b8..11b6812f7b37 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -92,5 +92,29 @@ iomap_apply(struct iomap_data *data, const struct iomap_ops *ops,
>  				     data->flags, &iomap);
>  	}
>  
> +	if (written && (data->flags & IOMAP_UNCACHED)) {

Hmmm... why is a chunk of buffered write(?) code landing in the iomap
apply function?

The #define for IOMAP_UNCACHED doesn't have a comment, so I don't know
what this is supposed to mean.  Judging from the one place it gets set
in the buffered write function I gather that this is how you implement
the "write through page cache and immediately unmap the page if it
wasn't there before" behavior?

So based on that, I think you want ...

if IOMAP_WRITE && _UNCACHED && !_DIRECT && written > 0:
	flush and invalidate

Since direct writes are never going to create page cache, right?

And in that case, why not put this at the end of iomap_write_actor?

(Sorry if this came up in the earlier discussions, I've been busy this
week and still have a long way to go for catching up...)

> +		struct address_space *mapping = data->inode->i_mapping;
> +
> +		end = data->pos + written;
> +		ret = filemap_write_and_wait_range(mapping, data->pos, end);
> +		if (ret)
> +			goto out;
> +
> +		/*
> +		 * No pages were created for this range, we're done
> +		 */
> +		if (!(iomap.flags & IOMAP_F_PAGE_CREATE))
> +			goto out;
> +
> +		/*
> +		 * Try to invalidate cache pages for the range we just wrote.
> +		 * We don't care if invalidation fails as the write has still
> +		 * worked and leaving clean uptodate pages in the page cache
> +		 * isn't a corruption vector for uncached IO.
> +		 */
> +		invalidate_inode_pages2_range(mapping,
> +				data->pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
> +	}
> +out:
>  	return written ? written : ret;
>  }
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 0a1a195ed1cc..df9d6002858e 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -659,6 +659,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
> +	unsigned aop_flags;
>  	struct page *page;
>  	int status = 0;
>  
> @@ -675,8 +676,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  			return status;
>  	}
>  
> +	aop_flags = AOP_FLAG_NOFS;
> +	if (flags & IOMAP_UNCACHED)
> +		aop_flags |= AOP_FLAG_UNCACHED;
>  	page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
> -			AOP_FLAG_NOFS);
> +						aop_flags);
>  	if (!page) {
>  		status = -ENOMEM;
>  		goto out_no_page;
> @@ -818,6 +822,7 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap,
>  {
>  	struct inode *inode = data->inode;
>  	struct iov_iter *i = data->priv;
> +	unsigned flags = data->flags;
>  	loff_t length = data->len;
>  	loff_t pos = data->pos;
>  	long status = 0;
> @@ -851,10 +856,17 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap,
>  			break;
>  		}
>  
> -		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
> -				srcmap);
> -		if (unlikely(status))
> +retry:
> +		status = iomap_write_begin(inode, pos, bytes, flags,
> +						&page, iomap, srcmap);
> +		if (unlikely(status)) {
> +			if (status == -ENOMEM && (flags & IOMAP_UNCACHED)) {
> +				iomap->flags |= IOMAP_F_PAGE_CREATE;
> +				flags &= ~IOMAP_UNCACHED;
> +				goto retry;
> +			}
>  			break;
> +		}
>  
>  		if (mapping_writably_mapped(inode->i_mapping))
>  			flush_dcache_page(page);
> @@ -907,6 +919,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
>  	};
>  	loff_t ret = 0, written = 0;
>  
> +	if (iocb->ki_flags & IOCB_UNCACHED)
> +		data.flags |= IOMAP_UNCACHED;
> +
>  	while (iov_iter_count(iter)) {
>  		data.len = iov_iter_count(iter);
>  		ret = iomap_apply(&data, ops, iomap_write_actor);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 30f40145a9e9..30bb248e1d0d 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -48,12 +48,16 @@ struct vm_fault;
>   *
>   * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
>   * buffer heads for this mapping.
> + *
> + * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy
> + * this operation.
>   */
>  #define IOMAP_F_NEW		0x01
>  #define IOMAP_F_DIRTY		0x02
>  #define IOMAP_F_SHARED		0x04
>  #define IOMAP_F_MERGED		0x08
>  #define IOMAP_F_BUFFER_HEAD	0x10
> +#define IOMAP_F_PAGE_CREATE	0x20

I think these new flags need an update to the _STRINGS arrays in
fs/iomap/trace.h.
>  
>  /*
>   * Flags set by the core iomap code during operations:
> @@ -121,6 +125,7 @@ struct iomap_page_ops {
>  #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
>  #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
>  #define IOMAP_NOWAIT		(1 << 5) /* do not block */
> +#define IOMAP_UNCACHED		(1 << 6)

No comment?

--D

>  
>  struct iomap_ops {
>  	/*
> -- 
> 2.24.1
>
Jens Axboe Dec. 13, 2019, 2:38 a.m. UTC | #2
On 12/12/19 7:26 PM, Darrick J. Wong wrote:
> On Thu, Dec 12, 2019 at 12:01:33PM -0700, Jens Axboe wrote:
>> This adds support for RWF_UNCACHED for file systems using iomap to
>> perform buffered writes. We use the generic infrastructure for this,
>> by tracking pages we created and calling write_drop_cached_pages()
>> to issue writeback and prune those pages.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  fs/iomap/apply.c       | 24 ++++++++++++++++++++++++
>>  fs/iomap/buffered-io.c | 23 +++++++++++++++++++----
>>  include/linux/iomap.h  |  5 +++++
>>  3 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
>> index e76148db03b8..11b6812f7b37 100644
>> --- a/fs/iomap/apply.c
>> +++ b/fs/iomap/apply.c
>> @@ -92,5 +92,29 @@ iomap_apply(struct iomap_data *data, const struct iomap_ops *ops,
>>  				     data->flags, &iomap);
>>  	}
>>  
>> +	if (written && (data->flags & IOMAP_UNCACHED)) {
> 
> Hmmm... why is a chunk of buffered write(?) code landing in the iomap
> apply function?

I'm going to say that Dave suggested it ;-)

> The #define for IOMAP_UNCACHED doesn't have a comment, so I don't know
> what this is supposed to mean.  Judging from the one place it gets set
> in the buffered write function I gather that this is how you implement
> the "write through page cache and immediately unmap the page if it
> wasn't there before" behavior?
> 
> So based on that, I think you want ...
> 
> if IOMAP_WRITE && _UNCACHED && !_DIRECT && written > 0:
> 	flush and invalidate

Looking at the comments, I did think it was just for writes, but it
looks generic. I'll take the blame for that, we should only call into
that sync-and-invalidate code for buffered writes. I'll make that
change.

> Since direct writes are never going to create page cache, right?

If they do, it's handled separately.

> And in that case, why not put this at the end of iomap_write_actor?
> 
> (Sorry if this came up in the earlier discussions, I've been busy this
> week and still have a long way to go for catching up...)

It did come up, the idea is to do it for the full range, not per chunk.
Does that help?

>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>> index 30f40145a9e9..30bb248e1d0d 100644
>> --- a/include/linux/iomap.h
>> +++ b/include/linux/iomap.h
>> @@ -48,12 +48,16 @@ struct vm_fault;
>>   *
>>   * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
>>   * buffer heads for this mapping.
>> + *
>> + * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy
>> + * this operation.
>>   */
>>  #define IOMAP_F_NEW		0x01
>>  #define IOMAP_F_DIRTY		0x02
>>  #define IOMAP_F_SHARED		0x04
>>  #define IOMAP_F_MERGED		0x08
>>  #define IOMAP_F_BUFFER_HEAD	0x10
>> +#define IOMAP_F_PAGE_CREATE	0x20
> 
> I think these new flags need an update to the _STRINGS arrays in
> fs/iomap/trace.h.

I'll add that.

>>  /*
>>   * Flags set by the core iomap code during operations:
>> @@ -121,6 +125,7 @@ struct iomap_page_ops {
>>  #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
>>  #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
>>  #define IOMAP_NOWAIT		(1 << 5) /* do not block */
>> +#define IOMAP_UNCACHED		(1 << 6)
> 
> No comment?

Definitely, I'll add a comment.

Thanks for taking a look! I'll incorporate your suggestions.

Patch
diff mbox series

diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
index e76148db03b8..11b6812f7b37 100644
--- a/fs/iomap/apply.c
+++ b/fs/iomap/apply.c
@@ -92,5 +92,29 @@  iomap_apply(struct iomap_data *data, const struct iomap_ops *ops,
 				     data->flags, &iomap);
 	}
 
+	if (written && (data->flags & IOMAP_UNCACHED)) {
+		struct address_space *mapping = data->inode->i_mapping;
+
+		end = data->pos + written;
+		ret = filemap_write_and_wait_range(mapping, data->pos, end);
+		if (ret)
+			goto out;
+
+		/*
+		 * No pages were created for this range, we're done
+		 */
+		if (!(iomap.flags & IOMAP_F_PAGE_CREATE))
+			goto out;
+
+		/*
+		 * Try to invalidate cache pages for the range we just wrote.
+		 * We don't care if invalidation fails as the write has still
+		 * worked and leaving clean uptodate pages in the page cache
+		 * isn't a corruption vector for uncached IO.
+		 */
+		invalidate_inode_pages2_range(mapping,
+				data->pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+	}
+out:
 	return written ? written : ret;
 }
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0a1a195ed1cc..df9d6002858e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -659,6 +659,7 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
+	unsigned aop_flags;
 	struct page *page;
 	int status = 0;
 
@@ -675,8 +676,11 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 			return status;
 	}
 
+	aop_flags = AOP_FLAG_NOFS;
+	if (flags & IOMAP_UNCACHED)
+		aop_flags |= AOP_FLAG_UNCACHED;
 	page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT,
-			AOP_FLAG_NOFS);
+						aop_flags);
 	if (!page) {
 		status = -ENOMEM;
 		goto out_no_page;
@@ -818,6 +822,7 @@  iomap_write_actor(const struct iomap_data *data, struct iomap *iomap,
 {
 	struct inode *inode = data->inode;
 	struct iov_iter *i = data->priv;
+	unsigned flags = data->flags;
 	loff_t length = data->len;
 	loff_t pos = data->pos;
 	long status = 0;
@@ -851,10 +856,17 @@  iomap_write_actor(const struct iomap_data *data, struct iomap *iomap,
 			break;
 		}
 
-		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
-				srcmap);
-		if (unlikely(status))
+retry:
+		status = iomap_write_begin(inode, pos, bytes, flags,
+						&page, iomap, srcmap);
+		if (unlikely(status)) {
+			if (status == -ENOMEM && (flags & IOMAP_UNCACHED)) {
+				iomap->flags |= IOMAP_F_PAGE_CREATE;
+				flags &= ~IOMAP_UNCACHED;
+				goto retry;
+			}
 			break;
+		}
 
 		if (mapping_writably_mapped(inode->i_mapping))
 			flush_dcache_page(page);
@@ -907,6 +919,9 @@  iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter,
 	};
 	loff_t ret = 0, written = 0;
 
+	if (iocb->ki_flags & IOCB_UNCACHED)
+		data.flags |= IOMAP_UNCACHED;
+
 	while (iov_iter_count(iter)) {
 		data.len = iov_iter_count(iter);
 		ret = iomap_apply(&data, ops, iomap_write_actor);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 30f40145a9e9..30bb248e1d0d 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -48,12 +48,16 @@  struct vm_fault;
  *
  * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
  * buffer heads for this mapping.
+ *
+ * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy
+ * this operation.
  */
 #define IOMAP_F_NEW		0x01
 #define IOMAP_F_DIRTY		0x02
 #define IOMAP_F_SHARED		0x04
 #define IOMAP_F_MERGED		0x08
 #define IOMAP_F_BUFFER_HEAD	0x10
+#define IOMAP_F_PAGE_CREATE	0x20
 
 /*
  * Flags set by the core iomap code during operations:
@@ -121,6 +125,7 @@  struct iomap_page_ops {
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
 #define IOMAP_NOWAIT		(1 << 5) /* do not block */
+#define IOMAP_UNCACHED		(1 << 6)
 
 struct iomap_ops {
 	/*