diff mbox series

[14/20] xfs: use shmem_get_folio in xfile_obj_store

Message ID 20240129143502.189370-15-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/20] mm: move mapping_set_update out of <linux/swap.h> | expand

Commit Message

Christoph Hellwig Jan. 29, 2024, 2:34 p.m. UTC
Switch to using shmem_get_folio and manually dirtying the page instead
of abusing aops->write_begin and aops->write_end in xfile_get_page.

This simplifies the code by not doing indirect calls of not actually
exported interfaces that don't really fit the use case very well, and
happens to get us large folio support for free.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/scrub/xfile.c | 73 ++++++++++++++++----------------------------
 1 file changed, 27 insertions(+), 46 deletions(-)

Comments

Hugh Dickins Feb. 16, 2024, 7:40 a.m. UTC | #1
On Mon, 29 Jan 2024, Christoph Hellwig wrote:

> Switch to using shmem_get_folio and manually dirtying the page instead
> of abusing aops->write_begin and aops->write_end in xfile_get_page.
> 
> This simplifies the code by not doing indirect calls of not actually
> exported interfaces that don't really fit the use case very well, and
> happens to get us large folio support for free.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/scrub/xfile.c | 73 ++++++++++++++++----------------------------
>  1 file changed, 27 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
> index a669ebbbc02d1d..2b4819902b4cc3 100644
> --- a/fs/xfs/scrub/xfile.c
> +++ b/fs/xfs/scrub/xfile.c
> @@ -183,11 +183,7 @@ xfile_store(
>  	loff_t			pos)
>  {
>  	struct inode		*inode = file_inode(xf->file);
> -	struct address_space	*mapping = inode->i_mapping;
> -	const struct address_space_operations *aops = mapping->a_ops;
> -	struct page		*page = NULL;
>  	unsigned int		pflags;
> -	int			error = 0;
>  
>  	if (count > MAX_RW_COUNT)
>  		return -ENOMEM;
> @@ -196,60 +192,45 @@ xfile_store(
>  
>  	trace_xfile_store(xf, pos, count);
>  
> +	/*
> +	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
> +	 * actually allocates a folio instead of erroring out.
> +	 */
> +	if (pos + count > i_size_read(inode))
> +		i_size_write(inode, pos + count);
> +
>  	pflags = memalloc_nofs_save();
>  	while (count > 0) {
> -		void		*fsdata = NULL;
> -		void		*p, *kaddr;
> +		struct folio	*folio;
>  		unsigned int	len;
> -		int		ret;
> +		unsigned int	offset;
>  
> -		len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos));
> -
> -		/*
> -		 * We call write_begin directly here to avoid all the freezer
> -		 * protection lock-taking that happens in the normal path.
> -		 * shmem doesn't support fs freeze, but lockdep doesn't know
> -		 * that and will trip over that.
> -		 */
> -		error = aops->write_begin(NULL, mapping, pos, len, &page,
> -				&fsdata);
> -		if (error) {
> -			error = -ENOMEM;
> +		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
> +				SGP_CACHE) < 0)o

SGP_CACHE is the safest choice, yes.  It will tend to do an unnecessary
clear_highpage() which you immediately overwrite with the actual data;
but saves calculating exactly what needs to be zeroed above and below
the data - not worth bothering with, unless shaving off CPU cycles.

>  			break;
> -		}
> -
> -		/*
> -		 * xfile pages must never be mapped into userspace, so we skip
> -		 * the dcache flush.  If the page is not uptodate, zero it
> -		 * before writing data.
> -		 */
> -		kaddr = page_address(page);
> -		if (!PageUptodate(page)) {
> -			memset(kaddr, 0, PAGE_SIZE);
> -			SetPageUptodate(page);
> -		}
> -		p = kaddr + offset_in_page(pos);
> -		memcpy(p, buf, len);
> -
> -		ret = aops->write_end(NULL, mapping, pos, len, len, page,
> -				fsdata);
> -		if (ret < 0) {
> -			error = -ENOMEM;
> +		if (filemap_check_wb_err(inode->i_mapping, 0)) {

Hah.  I was sceptical what that could ever achieve on shmem (the "wb"
is misleading); but it's an ingenious suggestion from Matthew, to avoid
our current horrid folio+page HWPoison handling.  I followed it up a bit,
and it does look as if this filemap_check_wb_err() technique should work
for that; but it's not something I've tried at all.

And that's all I've got to say on the series (I read on, but certainly
did not delve into the folio sorting stuff): looks good,  but the
VM_NORESERVE question probably needs attention (and that docbook
comment to mention "locked").

XFS tree stil seems to me the right home for it all.

Hugh

> +			folio_unlock(folio);
> +			folio_put(folio);
>  			break;
>  		}
>  
> -		if (ret != len) {
> -			error = -ENOMEM;
> -			break;
> -		}
> +		offset = offset_in_folio(folio, pos);
> +		len = min_t(ssize_t, count, folio_size(folio) - offset);
> +		memcpy(folio_address(folio) + offset, buf, len);
> +
> +		folio_mark_dirty(folio);
> +		folio_unlock(folio);
> +		folio_put(folio);
>  
> -		count -= ret;
> -		pos += ret;
> -		buf += ret;
> +		count -= len;
> +		pos += len;
> +		buf += len;
>  	}
>  	memalloc_nofs_restore(pflags);
>  
> -	return error;
> +	if (count)
> +		return -ENOMEM;
> +	return 0;
>  }
>  
>  /* Find the next written area in the xfile data for a given offset. */
> -- 
> 2.39.2
> 
>
diff mbox series

Patch

diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
index a669ebbbc02d1d..2b4819902b4cc3 100644
--- a/fs/xfs/scrub/xfile.c
+++ b/fs/xfs/scrub/xfile.c
@@ -183,11 +183,7 @@  xfile_store(
 	loff_t			pos)
 {
 	struct inode		*inode = file_inode(xf->file);
-	struct address_space	*mapping = inode->i_mapping;
-	const struct address_space_operations *aops = mapping->a_ops;
-	struct page		*page = NULL;
 	unsigned int		pflags;
-	int			error = 0;
 
 	if (count > MAX_RW_COUNT)
 		return -ENOMEM;
@@ -196,60 +192,45 @@  xfile_store(
 
 	trace_xfile_store(xf, pos, count);
 
+	/*
+	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
+	 * actually allocates a folio instead of erroring out.
+	 */
+	if (pos + count > i_size_read(inode))
+		i_size_write(inode, pos + count);
+
 	pflags = memalloc_nofs_save();
 	while (count > 0) {
-		void		*fsdata = NULL;
-		void		*p, *kaddr;
+		struct folio	*folio;
 		unsigned int	len;
-		int		ret;
+		unsigned int	offset;
 
-		len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos));
-
-		/*
-		 * We call write_begin directly here to avoid all the freezer
-		 * protection lock-taking that happens in the normal path.
-		 * shmem doesn't support fs freeze, but lockdep doesn't know
-		 * that and will trip over that.
-		 */
-		error = aops->write_begin(NULL, mapping, pos, len, &page,
-				&fsdata);
-		if (error) {
-			error = -ENOMEM;
+		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio,
+				SGP_CACHE) < 0)
 			break;
-		}
-
-		/*
-		 * xfile pages must never be mapped into userspace, so we skip
-		 * the dcache flush.  If the page is not uptodate, zero it
-		 * before writing data.
-		 */
-		kaddr = page_address(page);
-		if (!PageUptodate(page)) {
-			memset(kaddr, 0, PAGE_SIZE);
-			SetPageUptodate(page);
-		}
-		p = kaddr + offset_in_page(pos);
-		memcpy(p, buf, len);
-
-		ret = aops->write_end(NULL, mapping, pos, len, len, page,
-				fsdata);
-		if (ret < 0) {
-			error = -ENOMEM;
+		if (filemap_check_wb_err(inode->i_mapping, 0)) {
+			folio_unlock(folio);
+			folio_put(folio);
 			break;
 		}
 
-		if (ret != len) {
-			error = -ENOMEM;
-			break;
-		}
+		offset = offset_in_folio(folio, pos);
+		len = min_t(ssize_t, count, folio_size(folio) - offset);
+		memcpy(folio_address(folio) + offset, buf, len);
+
+		folio_mark_dirty(folio);
+		folio_unlock(folio);
+		folio_put(folio);
 
-		count -= ret;
-		pos += ret;
-		buf += ret;
+		count -= len;
+		pos += len;
+		buf += len;
 	}
 	memalloc_nofs_restore(pflags);
 
-	return error;
+	if (count)
+		return -ENOMEM;
+	return 0;
 }
 
 /* Find the next written area in the xfile data for a given offset. */