diff mbox series

[08/20] iomap: use a srcmap for a read-modify-write I/O

Message ID 20191008071527.29304-9-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/20] iomap: better document the IOMAP_F_* flags | expand

Commit Message

Christoph Hellwig Oct. 8, 2019, 7:15 a.m. UTC
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

The srcmap is used to identify where the read is to be performed from.
It is passed to ->iomap_begin, which can fill it in if we need to read
data for partially written blocks from a different location than the
write target.  The srcmap is only supported for buffered writes so far.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
[hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
      srcmap if not set, adjust length down to srcmap end as well]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c               |  9 ++++--
 fs/ext2/inode.c        |  2 +-
 fs/ext4/inode.c        |  2 +-
 fs/gfs2/bmap.c         |  3 +-
 fs/iomap/apply.c       | 25 ++++++++++++----
 fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-------------------
 fs/iomap/direct-io.c   |  2 +-
 fs/iomap/fiemap.c      |  4 +--
 fs/iomap/seek.c        |  4 +--
 fs/iomap/swapfile.c    |  3 +-
 fs/xfs/xfs_iomap.c     |  9 ++++--
 include/linux/iomap.h  |  5 ++--
 12 files changed, 80 insertions(+), 53 deletions(-)

Comments

Darrick J. Wong Oct. 8, 2019, 3 p.m. UTC | #1
On Tue, Oct 08, 2019 at 09:15:15AM +0200, Christoph Hellwig wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> The srcmap is used to identify where the read is to be performed from.
> It is passed to ->iomap_begin, which can fill it in if we need to read
> data for partially written blocks from a different location than the
> write target.  The srcmap is only supported for buffered writes so far.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
>       srcmap if not set, adjust length down to srcmap end as well]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c               |  9 ++++--
>  fs/ext2/inode.c        |  2 +-
>  fs/ext4/inode.c        |  2 +-
>  fs/gfs2/bmap.c         |  3 +-
>  fs/iomap/apply.c       | 25 ++++++++++++----
>  fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-------------------
>  fs/iomap/direct-io.c   |  2 +-
>  fs/iomap/fiemap.c      |  4 +--
>  fs/iomap/seek.c        |  4 +--
>  fs/iomap/swapfile.c    |  3 +-
>  fs/xfs/xfs_iomap.c     |  9 ++++--
>  include/linux/iomap.h  |  5 ++--
>  12 files changed, 80 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 6bf81f931de3..920105457c2c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>  
>  static loff_t
>  dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct block_device *bdev = iomap->bdev;
>  	struct dax_device *dax_dev = iomap->dax_dev;
> @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	unsigned long vaddr = vmf->address;
>  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
>  	struct iomap iomap = { 0 };

Does this definition ^^^^^ need to be converted too?  You convert the
one in iomap_apply()...

> +	struct iomap srcmap = { .type = IOMAP_HOLE };

...and at the same time I wonder if we ought to have:

	/*
	 * The @iomap and @srcmap parameters should be set to a hole
	 * prior to calling ->iomap_begin.
	 */
	#define IOMAP_EMPTY_RECORD	{ .type = IOMAP_HOLE }

...and later...

	struct iomap srcmap = IOMAP_EMPTY_RECORD;

..but meh, I'm not sure that adds much.

>  	unsigned flags = IOMAP_FAULT;
>  	int error, major = 0;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	 * the file system block size to be equal the page size, which means
>  	 * that we never have to deal with more than a single extent here.
>  	 */
> -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);

->iomap_begin callers are never supposed to touch srcmap, right?
Maybe we ought to check that srcmap.io_type == HOLE, at least until
someone fixes this code to dax-copy the data from srcmap to iomap?

(I don't like this open-coded iomap_apply here, but fixing that is for
another day because I once tried to extract the iteration pieces and
yeurghck...)

The rest of the patch looks ok.

--D

>  	if (iomap_errp)
>  		*iomap_errp = error;
>  	if (error) {
> @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	struct inode *inode = mapping->host;
>  	vm_fault_t result = VM_FAULT_FALLBACK;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { .type = IOMAP_HOLE };
>  	pgoff_t max_pgoff;
>  	void *entry;
>  	loff_t pos;
> @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	 * to look up our filesystem block.
>  	 */
>  	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> -	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
> +	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
> +			&srcmap);
>  	if (error)
>  		goto unlock_entry;
>  
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 7004ce581a32..467c13ff6b40 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
>  
>  #ifdef CONFIG_FS_DAX
>  static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> -		unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	unsigned int blkbits = inode->i_blkbits;
>  	unsigned long first_block = offset >> blkbits;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 516faa280ced..abaaf7d96ca4 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3407,7 +3407,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
>  }
>  
>  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>  	unsigned int blkbits = inode->i_blkbits;
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index f63df54a08c6..516103248272 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
>  }
>  
>  static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +			    unsigned flags, struct iomap *iomap,
> +			    struct iomap *srcmap)
>  {
>  	struct gfs2_inode *ip = GFS2_I(inode);
>  	struct metapath mp = { .mp_aheight = 1, };
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 54c02aecf3cd..484dd8eda861 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -23,8 +23,10 @@ loff_t
>  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  		const struct iomap_ops *ops, void *data, iomap_actor_t actor)
>  {
> -	struct iomap iomap = { 0 };
> +	struct iomap iomap = { .type = IOMAP_HOLE };
> +	struct iomap srcmap = { .type = IOMAP_HOLE };
>  	loff_t written = 0, ret;
> +	u64 end;
>  
>  	/*
>  	 * Need to map a range from start position for length bytes. This can
> @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  	 * expose transient stale data. If the reserve fails, we can safely
>  	 * back out at this point as there is nothing to undo.
>  	 */
> -	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> +	ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
>  	if (ret)
>  		return ret;
>  	if (WARN_ON(iomap.offset > pos))
> @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  	 * Cut down the length to the one actually provided by the filesystem,
>  	 * as it might not be able to give us the whole size that we requested.
>  	 */
> -	if (iomap.offset + iomap.length < pos + length)
> -		length = iomap.offset + iomap.length - pos;
> +	end = iomap.offset + iomap.length;
> +	if (srcmap.type != IOMAP_HOLE)
> +		end = min(end, srcmap.offset + srcmap.length);
> +	if (pos + length > end)
> +		length = end - pos;
>  
>  	/*
> -	 * Now that we have guaranteed that the space allocation will succeed.
> +	 * Now that we have guaranteed that the space allocation will succeed,
>  	 * we can do the copy-in page by page without having to worry about
>  	 * failures exposing transient data.
> +	 *
> +	 * To support COW operations, we read in data for partially blocks from
> +	 * the srcmap if the file system filled it in.  In that case we the
> +	 * length needs to be limited to the earlier of the ends of the iomaps.
> +	 * If the file system did not provide a srcmap we pass in the normal
> +	 * iomap into the actors so that they don't need to have special
> +	 * handling for the two cases.
>  	 */
> -	written = actor(inode, pos, length, data, &iomap);
> +	written = actor(inode, pos, length, data, &iomap,
> +			srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
>  
>  	/*
>  	 * Now the data has been copied, commit the range we've copied.  This
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index ac1bbed71a9b..eb2c6d73a837 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -234,7 +234,7 @@ static inline bool iomap_block_needs_zeroing(struct inode *inode,
>  
>  static loff_t
>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_readpage_ctx *ctx = data;
>  	struct page *page = ctx->cur_page;
> @@ -382,7 +382,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
>  
>  static loff_t
>  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_readpage_ctx *ctx = data;
>  	loff_t done, ret;
> @@ -402,7 +402,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
>  			ctx->cur_page_in_bio = false;
>  		}
>  		ret = iomap_readpage_actor(inode, pos + done, length - done,
> -				ctx, iomap);
> +				ctx, iomap, srcmap);
>  	}
>  
>  	return done;
> @@ -582,7 +582,7 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
>  
>  static int
>  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> -		struct page *page, struct iomap *iomap)
> +		struct page *page, struct iomap *srcmap)
>  {
>  	struct iomap_page *iop = iomap_page_create(inode, page);
>  	loff_t block_size = i_blocksize(inode);
> @@ -605,7 +605,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  		    (to <= poff || to >= poff + plen))
>  			continue;
>  
> -		if (iomap_block_needs_zeroing(inode, iomap, block_start)) {
> +		if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
>  			if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
>  				return -EIO;
>  			zero_user_segments(page, poff, from, to, poff + plen);
> @@ -614,7 +614,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  		}
>  
>  		status = iomap_read_page_sync(block_start, page, poff, plen,
> -				iomap);
> +				srcmap);
>  		if (status)
>  			return status;
>  	} while ((block_start += plen) < block_end);
> @@ -624,13 +624,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> -		struct page **pagep, struct iomap *iomap)
> +		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	struct page *page;
>  	int status = 0;
>  
>  	BUG_ON(pos + len > iomap->offset + iomap->length);
> +	if (srcmap != iomap)
> +		BUG_ON(pos + len > srcmap->offset + srcmap->length);

This should be a WARN_ON(...) followed by return -EIO, right?

>  
>  	if (fatal_signal_pending(current))
>  		return -EINTR;
> @@ -648,13 +650,13 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		goto out_no_page;
>  	}
>  
> -	if (iomap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, iomap);
> +	if (srcmap->type == IOMAP_INLINE)
> +		iomap_read_inline_data(inode, page, srcmap);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> -		status = __block_write_begin_int(page, pos, len, NULL, iomap);
> +		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
> -				iomap);
> +				srcmap);
>  
>  	if (unlikely(status))
>  		goto out_unlock;
> @@ -740,16 +742,16 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
>  }
>  
>  static int
> -iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> -		unsigned copied, struct page *page, struct iomap *iomap)
> +iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
> +		struct page *page, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	loff_t old_size = inode->i_size;
>  	int ret;
>  
> -	if (iomap->type == IOMAP_INLINE) {
> +	if (srcmap->type == IOMAP_INLINE) {
>  		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
> -	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
> +	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
>  		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
>  				page, NULL);
>  	} else {
> @@ -780,7 +782,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
>  
>  static loff_t
>  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iov_iter *i = data;
>  	long status = 0;
> @@ -814,7 +816,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  			break;
>  		}
>  
> -		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> +		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
> +				srcmap);
>  		if (unlikely(status))
>  			break;
>  
> @@ -825,8 +828,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  
>  		flush_dcache_page(page);
>  
> -		status = iomap_write_end(inode, pos, bytes, copied, page,
> -				iomap);
> +		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
> +				srcmap);
>  		if (unlikely(status < 0))
>  			break;
>  		copied = status;
> @@ -879,7 +882,7 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
>  
>  static loff_t
>  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	long status = 0;
>  	ssize_t written = 0;
> @@ -888,7 +891,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	if (!(iomap->flags & IOMAP_F_SHARED))
>  		return length;
>  	/* don't bother with holes or unwritten extents */
> -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
>  		return length;
>  
>  	do {
> @@ -897,11 +900,12 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct page *page;
>  
>  		status = iomap_write_begin(inode, pos, bytes,
> -				IOMAP_WRITE_F_UNSHARE, &page, iomap);
> +				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
>  		if (unlikely(status))
>  			return status;
>  
> -		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> +		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
> +				srcmap);
>  		if (unlikely(status <= 0)) {
>  			if (WARN_ON_ONCE(status == 0))
>  				return -EIO;
> @@ -940,19 +944,19 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
>  EXPORT_SYMBOL_GPL(iomap_file_unshare);
>  
>  static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
> -		unsigned bytes, struct iomap *iomap)
> +		unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page;
>  	int status;
>  
> -	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> +	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
>  	if (status)
>  		return status;
>  
>  	zero_user(page, offset, bytes);
>  	mark_page_accessed(page);
>  
> -	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> +	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
>  }
>  
>  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
> @@ -964,14 +968,14 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
>  
>  static loff_t
>  iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	bool *did_zero = data;
>  	loff_t written = 0;
>  	int status;
>  
>  	/* already zeroed?  we're done. */
> -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
>  		return count;
>  
>  	do {
> @@ -983,7 +987,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
>  		if (IS_DAX(inode))
>  			status = iomap_dax_zero(pos, offset, bytes, iomap);
>  		else
> -			status = iomap_zero(inode, pos, offset, bytes, iomap);
> +			status = iomap_zero(inode, pos, offset, bytes, iomap,
> +					srcmap);
>  		if (status < 0)
>  			return status;
>  
> @@ -1033,7 +1038,7 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page);
>  
>  static loff_t
>  iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page = data;
>  	int ret;
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 1fc28c2da279..e3ccbf7daaae 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  
>  static loff_t
>  iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_dio *dio = data;
>  
> diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
> index f26fdd36e383..690ef2d7c6c8 100644
> --- a/fs/iomap/fiemap.c
> +++ b/fs/iomap/fiemap.c
> @@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
>  
>  static loff_t
>  iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct fiemap_ctx *ctx = data;
>  	loff_t ret = length;
> @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
>  
>  static loff_t
>  iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	sector_t *bno = data, addr;
>  
> diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
> index c04bad4b2b43..89f61d93c0bc 100644
> --- a/fs/iomap/seek.c
> +++ b/fs/iomap/seek.c
> @@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
>  
>  static loff_t
>  iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
> -		      void *data, struct iomap *iomap)
> +		      void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	switch (iomap->type) {
>  	case IOMAP_UNWRITTEN:
> @@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole);
>  
>  static loff_t
>  iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
> -		      void *data, struct iomap *iomap)
> +		      void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	switch (iomap->type) {
>  	case IOMAP_HOLE:
> diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
> index 152a230f668d..a648dbf6991e 100644
> --- a/fs/iomap/swapfile.c
> +++ b/fs/iomap/swapfile.c
> @@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
>   * distinction between written and unwritten extents.
>   */
>  static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> -		loff_t count, void *data, struct iomap *iomap)
> +		loff_t count, void *data, struct iomap *iomap,
> +		struct iomap *srcmap)
>  {
>  	struct iomap_swapfile_info *isi = data;
>  	int error;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index c0a492353826..016adcd7dd66 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -928,7 +928,8 @@ xfs_file_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> @@ -1154,7 +1155,8 @@ xfs_seek_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> @@ -1240,7 +1242,8 @@ xfs_xattr_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 24c784e44274..37af5f9dc722 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -127,7 +127,8 @@ struct iomap_ops {
>  	 * The actual length is returned in iomap->length.
>  	 */
>  	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
> -			unsigned flags, struct iomap *iomap);
> +			unsigned flags, struct iomap *iomap,
> +			struct iomap *srcmap);
>  
>  	/*
>  	 * Commit and/or unreserve space previous allocated using iomap_begin.
> @@ -143,7 +144,7 @@ struct iomap_ops {
>   * Main iomap iterator function.
>   */
>  typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> -		void *data, struct iomap *iomap);
> +		void *data, struct iomap *iomap, struct iomap *srcmap);
>  
>  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  		unsigned flags, const struct iomap_ops *ops, void *data,
> -- 
> 2.20.1
>
Christoph Hellwig Oct. 9, 2019, 6:28 a.m. UTC | #2
On Tue, Oct 08, 2019 at 08:00:44AM -0700, Darrick J. Wong wrote:
> >  	unsigned long vaddr = vmf->address;
> >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> >  	struct iomap iomap = { 0 };
> 
> Does this definition ^^^^^ need to be converted too?  You convert the
> one in iomap_apply()...

Doesn't strictly need to, but it sure would look nicer and fit the theme.

> 	/*
> 	 * The @iomap and @srcmap parameters should be set to a hole
> 	 * prior to calling ->iomap_begin.
> 	 */
> 	#define IOMAP_EMPTY_RECORD	{ .type = IOMAP_HOLE }
> 
> ...and later...
> 
> 	struct iomap srcmap = IOMAP_EMPTY_RECORD;
> 
> ..but meh, I'm not sure that adds much.

I don't really see the point.

> >  	unsigned flags = IOMAP_FAULT;
> >  	int error, major = 0;
> >  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> > @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	 * the file system block size to be equal the page size, which means
> >  	 * that we never have to deal with more than a single extent here.
> >  	 */
> > -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> > +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
> 
> ->iomap_begin callers are never supposed to touch srcmap, right?
> Maybe we ought to check that srcmap.io_type == HOLE, at least until
> someone fixes this code to dax-copy the data from srcmap to iomap?

What do you mean with touch?  ->iomap_begin fills it out and then the
caller looks at it, at least for places that can deal with
read-modify-write operations (DAX currently can't).
Darrick J. Wong Oct. 9, 2019, 5:16 p.m. UTC | #3
On Wed, Oct 09, 2019 at 08:28:24AM +0200, Christoph Hellwig wrote:
> On Tue, Oct 08, 2019 at 08:00:44AM -0700, Darrick J. Wong wrote:
> > >  	unsigned long vaddr = vmf->address;
> > >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> > >  	struct iomap iomap = { 0 };
> > 
> > Does this definition ^^^^^ need to be converted too?  You convert the
> > one in iomap_apply()...
> 
> Doesn't strictly need to, but it sure would look nicer and fit the theme.
> 
> > 	/*
> > 	 * The @iomap and @srcmap parameters should be set to a hole
> > 	 * prior to calling ->iomap_begin.
> > 	 */
> > 	#define IOMAP_EMPTY_RECORD	{ .type = IOMAP_HOLE }
> > 
> > ...and later...
> > 
> > 	struct iomap srcmap = IOMAP_EMPTY_RECORD;
> > 
> > ..but meh, I'm not sure that adds much.
> 
> I don't really see the point.

Yeah.  Agreed.

> > >  	unsigned flags = IOMAP_FAULT;
> > >  	int error, major = 0;
> > >  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> > > @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> > >  	 * the file system block size to be equal the page size, which means
> > >  	 * that we never have to deal with more than a single extent here.
> > >  	 */
> > > -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> > > +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
> > 
> > ->iomap_begin callers are never supposed to touch srcmap, right?
> > Maybe we ought to check that srcmap.io_type == HOLE, at least until
> > someone fixes this code to dax-copy the data from srcmap to iomap?
> 
> What do you mean with touch?  ->iomap_begin fills it out and then the
> caller looks at it, at least for places that can deal with
> read-modify-write operations (DAX currently can't).

Yes, I grok that the DAX code should never get fed a shared mapping, but
maybe we ought to have a WARN_ON_ONCE just in case some filesystem AI
programmer decides to backport a fs patch that results in sending a
non-hole srcmap back to the dax iomap callers.  /We/ know that you
should never do this, but does the AI know? <grumble>

(Yeah, pure paranoia on my part :P)

--D
Darrick J. Wong Oct. 14, 2019, 11:27 p.m. UTC | #4
On Tue, Oct 08, 2019 at 09:15:15AM +0200, Christoph Hellwig wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> The srcmap is used to identify where the read is to be performed from.
> It is passed to ->iomap_begin, which can fill it in if we need to read
> data for partially written blocks from a different location than the
> write target.  The srcmap is only supported for buffered writes so far.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>

Goldwyn,

Since we've reworked your original patch quite extensively, could you
please have a look at (and if you approve, add an Acked-by) this new(er)
version so we can get this series moving for 5.5?

--D

> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
>       srcmap if not set, adjust length down to srcmap end as well]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c               |  9 ++++--
>  fs/ext2/inode.c        |  2 +-
>  fs/ext4/inode.c        |  2 +-
>  fs/gfs2/bmap.c         |  3 +-
>  fs/iomap/apply.c       | 25 ++++++++++++----
>  fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-------------------
>  fs/iomap/direct-io.c   |  2 +-
>  fs/iomap/fiemap.c      |  4 +--
>  fs/iomap/seek.c        |  4 +--
>  fs/iomap/swapfile.c    |  3 +-
>  fs/xfs/xfs_iomap.c     |  9 ++++--
>  include/linux/iomap.h  |  5 ++--
>  12 files changed, 80 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 6bf81f931de3..920105457c2c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>  
>  static loff_t
>  dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct block_device *bdev = iomap->bdev;
>  	struct dax_device *dax_dev = iomap->dax_dev;
> @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	unsigned long vaddr = vmf->address;
>  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { .type = IOMAP_HOLE };
>  	unsigned flags = IOMAP_FAULT;
>  	int error, major = 0;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	 * the file system block size to be equal the page size, which means
>  	 * that we never have to deal with more than a single extent here.
>  	 */
> -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
>  	if (iomap_errp)
>  		*iomap_errp = error;
>  	if (error) {
> @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	struct inode *inode = mapping->host;
>  	vm_fault_t result = VM_FAULT_FALLBACK;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { .type = IOMAP_HOLE };
>  	pgoff_t max_pgoff;
>  	void *entry;
>  	loff_t pos;
> @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	 * to look up our filesystem block.
>  	 */
>  	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> -	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
> +	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
> +			&srcmap);
>  	if (error)
>  		goto unlock_entry;
>  
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 7004ce581a32..467c13ff6b40 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
>  
>  #ifdef CONFIG_FS_DAX
>  static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> -		unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	unsigned int blkbits = inode->i_blkbits;
>  	unsigned long first_block = offset >> blkbits;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 516faa280ced..abaaf7d96ca4 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3407,7 +3407,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
>  }
>  
>  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>  	unsigned int blkbits = inode->i_blkbits;
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index f63df54a08c6..516103248272 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
>  }
>  
>  static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +			    unsigned flags, struct iomap *iomap,
> +			    struct iomap *srcmap)
>  {
>  	struct gfs2_inode *ip = GFS2_I(inode);
>  	struct metapath mp = { .mp_aheight = 1, };
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 54c02aecf3cd..484dd8eda861 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -23,8 +23,10 @@ loff_t
>  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  		const struct iomap_ops *ops, void *data, iomap_actor_t actor)
>  {
> -	struct iomap iomap = { 0 };
> +	struct iomap iomap = { .type = IOMAP_HOLE };
> +	struct iomap srcmap = { .type = IOMAP_HOLE };
>  	loff_t written = 0, ret;
> +	u64 end;
>  
>  	/*
>  	 * Need to map a range from start position for length bytes. This can
> @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  	 * expose transient stale data. If the reserve fails, we can safely
>  	 * back out at this point as there is nothing to undo.
>  	 */
> -	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> +	ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
>  	if (ret)
>  		return ret;
>  	if (WARN_ON(iomap.offset > pos))
> @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
>  	 * Cut down the length to the one actually provided by the filesystem,
>  	 * as it might not be able to give us the whole size that we requested.
>  	 */
> -	if (iomap.offset + iomap.length < pos + length)
> -		length = iomap.offset + iomap.length - pos;
> +	end = iomap.offset + iomap.length;
> +	if (srcmap.type != IOMAP_HOLE)
> +		end = min(end, srcmap.offset + srcmap.length);
> +	if (pos + length > end)
> +		length = end - pos;
>  
>  	/*
> -	 * Now that we have guaranteed that the space allocation will succeed.
> +	 * Now that we have guaranteed that the space allocation will succeed,
>  	 * we can do the copy-in page by page without having to worry about
>  	 * failures exposing transient data.
> +	 *
> +	 * To support COW operations, we read in data for partially blocks from
> +	 * the srcmap if the file system filled it in.  In that case we the
> +	 * length needs to be limited to the earlier of the ends of the iomaps.
> +	 * If the file system did not provide a srcmap we pass in the normal
> +	 * iomap into the actors so that they don't need to have special
> +	 * handling for the two cases.
>  	 */
> -	written = actor(inode, pos, length, data, &iomap);
> +	written = actor(inode, pos, length, data, &iomap,
> +			srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
>  
>  	/*
>  	 * Now the data has been copied, commit the range we've copied.  This
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index ac1bbed71a9b..eb2c6d73a837 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -234,7 +234,7 @@ static inline bool iomap_block_needs_zeroing(struct inode *inode,
>  
>  static loff_t
>  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_readpage_ctx *ctx = data;
>  	struct page *page = ctx->cur_page;
> @@ -382,7 +382,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
>  
>  static loff_t
>  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_readpage_ctx *ctx = data;
>  	loff_t done, ret;
> @@ -402,7 +402,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
>  			ctx->cur_page_in_bio = false;
>  		}
>  		ret = iomap_readpage_actor(inode, pos + done, length - done,
> -				ctx, iomap);
> +				ctx, iomap, srcmap);
>  	}
>  
>  	return done;
> @@ -582,7 +582,7 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
>  
>  static int
>  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> -		struct page *page, struct iomap *iomap)
> +		struct page *page, struct iomap *srcmap)
>  {
>  	struct iomap_page *iop = iomap_page_create(inode, page);
>  	loff_t block_size = i_blocksize(inode);
> @@ -605,7 +605,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  		    (to <= poff || to >= poff + plen))
>  			continue;
>  
> -		if (iomap_block_needs_zeroing(inode, iomap, block_start)) {
> +		if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
>  			if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
>  				return -EIO;
>  			zero_user_segments(page, poff, from, to, poff + plen);
> @@ -614,7 +614,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  		}
>  
>  		status = iomap_read_page_sync(block_start, page, poff, plen,
> -				iomap);
> +				srcmap);
>  		if (status)
>  			return status;
>  	} while ((block_start += plen) < block_end);
> @@ -624,13 +624,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  
>  static int
>  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> -		struct page **pagep, struct iomap *iomap)
> +		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	struct page *page;
>  	int status = 0;
>  
>  	BUG_ON(pos + len > iomap->offset + iomap->length);
> +	if (srcmap != iomap)
> +		BUG_ON(pos + len > srcmap->offset + srcmap->length);
>  
>  	if (fatal_signal_pending(current))
>  		return -EINTR;
> @@ -648,13 +650,13 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		goto out_no_page;
>  	}
>  
> -	if (iomap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, iomap);
> +	if (srcmap->type == IOMAP_INLINE)
> +		iomap_read_inline_data(inode, page, srcmap);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> -		status = __block_write_begin_int(page, pos, len, NULL, iomap);
> +		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
> -				iomap);
> +				srcmap);
>  
>  	if (unlikely(status))
>  		goto out_unlock;
> @@ -740,16 +742,16 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
>  }
>  
>  static int
> -iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> -		unsigned copied, struct page *page, struct iomap *iomap)
> +iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
> +		struct page *page, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	loff_t old_size = inode->i_size;
>  	int ret;
>  
> -	if (iomap->type == IOMAP_INLINE) {
> +	if (srcmap->type == IOMAP_INLINE) {
>  		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
> -	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
> +	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
>  		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
>  				page, NULL);
>  	} else {
> @@ -780,7 +782,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
>  
>  static loff_t
>  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iov_iter *i = data;
>  	long status = 0;
> @@ -814,7 +816,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  			break;
>  		}
>  
> -		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> +		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
> +				srcmap);
>  		if (unlikely(status))
>  			break;
>  
> @@ -825,8 +828,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  
>  		flush_dcache_page(page);
>  
> -		status = iomap_write_end(inode, pos, bytes, copied, page,
> -				iomap);
> +		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
> +				srcmap);
>  		if (unlikely(status < 0))
>  			break;
>  		copied = status;
> @@ -879,7 +882,7 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
>  
>  static loff_t
>  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	long status = 0;
>  	ssize_t written = 0;
> @@ -888,7 +891,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	if (!(iomap->flags & IOMAP_F_SHARED))
>  		return length;
>  	/* don't bother with holes or unwritten extents */
> -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
>  		return length;
>  
>  	do {
> @@ -897,11 +900,12 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct page *page;
>  
>  		status = iomap_write_begin(inode, pos, bytes,
> -				IOMAP_WRITE_F_UNSHARE, &page, iomap);
> +				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
>  		if (unlikely(status))
>  			return status;
>  
> -		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> +		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
> +				srcmap);
>  		if (unlikely(status <= 0)) {
>  			if (WARN_ON_ONCE(status == 0))
>  				return -EIO;
> @@ -940,19 +944,19 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
>  EXPORT_SYMBOL_GPL(iomap_file_unshare);
>  
>  static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
> -		unsigned bytes, struct iomap *iomap)
> +		unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page;
>  	int status;
>  
> -	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> +	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
>  	if (status)
>  		return status;
>  
>  	zero_user(page, offset, bytes);
>  	mark_page_accessed(page);
>  
> -	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> +	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
>  }
>  
>  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
> @@ -964,14 +968,14 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
>  
>  static loff_t
>  iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	bool *did_zero = data;
>  	loff_t written = 0;
>  	int status;
>  
>  	/* already zeroed?  we're done. */
> -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
>  		return count;
>  
>  	do {
> @@ -983,7 +987,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
>  		if (IS_DAX(inode))
>  			status = iomap_dax_zero(pos, offset, bytes, iomap);
>  		else
> -			status = iomap_zero(inode, pos, offset, bytes, iomap);
> +			status = iomap_zero(inode, pos, offset, bytes, iomap,
> +					srcmap);
>  		if (status < 0)
>  			return status;
>  
> @@ -1033,7 +1038,7 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page);
>  
>  static loff_t
>  iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page = data;
>  	int ret;
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 1fc28c2da279..e3ccbf7daaae 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  
>  static loff_t
>  iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct iomap_dio *dio = data;
>  
> diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
> index f26fdd36e383..690ef2d7c6c8 100644
> --- a/fs/iomap/fiemap.c
> +++ b/fs/iomap/fiemap.c
> @@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
>  
>  static loff_t
>  iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct fiemap_ctx *ctx = data;
>  	loff_t ret = length;
> @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
>  
>  static loff_t
>  iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
> -		void *data, struct iomap *iomap)
> +		void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	sector_t *bno = data, addr;
>  
> diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
> index c04bad4b2b43..89f61d93c0bc 100644
> --- a/fs/iomap/seek.c
> +++ b/fs/iomap/seek.c
> @@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
>  
>  static loff_t
>  iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
> -		      void *data, struct iomap *iomap)
> +		      void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	switch (iomap->type) {
>  	case IOMAP_UNWRITTEN:
> @@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole);
>  
>  static loff_t
>  iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
> -		      void *data, struct iomap *iomap)
> +		      void *data, struct iomap *iomap, struct iomap *srcmap)
>  {
>  	switch (iomap->type) {
>  	case IOMAP_HOLE:
> diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
> index 152a230f668d..a648dbf6991e 100644
> --- a/fs/iomap/swapfile.c
> +++ b/fs/iomap/swapfile.c
> @@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
>   * distinction between written and unwritten extents.
>   */
>  static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> -		loff_t count, void *data, struct iomap *iomap)
> +		loff_t count, void *data, struct iomap *iomap,
> +		struct iomap *srcmap)
>  {
>  	struct iomap_swapfile_info *isi = data;
>  	int error;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index c0a492353826..016adcd7dd66 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -928,7 +928,8 @@ xfs_file_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> @@ -1154,7 +1155,8 @@ xfs_seek_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> @@ -1240,7 +1242,8 @@ xfs_xattr_iomap_begin(
>  	loff_t			offset,
>  	loff_t			length,
>  	unsigned		flags,
> -	struct iomap		*iomap)
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
>  {
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	struct xfs_mount	*mp = ip->i_mount;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 24c784e44274..37af5f9dc722 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -127,7 +127,8 @@ struct iomap_ops {
>  	 * The actual length is returned in iomap->length.
>  	 */
>  	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
> -			unsigned flags, struct iomap *iomap);
> +			unsigned flags, struct iomap *iomap,
> +			struct iomap *srcmap);
>  
>  	/*
>  	 * Commit and/or unreserve space previous allocated using iomap_begin.
> @@ -143,7 +144,7 @@ struct iomap_ops {
>   * Main iomap iterator function.
>   */
>  typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> -		void *data, struct iomap *iomap);
> +		void *data, struct iomap *iomap, struct iomap *srcmap);
>  
>  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
>  		unsigned flags, const struct iomap_ops *ops, void *data,
> -- 
> 2.20.1
>
Goldwyn Rodrigues Oct. 15, 2019, 1 p.m. UTC | #5
On 16:27 14/10,  Darrick J. Wong  wrote:
> On Tue, Oct 08, 2019 at 09:15:15AM +0200, Christoph Hellwig wrote:
> > From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > 
> > The srcmap is used to identify where the read is to be performed from.
> > It is passed to ->iomap_begin, which can fill it in if we need to read
> > data for partially written blocks from a different location than the
> > write target.  The srcmap is only supported for buffered writes so far.
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Goldwyn,
> 
> Since we've reworked your original patch quite extensively, could you
> please have a look at (and if you approve, add an Acked-by) this new(er)
> version so we can get this series moving for 5.5?
> 
> 

Sorry, was on vacation until yesterday.
Yes, I have used this version in my series and I can confirm it works
correctly with btrfs. Thanks!

> > [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
> >       srcmap if not set, adjust length down to srcmap end as well]

Acked-by: Goldwyn Rodrigues <rgoldwyn@suse.com>

> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/dax.c               |  9 ++++--
> >  fs/ext2/inode.c        |  2 +-
> >  fs/ext4/inode.c        |  2 +-
> >  fs/gfs2/bmap.c         |  3 +-
> >  fs/iomap/apply.c       | 25 ++++++++++++----
> >  fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-------------------
> >  fs/iomap/direct-io.c   |  2 +-
> >  fs/iomap/fiemap.c      |  4 +--
> >  fs/iomap/seek.c        |  4 +--
> >  fs/iomap/swapfile.c    |  3 +-
> >  fs/xfs/xfs_iomap.c     |  9 ++++--
> >  include/linux/iomap.h  |  5 ++--
> >  12 files changed, 80 insertions(+), 53 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 6bf81f931de3..920105457c2c 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
> >  
> >  static loff_t
> >  dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > -		struct iomap *iomap)
> > +		struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct block_device *bdev = iomap->bdev;
> >  	struct dax_device *dax_dev = iomap->dax_dev;
> > @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	unsigned long vaddr = vmf->address;
> >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> >  	struct iomap iomap = { 0 };
> > +	struct iomap srcmap = { .type = IOMAP_HOLE };
> >  	unsigned flags = IOMAP_FAULT;
> >  	int error, major = 0;
> >  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> > @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	 * the file system block size to be equal the page size, which means
> >  	 * that we never have to deal with more than a single extent here.
> >  	 */
> > -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
> > +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
> >  	if (iomap_errp)
> >  		*iomap_errp = error;
> >  	if (error) {
> > @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	struct inode *inode = mapping->host;
> >  	vm_fault_t result = VM_FAULT_FALLBACK;
> >  	struct iomap iomap = { 0 };
> > +	struct iomap srcmap = { .type = IOMAP_HOLE };
> >  	pgoff_t max_pgoff;
> >  	void *entry;
> >  	loff_t pos;
> > @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	 * to look up our filesystem block.
> >  	 */
> >  	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> > -	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
> > +	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
> > +			&srcmap);
> >  	if (error)
> >  		goto unlock_entry;
> >  
> > diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> > index 7004ce581a32..467c13ff6b40 100644
> > --- a/fs/ext2/inode.c
> > +++ b/fs/ext2/inode.c
> > @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
> >  
> >  #ifdef CONFIG_FS_DAX
> >  static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > -		unsigned flags, struct iomap *iomap)
> > +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	unsigned int blkbits = inode->i_blkbits;
> >  	unsigned long first_block = offset >> blkbits;
> > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> > index 516faa280ced..abaaf7d96ca4 100644
> > --- a/fs/ext4/inode.c
> > +++ b/fs/ext4/inode.c
> > @@ -3407,7 +3407,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
> >  }
> >  
> >  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > -			    unsigned flags, struct iomap *iomap)
> > +		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> >  	unsigned int blkbits = inode->i_blkbits;
> > diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> > index f63df54a08c6..516103248272 100644
> > --- a/fs/gfs2/bmap.c
> > +++ b/fs/gfs2/bmap.c
> > @@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
> >  }
> >  
> >  static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> > -			    unsigned flags, struct iomap *iomap)
> > +			    unsigned flags, struct iomap *iomap,
> > +			    struct iomap *srcmap)
> >  {
> >  	struct gfs2_inode *ip = GFS2_I(inode);
> >  	struct metapath mp = { .mp_aheight = 1, };
> > diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> > index 54c02aecf3cd..484dd8eda861 100644
> > --- a/fs/iomap/apply.c
> > +++ b/fs/iomap/apply.c
> > @@ -23,8 +23,10 @@ loff_t
> >  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
> >  		const struct iomap_ops *ops, void *data, iomap_actor_t actor)
> >  {
> > -	struct iomap iomap = { 0 };
> > +	struct iomap iomap = { .type = IOMAP_HOLE };
> > +	struct iomap srcmap = { .type = IOMAP_HOLE };
> >  	loff_t written = 0, ret;
> > +	u64 end;
> >  
> >  	/*
> >  	 * Need to map a range from start position for length bytes. This can
> > @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
> >  	 * expose transient stale data. If the reserve fails, we can safely
> >  	 * back out at this point as there is nothing to undo.
> >  	 */
> > -	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> > +	ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
> >  	if (ret)
> >  		return ret;
> >  	if (WARN_ON(iomap.offset > pos))
> > @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
> >  	 * Cut down the length to the one actually provided by the filesystem,
> >  	 * as it might not be able to give us the whole size that we requested.
> >  	 */
> > -	if (iomap.offset + iomap.length < pos + length)
> > -		length = iomap.offset + iomap.length - pos;
> > +	end = iomap.offset + iomap.length;
> > +	if (srcmap.type != IOMAP_HOLE)
> > +		end = min(end, srcmap.offset + srcmap.length);
> > +	if (pos + length > end)
> > +		length = end - pos;
> >  
> >  	/*
> > -	 * Now that we have guaranteed that the space allocation will succeed.
> > +	 * Now that we have guaranteed that the space allocation will succeed,
> >  	 * we can do the copy-in page by page without having to worry about
> >  	 * failures exposing transient data.
> > +	 *
> > +	 * To support COW operations, we read in data for partially blocks from
> > +	 * the srcmap if the file system filled it in.  In that case we the
> > +	 * length needs to be limited to the earlier of the ends of the iomaps.
> > +	 * If the file system did not provide a srcmap we pass in the normal
> > +	 * iomap into the actors so that they don't need to have special
> > +	 * handling for the two cases.
> >  	 */
> > -	written = actor(inode, pos, length, data, &iomap);
> > +	written = actor(inode, pos, length, data, &iomap,
> > +			srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
> >  
> >  	/*
> >  	 * Now the data has been copied, commit the range we've copied.  This
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index ac1bbed71a9b..eb2c6d73a837 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -234,7 +234,7 @@ static inline bool iomap_block_needs_zeroing(struct inode *inode,
> >  
> >  static loff_t
> >  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > -		struct iomap *iomap)
> > +		struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct iomap_readpage_ctx *ctx = data;
> >  	struct page *page = ctx->cur_page;
> > @@ -382,7 +382,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
> >  
> >  static loff_t
> >  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> > -		void *data, struct iomap *iomap)
> > +		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct iomap_readpage_ctx *ctx = data;
> >  	loff_t done, ret;
> > @@ -402,7 +402,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
> >  			ctx->cur_page_in_bio = false;
> >  		}
> >  		ret = iomap_readpage_actor(inode, pos + done, length - done,
> > -				ctx, iomap);
> > +				ctx, iomap, srcmap);
> >  	}
> >  
> >  	return done;
> > @@ -582,7 +582,7 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
> >  
> >  static int
> >  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> > -		struct page *page, struct iomap *iomap)
> > +		struct page *page, struct iomap *srcmap)
> >  {
> >  	struct iomap_page *iop = iomap_page_create(inode, page);
> >  	loff_t block_size = i_blocksize(inode);
> > @@ -605,7 +605,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> >  		    (to <= poff || to >= poff + plen))
> >  			continue;
> >  
> > -		if (iomap_block_needs_zeroing(inode, iomap, block_start)) {
> > +		if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
> >  			if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
> >  				return -EIO;
> >  			zero_user_segments(page, poff, from, to, poff + plen);
> > @@ -614,7 +614,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> >  		}
> >  
> >  		status = iomap_read_page_sync(block_start, page, poff, plen,
> > -				iomap);
> > +				srcmap);
> >  		if (status)
> >  			return status;
> >  	} while ((block_start += plen) < block_end);
> > @@ -624,13 +624,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> >  
> >  static int
> >  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> > -		struct page **pagep, struct iomap *iomap)
> > +		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	const struct iomap_page_ops *page_ops = iomap->page_ops;
> >  	struct page *page;
> >  	int status = 0;
> >  
> >  	BUG_ON(pos + len > iomap->offset + iomap->length);
> > +	if (srcmap != iomap)
> > +		BUG_ON(pos + len > srcmap->offset + srcmap->length);
> >  
> >  	if (fatal_signal_pending(current))
> >  		return -EINTR;
> > @@ -648,13 +650,13 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >  		goto out_no_page;
> >  	}
> >  
> > -	if (iomap->type == IOMAP_INLINE)
> > -		iomap_read_inline_data(inode, page, iomap);
> > +	if (srcmap->type == IOMAP_INLINE)
> > +		iomap_read_inline_data(inode, page, srcmap);
> >  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> > -		status = __block_write_begin_int(page, pos, len, NULL, iomap);
> > +		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
> >  	else
> >  		status = __iomap_write_begin(inode, pos, len, flags, page,
> > -				iomap);
> > +				srcmap);
> >  
> >  	if (unlikely(status))
> >  		goto out_unlock;
> > @@ -740,16 +742,16 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
> >  }
> >  
> >  static int
> > -iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> > -		unsigned copied, struct page *page, struct iomap *iomap)
> > +iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
> > +		struct page *page, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	const struct iomap_page_ops *page_ops = iomap->page_ops;
> >  	loff_t old_size = inode->i_size;
> >  	int ret;
> >  
> > -	if (iomap->type == IOMAP_INLINE) {
> > +	if (srcmap->type == IOMAP_INLINE) {
> >  		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
> > -	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
> > +	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
> >  		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
> >  				page, NULL);
> >  	} else {
> > @@ -780,7 +782,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
> >  
> >  static loff_t
> >  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > -		struct iomap *iomap)
> > +		struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct iov_iter *i = data;
> >  	long status = 0;
> > @@ -814,7 +816,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  			break;
> >  		}
> >  
> > -		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> > +		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
> > +				srcmap);
> >  		if (unlikely(status))
> >  			break;
> >  
> > @@ -825,8 +828,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  
> >  		flush_dcache_page(page);
> >  
> > -		status = iomap_write_end(inode, pos, bytes, copied, page,
> > -				iomap);
> > +		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
> > +				srcmap);
> >  		if (unlikely(status < 0))
> >  			break;
> >  		copied = status;
> > @@ -879,7 +882,7 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
> >  
> >  static loff_t
> >  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > -		struct iomap *iomap)
> > +		struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	long status = 0;
> >  	ssize_t written = 0;
> > @@ -888,7 +891,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  	if (!(iomap->flags & IOMAP_F_SHARED))
> >  		return length;
> >  	/* don't bother with holes or unwritten extents */
> > -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> > +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
> >  		return length;
> >  
> >  	do {
> > @@ -897,11 +900,12 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  		struct page *page;
> >  
> >  		status = iomap_write_begin(inode, pos, bytes,
> > -				IOMAP_WRITE_F_UNSHARE, &page, iomap);
> > +				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
> >  		if (unlikely(status))
> >  			return status;
> >  
> > -		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> > +		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
> > +				srcmap);
> >  		if (unlikely(status <= 0)) {
> >  			if (WARN_ON_ONCE(status == 0))
> >  				return -EIO;
> > @@ -940,19 +944,19 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
> >  EXPORT_SYMBOL_GPL(iomap_file_unshare);
> >  
> >  static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
> > -		unsigned bytes, struct iomap *iomap)
> > +		unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct page *page;
> >  	int status;
> >  
> > -	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
> > +	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
> >  	if (status)
> >  		return status;
> >  
> >  	zero_user(page, offset, bytes);
> >  	mark_page_accessed(page);
> >  
> > -	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
> > +	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
> >  }
> >  
> >  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
> > @@ -964,14 +968,14 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
> >  
> >  static loff_t
> >  iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
> > -		void *data, struct iomap *iomap)
> > +		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	bool *did_zero = data;
> >  	loff_t written = 0;
> >  	int status;
> >  
> >  	/* already zeroed?  we're done. */
> > -	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
> > +	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
> >  		return count;
> >  
> >  	do {
> > @@ -983,7 +987,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
> >  		if (IS_DAX(inode))
> >  			status = iomap_dax_zero(pos, offset, bytes, iomap);
> >  		else
> > -			status = iomap_zero(inode, pos, offset, bytes, iomap);
> > +			status = iomap_zero(inode, pos, offset, bytes, iomap,
> > +					srcmap);
> >  		if (status < 0)
> >  			return status;
> >  
> > @@ -1033,7 +1038,7 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page);
> >  
> >  static loff_t
> >  iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
> > -		void *data, struct iomap *iomap)
> > +		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct page *page = data;
> >  	int ret;
> > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> > index 1fc28c2da279..e3ccbf7daaae 100644
> > --- a/fs/iomap/direct-io.c
> > +++ b/fs/iomap/direct-io.c
> > @@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
> >  
> >  static loff_t
> >  iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
> > -		void *data, struct iomap *iomap)
> > +		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct iomap_dio *dio = data;
> >  
> > diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
> > index f26fdd36e383..690ef2d7c6c8 100644
> > --- a/fs/iomap/fiemap.c
> > +++ b/fs/iomap/fiemap.c
> > @@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
> >  
> >  static loff_t
> >  iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> > -		struct iomap *iomap)
> > +		struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	struct fiemap_ctx *ctx = data;
> >  	loff_t ret = length;
> > @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
> >  
> >  static loff_t
> >  iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
> > -		void *data, struct iomap *iomap)
> > +		void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	sector_t *bno = data, addr;
> >  
> > diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
> > index c04bad4b2b43..89f61d93c0bc 100644
> > --- a/fs/iomap/seek.c
> > +++ b/fs/iomap/seek.c
> > @@ -119,7 +119,7 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
> >  
> >  static loff_t
> >  iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
> > -		      void *data, struct iomap *iomap)
> > +		      void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	switch (iomap->type) {
> >  	case IOMAP_UNWRITTEN:
> > @@ -165,7 +165,7 @@ EXPORT_SYMBOL_GPL(iomap_seek_hole);
> >  
> >  static loff_t
> >  iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
> > -		      void *data, struct iomap *iomap)
> > +		      void *data, struct iomap *iomap, struct iomap *srcmap)
> >  {
> >  	switch (iomap->type) {
> >  	case IOMAP_HOLE:
> > diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
> > index 152a230f668d..a648dbf6991e 100644
> > --- a/fs/iomap/swapfile.c
> > +++ b/fs/iomap/swapfile.c
> > @@ -76,7 +76,8 @@ static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
> >   * distinction between written and unwritten extents.
> >   */
> >  static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
> > -		loff_t count, void *data, struct iomap *iomap)
> > +		loff_t count, void *data, struct iomap *iomap,
> > +		struct iomap *srcmap)
> >  {
> >  	struct iomap_swapfile_info *isi = data;
> >  	int error;
> > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > index c0a492353826..016adcd7dd66 100644
> > --- a/fs/xfs/xfs_iomap.c
> > +++ b/fs/xfs/xfs_iomap.c
> > @@ -928,7 +928,8 @@ xfs_file_iomap_begin(
> >  	loff_t			offset,
> >  	loff_t			length,
> >  	unsigned		flags,
> > -	struct iomap		*iomap)
> > +	struct iomap		*iomap,
> > +	struct iomap		*srcmap)
> >  {
> >  	struct xfs_inode	*ip = XFS_I(inode);
> >  	struct xfs_mount	*mp = ip->i_mount;
> > @@ -1154,7 +1155,8 @@ xfs_seek_iomap_begin(
> >  	loff_t			offset,
> >  	loff_t			length,
> >  	unsigned		flags,
> > -	struct iomap		*iomap)
> > +	struct iomap		*iomap,
> > +	struct iomap		*srcmap)
> >  {
> >  	struct xfs_inode	*ip = XFS_I(inode);
> >  	struct xfs_mount	*mp = ip->i_mount;
> > @@ -1240,7 +1242,8 @@ xfs_xattr_iomap_begin(
> >  	loff_t			offset,
> >  	loff_t			length,
> >  	unsigned		flags,
> > -	struct iomap		*iomap)
> > +	struct iomap		*iomap,
> > +	struct iomap		*srcmap)
> >  {
> >  	struct xfs_inode	*ip = XFS_I(inode);
> >  	struct xfs_mount	*mp = ip->i_mount;
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index 24c784e44274..37af5f9dc722 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -127,7 +127,8 @@ struct iomap_ops {
> >  	 * The actual length is returned in iomap->length.
> >  	 */
> >  	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
> > -			unsigned flags, struct iomap *iomap);
> > +			unsigned flags, struct iomap *iomap,
> > +			struct iomap *srcmap);
> >  
> >  	/*
> >  	 * Commit and/or unreserve space previous allocated using iomap_begin.
> > @@ -143,7 +144,7 @@ struct iomap_ops {
> >   * Main iomap iterator function.
> >   */
> >  typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
> > -		void *data, struct iomap *iomap);
> > +		void *data, struct iomap *iomap, struct iomap *srcmap);
> >  
> >  loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
> >  		unsigned flags, const struct iomap_ops *ops, void *data,
> > -- 
> > 2.20.1
> > 
>
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 6bf81f931de3..920105457c2c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1090,7 +1090,7 @@  EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 
 static loff_t
 dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct block_device *bdev = iomap->bdev;
 	struct dax_device *dax_dev = iomap->dax_dev;
@@ -1248,6 +1248,7 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	unsigned long vaddr = vmf->address;
 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
 	struct iomap iomap = { 0 };
+	struct iomap srcmap = { .type = IOMAP_HOLE };
 	unsigned flags = IOMAP_FAULT;
 	int error, major = 0;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -1292,7 +1293,7 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * the file system block size to be equal the page size, which means
 	 * that we never have to deal with more than a single extent here.
 	 */
-	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
 	if (iomap_errp)
 		*iomap_errp = error;
 	if (error) {
@@ -1472,6 +1473,7 @@  static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	struct inode *inode = mapping->host;
 	vm_fault_t result = VM_FAULT_FALLBACK;
 	struct iomap iomap = { 0 };
+	struct iomap srcmap = { .type = IOMAP_HOLE };
 	pgoff_t max_pgoff;
 	void *entry;
 	loff_t pos;
@@ -1546,7 +1548,8 @@  static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * to look up our filesystem block.
 	 */
 	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
-	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
+	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
+			&srcmap);
 	if (error)
 		goto unlock_entry;
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7004ce581a32..467c13ff6b40 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -801,7 +801,7 @@  int ext2_get_block(struct inode *inode, sector_t iblock,
 
 #ifdef CONFIG_FS_DAX
 static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
-		unsigned flags, struct iomap *iomap)
+		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
 {
 	unsigned int blkbits = inode->i_blkbits;
 	unsigned long first_block = offset >> blkbits;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 516faa280ced..abaaf7d96ca4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3407,7 +3407,7 @@  static bool ext4_inode_datasync_dirty(struct inode *inode)
 }
 
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
-			    unsigned flags, struct iomap *iomap)
+		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	unsigned int blkbits = inode->i_blkbits;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f63df54a08c6..516103248272 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1149,7 +1149,8 @@  static inline bool gfs2_iomap_need_write_lock(unsigned flags)
 }
 
 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-			    unsigned flags, struct iomap *iomap)
+			    unsigned flags, struct iomap *iomap,
+			    struct iomap *srcmap)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct metapath mp = { .mp_aheight = 1, };
diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
index 54c02aecf3cd..484dd8eda861 100644
--- a/fs/iomap/apply.c
+++ b/fs/iomap/apply.c
@@ -23,8 +23,10 @@  loff_t
 iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 		const struct iomap_ops *ops, void *data, iomap_actor_t actor)
 {
-	struct iomap iomap = { 0 };
+	struct iomap iomap = { .type = IOMAP_HOLE };
+	struct iomap srcmap = { .type = IOMAP_HOLE };
 	loff_t written = 0, ret;
+	u64 end;
 
 	/*
 	 * Need to map a range from start position for length bytes. This can
@@ -38,7 +40,7 @@  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 	 * expose transient stale data. If the reserve fails, we can safely
 	 * back out at this point as there is nothing to undo.
 	 */
-	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
+	ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
 	if (ret)
 		return ret;
 	if (WARN_ON(iomap.offset > pos))
@@ -50,15 +52,26 @@  iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
 	 * Cut down the length to the one actually provided by the filesystem,
 	 * as it might not be able to give us the whole size that we requested.
 	 */
-	if (iomap.offset + iomap.length < pos + length)
-		length = iomap.offset + iomap.length - pos;
+	end = iomap.offset + iomap.length;
+	if (srcmap.type != IOMAP_HOLE)
+		end = min(end, srcmap.offset + srcmap.length);
+	if (pos + length > end)
+		length = end - pos;
 
 	/*
-	 * Now that we have guaranteed that the space allocation will succeed.
+	 * Now that we have guaranteed that the space allocation will succeed,
 	 * we can do the copy-in page by page without having to worry about
 	 * failures exposing transient data.
+	 *
+	 * To support COW operations, we read in data for partially blocks from
+	 * the srcmap if the file system filled it in.  In that case we the
+	 * length needs to be limited to the earlier of the ends of the iomaps.
+	 * If the file system did not provide a srcmap we pass in the normal
+	 * iomap into the actors so that they don't need to have special
+	 * handling for the two cases.
 	 */
-	written = actor(inode, pos, length, data, &iomap);
+	written = actor(inode, pos, length, data, &iomap,
+			srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
 
 	/*
 	 * Now the data has been copied, commit the range we've copied.  This
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ac1bbed71a9b..eb2c6d73a837 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -234,7 +234,7 @@  static inline bool iomap_block_needs_zeroing(struct inode *inode,
 
 static loff_t
 iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
 	struct page *page = ctx->cur_page;
@@ -382,7 +382,7 @@  iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
 
 static loff_t
 iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_readpage_ctx *ctx = data;
 	loff_t done, ret;
@@ -402,7 +402,7 @@  iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_actor(inode, pos + done, length - done,
-				ctx, iomap);
+				ctx, iomap, srcmap);
 	}
 
 	return done;
@@ -582,7 +582,7 @@  iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
 
 static int
 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
-		struct page *page, struct iomap *iomap)
+		struct page *page, struct iomap *srcmap)
 {
 	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
@@ -605,7 +605,7 @@  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 		    (to <= poff || to >= poff + plen))
 			continue;
 
-		if (iomap_block_needs_zeroing(inode, iomap, block_start)) {
+		if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
 			if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
 				return -EIO;
 			zero_user_segments(page, poff, from, to, poff + plen);
@@ -614,7 +614,7 @@  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 		}
 
 		status = iomap_read_page_sync(block_start, page, poff, plen,
-				iomap);
+				srcmap);
 		if (status)
 			return status;
 	} while ((block_start += plen) < block_end);
@@ -624,13 +624,15 @@  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, struct iomap *iomap)
+		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
 	struct page *page;
 	int status = 0;
 
 	BUG_ON(pos + len > iomap->offset + iomap->length);
+	if (srcmap != iomap)
+		BUG_ON(pos + len > srcmap->offset + srcmap->length);
 
 	if (fatal_signal_pending(current))
 		return -EINTR;
@@ -648,13 +650,13 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		goto out_no_page;
 	}
 
-	if (iomap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, iomap);
+	if (srcmap->type == IOMAP_INLINE)
+		iomap_read_inline_data(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
-		status = __block_write_begin_int(page, pos, len, NULL, iomap);
+		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
 		status = __iomap_write_begin(inode, pos, len, flags, page,
-				iomap);
+				srcmap);
 
 	if (unlikely(status))
 		goto out_unlock;
@@ -740,16 +742,16 @@  iomap_write_end_inline(struct inode *inode, struct page *page,
 }
 
 static int
-iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page, struct iomap *iomap)
+iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
+		struct page *page, struct iomap *iomap, struct iomap *srcmap)
 {
 	const struct iomap_page_ops *page_ops = iomap->page_ops;
 	loff_t old_size = inode->i_size;
 	int ret;
 
-	if (iomap->type == IOMAP_INLINE) {
+	if (srcmap->type == IOMAP_INLINE) {
 		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
 				page, NULL);
 	} else {
@@ -780,7 +782,7 @@  iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 
 static loff_t
 iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iov_iter *i = data;
 	long status = 0;
@@ -814,7 +816,8 @@  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 			break;
 		}
 
-		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
+		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
+				srcmap);
 		if (unlikely(status))
 			break;
 
@@ -825,8 +828,8 @@  iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		flush_dcache_page(page);
 
-		status = iomap_write_end(inode, pos, bytes, copied, page,
-				iomap);
+		status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
+				srcmap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@@ -879,7 +882,7 @@  EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
 static loff_t
 iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	long status = 0;
 	ssize_t written = 0;
@@ -888,7 +891,7 @@  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	if (!(iomap->flags & IOMAP_F_SHARED))
 		return length;
 	/* don't bother with holes or unwritten extents */
-	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
 		return length;
 
 	do {
@@ -897,11 +900,12 @@  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct page *page;
 
 		status = iomap_write_begin(inode, pos, bytes,
-				IOMAP_WRITE_F_UNSHARE, &page, iomap);
+				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
 		if (unlikely(status))
 			return status;
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
+				srcmap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@@ -940,19 +944,19 @@  iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
 EXPORT_SYMBOL_GPL(iomap_file_unshare);
 
 static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
-		unsigned bytes, struct iomap *iomap)
+		unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct page *page;
 	int status;
 
-	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
+	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
 	if (status)
 		return status;
 
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
 
-	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
 }
 
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@@ -964,14 +968,14 @@  static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
 
 static loff_t
 iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	bool *did_zero = data;
 	loff_t written = 0;
 	int status;
 
 	/* already zeroed?  we're done. */
-	if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
+	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
 		return count;
 
 	do {
@@ -983,7 +987,8 @@  iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
 		if (IS_DAX(inode))
 			status = iomap_dax_zero(pos, offset, bytes, iomap);
 		else
-			status = iomap_zero(inode, pos, offset, bytes, iomap);
+			status = iomap_zero(inode, pos, offset, bytes, iomap,
+					srcmap);
 		if (status < 0)
 			return status;
 
@@ -1033,7 +1038,7 @@  EXPORT_SYMBOL_GPL(iomap_truncate_page);
 
 static loff_t
 iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct page *page = data;
 	int ret;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 1fc28c2da279..e3ccbf7daaae 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -358,7 +358,7 @@  iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 
 static loff_t
 iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	struct iomap_dio *dio = data;
 
diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c
index f26fdd36e383..690ef2d7c6c8 100644
--- a/fs/iomap/fiemap.c
+++ b/fs/iomap/fiemap.c
@@ -44,7 +44,7 @@  static int iomap_to_fiemap(struct fiemap_extent_info *fi,
 
 static loff_t
 iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
-		struct iomap *iomap)
+		struct iomap *iomap, struct iomap *srcmap)
 {
 	struct fiemap_ctx *ctx = data;
 	loff_t ret = length;
@@ -111,7 +111,7 @@  EXPORT_SYMBOL_GPL(iomap_fiemap);
 
 static loff_t
 iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
-		void *data, struct iomap *iomap)
+		void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	sector_t *bno = data, addr;
 
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index c04bad4b2b43..89f61d93c0bc 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -119,7 +119,7 @@  page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
 
 static loff_t
 iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
-		      void *data, struct iomap *iomap)
+		      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	switch (iomap->type) {
 	case IOMAP_UNWRITTEN:
@@ -165,7 +165,7 @@  EXPORT_SYMBOL_GPL(iomap_seek_hole);
 
 static loff_t
 iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
-		      void *data, struct iomap *iomap)
+		      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
 	switch (iomap->type) {
 	case IOMAP_HOLE:
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index 152a230f668d..a648dbf6991e 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -76,7 +76,8 @@  static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
  * distinction between written and unwritten extents.
  */
 static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
-		loff_t count, void *data, struct iomap *iomap)
+		loff_t count, void *data, struct iomap *iomap,
+		struct iomap *srcmap)
 {
 	struct iomap_swapfile_info *isi = data;
 	int error;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index c0a492353826..016adcd7dd66 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -928,7 +928,8 @@  xfs_file_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1154,7 +1155,8 @@  xfs_seek_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1240,7 +1242,8 @@  xfs_xattr_iomap_begin(
 	loff_t			offset,
 	loff_t			length,
 	unsigned		flags,
-	struct iomap		*iomap)
+	struct iomap		*iomap,
+	struct iomap		*srcmap)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 24c784e44274..37af5f9dc722 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -127,7 +127,8 @@  struct iomap_ops {
 	 * The actual length is returned in iomap->length.
 	 */
 	int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length,
-			unsigned flags, struct iomap *iomap);
+			unsigned flags, struct iomap *iomap,
+			struct iomap *srcmap);
 
 	/*
 	 * Commit and/or unreserve space previous allocated using iomap_begin.
@@ -143,7 +144,7 @@  struct iomap_ops {
  * Main iomap iterator function.
  */
 typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
-		void *data, struct iomap *iomap);
+		void *data, struct iomap *iomap, struct iomap *srcmap);
 
 loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 		unsigned flags, const struct iomap_ops *ops, void *data,