diff mbox series

[8/9] Use FIEMAP for FIBMAP calls

Message ID 20190731141245.7230-9-cmaiolino@redhat.com (mailing list archive)
State New, archived
Headers show
Series New ->fiemap infrastructure and ->bmap removal | expand

Commit Message

Carlos Maiolino July 31, 2019, 2:12 p.m. UTC
Enables the usage of FIEMAP ioctl infrastructure to handle FIBMAP calls.
From now on, ->bmap() methods can start to be removed from filesystems
which already provides ->fiemap().

This adds a new helper - bmap_fiemap() - which is used to fill in the
fiemap request, call into the underlying filesystem and check the flags
set in the extent requested.

Add a new fiemap fill extent callback to handle the in-kernel only
fiemap_extent structure used for FIBMAP.

The new FIEMAP_KERNEL_FIBMAP flag, is used to tell the filesystem
->fiemap interface, that the call is coming from ioctl_fibmap. The
addition of this new flag, requires an update to fiemap_check_flags(),
so it doesn't treat FIBMAP requests as invalid.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
---

Changelog:

V4:
	- Fix if conditional in bmap()
	- Add filesystem-specific modifications
V3:
	- Add FIEMAP_EXTENT_SHARED to the list of invalid extents in
	  bmap_fiemap()
	- Rename fi_extents_start to fi_cb_data
	- Use if conditional instead of ternary operator
	- Make fiemap_fill_* callbacks static (which required the
	  removal of some macros
	- Set FIEMAP_FLAG_SYNC when calling in ->fiemap method from
	  fibmap
	- Add FIEMAP_KERNEL_FIBMAP flag, to identify the usage of fiemap
	  infrastructure for fibmap calls, defined in fs.h so it's not
	  exported to userspace.
	- Update fiemap_check_flags() to understand FIEMAP_KERNEL_FIBMAP
	- Update filesystems supporting both FIBMAP and FIEMAP, which
	  need extra checks on FIBMAP calls

V2:
	- Now based on the updated fiemap_extent_info,
	- move the fiemap call itself to a new helper

 fs/ext4/extents.c     |  7 +++-
 fs/f2fs/data.c        | 10 +++++-
 fs/gfs2/inode.c       |  6 +++-
 fs/inode.c            | 81 +++++++++++++++++++++++++++++++++++++++++--
 fs/ioctl.c            | 40 ++++++++++++++-------
 fs/iomap.c            |  2 +-
 fs/ocfs2/extent_map.c |  8 ++++-
 fs/xfs/xfs_iops.c     |  5 +++
 include/linux/fs.h    |  4 +++
 9 files changed, 144 insertions(+), 19 deletions(-)

Comments

Darrick J. Wong July 31, 2019, 11:22 p.m. UTC | #1
On Wed, Jul 31, 2019 at 04:12:44PM +0200, Carlos Maiolino wrote:
> Enables the usage of FIEMAP ioctl infrastructure to handle FIBMAP calls.
> From now on, ->bmap() methods can start to be removed from filesystems
> which already provides ->fiemap().
> 
> This adds a new helper - bmap_fiemap() - which is used to fill in the
> fiemap request, call into the underlying filesystem and check the flags
> set in the extent requested.
> 
> Add a new fiemap fill extent callback to handle the in-kernel only
> fiemap_extent structure used for FIBMAP.
> 
> The new FIEMAP_KERNEL_FIBMAP flag, is used to tell the filesystem
> ->fiemap interface, that the call is coming from ioctl_fibmap. The
> addition of this new flag, requires an update to fiemap_check_flags(),
> so it doesn't treat FIBMAP requests as invalid.
> 
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
> 
> Changelog:
> 
> V4:
> 	- Fix if conditional in bmap()
> 	- Add filesystem-specific modifications
> V3:
> 	- Add FIEMAP_EXTENT_SHARED to the list of invalid extents in
> 	  bmap_fiemap()
> 	- Rename fi_extents_start to fi_cb_data
> 	- Use if conditional instead of ternary operator
> 	- Make fiemap_fill_* callbacks static (which required the
> 	  removal of some macros
> 	- Set FIEMAP_FLAG_SYNC when calling in ->fiemap method from
> 	  fibmap
> 	- Add FIEMAP_KERNEL_FIBMAP flag, to identify the usage of fiemap
> 	  infrastructure for fibmap calls, defined in fs.h so it's not
> 	  exported to userspace.
> 	- Update fiemap_check_flags() to understand FIEMAP_KERNEL_FIBMAP
> 	- Update filesystems supporting both FIBMAP and FIEMAP, which
> 	  need extra checks on FIBMAP calls
> 
> V2:
> 	- Now based on the updated fiemap_extent_info,
> 	- move the fiemap call itself to a new helper
> 
>  fs/ext4/extents.c     |  7 +++-
>  fs/f2fs/data.c        | 10 +++++-
>  fs/gfs2/inode.c       |  6 +++-
>  fs/inode.c            | 81 +++++++++++++++++++++++++++++++++++++++++--
>  fs/ioctl.c            | 40 ++++++++++++++-------
>  fs/iomap.c            |  2 +-
>  fs/ocfs2/extent_map.c |  8 ++++-
>  fs/xfs/xfs_iops.c     |  5 +++
>  include/linux/fs.h    |  4 +++
>  9 files changed, 144 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 436e564ebdd6..093b6a07067f 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -5001,7 +5001,9 @@ static int ext4_find_delayed_extent(struct inode *inode,
>  	return next_del;
>  }
>  /* fiemap flags we can handle specified here */
> -#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
> +#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | \
> +				 FIEMAP_FLAG_XATTR| \
> +				 FIEMAP_KERNEL_FIBMAP)
>  
>  static int ext4_xattr_fiemap(struct inode *inode,
>  				struct fiemap_extent_info *fieinfo)
> @@ -5048,6 +5050,9 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  	if (ext4_has_inline_data(inode)) {
>  		int has_inline = 1;
>  
> +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> +			return -EINVAL;

Wouldn't the inline data case be caught by fiemap_bmap and turned into
-EINVAL?

> +
>  		error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
>  						start, len);
>  
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 2979ca40d192..29b6c48fb6cc 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1409,6 +1409,9 @@ static int f2fs_xattr_fiemap(struct inode *inode,
>  	return (err < 0 ? err : 0);
>  }
>  
> +#define F2FS_FIEMAP_COMPAT	(FIEMAP_FLAG_SYNC | \
> +				 FIEMAP_FLAG_XATTR| \
> +				 FIEMAP_KERNEL_FIBMAP)
>  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  {
>  	u64 start = fieinfo->fi_start;
> @@ -1426,7 +1429,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  			return ret;
>  	}
>  
> -	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
> +	ret = fiemap_check_flags(fieinfo, F2FS_FIEMAP_COMPAT);
>  	if (ret)
>  		return ret;
>  
> @@ -1438,6 +1441,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  	}
>  
>  	if (f2fs_has_inline_data(inode)) {
> +
> +		ret = -EINVAL;
> +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> +			goto out;
> +
>  		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
>  		if (ret != -EAGAIN)
>  			goto out;
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index df31bd8ecf6f..30554b4f49c3 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -2016,7 +2016,11 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  	if (ret)
>  		goto out;
>  
> -	ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
> +	if (gfs2_is_stuffed(ip) &&
> +	    (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP))
> +		ret = -EINVAL;
> +	else
> +		ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
>  
>  	gfs2_glock_dq_uninit(&gh);
>  
> diff --git a/fs/inode.c b/fs/inode.c
> index 824fa54d393d..02552b09e77f 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -1575,6 +1575,78 @@ void iput(struct inode *inode)
>  }
>  EXPORT_SYMBOL(iput);
>  
> +static int fiemap_fill_kernel_extent(struct fiemap_extent_info *fieinfo,
> +			u64 logical, u64 phys, u64 len, u32 flags)
> +{
> +	struct fiemap_extent *extent = fieinfo->fi_cb_data;
> +
> +	/* only count the extents */
> +	if (fieinfo->fi_cb_data == 0) {
> +		fieinfo->fi_extents_mapped++;
> +		goto out;
> +	}
> +
> +	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
> +		return 1;
> +
> +	if (flags & FIEMAP_EXTENT_DELALLOC)
> +		flags |= FIEMAP_EXTENT_UNKNOWN;
> +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
> +		flags |= FIEMAP_EXTENT_ENCODED;
> +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
> +		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
> +
> +	extent->fe_logical = logical;
> +	extent->fe_physical = phys;
> +	extent->fe_length = len;
> +	extent->fe_flags = flags;
> +
> +	fieinfo->fi_extents_mapped++;
> +
> +	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
> +		return 1;
> +
> +out:
> +	if (flags & FIEMAP_EXTENT_LAST)
> +		return 1;
> +	return 0;
> +}
> +
> +static int bmap_fiemap(struct inode *inode, sector_t *block)
> +{
> +	struct fiemap_extent_info fieinfo = { 0, };
> +	struct fiemap_extent fextent;
> +	u64 start = *block << inode->i_blkbits;
> +	int error = -EINVAL;
> +
> +	fextent.fe_logical = 0;
> +	fextent.fe_physical = 0;
> +	fieinfo.fi_extents_max = 1;
> +	fieinfo.fi_extents_mapped = 0;
> +	fieinfo.fi_cb_data = &fextent;
> +	fieinfo.fi_start = start;
> +	fieinfo.fi_len = 1 << inode->i_blkbits;
> +	fieinfo.fi_cb = fiemap_fill_kernel_extent;
> +	fieinfo.fi_flags = (FIEMAP_KERNEL_FIBMAP | FIEMAP_FLAG_SYNC);
> +
> +	error = inode->i_op->fiemap(inode, &fieinfo);
> +
> +	if (error)
> +		return error;
> +
> +	if (fieinfo.fi_flags & (FIEMAP_EXTENT_UNKNOWN |
> +				FIEMAP_EXTENT_ENCODED |
> +				FIEMAP_EXTENT_DATA_INLINE |
> +				FIEMAP_EXTENT_UNWRITTEN |
> +				FIEMAP_EXTENT_SHARED))
> +		return -EINVAL;
> +
> +	*block = (fextent.fe_physical +
> +		  (start - fextent.fe_logical)) >> inode->i_blkbits;
> +
> +	return error;
> +}
> +
>  /**
>   *	bmap	- find a block number in a file
>   *	@inode:  inode owning the block number being requested
> @@ -1591,10 +1663,15 @@ EXPORT_SYMBOL(iput);
>   */
>  int bmap(struct inode *inode, sector_t *block)
>  {
> -	if (!inode->i_mapping->a_ops->bmap)
> +	if (inode->i_op->fiemap)
> +		return bmap_fiemap(inode, block);
> +
> +	if (inode->i_mapping->a_ops->bmap)
> +		*block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
> +						       *block);
> +	else
>  		return -EINVAL;
>  
> -	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
>  	return 0;
>  }
>  EXPORT_SYMBOL(bmap);
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index d72696c222de..0759ac6e4c7e 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -77,11 +77,8 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
>  	return error;
>  }
>  
> -#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
> -#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
> -#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
> -int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> -			    u64 phys, u64 len, u32 flags)
> +static int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo,
> +			u64 logical, u64 phys, u64 len, u32 flags)
>  {
>  	struct fiemap_extent extent;
>  	struct fiemap_extent __user *dest = fieinfo->fi_cb_data;
> @@ -89,17 +86,17 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
>  	/* only count the extents */
>  	if (fieinfo->fi_extents_max == 0) {
>  		fieinfo->fi_extents_mapped++;
> -		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> +		goto out;
>  	}
>  
>  	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
>  		return 1;
>  
> -	if (flags & SET_UNKNOWN_FLAGS)
> +	if (flags & FIEMAP_EXTENT_DELALLOC)
>  		flags |= FIEMAP_EXTENT_UNKNOWN;
> -	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
> +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
>  		flags |= FIEMAP_EXTENT_ENCODED;
> -	if (flags & SET_NOT_ALIGNED_FLAGS)

It's too bad that we lose the "not aligned" semantic meaning here.

> +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
>  		flags |= FIEMAP_EXTENT_NOT_ALIGNED;

Why doesn't this function just call fiemap_fill_kernel_extent to fill
out the onstack @extent structure?  We've now implemented "fill out out
a struct fiemap_extent" twice.

>  
>  	memset(&extent, 0, sizeof(extent));
> @@ -115,7 +112,11 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
>  	fieinfo->fi_extents_mapped++;
>  	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
>  		return 1;
> -	return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> +
> +out:
> +	if (flags & FIEMAP_EXTENT_LAST)
> +		return 1;
> +	return 0;
>  }
>  
>  /**
> @@ -151,13 +152,23 @@ EXPORT_SYMBOL(fiemap_fill_next_extent);
>   * flags, the invalid values will be written into the fieinfo structure, and
>   * -EBADR is returned, which tells ioctl_fiemap() to return those values to
>   * userspace. For this reason, a return code of -EBADR should be preserved.
> + * In case ->fiemap is being used for FIBMAP calls, and the filesystem does not
> + * support it, return -EINVAL.
>   *
> - * Returns 0 on success, -EBADR on bad flags.
> + * Returns 0 on success, -EBADR on bad flags, -EINVAL for an unsupported FIBMAP
> + * request.
>   */
>  int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
>  {
>  	u32 incompat_flags;
>  
> +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
> +		if (fs_flags & FIEMAP_KERNEL_FIBMAP)
> +			return 0;
> +
> +		return -EINVAL;
> +	}
> +
>  	incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
>  	if (incompat_flags) {
>  		fieinfo->fi_flags = incompat_flags;
> @@ -208,6 +219,10 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
>  	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
>  		return -EINVAL;
>  
> +	/* Userspace has no access to this flag */
> +	if (fiemap.fm_flags & FIEMAP_KERNEL_FIBMAP)
> +		return -EINVAL;
> +
>  	error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
>  				    &len);
>  	if (error)
> @@ -318,7 +333,8 @@ int __generic_block_fiemap(struct inode *inode,
>  	bool past_eof = false, whole_file = false;
>  	int ret = 0;
>  
> -	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
> +	ret = fiemap_check_flags(fieinfo,
> +				 FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
>  	if (ret)
>  		return ret;
>  
> diff --git a/fs/iomap.c b/fs/iomap.c
> index b1e88722e10b..2b182abd18e8 100644
> --- a/fs/iomap.c
> +++ b/fs/iomap.c
> @@ -1195,7 +1195,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
>  	ctx.fi = fi;
>  	ctx.prev.type = IOMAP_HOLE;
>  
> -	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
> +	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
>  	if (ret)
>  		return ret;
>  
> diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
> index e01fd38ea935..2884395f3972 100644
> --- a/fs/ocfs2/extent_map.c
> +++ b/fs/ocfs2/extent_map.c
> @@ -747,7 +747,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
>  	return 0;
>  }
>  
> -#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
> +#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP)
>  
>  int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  {
> @@ -756,6 +756,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  	unsigned int hole_size;
>  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>  	u64 len_bytes, phys_bytes, virt_bytes;
> +
>  	struct buffer_head *di_bh = NULL;
>  	struct ocfs2_extent_rec rec;
>  	u64 map_start = fieinfo->fi_start;
> @@ -765,6 +766,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
>  	if (ret)
>  		return ret;
>  
> +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
> +		if (ocfs2_is_refcount_inode(inode))
> +			return -EINVAL;
> +	}
> +
>  	ret = ocfs2_inode_lock(inode, &di_bh, 0);
>  	if (ret) {
>  		mlog_errno(ret);
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index b485190b7ecd..18a798e9076b 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -1113,6 +1113,11 @@ xfs_vn_fiemap(
>  	struct fiemap_extent_info *fieinfo)
>  {
>  	int	error;
> +	struct	xfs_inode	*ip = XFS_I(inode);

Would you mind fixing the indentation to match usual xfs style?

> +
> +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> +		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
> +			return -EINVAL;

The xfs part looks ok to me.

--D

>  
>  	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
>  	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a8bd3c4f6d86..233e12ccb6d3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1709,6 +1709,10 @@ extern bool may_open_dev(const struct path *path);
>  typedef int (*fiemap_fill_cb)(struct fiemap_extent_info *fieinfo, u64 logical,
>  			      u64 phys, u64 len, u32 flags);
>  
> +#define FIEMAP_KERNEL_FIBMAP 0x10000000		/* FIBMAP call through FIEMAP
> +						   interface. This is a kernel
> +						   only flag */
> +
>  struct fiemap_extent_info {
>  	unsigned int	fi_flags;		/* Flags as passed from user */
>  	u64		fi_start;
> -- 
> 2.20.1
>
Darrick J. Wong July 31, 2019, 11:31 p.m. UTC | #2
On Wed, Jul 31, 2019 at 04:22:54PM -0700, Darrick J. Wong wrote:
> On Wed, Jul 31, 2019 at 04:12:44PM +0200, Carlos Maiolino wrote:
> > Enables the usage of FIEMAP ioctl infrastructure to handle FIBMAP calls.
> > From now on, ->bmap() methods can start to be removed from filesystems
> > which already provides ->fiemap().
> > 
> > This adds a new helper - bmap_fiemap() - which is used to fill in the
> > fiemap request, call into the underlying filesystem and check the flags
> > set in the extent requested.
> > 
> > Add a new fiemap fill extent callback to handle the in-kernel only
> > fiemap_extent structure used for FIBMAP.
> > 
> > The new FIEMAP_KERNEL_FIBMAP flag, is used to tell the filesystem
> > ->fiemap interface, that the call is coming from ioctl_fibmap. The
> > addition of this new flag, requires an update to fiemap_check_flags(),
> > so it doesn't treat FIBMAP requests as invalid.
> > 
> > Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> > ---
> > 
> > Changelog:
> > 
> > V4:
> > 	- Fix if conditional in bmap()
> > 	- Add filesystem-specific modifications
> > V3:
> > 	- Add FIEMAP_EXTENT_SHARED to the list of invalid extents in
> > 	  bmap_fiemap()
> > 	- Rename fi_extents_start to fi_cb_data
> > 	- Use if conditional instead of ternary operator
> > 	- Make fiemap_fill_* callbacks static (which required the
> > 	  removal of some macros
> > 	- Set FIEMAP_FLAG_SYNC when calling in ->fiemap method from
> > 	  fibmap
> > 	- Add FIEMAP_KERNEL_FIBMAP flag, to identify the usage of fiemap
> > 	  infrastructure for fibmap calls, defined in fs.h so it's not
> > 	  exported to userspace.
> > 	- Update fiemap_check_flags() to understand FIEMAP_KERNEL_FIBMAP
> > 	- Update filesystems supporting both FIBMAP and FIEMAP, which
> > 	  need extra checks on FIBMAP calls
> > 
> > V2:
> > 	- Now based on the updated fiemap_extent_info,
> > 	- move the fiemap call itself to a new helper
> > 
> >  fs/ext4/extents.c     |  7 +++-
> >  fs/f2fs/data.c        | 10 +++++-
> >  fs/gfs2/inode.c       |  6 +++-
> >  fs/inode.c            | 81 +++++++++++++++++++++++++++++++++++++++++--
> >  fs/ioctl.c            | 40 ++++++++++++++-------
> >  fs/iomap.c            |  2 +-
> >  fs/ocfs2/extent_map.c |  8 ++++-
> >  fs/xfs/xfs_iops.c     |  5 +++
> >  include/linux/fs.h    |  4 +++
> >  9 files changed, 144 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 436e564ebdd6..093b6a07067f 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -5001,7 +5001,9 @@ static int ext4_find_delayed_extent(struct inode *inode,
> >  	return next_del;
> >  }
> >  /* fiemap flags we can handle specified here */
> > -#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
> > +#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | \
> > +				 FIEMAP_FLAG_XATTR| \
> > +				 FIEMAP_KERNEL_FIBMAP)
> >  
> >  static int ext4_xattr_fiemap(struct inode *inode,
> >  				struct fiemap_extent_info *fieinfo)
> > @@ -5048,6 +5050,9 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	if (ext4_has_inline_data(inode)) {
> >  		int has_inline = 1;
> >  
> > +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > +			return -EINVAL;
> 
> Wouldn't the inline data case be caught by fiemap_bmap and turned into
> -EINVAL?
> 
> > +
> >  		error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
> >  						start, len);
> >  
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 2979ca40d192..29b6c48fb6cc 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1409,6 +1409,9 @@ static int f2fs_xattr_fiemap(struct inode *inode,
> >  	return (err < 0 ? err : 0);
> >  }
> >  
> > +#define F2FS_FIEMAP_COMPAT	(FIEMAP_FLAG_SYNC | \
> > +				 FIEMAP_FLAG_XATTR| \
> > +				 FIEMAP_KERNEL_FIBMAP)
> >  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  {
> >  	u64 start = fieinfo->fi_start;
> > @@ -1426,7 +1429,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  			return ret;
> >  	}
> >  
> > -	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
> > +	ret = fiemap_check_flags(fieinfo, F2FS_FIEMAP_COMPAT);
> >  	if (ret)
> >  		return ret;
> >  
> > @@ -1438,6 +1441,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	}
> >  
> >  	if (f2fs_has_inline_data(inode)) {
> > +
> > +		ret = -EINVAL;
> > +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > +			goto out;
> > +
> >  		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
> >  		if (ret != -EAGAIN)
> >  			goto out;
> > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> > index df31bd8ecf6f..30554b4f49c3 100644
> > --- a/fs/gfs2/inode.c
> > +++ b/fs/gfs2/inode.c
> > @@ -2016,7 +2016,11 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	if (ret)
> >  		goto out;
> >  
> > -	ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
> > +	if (gfs2_is_stuffed(ip) &&
> > +	    (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP))
> > +		ret = -EINVAL;
> > +	else
> > +		ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
> >  
> >  	gfs2_glock_dq_uninit(&gh);
> >  
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 824fa54d393d..02552b09e77f 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -1575,6 +1575,78 @@ void iput(struct inode *inode)
> >  }
> >  EXPORT_SYMBOL(iput);
> >  
> > +static int fiemap_fill_kernel_extent(struct fiemap_extent_info *fieinfo,
> > +			u64 logical, u64 phys, u64 len, u32 flags)
> > +{
> > +	struct fiemap_extent *extent = fieinfo->fi_cb_data;
> > +
> > +	/* only count the extents */
> > +	if (fieinfo->fi_cb_data == 0) {
> > +		fieinfo->fi_extents_mapped++;
> > +		goto out;
> > +	}
> > +
> > +	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
> > +		return 1;
> > +
> > +	if (flags & FIEMAP_EXTENT_DELALLOC)
> > +		flags |= FIEMAP_EXTENT_UNKNOWN;
> > +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
> > +		flags |= FIEMAP_EXTENT_ENCODED;
> > +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
> > +		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
> > +
> > +	extent->fe_logical = logical;
> > +	extent->fe_physical = phys;
> > +	extent->fe_length = len;
> > +	extent->fe_flags = flags;
> > +
> > +	fieinfo->fi_extents_mapped++;
> > +
> > +	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
> > +		return 1;
> > +
> > +out:
> > +	if (flags & FIEMAP_EXTENT_LAST)
> > +		return 1;
> > +	return 0;
> > +}
> > +
> > +static int bmap_fiemap(struct inode *inode, sector_t *block)
> > +{
> > +	struct fiemap_extent_info fieinfo = { 0, };
> > +	struct fiemap_extent fextent;
> > +	u64 start = *block << inode->i_blkbits;
> > +	int error = -EINVAL;
> > +
> > +	fextent.fe_logical = 0;
> > +	fextent.fe_physical = 0;
> > +	fieinfo.fi_extents_max = 1;
> > +	fieinfo.fi_extents_mapped = 0;
> > +	fieinfo.fi_cb_data = &fextent;
> > +	fieinfo.fi_start = start;
> > +	fieinfo.fi_len = 1 << inode->i_blkbits;
> > +	fieinfo.fi_cb = fiemap_fill_kernel_extent;
> > +	fieinfo.fi_flags = (FIEMAP_KERNEL_FIBMAP | FIEMAP_FLAG_SYNC);
> > +
> > +	error = inode->i_op->fiemap(inode, &fieinfo);
> > +
> > +	if (error)
> > +		return error;
> > +
> > +	if (fieinfo.fi_flags & (FIEMAP_EXTENT_UNKNOWN |
> > +				FIEMAP_EXTENT_ENCODED |
> > +				FIEMAP_EXTENT_DATA_INLINE |
> > +				FIEMAP_EXTENT_UNWRITTEN |
> > +				FIEMAP_EXTENT_SHARED))
> > +		return -EINVAL;
> > +
> > +	*block = (fextent.fe_physical +
> > +		  (start - fextent.fe_logical)) >> inode->i_blkbits;
> > +
> > +	return error;
> > +}
> > +
> >  /**
> >   *	bmap	- find a block number in a file
> >   *	@inode:  inode owning the block number being requested
> > @@ -1591,10 +1663,15 @@ EXPORT_SYMBOL(iput);
> >   */
> >  int bmap(struct inode *inode, sector_t *block)
> >  {
> > -	if (!inode->i_mapping->a_ops->bmap)
> > +	if (inode->i_op->fiemap)
> > +		return bmap_fiemap(inode, block);
> > +
> > +	if (inode->i_mapping->a_ops->bmap)
> > +		*block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
> > +						       *block);
> > +	else
> >  		return -EINVAL;
> >  
> > -	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL(bmap);
> > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > index d72696c222de..0759ac6e4c7e 100644
> > --- a/fs/ioctl.c
> > +++ b/fs/ioctl.c
> > @@ -77,11 +77,8 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
> >  	return error;
> >  }
> >  
> > -#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
> > -#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
> > -#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
> > -int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> > -			    u64 phys, u64 len, u32 flags)
> > +static int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo,
> > +			u64 logical, u64 phys, u64 len, u32 flags)
> >  {
> >  	struct fiemap_extent extent;
> >  	struct fiemap_extent __user *dest = fieinfo->fi_cb_data;
> > @@ -89,17 +86,17 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> >  	/* only count the extents */
> >  	if (fieinfo->fi_extents_max == 0) {
> >  		fieinfo->fi_extents_mapped++;
> > -		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> > +		goto out;
> >  	}
> >  
> >  	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
> >  		return 1;
> >  
> > -	if (flags & SET_UNKNOWN_FLAGS)
> > +	if (flags & FIEMAP_EXTENT_DELALLOC)
> >  		flags |= FIEMAP_EXTENT_UNKNOWN;
> > -	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
> > +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
> >  		flags |= FIEMAP_EXTENT_ENCODED;
> > -	if (flags & SET_NOT_ALIGNED_FLAGS)
> 
> It's too bad that we lose the "not aligned" semantic meaning here.
> 
> > +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
> >  		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
> 
> Why doesn't this function just call fiemap_fill_kernel_extent to fill
> out the onstack @extent structure?  We've now implemented "fill out out
> a struct fiemap_extent" twice.
> 
> >  
> >  	memset(&extent, 0, sizeof(extent));
> > @@ -115,7 +112,11 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> >  	fieinfo->fi_extents_mapped++;
> >  	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
> >  		return 1;
> > -	return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> > +
> > +out:
> > +	if (flags & FIEMAP_EXTENT_LAST)
> > +		return 1;
> > +	return 0;
> >  }
> >  
> >  /**
> > @@ -151,13 +152,23 @@ EXPORT_SYMBOL(fiemap_fill_next_extent);
> >   * flags, the invalid values will be written into the fieinfo structure, and
> >   * -EBADR is returned, which tells ioctl_fiemap() to return those values to
> >   * userspace. For this reason, a return code of -EBADR should be preserved.
> > + * In case ->fiemap is being used for FIBMAP calls, and the filesystem does not
> > + * support it, return -EINVAL.
> >   *
> > - * Returns 0 on success, -EBADR on bad flags.
> > + * Returns 0 on success, -EBADR on bad flags, -EINVAL for an unsupported FIBMAP
> > + * request.
> >   */
> >  int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
> >  {
> >  	u32 incompat_flags;
> >  
> > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
> > +		if (fs_flags & FIEMAP_KERNEL_FIBMAP)
> > +			return 0;
> > +
> > +		return -EINVAL;
> > +	}
> > +
> >  	incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
> >  	if (incompat_flags) {
> >  		fieinfo->fi_flags = incompat_flags;
> > @@ -208,6 +219,10 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
> >  	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
> >  		return -EINVAL;
> >  
> > +	/* Userspace has no access to this flag */
> > +	if (fiemap.fm_flags & FIEMAP_KERNEL_FIBMAP)
> > +		return -EINVAL;
> > +
> >  	error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
> >  				    &len);
> >  	if (error)
> > @@ -318,7 +333,8 @@ int __generic_block_fiemap(struct inode *inode,
> >  	bool past_eof = false, whole_file = false;
> >  	int ret = 0;
> >  
> > -	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
> > +	ret = fiemap_check_flags(fieinfo,
> > +				 FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
> >  	if (ret)
> >  		return ret;
> >  
> > diff --git a/fs/iomap.c b/fs/iomap.c
> > index b1e88722e10b..2b182abd18e8 100644
> > --- a/fs/iomap.c
> > +++ b/fs/iomap.c
> > @@ -1195,7 +1195,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
> >  	ctx.fi = fi;
> >  	ctx.prev.type = IOMAP_HOLE;
> >  
> > -	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
> > +	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
> >  	if (ret)
> >  		return ret;
> >  
> > diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
> > index e01fd38ea935..2884395f3972 100644
> > --- a/fs/ocfs2/extent_map.c
> > +++ b/fs/ocfs2/extent_map.c
> > @@ -747,7 +747,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
> >  	return 0;
> >  }
> >  
> > -#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
> > +#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP)
> >  
> >  int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  {
> > @@ -756,6 +756,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	unsigned int hole_size;
> >  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> >  	u64 len_bytes, phys_bytes, virt_bytes;
> > +
> >  	struct buffer_head *di_bh = NULL;
> >  	struct ocfs2_extent_rec rec;
> >  	u64 map_start = fieinfo->fi_start;
> > @@ -765,6 +766,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	if (ret)
> >  		return ret;
> >  
> > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
> > +		if (ocfs2_is_refcount_inode(inode))
> > +			return -EINVAL;
> > +	}
> > +
> >  	ret = ocfs2_inode_lock(inode, &di_bh, 0);
> >  	if (ret) {
> >  		mlog_errno(ret);
> > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > index b485190b7ecd..18a798e9076b 100644
> > --- a/fs/xfs/xfs_iops.c
> > +++ b/fs/xfs/xfs_iops.c
> > @@ -1113,6 +1113,11 @@ xfs_vn_fiemap(
> >  	struct fiemap_extent_info *fieinfo)
> >  {
> >  	int	error;
> > +	struct	xfs_inode	*ip = XFS_I(inode);
> 
> Would you mind fixing the indentation to match usual xfs style?
> 
> > +
> > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > +		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
> > +			return -EINVAL;
> 
> The xfs part looks ok to me.

No it doesn't.  This check ought to be xfs_is_cow_inode() to match the
code being removed in the last patch:

if ((fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) &&
    (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)))
	return -EINVAL;

> --D
> 
> >  
> >  	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
> >  	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index a8bd3c4f6d86..233e12ccb6d3 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1709,6 +1709,10 @@ extern bool may_open_dev(const struct path *path);
> >  typedef int (*fiemap_fill_cb)(struct fiemap_extent_info *fieinfo, u64 logical,
> >  			      u64 phys, u64 len, u32 flags);
> >  
> > +#define FIEMAP_KERNEL_FIBMAP 0x10000000		/* FIBMAP call through FIEMAP
> > +						   interface. This is a kernel
> > +						   only flag */
> > +
> >  struct fiemap_extent_info {
> >  	unsigned int	fi_flags;		/* Flags as passed from user */
> >  	u64		fi_start;
> > -- 
> > 2.20.1
> >
Carlos Maiolino Aug. 2, 2019, 1:48 p.m. UTC | #3
> > -#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
> > +#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | \
> > +				 FIEMAP_FLAG_XATTR| \
> > +				 FIEMAP_KERNEL_FIBMAP)
> >  
> >  static int ext4_xattr_fiemap(struct inode *inode,
> >  				struct fiemap_extent_info *fieinfo)
> > @@ -5048,6 +5050,9 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> >  	if (ext4_has_inline_data(inode)) {
> >  		int has_inline = 1;
> >  
> > +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > +			return -EINVAL;
> 
> Wouldn't the inline data case be caught by fiemap_bmap and turned into
> -EINVAL?

Yes, it does, but until ext4_fiemap() returns the extent with the INLINE flag,
it does need to go through the whole fiemap mapping mechanism when we already
know the result... So, instead of letting the ext4_fiemap() map the extent, just
take the shortcut and return -EINVAL directly.

The check in fiemap_bmap() is a 'safe measure' (if it does have other name I
don't know :), but if the filesystem already knows it's gonna fall into an
inline inode, taking the shortcut is better, isn't it?

> > +		return 1;
> > +	return 0;
> > +}
> > +
> > +static int bmap_fiemap(struct inode *inode, sector_t *block)
> > +{
> > +	struct fiemap_extent_info fieinfo = { 0, };
> > +	struct fiemap_extent fextent;
> > +	u64 start = *block << inode->i_blkbits;
> > +	int error = -EINVAL;
> > +
> > +	fextent.fe_logical = 0;
> > +	fextent.fe_physical = 0;
> > +	fieinfo.fi_extents_max = 1;
> > +	fieinfo.fi_extents_mapped = 0;
> > +	fieinfo.fi_cb_data = &fextent;
> > +	fieinfo.fi_start = start;
> > +	fieinfo.fi_len = 1 << inode->i_blkbits;
> > +	fieinfo.fi_cb = fiemap_fill_kernel_extent;
> > +	fieinfo.fi_flags = (FIEMAP_KERNEL_FIBMAP | FIEMAP_FLAG_SYNC);
> > +
> > +	error = inode->i_op->fiemap(inode, &fieinfo);
> > +
> > +	if (error)
> > +		return error;
> > +
> > +	if (fieinfo.fi_flags & (FIEMAP_EXTENT_UNKNOWN |
> > +				FIEMAP_EXTENT_ENCODED |
> > +				FIEMAP_EXTENT_DATA_INLINE |
> > +				FIEMAP_EXTENT_UNWRITTEN |
> > +				FIEMAP_EXTENT_SHARED))
> > +		return -EINVAL;
> > +
> > +	*block = (fextent.fe_physical +
> > +		  (start - fextent.fe_logical)) >> inode->i_blkbits;
> > +
> > +	return error;
> > +}
> > +
> >  /**
> >   *	bmap	- find a block number in a file
> >   *	@inode:  inode owning the block number being requested
> > @@ -1591,10 +1663,15 @@ EXPORT_SYMBOL(iput);
> >   */
> >  int bmap(struct inode *inode, sector_t *block)
> >  {
> > -	if (!inode->i_mapping->a_ops->bmap)
> > +	if (inode->i_op->fiemap)
> > +		return bmap_fiemap(inode, block);
> > +
> > +	if (inode->i_mapping->a_ops->bmap)
> > +		*block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
> > +						       *block);
> > +	else
> >  		return -EINVAL;
> >  
> > -	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL(bmap);
> > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > index d72696c222de..0759ac6e4c7e 100644
> > --- a/fs/ioctl.c
> > +++ b/fs/ioctl.c
> > @@ -77,11 +77,8 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
> >  	return error;
> >  }
> >  
> > -#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
> > -#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
> > -#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
> > -int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> > -			    u64 phys, u64 len, u32 flags)
> > +static int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo,
> > +			u64 logical, u64 phys, u64 len, u32 flags)
> >  {
> >  	struct fiemap_extent extent;
> >  	struct fiemap_extent __user *dest = fieinfo->fi_cb_data;
> > @@ -89,17 +86,17 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> >  	/* only count the extents */
> >  	if (fieinfo->fi_extents_max == 0) {
> >  		fieinfo->fi_extents_mapped++;
> > -		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> > +		goto out;
> >  	}
> >  
> >  	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
> >  		return 1;
> >  
> > -	if (flags & SET_UNKNOWN_FLAGS)
> > +	if (flags & FIEMAP_EXTENT_DELALLOC)
> >  		flags |= FIEMAP_EXTENT_UNKNOWN;
> > -	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
> > +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
> >  		flags |= FIEMAP_EXTENT_ENCODED;
> > -	if (flags & SET_NOT_ALIGNED_FLAGS)
> 
> It's too bad that we lose the "not aligned" semantic meaning here.

May you explain a bit better what you mean? We don't lose it, just the define
goes away, the reason I dropped these defines is because the same flags are used
in both functions, fiemap_fill_{user,kernel}_extent(), and I didn't think
defining them on both places (or in fs.h) has any benefit here, so I opted to
remove them.

> 
> > +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
> >  		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
> 
> Why doesn't this function just call fiemap_fill_kernel_extent to fill
> out the onstack @extent structure?  We've now implemented "fill out out
> a struct fiemap_extent" twice.

fiemap_fill_{user, kernel}_extent() have different purposes, and the big
difference is one handles a userspace pointer memory and the other don't. IIRC
the original proposal was some sort of sharing a single function, but then
Christoph suggested a new design, using different functions as callbacks.

> > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > index b485190b7ecd..18a798e9076b 100644
> > --- a/fs/xfs/xfs_iops.c
> > +++ b/fs/xfs/xfs_iops.c
> > @@ -1113,6 +1113,11 @@ xfs_vn_fiemap(
> >  	struct fiemap_extent_info *fieinfo)
> >  {
> >  	int	error;
> > +	struct	xfs_inode	*ip = XFS_I(inode);
> 
> Would you mind fixing the indentation to match usual xfs style?

Sure, will fix it


> 
> > +
> > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > +		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
> > +			return -EINVAL;
> 
> The xfs part looks ok to me.
> 
> --D
>
Carlos Maiolino Aug. 2, 2019, 1:52 p.m. UTC | #4
> > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > > index b485190b7ecd..18a798e9076b 100644
> > > --- a/fs/xfs/xfs_iops.c
> > > +++ b/fs/xfs/xfs_iops.c
> > > @@ -1113,6 +1113,11 @@ xfs_vn_fiemap(
> > >  	struct fiemap_extent_info *fieinfo)
> > >  {
> > >  	int	error;
> > > +	struct	xfs_inode	*ip = XFS_I(inode);
> > 
> > Would you mind fixing the indentation to match usual xfs style?
> > 
> > > +
> > > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > > +		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
> > > +			return -EINVAL;
> > 
> > The xfs part looks ok to me.
> 
> No it doesn't.  This check ought to be xfs_is_cow_inode() to match the
> code being removed in the last patch:
> 
> if ((fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) &&
>     (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)))
> 	return -EINVAL;
> 

Agreed, will fix

> > --D
> >
Darrick J. Wong Aug. 2, 2019, 3:29 p.m. UTC | #5
On Fri, Aug 02, 2019 at 03:48:17PM +0200, Carlos Maiolino wrote:
> > > -#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
> > > +#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | \
> > > +				 FIEMAP_FLAG_XATTR| \
> > > +				 FIEMAP_KERNEL_FIBMAP)
> > >  
> > >  static int ext4_xattr_fiemap(struct inode *inode,
> > >  				struct fiemap_extent_info *fieinfo)
> > > @@ -5048,6 +5050,9 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
> > >  	if (ext4_has_inline_data(inode)) {
> > >  		int has_inline = 1;
> > >  
> > > +		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > > +			return -EINVAL;
> > 
> > Wouldn't the inline data case be caught by fiemap_bmap and turned into
> > -EINVAL?
> 
> Yes, it does, but until ext4_fiemap() returns the extent with the INLINE flag,
> it does need to go through the whole fiemap mapping mechanism when we already
> know the result... So, instead of letting the ext4_fiemap() map the extent, just
> take the shortcut and return -EINVAL directly.
> 
> The check in fiemap_bmap() is a 'safe measure' (if it does have other name I
> don't know :), but if the filesystem already knows it's gonna fall into an
> inline inode, taking the shortcut is better, isn't it?

I suppose so.  Just wondering, that was all... :)

> 
> > > +		return 1;
> > > +	return 0;
> > > +}
> > > +
> > > +static int bmap_fiemap(struct inode *inode, sector_t *block)
> > > +{
> > > +	struct fiemap_extent_info fieinfo = { 0, };
> > > +	struct fiemap_extent fextent;
> > > +	u64 start = *block << inode->i_blkbits;
> > > +	int error = -EINVAL;
> > > +
> > > +	fextent.fe_logical = 0;
> > > +	fextent.fe_physical = 0;
> > > +	fieinfo.fi_extents_max = 1;
> > > +	fieinfo.fi_extents_mapped = 0;
> > > +	fieinfo.fi_cb_data = &fextent;
> > > +	fieinfo.fi_start = start;
> > > +	fieinfo.fi_len = 1 << inode->i_blkbits;
> > > +	fieinfo.fi_cb = fiemap_fill_kernel_extent;
> > > +	fieinfo.fi_flags = (FIEMAP_KERNEL_FIBMAP | FIEMAP_FLAG_SYNC);
> > > +
> > > +	error = inode->i_op->fiemap(inode, &fieinfo);
> > > +
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	if (fieinfo.fi_flags & (FIEMAP_EXTENT_UNKNOWN |
> > > +				FIEMAP_EXTENT_ENCODED |
> > > +				FIEMAP_EXTENT_DATA_INLINE |
> > > +				FIEMAP_EXTENT_UNWRITTEN |
> > > +				FIEMAP_EXTENT_SHARED))
> > > +		return -EINVAL;
> > > +
> > > +	*block = (fextent.fe_physical +
> > > +		  (start - fextent.fe_logical)) >> inode->i_blkbits;
> > > +
> > > +	return error;
> > > +}
> > > +
> > >  /**
> > >   *	bmap	- find a block number in a file
> > >   *	@inode:  inode owning the block number being requested
> > > @@ -1591,10 +1663,15 @@ EXPORT_SYMBOL(iput);
> > >   */
> > >  int bmap(struct inode *inode, sector_t *block)
> > >  {
> > > -	if (!inode->i_mapping->a_ops->bmap)
> > > +	if (inode->i_op->fiemap)
> > > +		return bmap_fiemap(inode, block);
> > > +
> > > +	if (inode->i_mapping->a_ops->bmap)
> > > +		*block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
> > > +						       *block);
> > > +	else
> > >  		return -EINVAL;
> > >  
> > > -	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
> > >  	return 0;
> > >  }
> > >  EXPORT_SYMBOL(bmap);
> > > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > > index d72696c222de..0759ac6e4c7e 100644
> > > --- a/fs/ioctl.c
> > > +++ b/fs/ioctl.c
> > > @@ -77,11 +77,8 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
> > >  	return error;
> > >  }
> > >  
> > > -#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
> > > -#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
> > > -#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
> > > -int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> > > -			    u64 phys, u64 len, u32 flags)
> > > +static int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo,
> > > +			u64 logical, u64 phys, u64 len, u32 flags)
> > >  {
> > >  	struct fiemap_extent extent;
> > >  	struct fiemap_extent __user *dest = fieinfo->fi_cb_data;
> > > @@ -89,17 +86,17 @@ int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
> > >  	/* only count the extents */
> > >  	if (fieinfo->fi_extents_max == 0) {
> > >  		fieinfo->fi_extents_mapped++;
> > > -		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
> > > +		goto out;
> > >  	}
> > >  
> > >  	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
> > >  		return 1;
> > >  
> > > -	if (flags & SET_UNKNOWN_FLAGS)
> > > +	if (flags & FIEMAP_EXTENT_DELALLOC)
> > >  		flags |= FIEMAP_EXTENT_UNKNOWN;
> > > -	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
> > > +	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
> > >  		flags |= FIEMAP_EXTENT_ENCODED;
> > > -	if (flags & SET_NOT_ALIGNED_FLAGS)
> > 
> > It's too bad that we lose the "not aligned" semantic meaning here.
> 
> May you explain a bit better what you mean? We don't lose it, just the define
> goes away, the reason I dropped these defines is because the same flags are used
> in both functions, fiemap_fill_{user,kernel}_extent(), and I didn't think
> defining them on both places (or in fs.h) has any benefit here, so I opted to
> remove them.

Eh, I changed my mind.  It's easy enough to tell which flags map to "No
umounted IO" from the code even if the #defines go away.

> > 
> > > +	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
> > >  		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
> > 
> > Why doesn't this function just call fiemap_fill_kernel_extent to fill
> > out the onstack @extent structure?  We've now implemented "fill out out
> > a struct fiemap_extent" twice.
> 
> fiemap_fill_{user, kernel}_extent() have different purposes, and the big
> difference is one handles a userspace pointer memory and the other don't. IIRC
> the original proposal was some sort of sharing a single function, but then
> Christoph suggested a new design, using different functions as callbacks.

It's harder for me to tell when I don't have a branch containing the
final product to look at, but I'd have thought that _fill_kernel fills
out an in-kernel fiemap extent; and then _fill_user would declare one on
the stack, call _fill_kernel to set the fields, and then copy_to_user?

(But maybe the code already does this and I can't tell...)

> 
> > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > > index b485190b7ecd..18a798e9076b 100644
> > > --- a/fs/xfs/xfs_iops.c
> > > +++ b/fs/xfs/xfs_iops.c
> > > @@ -1113,6 +1113,11 @@ xfs_vn_fiemap(
> > >  	struct fiemap_extent_info *fieinfo)
> > >  {
> > >  	int	error;
> > > +	struct	xfs_inode	*ip = XFS_I(inode);
> > 
> > Would you mind fixing the indentation to match usual xfs style?
> 
> Sure, will fix it
> 
> 
> > 
> > > +
> > > +	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
> > > +		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
> > > +			return -EINVAL;
> > 
> > The xfs part looks ok to me.
> > 
> > --D
> > 
> 
> -- 
> Carlos
Carlos Maiolino Aug. 5, 2019, 10:38 a.m. UTC | #6
> > > 
> > > Why doesn't this function just call fiemap_fill_kernel_extent to fill
> > > out the onstack @extent structure?  We've now implemented "fill out out
> > > a struct fiemap_extent" twice.
> > 
> > fiemap_fill_{user, kernel}_extent() have different purposes, and the big
> > difference is one handles a userspace pointer memory and the other don't. IIRC
> > the original proposal was some sort of sharing a single function, but then
> > Christoph suggested a new design, using different functions as callbacks.
> 
> It's harder for me to tell when I don't have a branch containing the
> final product to look at,

Good, I though I was the only one having issues with it :)

You can see the work here:

https://github.com/cmaiolino/linux/commits/FIEMAP_V5

^ This already includes changes addressing your concerns as we discussed int
this thread btw.

> but I'd have thought that _fill_kernel fills
> out an in-kernel fiemap extent; and then _fill_user would declare one on
> the stack, call _fill_kernel to set the fields, and then copy_to_user?

None of those functions will declare a fiemap_extent, the fiemap extent will be
declared before (in ioctl_fiemap() or bmap_fiemap()) calling either function and
then passed to the proper callback via fieinfo.fi_cb_data. This will be a user
or a kernel memory address, and the callbacks will handle the memory accordingly.

Cheers
Christoph Hellwig Aug. 6, 2019, 5:41 a.m. UTC | #7
Darrick, and chance to not send out a double full quote?  I just can't
not find the actual information in this mail even if I try.
Christoph Hellwig Aug. 6, 2019, 5:46 a.m. UTC | #8
On Fri, Aug 02, 2019 at 08:29:02AM -0700, Darrick J. Wong wrote:
> It's harder for me to tell when I don't have a branch containing the
> final product to look at, but I'd have thought that _fill_kernel fills
> out an in-kernel fiemap extent; and then _fill_user would declare one on
> the stack, call _fill_kernel to set the fields, and then copy_to_user?

That works fine for small, fixed-sized structures.  But for large
variable sized structures it is very inefficient, as we need to do a
possibly large kernel allocation and just copy it on.  Thus I told Carlos
to follow the readdir model with the ->actor (used to be ->filldir)
callback that can fill out the actual kernel or user data directly.
Another example of this high-level model is our struct iov_iter used
in the I/O path.
diff mbox series

Patch

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 436e564ebdd6..093b6a07067f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5001,7 +5001,9 @@  static int ext4_find_delayed_extent(struct inode *inode,
 	return next_del;
 }
 /* fiemap flags we can handle specified here */
-#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+#define EXT4_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | \
+				 FIEMAP_FLAG_XATTR| \
+				 FIEMAP_KERNEL_FIBMAP)
 
 static int ext4_xattr_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo)
@@ -5048,6 +5050,9 @@  int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 	if (ext4_has_inline_data(inode)) {
 		int has_inline = 1;
 
+		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
+			return -EINVAL;
+
 		error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
 						start, len);
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2979ca40d192..29b6c48fb6cc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1409,6 +1409,9 @@  static int f2fs_xattr_fiemap(struct inode *inode,
 	return (err < 0 ? err : 0);
 }
 
+#define F2FS_FIEMAP_COMPAT	(FIEMAP_FLAG_SYNC | \
+				 FIEMAP_FLAG_XATTR| \
+				 FIEMAP_KERNEL_FIBMAP)
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 {
 	u64 start = fieinfo->fi_start;
@@ -1426,7 +1429,7 @@  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 			return ret;
 	}
 
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
+	ret = fiemap_check_flags(fieinfo, F2FS_FIEMAP_COMPAT);
 	if (ret)
 		return ret;
 
@@ -1438,6 +1441,11 @@  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 	}
 
 	if (f2fs_has_inline_data(inode)) {
+
+		ret = -EINVAL;
+		if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
+			goto out;
+
 		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
 		if (ret != -EAGAIN)
 			goto out;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df31bd8ecf6f..30554b4f49c3 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2016,7 +2016,11 @@  static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 	if (ret)
 		goto out;
 
-	ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
+	if (gfs2_is_stuffed(ip) &&
+	    (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP))
+		ret = -EINVAL;
+	else
+		ret = iomap_fiemap(inode, fieinfo, &gfs2_iomap_ops);
 
 	gfs2_glock_dq_uninit(&gh);
 
diff --git a/fs/inode.c b/fs/inode.c
index 824fa54d393d..02552b09e77f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1575,6 +1575,78 @@  void iput(struct inode *inode)
 }
 EXPORT_SYMBOL(iput);
 
+static int fiemap_fill_kernel_extent(struct fiemap_extent_info *fieinfo,
+			u64 logical, u64 phys, u64 len, u32 flags)
+{
+	struct fiemap_extent *extent = fieinfo->fi_cb_data;
+
+	/* only count the extents */
+	if (fieinfo->fi_cb_data == 0) {
+		fieinfo->fi_extents_mapped++;
+		goto out;
+	}
+
+	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
+		return 1;
+
+	if (flags & FIEMAP_EXTENT_DELALLOC)
+		flags |= FIEMAP_EXTENT_UNKNOWN;
+	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
+		flags |= FIEMAP_EXTENT_ENCODED;
+	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
+		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
+
+	extent->fe_logical = logical;
+	extent->fe_physical = phys;
+	extent->fe_length = len;
+	extent->fe_flags = flags;
+
+	fieinfo->fi_extents_mapped++;
+
+	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
+		return 1;
+
+out:
+	if (flags & FIEMAP_EXTENT_LAST)
+		return 1;
+	return 0;
+}
+
+static int bmap_fiemap(struct inode *inode, sector_t *block)
+{
+	struct fiemap_extent_info fieinfo = { 0, };
+	struct fiemap_extent fextent;
+	u64 start = *block << inode->i_blkbits;
+	int error = -EINVAL;
+
+	fextent.fe_logical = 0;
+	fextent.fe_physical = 0;
+	fieinfo.fi_extents_max = 1;
+	fieinfo.fi_extents_mapped = 0;
+	fieinfo.fi_cb_data = &fextent;
+	fieinfo.fi_start = start;
+	fieinfo.fi_len = 1 << inode->i_blkbits;
+	fieinfo.fi_cb = fiemap_fill_kernel_extent;
+	fieinfo.fi_flags = (FIEMAP_KERNEL_FIBMAP | FIEMAP_FLAG_SYNC);
+
+	error = inode->i_op->fiemap(inode, &fieinfo);
+
+	if (error)
+		return error;
+
+	if (fieinfo.fi_flags & (FIEMAP_EXTENT_UNKNOWN |
+				FIEMAP_EXTENT_ENCODED |
+				FIEMAP_EXTENT_DATA_INLINE |
+				FIEMAP_EXTENT_UNWRITTEN |
+				FIEMAP_EXTENT_SHARED))
+		return -EINVAL;
+
+	*block = (fextent.fe_physical +
+		  (start - fextent.fe_logical)) >> inode->i_blkbits;
+
+	return error;
+}
+
 /**
  *	bmap	- find a block number in a file
  *	@inode:  inode owning the block number being requested
@@ -1591,10 +1663,15 @@  EXPORT_SYMBOL(iput);
  */
 int bmap(struct inode *inode, sector_t *block)
 {
-	if (!inode->i_mapping->a_ops->bmap)
+	if (inode->i_op->fiemap)
+		return bmap_fiemap(inode, block);
+
+	if (inode->i_mapping->a_ops->bmap)
+		*block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
+						       *block);
+	else
 		return -EINVAL;
 
-	*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
 	return 0;
 }
 EXPORT_SYMBOL(bmap);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d72696c222de..0759ac6e4c7e 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -77,11 +77,8 @@  static int ioctl_fibmap(struct file *filp, int __user *p)
 	return error;
 }
 
-#define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
-#define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
-#define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
-int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
-			    u64 phys, u64 len, u32 flags)
+static int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo,
+			u64 logical, u64 phys, u64 len, u32 flags)
 {
 	struct fiemap_extent extent;
 	struct fiemap_extent __user *dest = fieinfo->fi_cb_data;
@@ -89,17 +86,17 @@  int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 	/* only count the extents */
 	if (fieinfo->fi_extents_max == 0) {
 		fieinfo->fi_extents_mapped++;
-		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+		goto out;
 	}
 
 	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
 		return 1;
 
-	if (flags & SET_UNKNOWN_FLAGS)
+	if (flags & FIEMAP_EXTENT_DELALLOC)
 		flags |= FIEMAP_EXTENT_UNKNOWN;
-	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
+	if (flags & FIEMAP_EXTENT_DATA_ENCRYPTED)
 		flags |= FIEMAP_EXTENT_ENCODED;
-	if (flags & SET_NOT_ALIGNED_FLAGS)
+	if (flags & (FIEMAP_EXTENT_DATA_TAIL | FIEMAP_EXTENT_DATA_INLINE))
 		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
 
 	memset(&extent, 0, sizeof(extent));
@@ -115,7 +112,11 @@  int fiemap_fill_user_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 	fieinfo->fi_extents_mapped++;
 	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
 		return 1;
-	return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
+
+out:
+	if (flags & FIEMAP_EXTENT_LAST)
+		return 1;
+	return 0;
 }
 
 /**
@@ -151,13 +152,23 @@  EXPORT_SYMBOL(fiemap_fill_next_extent);
  * flags, the invalid values will be written into the fieinfo structure, and
  * -EBADR is returned, which tells ioctl_fiemap() to return those values to
  * userspace. For this reason, a return code of -EBADR should be preserved.
+ * In case ->fiemap is being used for FIBMAP calls, and the filesystem does not
+ * support it, return -EINVAL.
  *
- * Returns 0 on success, -EBADR on bad flags.
+ * Returns 0 on success, -EBADR on bad flags, -EINVAL for an unsupported FIBMAP
+ * request.
  */
 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
 {
 	u32 incompat_flags;
 
+	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
+		if (fs_flags & FIEMAP_KERNEL_FIBMAP)
+			return 0;
+
+		return -EINVAL;
+	}
+
 	incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
 	if (incompat_flags) {
 		fieinfo->fi_flags = incompat_flags;
@@ -208,6 +219,10 @@  static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
 		return -EINVAL;
 
+	/* Userspace has no access to this flag */
+	if (fiemap.fm_flags & FIEMAP_KERNEL_FIBMAP)
+		return -EINVAL;
+
 	error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
 				    &len);
 	if (error)
@@ -318,7 +333,8 @@  int __generic_block_fiemap(struct inode *inode,
 	bool past_eof = false, whole_file = false;
 	int ret = 0;
 
-	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	ret = fiemap_check_flags(fieinfo,
+				 FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
 	if (ret)
 		return ret;
 
diff --git a/fs/iomap.c b/fs/iomap.c
index b1e88722e10b..2b182abd18e8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1195,7 +1195,7 @@  int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 	ctx.fi = fi;
 	ctx.prev.type = IOMAP_HOLE;
 
-	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
+	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP);
 	if (ret)
 		return ret;
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e01fd38ea935..2884395f3972 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -747,7 +747,7 @@  static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 	return 0;
 }
 
-#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC)
+#define OCFS2_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC | FIEMAP_KERNEL_FIBMAP)
 
 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 {
@@ -756,6 +756,7 @@  int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 	unsigned int hole_size;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u64 len_bytes, phys_bytes, virt_bytes;
+
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_extent_rec rec;
 	u64 map_start = fieinfo->fi_start;
@@ -765,6 +766,11 @@  int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
 	if (ret)
 		return ret;
 
+	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP) {
+		if (ocfs2_is_refcount_inode(inode))
+			return -EINVAL;
+	}
+
 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b485190b7ecd..18a798e9076b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1113,6 +1113,11 @@  xfs_vn_fiemap(
 	struct fiemap_extent_info *fieinfo)
 {
 	int	error;
+	struct	xfs_inode	*ip = XFS_I(inode);
+
+	if (fieinfo->fi_flags & FIEMAP_KERNEL_FIBMAP)
+		if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
+			return -EINVAL;
 
 	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a8bd3c4f6d86..233e12ccb6d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1709,6 +1709,10 @@  extern bool may_open_dev(const struct path *path);
 typedef int (*fiemap_fill_cb)(struct fiemap_extent_info *fieinfo, u64 logical,
 			      u64 phys, u64 len, u32 flags);
 
+#define FIEMAP_KERNEL_FIBMAP 0x10000000		/* FIBMAP call through FIEMAP
+						   interface. This is a kernel
+						   only flag */
+
 struct fiemap_extent_info {
 	unsigned int	fi_flags;		/* Flags as passed from user */
 	u64		fi_start;