diff mbox

[5/5] btrfs: add no_mtime flag to btrfs-extent-same

Message ID 1435013262-23252-6-git-send-email-mfasheh@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Mark Fasheh June 22, 2015, 10:47 p.m. UTC
One issue users have reported is that dedupe changes mtime on files,
resulting in tools like rsync thinking that their contents have changed when
in fact the data is exactly the same. Clone still wants an mtime change, so
we special case this in the code.

With this patch an application can pass the BTRFS_SAME_NO_MTIME flag to a
dedupe request and the kernel will honor it by only changing ctime.

I have an updated version of the btrfs-extent-same test program with a
switch to provide this flag at the 'no_time' branch of:

https://github.com/markfasheh/duperemove/

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
---
 fs/btrfs/ioctl.c           | 34 ++++++++++++++++++++++++----------
 include/uapi/linux/btrfs.h |  5 ++++-
 2 files changed, 28 insertions(+), 11 deletions(-)

Comments

David Sterba June 23, 2015, 3:11 p.m. UTC | #1
On Mon, Jun 22, 2015 at 03:47:42PM -0700, Mark Fasheh wrote:
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
>  
>  
>  static int btrfs_clone(struct inode *src, struct inode *inode,
> -		       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
> +		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
> +		       int no_mtime);

Please make it 'flags' and pass the verified flags directly.

>  /* Mask out flags that are inappropriate for the given type of inode. */
>  static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
> @@ -2974,7 +2975,7 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
>  }
>  
>  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
> -			     struct inode *dst, u64 dst_loff)
> +			     struct inode *dst, u64 dst_loff, int no_mtime)
>  {
>  	int ret;
>  	u64 len = olen;
> @@ -3054,7 +3055,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
>  	/* pass original length for comparison so we stay within i_size */
>  	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
>  	if (ret == 0)
> -		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
> +		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff,
> +				  no_mtime);
>  
>  	if (same_inode)
>  		unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
> @@ -3088,6 +3090,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
>  	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
>  	bool is_admin = capable(CAP_SYS_ADMIN);
>  	u16 count;
> +	int no_mtime = 0;
>  
>  	if (!(file->f_mode & FMODE_READ))
>  		return -EINVAL;
> @@ -3139,6 +3142,12 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
>  	if (!S_ISREG(src->i_mode))
>  		goto out;
>  
> +	ret = -EINVAL;
> +	if (same->flags & ~BTRFS_SAME_FLAGS)
> +		goto out;
> +	if (same->flags & BTRFS_SAME_NO_MTIME)
> +		no_mtime = 1;
> +
>  	/* pre-format output fields to sane values */
>  	for (i = 0; i < count; i++) {
>  		same->info[i].bytes_deduped = 0ULL;
> @@ -3164,7 +3173,8 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
>  			info->status = -EACCES;
>  		} else {
>  			info->status = btrfs_extent_same(src, off, len, dst,
> -							info->logical_offset);
> +							 info->logical_offset,
> +							 no_mtime);
>  			if (info->status == 0)
>  				info->bytes_deduped += len;
>  		}
> @@ -3219,13 +3229,17 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
>  				     struct inode *inode,
>  				     u64 endoff,
>  				     const u64 destoff,
> -				     const u64 olen)
> +				     const u64 olen,
> +				     int no_mtime)
>  {
>  	struct btrfs_root *root = BTRFS_I(inode)->root;
>  	int ret;
>  
>  	inode_inc_iversion(inode);
> -	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> +	if (no_mtime)
> +		inode->i_ctime = CURRENT_TIME;
> +	else
> +		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
>  	/*
>  	 * We round up to the block size at eof when determining which
>  	 * extents to clone above, but shouldn't round up the file size.
> @@ -3316,7 +3330,7 @@ static void clone_update_extent_map(struct inode *inode,
>   */
>  static int btrfs_clone(struct inode *src, struct inode *inode,
>  		       const u64 off, const u64 olen, const u64 olen_aligned,
> -		       const u64 destoff)
> +		       const u64 destoff, int no_mtime)
>  {
>  	struct btrfs_root *root = BTRFS_I(inode)->root;
>  	struct btrfs_path *path = NULL;
> @@ -3640,7 +3654,7 @@ process_slot:
>  					      root->sectorsize);
>  			ret = clone_finish_inode_update(trans, inode,
>  							last_dest_end,
> -							destoff, olen);
> +							destoff, olen, no_mtime);
>  			if (ret)
>  				goto out;
>  			if (new_key.offset + datal >= destoff + len)
> @@ -3678,7 +3692,7 @@ process_slot:
>  		clone_update_extent_map(inode, trans, NULL, last_dest_end,
>  					destoff + len - last_dest_end);
>  		ret = clone_finish_inode_update(trans, inode, destoff + len,
> -						destoff, olen);
> +						destoff, olen, no_mtime);
>  	}
>  
>  out:
> @@ -3808,7 +3822,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
>  		btrfs_double_extent_lock(src, off, inode, destoff, len);
>  	}
>  
> -	ret = btrfs_clone(src, inode, off, olen, len, destoff);
> +	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
>  
>  	if (same_inode) {
>  		u64 lock_start = min_t(u64, off, destoff);
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index b6dec05..beeb51c 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h

> +#define	BTRFS_SAME_NO_MTIME	0x1
> +#define	BTRFS_SAME_FLAGS	(BTRFS_SAME_NO_MTIME)

The naming scheme used eg. for the send flags:

BTRFS_SEND_FLAG_MASK
BTRFS_SEND_FLAG_NO_FILE_DATA

So I suggest to follow it:

BTRFS_SAME_FLAG_MASK
BTRFS_SAME_FLAG_NO_MTIME

Though I'd rather see it named it with BTRFS_EXTENT_SAME_ prefix so it
(almost ...) matches the ioctl name (and is greppable).

This is going to be in a public interface so the naming is important.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Fasheh June 23, 2015, 5:11 p.m. UTC | #2
On Tue, Jun 23, 2015 at 05:11:56PM +0200, David Sterba wrote:
> On Mon, Jun 22, 2015 at 03:47:42PM -0700, Mark Fasheh wrote:
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
> >  
> >  
> >  static int btrfs_clone(struct inode *src, struct inode *inode,
> > -		       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
> > +		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
> > +		       int no_mtime);
> 
> Please make it 'flags' and pass the verified flags directly.

Sure.


> >  /* Mask out flags that are inappropriate for the given type of inode. */
> >  static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
> > @@ -2974,7 +2975,7 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
> >  }
> >  
> >  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
> > -			     struct inode *dst, u64 dst_loff)
> > +			     struct inode *dst, u64 dst_loff, int no_mtime)
> >  {
> >  	int ret;
> >  	u64 len = olen;
> > @@ -3054,7 +3055,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
> >  	/* pass original length for comparison so we stay within i_size */
> >  	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
> >  	if (ret == 0)
> > -		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
> > +		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff,
> > +				  no_mtime);
> >  
> >  	if (same_inode)
> >  		unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
> > @@ -3088,6 +3090,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
> >  	bool is_admin = capable(CAP_SYS_ADMIN);
> >  	u16 count;
> > +	int no_mtime = 0;
> >  
> >  	if (!(file->f_mode & FMODE_READ))
> >  		return -EINVAL;
> > @@ -3139,6 +3142,12 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  	if (!S_ISREG(src->i_mode))
> >  		goto out;
> >  
> > +	ret = -EINVAL;
> > +	if (same->flags & ~BTRFS_SAME_FLAGS)
> > +		goto out;
> > +	if (same->flags & BTRFS_SAME_NO_MTIME)
> > +		no_mtime = 1;
> > +
> >  	/* pre-format output fields to sane values */
> >  	for (i = 0; i < count; i++) {
> >  		same->info[i].bytes_deduped = 0ULL;
> > @@ -3164,7 +3173,8 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  			info->status = -EACCES;
> >  		} else {
> >  			info->status = btrfs_extent_same(src, off, len, dst,
> > -							info->logical_offset);
> > +							 info->logical_offset,
> > +							 no_mtime);
> >  			if (info->status == 0)
> >  				info->bytes_deduped += len;
> >  		}
> > @@ -3219,13 +3229,17 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
> >  				     struct inode *inode,
> >  				     u64 endoff,
> >  				     const u64 destoff,
> > -				     const u64 olen)
> > +				     const u64 olen,
> > +				     int no_mtime)
> >  {
> >  	struct btrfs_root *root = BTRFS_I(inode)->root;
> >  	int ret;
> >  
> >  	inode_inc_iversion(inode);
> > -	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> > +	if (no_mtime)
> > +		inode->i_ctime = CURRENT_TIME;
> > +	else
> > +		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> >  	/*
> >  	 * We round up to the block size at eof when determining which
> >  	 * extents to clone above, but shouldn't round up the file size.
> > @@ -3316,7 +3330,7 @@ static void clone_update_extent_map(struct inode *inode,
> >   */
> >  static int btrfs_clone(struct inode *src, struct inode *inode,
> >  		       const u64 off, const u64 olen, const u64 olen_aligned,
> > -		       const u64 destoff)
> > +		       const u64 destoff, int no_mtime)
> >  {
> >  	struct btrfs_root *root = BTRFS_I(inode)->root;
> >  	struct btrfs_path *path = NULL;
> > @@ -3640,7 +3654,7 @@ process_slot:
> >  					      root->sectorsize);
> >  			ret = clone_finish_inode_update(trans, inode,
> >  							last_dest_end,
> > -							destoff, olen);
> > +							destoff, olen, no_mtime);
> >  			if (ret)
> >  				goto out;
> >  			if (new_key.offset + datal >= destoff + len)
> > @@ -3678,7 +3692,7 @@ process_slot:
> >  		clone_update_extent_map(inode, trans, NULL, last_dest_end,
> >  					destoff + len - last_dest_end);
> >  		ret = clone_finish_inode_update(trans, inode, destoff + len,
> > -						destoff, olen);
> > +						destoff, olen, no_mtime);
> >  	}
> >  
> >  out:
> > @@ -3808,7 +3822,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
> >  		btrfs_double_extent_lock(src, off, inode, destoff, len);
> >  	}
> >  
> > -	ret = btrfs_clone(src, inode, off, olen, len, destoff);
> > +	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
> >  
> >  	if (same_inode) {
> >  		u64 lock_start = min_t(u64, off, destoff);
> > diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> > index b6dec05..beeb51c 100644
> > --- a/include/uapi/linux/btrfs.h
> > +++ b/include/uapi/linux/btrfs.h
> 
> > +#define	BTRFS_SAME_NO_MTIME	0x1
> > +#define	BTRFS_SAME_FLAGS	(BTRFS_SAME_NO_MTIME)
> 
> The naming scheme used eg. for the send flags:
> 
> BTRFS_SEND_FLAG_MASK
> BTRFS_SEND_FLAG_NO_FILE_DATA
> 
> So I suggest to follow it:
> 
> BTRFS_SAME_FLAG_MASK
> BTRFS_SAME_FLAG_NO_MTIME
> 
> Though I'd rather see it named it with BTRFS_EXTENT_SAME_ prefix so it
> (almost ...) matches the ioctl name (and is greppable).
> 
> This is going to be in a public interface so the naming is important.

Sounds good, I'll fix up the patch with your suggestions and resend another
round shortly. Btw thanks for all the reviews David, it is appreciated!
	--Mark

--
Mark Fasheh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zygo Blaxell June 24, 2015, 8:17 p.m. UTC | #3
On Tue, Jun 23, 2015 at 05:11:56PM +0200, David Sterba wrote:
> On Mon, Jun 22, 2015 at 03:47:42PM -0700, Mark Fasheh wrote:
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
> >  
> >  
> >  static int btrfs_clone(struct inode *src, struct inode *inode,
> > -		       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
> > +		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
> > +		       int no_mtime);
> 
> Please make it 'flags' and pass the verified flags directly.
> 
> >  /* Mask out flags that are inappropriate for the given type of inode. */
> >  static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
> > @@ -2974,7 +2975,7 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
> >  }
> >  
> >  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
> > -			     struct inode *dst, u64 dst_loff)
> > +			     struct inode *dst, u64 dst_loff, int no_mtime)
> >  {
> >  	int ret;
> >  	u64 len = olen;
> > @@ -3054,7 +3055,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
> >  	/* pass original length for comparison so we stay within i_size */
> >  	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
> >  	if (ret == 0)
> > -		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
> > +		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff,
> > +				  no_mtime);
> >  
> >  	if (same_inode)
> >  		unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
> > @@ -3088,6 +3090,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
> >  	bool is_admin = capable(CAP_SYS_ADMIN);
> >  	u16 count;
> > +	int no_mtime = 0;
> >  
> >  	if (!(file->f_mode & FMODE_READ))
> >  		return -EINVAL;
> > @@ -3139,6 +3142,12 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  	if (!S_ISREG(src->i_mode))
> >  		goto out;
> >  
> > +	ret = -EINVAL;
> > +	if (same->flags & ~BTRFS_SAME_FLAGS)
> > +		goto out;
> > +	if (same->flags & BTRFS_SAME_NO_MTIME)
> > +		no_mtime = 1;
> > +
> >  	/* pre-format output fields to sane values */
> >  	for (i = 0; i < count; i++) {
> >  		same->info[i].bytes_deduped = 0ULL;
> > @@ -3164,7 +3173,8 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
> >  			info->status = -EACCES;
> >  		} else {
> >  			info->status = btrfs_extent_same(src, off, len, dst,
> > -							info->logical_offset);
> > +							 info->logical_offset,
> > +							 no_mtime);
> >  			if (info->status == 0)
> >  				info->bytes_deduped += len;
> >  		}
> > @@ -3219,13 +3229,17 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
> >  				     struct inode *inode,
> >  				     u64 endoff,
> >  				     const u64 destoff,
> > -				     const u64 olen)
> > +				     const u64 olen,
> > +				     int no_mtime)
> >  {
> >  	struct btrfs_root *root = BTRFS_I(inode)->root;
> >  	int ret;
> >  
> >  	inode_inc_iversion(inode);
> > -	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> > +	if (no_mtime)
> > +		inode->i_ctime = CURRENT_TIME;
> > +	else
> > +		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> >  	/*
> >  	 * We round up to the block size at eof when determining which
> >  	 * extents to clone above, but shouldn't round up the file size.
> > @@ -3316,7 +3330,7 @@ static void clone_update_extent_map(struct inode *inode,
> >   */
> >  static int btrfs_clone(struct inode *src, struct inode *inode,
> >  		       const u64 off, const u64 olen, const u64 olen_aligned,
> > -		       const u64 destoff)
> > +		       const u64 destoff, int no_mtime)
> >  {
> >  	struct btrfs_root *root = BTRFS_I(inode)->root;
> >  	struct btrfs_path *path = NULL;
> > @@ -3640,7 +3654,7 @@ process_slot:
> >  					      root->sectorsize);
> >  			ret = clone_finish_inode_update(trans, inode,
> >  							last_dest_end,
> > -							destoff, olen);
> > +							destoff, olen, no_mtime);
> >  			if (ret)
> >  				goto out;
> >  			if (new_key.offset + datal >= destoff + len)
> > @@ -3678,7 +3692,7 @@ process_slot:
> >  		clone_update_extent_map(inode, trans, NULL, last_dest_end,
> >  					destoff + len - last_dest_end);
> >  		ret = clone_finish_inode_update(trans, inode, destoff + len,
> > -						destoff, olen);
> > +						destoff, olen, no_mtime);
> >  	}
> >  
> >  out:
> > @@ -3808,7 +3822,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
> >  		btrfs_double_extent_lock(src, off, inode, destoff, len);
> >  	}
> >  
> > -	ret = btrfs_clone(src, inode, off, olen, len, destoff);
> > +	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
> >  
> >  	if (same_inode) {
> >  		u64 lock_start = min_t(u64, off, destoff);
> > diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> > index b6dec05..beeb51c 100644
> > --- a/include/uapi/linux/btrfs.h
> > +++ b/include/uapi/linux/btrfs.h
> 
> > +#define	BTRFS_SAME_NO_MTIME	0x1
> > +#define	BTRFS_SAME_FLAGS	(BTRFS_SAME_NO_MTIME)
> 
> The naming scheme used eg. for the send flags:
> 
> BTRFS_SEND_FLAG_MASK
> BTRFS_SEND_FLAG_NO_FILE_DATA
> 
> So I suggest to follow it:
> 
> BTRFS_SAME_FLAG_MASK
> BTRFS_SAME_FLAG_NO_MTIME
> 
> Though I'd rather see it named it with BTRFS_EXTENT_SAME_ prefix so it
> (almost ...) matches the ioctl name (and is greppable).

Is there any sane use case where we would _want_ EXTENT_SAME to change
the mtime?  We do a lot of work to make sure that none of the files
involved have any sort of content change.  Why do we need the flag at all?

> This is going to be in a public interface so the naming is important.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba June 25, 2015, 12:52 p.m. UTC | #4
On Wed, Jun 24, 2015 at 04:17:32PM -0400, Zygo Blaxell wrote:
> Is there any sane use case where we would _want_ EXTENT_SAME to change
> the mtime?  We do a lot of work to make sure that none of the files
> involved have any sort of content change.  Why do we need the flag at all?

Good point, I don't see the usecase for updating MTIME.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Austin S. Hemmelgarn June 25, 2015, 1:10 p.m. UTC | #5
On 2015-06-25 08:52, David Sterba wrote:
> On Wed, Jun 24, 2015 at 04:17:32PM -0400, Zygo Blaxell wrote:
>> Is there any sane use case where we would _want_ EXTENT_SAME to change
>> the mtime?  We do a lot of work to make sure that none of the files
>> involved have any sort of content change.  Why do we need the flag at all?
>
> Good point, I don't see the usecase for updating MTIME.
Was the original intent possibly to make certain the CTIME got updated? 
  Because EXTENT_SAME _does_ update the metadata, and by logical 
extension that means that the CTIME should change.
Zygo Blaxell June 25, 2015, 4:52 p.m. UTC | #6
On Thu, Jun 25, 2015 at 09:10:31AM -0400, Austin S Hemmelgarn wrote:
> On 2015-06-25 08:52, David Sterba wrote:
> >On Wed, Jun 24, 2015 at 04:17:32PM -0400, Zygo Blaxell wrote:
> >>Is there any sane use case where we would _want_ EXTENT_SAME to change
> >>the mtime?  We do a lot of work to make sure that none of the files
> >>involved have any sort of content change.  Why do we need the flag at all?
> >
> >Good point, I don't see the usecase for updating MTIME.
> Was the original intent possibly to make certain the CTIME got
> updated?  Because EXTENT_SAME _does_ update the metadata, and by
> logical extension that means that the CTIME should change.

It updates the _extent_ metadata.  CTIME does not cover that, only _inode_
metadata changes.

Put another way, if we updated CTIME for extent metadata changes, then
a balance would imply rewriting every inode on the filesystem, which is
insane.  Same for defragment:  when we defragment files, we already
leave the MTIME and CTIME fields alone, and we use locking to protect
defrag from race conditions that might cause it to modify data.

You can confirm the behavior of balance and defrag on v4.0.5:

	# Here's a file:
	root@testhost:~# fiemap /media/usb7/vmlinuz
	File: /media/usb7/vmlinuz
	Log 0x0..0x654000 Phy 0xc10000..0x1264000 Flags FIEMAP_EXTENT_LAST
	root@testhost:~# ls --full -lc /media/usb7/vmlinuz
	-rw------- 1 root root 6634160 2015-06-25 12:37:16.265688748 -0400 /media/usb7/vmlinuz

	# Balance:
	root@testhost:~# btrfs balance start /media/usb7
	Done, had to relocate 5 out of 5 chunks

	# Confirm the file is an a new physical location:
	root@testhost:~# fiemap /media/usb7/vmlinuz
	File: /media/usb7/vmlinuz
	Log 0x0..0x654000 Phy 0x2b3d0000..0x2ba24000 Flags FIEMAP_EXTENT_LAST

	# We did not change the CTIME.
	root@testhost:~# ls --full -lc /media/usb7/vmlinuz
	-rw------- 1 root root 6634160 2015-06-25 12:37:16.265688748 -0400 /media/usb7/vmlinuz

	# Now let's try defrag!
	root@testhost:~# btrfs fi defrag -c /media/usb7/vmlinuz

	# File has been moved again, and parts of it are even compressed now
	root@testhost:~# fiemap /media/usb7/vmlinuz
	File: /media/usb7/vmlinuz
	Log 0x0..0x20000 Phy 0x2b390000..0x2b3b0000 Flags FIEMAP_EXTENT_ENCODED
	Log 0x20000..0x80000 Phy 0x2ba64000..0x2bac4000 Flags 0
	Log 0x80000..0x100000 Phy 0x2bac4000..0x2bb44000 Flags 0
	Log 0x100000..0x180000 Phy 0x2bb44000..0x2bbc4000 Flags 0
	Log 0x180000..0x200000 Phy 0x2bbc4000..0x2bc44000 Flags 0
	Log 0x200000..0x280000 Phy 0x2bc44000..0x2bcc4000 Flags 0
	Log 0x280000..0x300000 Phy 0x2bcc4000..0x2bd44000 Flags 0
	Log 0x300000..0x380000 Phy 0x2bd44000..0x2bdc4000 Flags 0
	Log 0x380000..0x400000 Phy 0x2bdc4000..0x2be44000 Flags 0
	Log 0x400000..0x480000 Phy 0x2be44000..0x2bec4000 Flags 0
	Log 0x480000..0x500000 Phy 0x2bec4000..0x2bf44000 Flags 0
	Log 0x500000..0x580000 Phy 0x2bf44000..0x2bfc4000 Flags 0
	Log 0x580000..0x600000 Phy 0x2bfc4000..0x2c044000 Flags 0
	Log 0x600000..0x654000 Phy 0x2c044000..0x2c098000 Flags FIEMAP_EXTENT_LAST

	# CTIME not changed (no changes to file content).
	root@testhost:~# ls --full -lc /media/usb7/vmlinuz
	-rw------- 1 root root 6634160 2015-06-25 12:37:16.265688748 -0400 /media/usb7/vmlinuz


> 
>
Mark Fasheh June 25, 2015, 6:12 p.m. UTC | #7
On Thu, Jun 25, 2015 at 02:52:50PM +0200, David Sterba wrote:
> On Wed, Jun 24, 2015 at 04:17:32PM -0400, Zygo Blaxell wrote:
> > Is there any sane use case where we would _want_ EXTENT_SAME to change
> > the mtime?  We do a lot of work to make sure that none of the files
> > involved have any sort of content change.  Why do we need the flag at all?
> 
> Good point, I don't see the usecase for updating MTIME.

Yeah there isn't one and I doubt anyone will be upset if we just always
ignore the mtime update. I'll send some new patches shortly.

Thanks for the suggestion Zygo.
	--Mark

--
Mark Fasheh
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 83f4679..8cfc65f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@  struct btrfs_ioctl_received_subvol_args_32 {
 
 
 static int btrfs_clone(struct inode *src, struct inode *inode,
-		       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
+		       int no_mtime);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2974,7 +2975,7 @@  static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
 }
 
 static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
-			     struct inode *dst, u64 dst_loff)
+			     struct inode *dst, u64 dst_loff, int no_mtime)
 {
 	int ret;
 	u64 len = olen;
@@ -3054,7 +3055,8 @@  static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 	/* pass original length for comparison so we stay within i_size */
 	ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
 	if (ret == 0)
-		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
+		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff,
+				  no_mtime);
 
 	if (same_inode)
 		unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
@@ -3088,6 +3090,7 @@  static long btrfs_ioctl_file_extent_same(struct file *file,
 	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
 	bool is_admin = capable(CAP_SYS_ADMIN);
 	u16 count;
+	int no_mtime = 0;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EINVAL;
@@ -3139,6 +3142,12 @@  static long btrfs_ioctl_file_extent_same(struct file *file,
 	if (!S_ISREG(src->i_mode))
 		goto out;
 
+	ret = -EINVAL;
+	if (same->flags & ~BTRFS_SAME_FLAGS)
+		goto out;
+	if (same->flags & BTRFS_SAME_NO_MTIME)
+		no_mtime = 1;
+
 	/* pre-format output fields to sane values */
 	for (i = 0; i < count; i++) {
 		same->info[i].bytes_deduped = 0ULL;
@@ -3164,7 +3173,8 @@  static long btrfs_ioctl_file_extent_same(struct file *file,
 			info->status = -EACCES;
 		} else {
 			info->status = btrfs_extent_same(src, off, len, dst,
-							info->logical_offset);
+							 info->logical_offset,
+							 no_mtime);
 			if (info->status == 0)
 				info->bytes_deduped += len;
 		}
@@ -3219,13 +3229,17 @@  static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     u64 endoff,
 				     const u64 destoff,
-				     const u64 olen)
+				     const u64 olen,
+				     int no_mtime)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
 
 	inode_inc_iversion(inode);
-	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	if (no_mtime)
+		inode->i_ctime = CURRENT_TIME;
+	else
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	/*
 	 * We round up to the block size at eof when determining which
 	 * extents to clone above, but shouldn't round up the file size.
@@ -3316,7 +3330,7 @@  static void clone_update_extent_map(struct inode *inode,
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
 		       const u64 off, const u64 olen, const u64 olen_aligned,
-		       const u64 destoff)
+		       const u64 destoff, int no_mtime)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path = NULL;
@@ -3640,7 +3654,7 @@  process_slot:
 					      root->sectorsize);
 			ret = clone_finish_inode_update(trans, inode,
 							last_dest_end,
-							destoff, olen);
+							destoff, olen, no_mtime);
 			if (ret)
 				goto out;
 			if (new_key.offset + datal >= destoff + len)
@@ -3678,7 +3692,7 @@  process_slot:
 		clone_update_extent_map(inode, trans, NULL, last_dest_end,
 					destoff + len - last_dest_end);
 		ret = clone_finish_inode_update(trans, inode, destoff + len,
-						destoff, olen);
+						destoff, olen, no_mtime);
 	}
 
 out:
@@ -3808,7 +3822,7 @@  static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		btrfs_double_extent_lock(src, off, inode, destoff, len);
 	}
 
-	ret = btrfs_clone(src, inode, off, olen, len, destoff);
+	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
 
 	if (same_inode) {
 		u64 lock_start = min_t(u64, off, destoff);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b6dec05..beeb51c 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -342,11 +342,14 @@  struct btrfs_ioctl_same_extent_info {
 	__u32 reserved;
 };
 
+#define	BTRFS_SAME_NO_MTIME	0x1
+#define	BTRFS_SAME_FLAGS	(BTRFS_SAME_NO_MTIME)
+
 struct btrfs_ioctl_same_args {
 	__u64 logical_offset;	/* in - start of extent in source */
 	__u64 length;		/* in - length of extent */
 	__u16 dest_count;	/* in - total elements in info array */
-	__u16 reserved1;
+	__u16 flags;		/* in - see BTRFS_SAME_FLAGS */
 	__u32 reserved2;
 	struct btrfs_ioctl_same_extent_info info[0];
 };