diff mbox

[2/2] xfs: implement the lazytime mount options

Message ID 20180222152957.19624-3-hch@lst.de (mailing list archive)
State Accepted
Headers show

Commit Message

Christoph Hellwig Feb. 22, 2018, 3:29 p.m. UTC
Use the VFS dirty inode tracking for lazytime inodes only, and just
log them in ->dirty_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iops.c        |  5 +++++
 fs/xfs/xfs_super.c       | 23 +++++++++++++++++++++++
 fs/xfs/xfs_trans_inode.c |  8 ++++++++
 3 files changed, 36 insertions(+)

Comments

Dave Chinner Feb. 23, 2018, 12:50 a.m. UTC | #1
On Thu, Feb 22, 2018 at 07:29:57AM -0800, Christoph Hellwig wrote:
> Use the VFS dirty inode tracking for lazytime inodes only, and just
> log them in ->dirty_inode.

This is a lot cleaner than what I was looking at doing. :)

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_iops.c        |  5 +++++
>  fs/xfs/xfs_super.c       | 23 +++++++++++++++++++++++
>  fs/xfs/xfs_trans_inode.c |  8 ++++++++
>  3 files changed, 36 insertions(+)
> 
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 56475fcd76f2..0946a3baae6a 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -46,6 +46,7 @@
>  #include <linux/security.h>
>  #include <linux/iomap.h>
>  #include <linux/slab.h>
> +#include <linux/iversion.h>
>  
>  /*
>   * Directories have different lock order w.r.t. mmap_sem compared to regular
> @@ -1057,6 +1058,10 @@ xfs_vn_update_time(
>  
>  	trace_xfs_update_time(ip);
>  
> +	if ((inode->i_sb->s_flags & SB_LAZYTIME) &&
> +	    !((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)))
> +		return generic_update_time(inode, now, flags);

So if we've incremented iversion here on a lazytime update (hence
making it not lazy), we won't do the iversion update in
xfs_trans_log_inode() because the query bit has been cleared here.
Hence we won't set XFS_ILOG_CORE in the transaction and the iversion
update will not be logged.

Maybe:

	int	log_flags = XFS_ILOG_TIMESTAMP;
	...

	if (inode->i_sb->s_flags & SB_LAZYTIME) {
		if (!(flags & S_VERSION) ||
		     !inode_maybe_inc_iversion(inode, false)))
			return generic_update_time(inode, now, flags);

		/* Capture the iversion update that just occurred */
		log_flags |= XFS_ILOG_CORE;
	}

	.....
	xfs_trans_log_inode(tp, ip, log_flags);
	.....

> diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
> index fddacf9575df..769943fcc1f2 100644
> --- a/fs/xfs/xfs_trans_inode.c
> +++ b/fs/xfs/xfs_trans_inode.c
> @@ -98,9 +98,17 @@ xfs_trans_log_inode(
>  	xfs_inode_t	*ip,
>  	uint		flags)
>  {
> +	struct inode	*inode = VFS_I(ip);
> +
>  	ASSERT(ip->i_itemp != NULL);
>  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
>  
> +	if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
> +		spin_lock(&inode->i_lock);
> +		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
> +		spin_unlock(&inode->i_lock);
> +	}

I suspect this is racy w.r.t other code that sets/clears the
I_DIRTY_TIME fields. Shouldn't both the check and clear be under the
spin lock?

Cheers,

Dave.
Christoph Hellwig Feb. 23, 2018, 3:35 p.m. UTC | #2
On Fri, Feb 23, 2018 at 11:50:48AM +1100, Dave Chinner wrote:
> > +	if ((inode->i_sb->s_flags & SB_LAZYTIME) &&
> > +	    !((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)))
> > +		return generic_update_time(inode, now, flags);
> 
> So if we've incremented iversion here on a lazytime update (hence
> making it not lazy), we won't do the iversion update in
> xfs_trans_log_inode() because the query bit has been cleared here.
> Hence we won't set XFS_ILOG_CORE in the transaction and the iversion
> update will not be logged.

Indeed, I'll fix this up.

> > +	if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
> > +		spin_lock(&inode->i_lock);
> > +		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
> > +		spin_unlock(&inode->i_lock);
> > +	}
> 
> I suspect this is racy w.r.t other code that sets/clears the
> I_DIRTY_TIME fields. Shouldn't both the check and clear be under the
> spin lock?

I don;t think so.  The racy check is perfectly fine - if someone
cleared the flag racing with the above nothing bad will happen.
If someone raced setting it we won't capture this update in the
transaction and will have to log another one 24 hours later in the
worst case.  The only reason for the lock is to not corrupt i_state
itself by overlapping rmw cycles.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 56475fcd76f2..0946a3baae6a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -46,6 +46,7 @@ 
 #include <linux/security.h>
 #include <linux/iomap.h>
 #include <linux/slab.h>
+#include <linux/iversion.h>
 
 /*
  * Directories have different lock order w.r.t. mmap_sem compared to regular
@@ -1057,6 +1058,10 @@  xfs_vn_update_time(
 
 	trace_xfs_update_time(ip);
 
+	if ((inode->i_sb->s_flags & SB_LAZYTIME) &&
+	    !((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)))
+		return generic_update_time(inode, now, flags);
+
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
 	if (error)
 		return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7aba628dc527..20d53f125c35 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1007,6 +1007,28 @@  xfs_fs_destroy_inode(
 	xfs_inode_set_reclaim_tag(ip);
 }
 
+static void
+xfs_fs_dirty_inode(
+	struct inode			*inode,
+	int				flag)
+{
+	struct xfs_inode		*ip = XFS_I(inode);
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_trans		*tp;
+
+	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+		return;
+	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
+		return;
+
+	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+		return;
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+	xfs_trans_commit(tp);
+}
+
 /*
  * Slab object creation initialisation for the XFS inode.
  * This covers only the idempotent fields in the XFS inode;
@@ -1787,6 +1809,7 @@  xfs_fs_free_cached_objects(
 static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
+	.dirty_inode		= xfs_fs_dirty_inode,
 	.drop_inode		= xfs_fs_drop_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index fddacf9575df..769943fcc1f2 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -98,9 +98,17 @@  xfs_trans_log_inode(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
+	struct inode	*inode = VFS_I(ip);
+
 	ASSERT(ip->i_itemp != NULL);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
+	if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+		spin_lock(&inode->i_lock);
+		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+		spin_unlock(&inode->i_lock);
+	}
+
 	/*
 	 * First time we log the inode in a transaction, bump the inode change
 	 * counter if it is configured for this to occur. While we have the