diff mbox series

[35/45] xfs: introduce per-cpu CIL tracking sructure

Message ID 20210305051143.182133-36-david@fromorbit.com (mailing list archive)
State New
Headers show
Series xfs: consolidated log and optimisation changes | expand

Commit Message

Dave Chinner March 5, 2021, 5:11 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

The CIL push lock is highly contended on larger machines, becoming a
hard bottleneck that about 700,000 transaction commits/s on >16p
machines. To address this, start moving the CIL tracking
infrastructure to utilise per-CPU structures.

We need to track the space used, the amount of log reservation space
reserved to write the CIL, the log items in the CIL and the busy
extents that need to be completed by the CIL commit.  This requires
a couple of per-cpu counters, an unordered per-cpu list and a
globally ordered per-cpu list.

Create a per-cpu structure to hold these and all the management
interfaces needed, as well as the hooks to handle hotplug CPUs.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log_cil.c       | 94 ++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_priv.h      | 15 ++++++
 include/linux/cpuhotplug.h |  1 +
 3 files changed, 110 insertions(+)

Comments

Darrick J. Wong March 11, 2021, 12:11 a.m. UTC | #1
On Fri, Mar 05, 2021 at 04:11:33PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The CIL push lock is highly contended on larger machines, becoming a
> hard bottleneck that about 700,000 transaction commits/s on >16p
> machines. To address this, start moving the CIL tracking
> infrastructure to utilise per-CPU structures.
> 
> We need to track the space used, the amount of log reservation space
> reserved to write the CIL, the log items in the CIL and the busy
> extents that need to be completed by the CIL commit.  This requires
> a couple of per-cpu counters, an unordered per-cpu list and a
> globally ordered per-cpu list.
> 
> Create a per-cpu structure to hold these and all the management
> interfaces needed, as well as the hooks to handle hotplug CPUs.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log_cil.c       | 94 ++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_priv.h      | 15 ++++++
>  include/linux/cpuhotplug.h |  1 +
>  3 files changed, 110 insertions(+)
> 
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index f8fb2f59e24c..1bcf0d423d30 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -1365,6 +1365,93 @@ xfs_log_item_in_current_chkpt(
>  	return true;
>  }
>  
> +#ifdef CONFIG_HOTPLUG_CPU
> +static LIST_HEAD(xlog_cil_pcp_list);
> +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> +static bool xlog_cil_pcp_init;
> +
> +static int
> +xlog_cil_pcp_dead(
> +	unsigned int		cpu)
> +{
> +	struct xfs_cil		*cil;
> +
> +        spin_lock(&xlog_cil_pcp_lock);
> +        list_for_each_entry(cil, &xlog_cil_pcp_list, xc_pcp_list) {

Weird indentation.

> +		/* move stuff on dead CPU to context */

Should this have some actual code?  I don't think any of the remaining
patches add anything here.

> +	}
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static int
> +xlog_cil_pcp_hpadd(
> +	struct xfs_cil		*cil)
> +{
> +	if (!xlog_cil_pcp_init) {
> +		int	ret;
> +		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
> +						"xfs/cil_pcp:dead", NULL,
> +						xlog_cil_pcp_dead);
> +		if (ret < 0) {
> +			xfs_warn(cil->xc_log->l_mp,
> +	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",

How likely is to happen?

> +				ret);
> +			ASSERT(0);

I guess not that often?

> +			return -ENOMEM;

Why not return ret here?  I guess it's because ret could be any number
of (not centrally documented?) error codes, and we don't really care to
expose that to userspace?

--D

> +		}
> +		xlog_cil_pcp_init = true;
> +	}
> +
> +	INIT_LIST_HEAD(&cil->xc_pcp_list);
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static void
> +xlog_cil_pcp_hpremove(
> +	struct xfs_cil		*cil)
> +{
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_del(&cil->xc_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +}
> +
> +#else /* !CONFIG_HOTPLUG_CPU */
> +static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}
> +static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
> +#endif
> +
> +static void __percpu *
> +xlog_cil_pcp_alloc(
> +	struct xfs_cil		*cil)
> +{
> +	struct xlog_cil_pcp	*cilpcp;
> +
> +	cilpcp = alloc_percpu(struct xlog_cil_pcp);
> +	if (!cilpcp)
> +		return NULL;
> +
> +	if (xlog_cil_pcp_hpadd(cil) < 0) {
> +		free_percpu(cilpcp);
> +		return NULL;
> +	}
> +	return cilpcp;
> +}
> +
> +static void
> +xlog_cil_pcp_free(
> +	struct xfs_cil		*cil,
> +	struct xlog_cil_pcp	*cilpcp)
> +{
> +	if (!cilpcp)
> +		return;
> +	xlog_cil_pcp_hpremove(cil);
> +	free_percpu(cilpcp);
> +}
> +
>  /*
>   * Perform initial CIL structure initialisation.
>   */
> @@ -1379,6 +1466,12 @@ xlog_cil_init(
>  	if (!cil)
>  		return -ENOMEM;
>  
> +	cil->xc_pcp = xlog_cil_pcp_alloc(cil);
> +	if (!cil->xc_pcp) {
> +		kmem_free(cil);
> +		return -ENOMEM;
> +	}
> +
>  	INIT_LIST_HEAD(&cil->xc_cil);
>  	INIT_LIST_HEAD(&cil->xc_committing);
>  	spin_lock_init(&cil->xc_cil_lock);
> @@ -1409,6 +1502,7 @@ xlog_cil_destroy(
>  
>  	ASSERT(list_empty(&cil->xc_cil));
>  	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
> +	xlog_cil_pcp_free(cil, cil->xc_pcp);
>  	kmem_free(cil);
>  }
>  
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index e72d14c76e03..2562f29c8986 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -231,6 +231,16 @@ struct xfs_cil_ctx {
>  	struct work_struct	push_work;
>  };
>  
> +/*
> + * Per-cpu CIL tracking items
> + */
> +struct xlog_cil_pcp {
> +	uint32_t		space_used;
> +	uint32_t		curr_res;
> +	struct list_head	busy_extents;
> +	struct list_head	log_items;
> +};
> +
>  /*
>   * Committed Item List structure
>   *
> @@ -264,6 +274,11 @@ struct xfs_cil {
>  	wait_queue_head_t	xc_commit_wait;
>  	xfs_csn_t		xc_current_sequence;
>  	wait_queue_head_t	xc_push_wait;	/* background push throttle */
> +
> +	struct xlog_cil_pcp __percpu *xc_pcp;
> +#ifdef CONFIG_HOTPLUG_CPU
> +	struct list_head	xc_pcp_list;
> +#endif
>  } ____cacheline_aligned_in_smp;
>  
>  /* xc_flags bit values */
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index f14adb882338..b13b21d825b3 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -52,6 +52,7 @@ enum cpuhp_state {
>  	CPUHP_FS_BUFF_DEAD,
>  	CPUHP_PRINTK_DEAD,
>  	CPUHP_MM_MEMCQ_DEAD,
> +	CPUHP_XFS_CIL_DEAD,
>  	CPUHP_PERCPU_CNT_DEAD,
>  	CPUHP_RADIX_DEAD,
>  	CPUHP_PAGE_ALLOC_DEAD,
> -- 
> 2.28.0
>
Dave Chinner March 11, 2021, 6:33 a.m. UTC | #2
On Wed, Mar 10, 2021 at 04:11:43PM -0800, Darrick J. Wong wrote:
> On Fri, Mar 05, 2021 at 04:11:33PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > The CIL push lock is highly contended on larger machines, becoming a
> > hard bottleneck that about 700,000 transaction commits/s on >16p
> > machines. To address this, start moving the CIL tracking
> > infrastructure to utilise per-CPU structures.
> > 
> > We need to track the space used, the amount of log reservation space
> > reserved to write the CIL, the log items in the CIL and the busy
> > extents that need to be completed by the CIL commit.  This requires
> > a couple of per-cpu counters, an unordered per-cpu list and a
> > globally ordered per-cpu list.
> > 
> > Create a per-cpu structure to hold these and all the management
> > interfaces needed, as well as the hooks to handle hotplug CPUs.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_log_cil.c       | 94 ++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/xfs_log_priv.h      | 15 ++++++
> >  include/linux/cpuhotplug.h |  1 +
> >  3 files changed, 110 insertions(+)
> > 
> > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> > index f8fb2f59e24c..1bcf0d423d30 100644
> > --- a/fs/xfs/xfs_log_cil.c
> > +++ b/fs/xfs/xfs_log_cil.c
> > @@ -1365,6 +1365,93 @@ xfs_log_item_in_current_chkpt(
> >  	return true;
> >  }
> >  
> > +#ifdef CONFIG_HOTPLUG_CPU
> > +static LIST_HEAD(xlog_cil_pcp_list);
> > +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> > +static bool xlog_cil_pcp_init;
> > +
> > +static int
> > +xlog_cil_pcp_dead(
> > +	unsigned int		cpu)
> > +{
> > +	struct xfs_cil		*cil;
> > +
> > +        spin_lock(&xlog_cil_pcp_lock);
> > +        list_for_each_entry(cil, &xlog_cil_pcp_list, xc_pcp_list) {
> 
> Weird indentation.
> 
> > +		/* move stuff on dead CPU to context */
> 
> Should this have some actual code?  I don't think any of the remaining
> patches add anything here.

They should be moving stuff to the current CIL ctx so it is captured
when the CPU goes down.

> 
> > +	}
> > +	spin_unlock(&xlog_cil_pcp_lock);
> > +	return 0;
> > +}
> > +
> > +static int
> > +xlog_cil_pcp_hpadd(
> > +	struct xfs_cil		*cil)
> > +{
> > +	if (!xlog_cil_pcp_init) {
> > +		int	ret;
> > +		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
> > +						"xfs/cil_pcp:dead", NULL,
> > +						xlog_cil_pcp_dead);
> > +		if (ret < 0) {
> > +			xfs_warn(cil->xc_log->l_mp,
> > +	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
> 
> How likely is to happen?

AFAICT, very unlikely, but....

> 
> > +				ret);
> > +			ASSERT(0);
> 
> I guess not that often?
> 
> > +			return -ENOMEM;
> 
> Why not return ret here?  I guess it's because ret could be any number
> of (not centrally documented?) error codes, and we don't really care to
> expose that to userspace?

... yeah.

The cpu hotplug stuff is poorly documented and the code is another
of those "maze of twisty passages" implementations that bounce
through many functions that can return all sorts of different stuff
from lots of different things that could got wrong. I see at least
EINVAL, ENOSPC, ENOMEM, EBUSY and EAGAIN could be returned, and I
have no idea what any of them would actually mean went wrong.

In the end I kinda just copied what the radix tree and percpu
counter code do with an error failing to init hotplug calls: WARN
and then ignore. Hence from the filesystem POV, the error may as
well be ENOMEM because we couldn't set something critical up and
that saves us getting confused over whether the error is fatal or
not at a higher level.

Cheers,

Dave.
Dave Chinner March 11, 2021, 6:42 a.m. UTC | #3
On Thu, Mar 11, 2021 at 05:33:38PM +1100, Dave Chinner wrote:
> On Wed, Mar 10, 2021 at 04:11:43PM -0800, Darrick J. Wong wrote:
> > On Fri, Mar 05, 2021 at 04:11:33PM +1100, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > The CIL push lock is highly contended on larger machines, becoming a
> > > hard bottleneck that about 700,000 transaction commits/s on >16p
> > > machines. To address this, start moving the CIL tracking
> > > infrastructure to utilise per-CPU structures.
> > > 
> > > We need to track the space used, the amount of log reservation space
> > > reserved to write the CIL, the log items in the CIL and the busy
> > > extents that need to be completed by the CIL commit.  This requires
> > > a couple of per-cpu counters, an unordered per-cpu list and a
> > > globally ordered per-cpu list.
> > > 
> > > Create a per-cpu structure to hold these and all the management
> > > interfaces needed, as well as the hooks to handle hotplug CPUs.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > ---
> > >  fs/xfs/xfs_log_cil.c       | 94 ++++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/xfs_log_priv.h      | 15 ++++++
> > >  include/linux/cpuhotplug.h |  1 +
> > >  3 files changed, 110 insertions(+)
> > > 
> > > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> > > index f8fb2f59e24c..1bcf0d423d30 100644
> > > --- a/fs/xfs/xfs_log_cil.c
> > > +++ b/fs/xfs/xfs_log_cil.c
> > > @@ -1365,6 +1365,93 @@ xfs_log_item_in_current_chkpt(
> > >  	return true;
> > >  }
> > >  
> > > +#ifdef CONFIG_HOTPLUG_CPU
> > > +static LIST_HEAD(xlog_cil_pcp_list);
> > > +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> > > +static bool xlog_cil_pcp_init;
> > > +
> > > +static int
> > > +xlog_cil_pcp_dead(
> > > +	unsigned int		cpu)
> > > +{
> > > +	struct xfs_cil		*cil;
> > > +
> > > +        spin_lock(&xlog_cil_pcp_lock);
> > > +        list_for_each_entry(cil, &xlog_cil_pcp_list, xc_pcp_list) {
> > 
> > Weird indentation.
> > 
> > > +		/* move stuff on dead CPU to context */
> > 
> > Should this have some actual code?  I don't think any of the remaining
> > patches add anything here.
> 
> They should be moving stuff to the current CIL ctx so it is captured
> when the CPU goes down.

Yup, looks like I missed updating this. Will add it in the patches
that need it.

-Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f8fb2f59e24c..1bcf0d423d30 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -1365,6 +1365,93 @@  xfs_log_item_in_current_chkpt(
 	return true;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static LIST_HEAD(xlog_cil_pcp_list);
+static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
+static bool xlog_cil_pcp_init;
+
+static int
+xlog_cil_pcp_dead(
+	unsigned int		cpu)
+{
+	struct xfs_cil		*cil;
+
+        spin_lock(&xlog_cil_pcp_lock);
+        list_for_each_entry(cil, &xlog_cil_pcp_list, xc_pcp_list) {
+		/* move stuff on dead CPU to context */
+	}
+	spin_unlock(&xlog_cil_pcp_lock);
+	return 0;
+}
+
+static int
+xlog_cil_pcp_hpadd(
+	struct xfs_cil		*cil)
+{
+	if (!xlog_cil_pcp_init) {
+		int	ret;
+		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
+						"xfs/cil_pcp:dead", NULL,
+						xlog_cil_pcp_dead);
+		if (ret < 0) {
+			xfs_warn(cil->xc_log->l_mp,
+	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
+				ret);
+			ASSERT(0);
+			return -ENOMEM;
+		}
+		xlog_cil_pcp_init = true;
+	}
+
+	INIT_LIST_HEAD(&cil->xc_pcp_list);
+	spin_lock(&xlog_cil_pcp_lock);
+	list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
+	spin_unlock(&xlog_cil_pcp_lock);
+	return 0;
+}
+
+static void
+xlog_cil_pcp_hpremove(
+	struct xfs_cil		*cil)
+{
+	spin_lock(&xlog_cil_pcp_lock);
+	list_del(&cil->xc_pcp_list);
+	spin_unlock(&xlog_cil_pcp_lock);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}
+static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
+#endif
+
+static void __percpu *
+xlog_cil_pcp_alloc(
+	struct xfs_cil		*cil)
+{
+	struct xlog_cil_pcp	*cilpcp;
+
+	cilpcp = alloc_percpu(struct xlog_cil_pcp);
+	if (!cilpcp)
+		return NULL;
+
+	if (xlog_cil_pcp_hpadd(cil) < 0) {
+		free_percpu(cilpcp);
+		return NULL;
+	}
+	return cilpcp;
+}
+
+static void
+xlog_cil_pcp_free(
+	struct xfs_cil		*cil,
+	struct xlog_cil_pcp	*cilpcp)
+{
+	if (!cilpcp)
+		return;
+	xlog_cil_pcp_hpremove(cil);
+	free_percpu(cilpcp);
+}
+
 /*
  * Perform initial CIL structure initialisation.
  */
@@ -1379,6 +1466,12 @@  xlog_cil_init(
 	if (!cil)
 		return -ENOMEM;
 
+	cil->xc_pcp = xlog_cil_pcp_alloc(cil);
+	if (!cil->xc_pcp) {
+		kmem_free(cil);
+		return -ENOMEM;
+	}
+
 	INIT_LIST_HEAD(&cil->xc_cil);
 	INIT_LIST_HEAD(&cil->xc_committing);
 	spin_lock_init(&cil->xc_cil_lock);
@@ -1409,6 +1502,7 @@  xlog_cil_destroy(
 
 	ASSERT(list_empty(&cil->xc_cil));
 	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
+	xlog_cil_pcp_free(cil, cil->xc_pcp);
 	kmem_free(cil);
 }
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e72d14c76e03..2562f29c8986 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -231,6 +231,16 @@  struct xfs_cil_ctx {
 	struct work_struct	push_work;
 };
 
+/*
+ * Per-cpu CIL tracking items
+ */
+struct xlog_cil_pcp {
+	uint32_t		space_used;
+	uint32_t		curr_res;
+	struct list_head	busy_extents;
+	struct list_head	log_items;
+};
+
 /*
  * Committed Item List structure
  *
@@ -264,6 +274,11 @@  struct xfs_cil {
 	wait_queue_head_t	xc_commit_wait;
 	xfs_csn_t		xc_current_sequence;
 	wait_queue_head_t	xc_push_wait;	/* background push throttle */
+
+	struct xlog_cil_pcp __percpu *xc_pcp;
+#ifdef CONFIG_HOTPLUG_CPU
+	struct list_head	xc_pcp_list;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /* xc_flags bit values */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index f14adb882338..b13b21d825b3 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -52,6 +52,7 @@  enum cpuhp_state {
 	CPUHP_FS_BUFF_DEAD,
 	CPUHP_PRINTK_DEAD,
 	CPUHP_MM_MEMCQ_DEAD,
+	CPUHP_XFS_CIL_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC_DEAD,