diff mbox series

[29/39] xfs: introduce per-cpu CIL tracking structure

Message ID 20210603052240.171998-30-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series xfs: CIL and log optimisations | expand

Commit Message

Dave Chinner June 3, 2021, 5:22 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

The CIL push lock is highly contended on larger machines, becoming a
hard bottleneck that about 700,000 transaction commits/s on >16p
machines. To address this, start moving the CIL tracking
infrastructure to utilise per-CPU structures.

We need to track the space used, the amount of log reservation space
reserved to write the CIL, the log items in the CIL and the busy
extents that need to be completed by the CIL commit.  This requires
a couple of per-cpu counters, an unordered per-cpu list and a
globally ordered per-cpu list.

Create a per-cpu structure to hold these and all the management
interfaces needed, as well as the hooks to handle hotplug CPUs.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log_cil.c       | 107 +++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_priv.h      |  13 +++++
 include/linux/cpuhotplug.h |   1 +
 3 files changed, 121 insertions(+)

Comments

kernel test robot June 3, 2021, 9:18 a.m. UTC | #1
Hi Dave,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on linus/master v5.13-rc4 next-20210602]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Dave-Chinner/xfs-CIL-and-log-optimisations/20210603-134113
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
config: arm-randconfig-r023-20210603 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project d8e0ae9a76a62bdc6117630d59bf9967ac9bb4ea)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # https://github.com/0day-ci/linux/commit/7832a61eefffdb48fdd863bbb0da4b8b7a4e2bb9
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Dave-Chinner/xfs-CIL-and-log-optimisations/20210603-134113
        git checkout 7832a61eefffdb48fdd863bbb0da4b8b7a4e2bb9
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> fs/xfs/xfs_log_cil.c:1454:30: error: invalid operands to binary expression ('void' and 'int')
           if (xlog_cil_pcp_hpadd(cil) < 0) {
               ~~~~~~~~~~~~~~~~~~~~~~~ ^ ~
   1 error generated.


vim +1454 fs/xfs/xfs_log_cil.c

  1443	
  1444	static void __percpu *
  1445	xlog_cil_pcp_alloc(
  1446		struct xfs_cil		*cil)
  1447	{
  1448		void __percpu		*pcp;
  1449	
  1450		pcp = alloc_percpu(struct xlog_cil_pcp);
  1451		if (!pcp)
  1452			return NULL;
  1453	
> 1454		if (xlog_cil_pcp_hpadd(cil) < 0) {
  1455			free_percpu(pcp);
  1456			return NULL;
  1457		}
  1458		return pcp;
  1459	}
  1460	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot June 3, 2021, 10:08 a.m. UTC | #2
Hi Dave,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on xfs-linux/for-next]
[also build test ERROR on linus/master v5.13-rc4 next-20210602]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Dave-Chinner/xfs-CIL-and-log-optimisations/20210603-134113
base:   https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next
config: arc-randconfig-r034-20210603 (attached as .config)
compiler: arc-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/7832a61eefffdb48fdd863bbb0da4b8b7a4e2bb9
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Dave-Chinner/xfs-CIL-and-log-optimisations/20210603-134113
        git checkout 7832a61eefffdb48fdd863bbb0da4b8b7a4e2bb9
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/string.h:6,
                    from include/linux/uuid.h:12,
                    from fs/xfs/xfs_linux.h:10,
                    from fs/xfs/xfs.h:22,
                    from fs/xfs/xfs_log_cil.c:6:
   fs/xfs/xfs_log_cil.c: In function 'xlog_cil_pcp_alloc':
>> fs/xfs/xfs_log_cil.c:1454:6: error: void value not ignored as it ought to be
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |      ^~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:58:52: note: in definition of macro '__trace_if_var'
      58 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
         |                                                    ^~~~
   fs/xfs/xfs_log_cil.c:1454:2: note: in expansion of macro 'if'
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |  ^~
>> fs/xfs/xfs_log_cil.c:1454:6: error: void value not ignored as it ought to be
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |      ^~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:58:61: note: in definition of macro '__trace_if_var'
      58 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
         |                                                             ^~~~
   fs/xfs/xfs_log_cil.c:1454:2: note: in expansion of macro 'if'
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |  ^~
>> fs/xfs/xfs_log_cil.c:1454:6: error: void value not ignored as it ought to be
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |      ^~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:69:3: note: in definition of macro '__trace_if_value'
      69 |  (cond) ?     \
         |   ^~~~
   include/linux/compiler.h:56:28: note: in expansion of macro '__trace_if_var'
      56 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
         |                            ^~~~~~~~~~~~~~
   fs/xfs/xfs_log_cil.c:1454:2: note: in expansion of macro 'if'
    1454 |  if (xlog_cil_pcp_hpadd(cil) < 0) {
         |  ^~


vim +1454 fs/xfs/xfs_log_cil.c

  1443	
  1444	static void __percpu *
  1445	xlog_cil_pcp_alloc(
  1446		struct xfs_cil		*cil)
  1447	{
  1448		void __percpu		*pcp;
  1449	
  1450		pcp = alloc_percpu(struct xlog_cil_pcp);
  1451		if (!pcp)
  1452			return NULL;
  1453	
> 1454		if (xlog_cil_pcp_hpadd(cil) < 0) {
  1455			free_percpu(pcp);
  1456			return NULL;
  1457		}
  1458		return pcp;
  1459	}
  1460	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Darrick J. Wong June 3, 2021, 4:49 p.m. UTC | #3
On Thu, Jun 03, 2021 at 03:22:30PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The CIL push lock is highly contended on larger machines, becoming a
> hard bottleneck that about 700,000 transaction commits/s on >16p
> machines. To address this, start moving the CIL tracking
> infrastructure to utilise per-CPU structures.
> 
> We need to track the space used, the amount of log reservation space
> reserved to write the CIL, the log items in the CIL and the busy
> extents that need to be completed by the CIL commit.  This requires
> a couple of per-cpu counters, an unordered per-cpu list and a
> globally ordered per-cpu list.
> 
> Create a per-cpu structure to hold these and all the management
> interfaces needed, as well as the hooks to handle hotplug CPUs.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log_cil.c       | 107 +++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_priv.h      |  13 +++++
>  include/linux/cpuhotplug.h |   1 +
>  3 files changed, 121 insertions(+)
> 
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index 738dc4248113..791ed1058fb4 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -1369,6 +1369,106 @@ xfs_log_item_in_current_chkpt(
>  	return lip->li_seq == cil->xc_ctx->sequence;
>  }
>  
> +#ifdef CONFIG_HOTPLUG_CPU
> +static LIST_HEAD(xlog_cil_pcp_list);
> +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> +static bool xlog_cil_pcp_init;
> +
> +/*
> + * Move dead percpu state to the relevant CIL context structures.
> + *
> + * We have to lock the CIL context here to ensure that nothing is modifying
> + * the percpu state, either addition or removal. Both of these are done under
> + * the CIL context lock, so grabbing that exclusively here will ensure we can
> + * safely drain the cilpcp for the CPU that is dying.
> + */
> +static int
> +xlog_cil_pcp_dead(
> +	unsigned int		cpu)
> +{
> +	struct xfs_cil		*cil, *n;
> +
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_for_each_entry_safe(cil, n, &xlog_cil_pcp_list, xc_pcp_list) {
> +		spin_unlock(&xlog_cil_pcp_lock);
> +		down_write(&cil->xc_ctx_lock);
> +		/* move stuff on dead CPU to context */
> +		up_write(&cil->xc_ctx_lock);
> +		spin_lock(&xlog_cil_pcp_lock);
> +	}
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static int
> +xlog_cil_pcp_hpadd(
> +	struct xfs_cil		*cil)
> +{
> +	if (!xlog_cil_pcp_init) {
> +		int	ret;
> +
> +		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
> +						"xfs/cil_pcp:dead", NULL,
> +						xlog_cil_pcp_dead);
> +		if (ret < 0) {
> +			xfs_warn(cil->xc_log->l_mp,
> +	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
> +				ret);
> +			ASSERT(0);
> +			return -ENOMEM;
> +		}
> +		xlog_cil_pcp_init = true;
> +	}
> +
> +	INIT_LIST_HEAD(&cil->xc_pcp_list);
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static void
> +xlog_cil_pcp_hpremove(
> +	struct xfs_cil		*cil)
> +{
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_del(&cil->xc_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +}
> +
> +#else /* !CONFIG_HOTPLUG_CPU */
> +static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}

The signature doesn't match the one above, which I think is why the
kbuild robot complained.

Now that I've gotten my head wrapped around the percpu cil tracking
structures, I think the set looks reasonable.  I have some logistical
questions about this very long series, but I'll ask them in a reply to
the cover letter.

With the !HOTPLUG function signature fixed,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> +static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
> +#endif
> +
> +static void __percpu *
> +xlog_cil_pcp_alloc(
> +	struct xfs_cil		*cil)
> +{
> +	void __percpu		*pcp;
> +
> +	pcp = alloc_percpu(struct xlog_cil_pcp);
> +	if (!pcp)
> +		return NULL;
> +
> +	if (xlog_cil_pcp_hpadd(cil) < 0) {
> +		free_percpu(pcp);
> +		return NULL;
> +	}
> +	return pcp;
> +}
> +
> +static void
> +xlog_cil_pcp_free(
> +	struct xfs_cil		*cil,
> +	void __percpu		*pcp)
> +{
> +	if (!pcp)
> +		return;
> +	xlog_cil_pcp_hpremove(cil);
> +	free_percpu(pcp);
> +}
> +
>  /*
>   * Perform initial CIL structure initialisation.
>   */
> @@ -1383,6 +1483,12 @@ xlog_cil_init(
>  	if (!cil)
>  		return -ENOMEM;
>  
> +	cil->xc_pcp = xlog_cil_pcp_alloc(cil);
> +	if (!cil->xc_pcp) {
> +		kmem_free(cil);
> +		return -ENOMEM;
> +	}
> +
>  	INIT_LIST_HEAD(&cil->xc_cil);
>  	INIT_LIST_HEAD(&cil->xc_committing);
>  	spin_lock_init(&cil->xc_cil_lock);
> @@ -1413,6 +1519,7 @@ xlog_cil_destroy(
>  
>  	ASSERT(list_empty(&cil->xc_cil));
>  	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
> +	xlog_cil_pcp_free(cil, cil->xc_pcp);
>  	kmem_free(cil);
>  }
>  
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index 85a85ab569fe..d76deffa4690 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -227,6 +227,14 @@ struct xfs_cil_ctx {
>  	struct work_struct	push_work;
>  };
>  
> +/*
> + * Per-cpu CIL tracking items
> + */
> +struct xlog_cil_pcp {
> +	struct list_head	busy_extents;
> +	struct list_head	log_items;
> +};
> +
>  /*
>   * Committed Item List structure
>   *
> @@ -260,6 +268,11 @@ struct xfs_cil {
>  	wait_queue_head_t	xc_commit_wait;
>  	xfs_csn_t		xc_current_sequence;
>  	wait_queue_head_t	xc_push_wait;	/* background push throttle */
> +
> +	void __percpu		*xc_pcp;	/* percpu CIL structures */
> +#ifdef CONFIG_HOTPLUG_CPU
> +	struct list_head	xc_pcp_list;
> +#endif
>  } ____cacheline_aligned_in_smp;
>  
>  /* xc_flags bit values */
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 4a62b3980642..3d3ccde9e9c8 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -52,6 +52,7 @@ enum cpuhp_state {
>  	CPUHP_FS_BUFF_DEAD,
>  	CPUHP_PRINTK_DEAD,
>  	CPUHP_MM_MEMCQ_DEAD,
> +	CPUHP_XFS_CIL_DEAD,
>  	CPUHP_PERCPU_CNT_DEAD,
>  	CPUHP_RADIX_DEAD,
>  	CPUHP_PAGE_ALLOC_DEAD,
> -- 
> 2.31.1
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 738dc4248113..791ed1058fb4 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -1369,6 +1369,106 @@  xfs_log_item_in_current_chkpt(
 	return lip->li_seq == cil->xc_ctx->sequence;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static LIST_HEAD(xlog_cil_pcp_list);
+static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
+static bool xlog_cil_pcp_init;
+
+/*
+ * Move dead percpu state to the relevant CIL context structures.
+ *
+ * We have to lock the CIL context here to ensure that nothing is modifying
+ * the percpu state, either addition or removal. Both of these are done under
+ * the CIL context lock, so grabbing that exclusively here will ensure we can
+ * safely drain the cilpcp for the CPU that is dying.
+ */
+static int
+xlog_cil_pcp_dead(
+	unsigned int		cpu)
+{
+	struct xfs_cil		*cil, *n;
+
+	spin_lock(&xlog_cil_pcp_lock);
+	list_for_each_entry_safe(cil, n, &xlog_cil_pcp_list, xc_pcp_list) {
+		spin_unlock(&xlog_cil_pcp_lock);
+		down_write(&cil->xc_ctx_lock);
+		/* move stuff on dead CPU to context */
+		up_write(&cil->xc_ctx_lock);
+		spin_lock(&xlog_cil_pcp_lock);
+	}
+	spin_unlock(&xlog_cil_pcp_lock);
+	return 0;
+}
+
+static int
+xlog_cil_pcp_hpadd(
+	struct xfs_cil		*cil)
+{
+	if (!xlog_cil_pcp_init) {
+		int	ret;
+
+		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
+						"xfs/cil_pcp:dead", NULL,
+						xlog_cil_pcp_dead);
+		if (ret < 0) {
+			xfs_warn(cil->xc_log->l_mp,
+	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
+				ret);
+			ASSERT(0);
+			return -ENOMEM;
+		}
+		xlog_cil_pcp_init = true;
+	}
+
+	INIT_LIST_HEAD(&cil->xc_pcp_list);
+	spin_lock(&xlog_cil_pcp_lock);
+	list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
+	spin_unlock(&xlog_cil_pcp_lock);
+	return 0;
+}
+
+static void
+xlog_cil_pcp_hpremove(
+	struct xfs_cil		*cil)
+{
+	spin_lock(&xlog_cil_pcp_lock);
+	list_del(&cil->xc_pcp_list);
+	spin_unlock(&xlog_cil_pcp_lock);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}
+static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
+#endif
+
+static void __percpu *
+xlog_cil_pcp_alloc(
+	struct xfs_cil		*cil)
+{
+	void __percpu		*pcp;
+
+	pcp = alloc_percpu(struct xlog_cil_pcp);
+	if (!pcp)
+		return NULL;
+
+	if (xlog_cil_pcp_hpadd(cil) < 0) {
+		free_percpu(pcp);
+		return NULL;
+	}
+	return pcp;
+}
+
+static void
+xlog_cil_pcp_free(
+	struct xfs_cil		*cil,
+	void __percpu		*pcp)
+{
+	if (!pcp)
+		return;
+	xlog_cil_pcp_hpremove(cil);
+	free_percpu(pcp);
+}
+
 /*
  * Perform initial CIL structure initialisation.
  */
@@ -1383,6 +1483,12 @@  xlog_cil_init(
 	if (!cil)
 		return -ENOMEM;
 
+	cil->xc_pcp = xlog_cil_pcp_alloc(cil);
+	if (!cil->xc_pcp) {
+		kmem_free(cil);
+		return -ENOMEM;
+	}
+
 	INIT_LIST_HEAD(&cil->xc_cil);
 	INIT_LIST_HEAD(&cil->xc_committing);
 	spin_lock_init(&cil->xc_cil_lock);
@@ -1413,6 +1519,7 @@  xlog_cil_destroy(
 
 	ASSERT(list_empty(&cil->xc_cil));
 	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
+	xlog_cil_pcp_free(cil, cil->xc_pcp);
 	kmem_free(cil);
 }
 
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 85a85ab569fe..d76deffa4690 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -227,6 +227,14 @@  struct xfs_cil_ctx {
 	struct work_struct	push_work;
 };
 
+/*
+ * Per-cpu CIL tracking items
+ */
+struct xlog_cil_pcp {
+	struct list_head	busy_extents;
+	struct list_head	log_items;
+};
+
 /*
  * Committed Item List structure
  *
@@ -260,6 +268,11 @@  struct xfs_cil {
 	wait_queue_head_t	xc_commit_wait;
 	xfs_csn_t		xc_current_sequence;
 	wait_queue_head_t	xc_push_wait;	/* background push throttle */
+
+	void __percpu		*xc_pcp;	/* percpu CIL structures */
+#ifdef CONFIG_HOTPLUG_CPU
+	struct list_head	xc_pcp_list;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /* xc_flags bit values */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4a62b3980642..3d3ccde9e9c8 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -52,6 +52,7 @@  enum cpuhp_state {
 	CPUHP_FS_BUFF_DEAD,
 	CPUHP_PRINTK_DEAD,
 	CPUHP_MM_MEMCQ_DEAD,
+	CPUHP_XFS_CIL_DEAD,
 	CPUHP_PERCPU_CNT_DEAD,
 	CPUHP_RADIX_DEAD,
 	CPUHP_PAGE_ALLOC_DEAD,