[6/6] xfs: parallelize block preallocation garbage collection
diff mbox series

Message ID 157784091539.1361683.10265501485703077036.stgit@magnolia
State New
Headers show
Series
  • xfs: consolidate posteof and cowblocks cleanup
Related show

Commit Message

Darrick J. Wong Jan. 1, 2020, 1:08 a.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

Split the block preallocation garbage collection work into per-AG work
items so that we can take advantage of parallelization.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_icache.c |   51 +++++++++++++++++++++++++++++++++++++++------------
 fs/xfs/xfs_mount.c  |    3 +++
 fs/xfs/xfs_mount.h  |    5 +++--
 fs/xfs/xfs_super.c  |   25 +++++++++++++++++++++++--
 4 files changed, 68 insertions(+), 16 deletions(-)

Patch
diff mbox series

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 622fdd747099..1a09d4854266 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -963,12 +963,12 @@  xfs_ici_walk_all(
  */
 static void
 xfs_queue_blockgc(
-	struct xfs_mount	*mp)
+	struct xfs_perag	*pag)
 {
 	rcu_read_lock();
-	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_BLOCK_GC_TAG))
-		queue_delayed_work(mp->m_blockgc_workqueue,
-				   &mp->m_blockgc_work,
+	if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCK_GC_TAG))
+		queue_delayed_work(pag->pag_mount->m_blockgc_workqueue,
+				   &pag->pag_blockgc_work,
 				   msecs_to_jiffies(xfs_blockgc_secs * 1000));
 	rcu_read_unlock();
 }
@@ -988,6 +988,16 @@  xfs_blockgc_scan_inode(
 	return xfs_inode_free_cowblocks(ip, args);
 }
 
+/* Scan an AG's inodes for block preallocations that we can remove. */
+static int
+xfs_blockgc_scan_pag(
+	struct xfs_perag	*pag,
+	struct xfs_eofblocks	*eofb)
+{
+	return xfs_ici_walk_ag(pag->pag_mount, pag, xfs_blockgc_scan_inode,
+			eofb, XFS_ICI_BLOCK_GC_TAG, 0);
+}
+
 /* Scan all incore inodes for block preallocations that we can remove. */
 static inline int
 xfs_blockgc_scan(
@@ -1003,22 +1013,35 @@  void
 xfs_blockgc_worker(
 	struct work_struct	*work)
 {
-	struct xfs_mount	*mp = container_of(to_delayed_work(work),
-					struct xfs_mount, m_blockgc_work);
+	struct xfs_perag	*pag = container_of(to_delayed_work(work),
+					struct xfs_perag, pag_blockgc_work);
 	int			error;
 
-	error = xfs_blockgc_scan(mp, NULL);
+	error = xfs_blockgc_scan_pag(pag, NULL);
 	if (error)
-		xfs_info(mp, "preallocation gc worker failed, err=%d", error);
-	xfs_queue_blockgc(mp);
+		xfs_info(pag->pag_mount,
+				"AG %u preallocation gc worker failed, err=%d",
+				pag->pag_agno, error);
+	xfs_queue_blockgc(pag);
 }
 
+#define for_each_perag_tag(mp, next_agno, pag, tag) \
+	for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
+	     (pag) != NULL; \
+	     (next_agno) = (pag)->pag_agno + 1, \
+	     xfs_perag_put(pag), \
+	     (pag) = xfs_perag_get_tag((mp), (next_agno), (tag)))
+
 /* Disable post-EOF and CoW block auto-reclamation. */
 void
 xfs_blockgc_stop(
 	struct xfs_mount	*mp)
 {
-	cancel_delayed_work_sync(&mp->m_blockgc_work);
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCK_GC_TAG)
+		cancel_delayed_work_sync(&pag->pag_blockgc_work);
 }
 
 /* Enable post-EOF and CoW block auto-reclamation. */
@@ -1026,7 +1049,11 @@  void
 xfs_blockgc_start(
 	struct xfs_mount	*mp)
 {
-	xfs_queue_blockgc(mp);
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		agno;
+
+	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCK_GC_TAG)
+		xfs_queue_blockgc(pag);
 }
 
 /*
@@ -1666,7 +1693,7 @@  __xfs_inode_set_blocks_tag(
 		spin_unlock(&ip->i_mount->m_perag_lock);
 
 		/* kick off background trimming */
-		xfs_queue_blockgc(ip->i_mount);
+		xfs_queue_blockgc(pag);
 
 		trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1,
 				_RET_IP_);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 626c62bbe8d6..ea74bd3be0bf 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -126,6 +126,7 @@  __xfs_free_perag(
 {
 	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 
+	ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
 	ASSERT(atomic_read(&pag->pag_ref) == 0);
 	kmem_free(pag);
 }
@@ -146,6 +147,7 @@  xfs_free_perag(
 		spin_unlock(&mp->m_perag_lock);
 		ASSERT(pag);
 		ASSERT(atomic_read(&pag->pag_ref) == 0);
+		cancel_delayed_work_sync(&pag->pag_blockgc_work);
 		xfs_iunlink_destroy(pag);
 		xfs_buf_hash_destroy(pag);
 		mutex_destroy(&pag->pag_ici_reclaim_lock);
@@ -201,6 +203,7 @@  xfs_initialize_perag(
 		pag->pag_mount = mp;
 		spin_lock_init(&pag->pag_ici_lock);
 		mutex_init(&pag->pag_ici_reclaim_lock);
+		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
 		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 		if (xfs_buf_hash_init(pag))
 			goto out_free_pag;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bf05b57bc128..296223c2b782 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -155,8 +155,6 @@  typedef struct xfs_mount {
 	atomic_t		m_active_trans;	/* number trans frozen */
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
-	struct delayed_work	m_blockgc_work; /* background prealloc blocks
-						     trimming */
 	bool			m_update_sb;	/* sb needs update in mount */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
@@ -355,6 +353,9 @@  typedef struct xfs_perag {
 	/* Blocks reserved for the reverse mapping btree. */
 	struct xfs_ag_resv	pag_rmapbt_resv;
 
+	/* background prealloc block trimming */
+	struct delayed_work	pag_blockgc_work;
+
 	/* reference count */
 	uint8_t			pagf_refcount_level;
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e734a2a663ac..03d95bf0952c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -510,7 +510,8 @@  xfs_init_mount_workqueues(
 		goto out_destroy_cil;
 
 	mp->m_blockgc_workqueue = alloc_workqueue("xfs-blockgc/%s",
-			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0,
+			mp->m_super->s_id);
 	if (!mp->m_blockgc_workqueue)
 		goto out_destroy_reclaim;
 
@@ -1372,6 +1373,25 @@  xfs_fc_validate_params(
 	return 0;
 }
 
+/*
+ * Constrain the number of threads that we start for background work.  This
+ * is the estimated parallelism of the filesystem capped to the unbound work
+ * queue maximum.
+ *
+ * We can't set this when we allocate the workqueues because the thread count
+ * derives from AG count, and we can't know that until we're far enough through
+ * setup to read the superblock, which requires functioning workqueues.
+ */
+static inline void
+xfs_configure_background_workqueues(
+	struct xfs_mount	*mp)
+{
+	unsigned int		max_active = xfs_guess_metadata_threads(mp);
+
+	max_active = min_t(unsigned int, max_active, WQ_UNBOUND_MAX_ACTIVE);
+	workqueue_set_max_active(mp->m_blockgc_workqueue, max_active);
+}
+
 static int
 xfs_fc_fill_super(
 	struct super_block	*sb,
@@ -1437,6 +1457,8 @@  xfs_fc_fill_super(
 	if (error)
 		goto out_free_sb;
 
+	xfs_configure_background_workqueues(mp);
+
 	error = xfs_setup_devices(mp);
 	if (error)
 		goto out_free_sb;
@@ -1768,7 +1790,6 @@  static int xfs_init_fs_context(
 	atomic_set(&mp->m_active_trans, 0);
 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-	INIT_DELAYED_WORK(&mp->m_blockgc_work, xfs_blockgc_worker);
 	mp->m_kobj.kobject.kset = xfs_kset;
 	/*
 	 * We don't create the finobt per-ag space reservation until after log