diff mbox series

[15/20] xfs: reduce inactivation delay when AG free space are tight

Message ID 162758431624.332903.3577155349499604841.stgit@magnolia (mailing list archive)
State New, archived
Headers show
Series xfs: deferred inode inactivation | expand

Commit Message

Darrick J. Wong July 29, 2021, 6:45 p.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Implement the same scaling down of inodegc delays when we're tight on
free space in an AG.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_icache.c |   52 +++++++++++++++++++++++++++++++++++++++++++++++----
 fs/xfs/xfs_mount.c  |    2 ++
 fs/xfs/xfs_mount.h  |    1 +
 fs/xfs/xfs_trace.h  |   27 ++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 6e9ca483c100..17cc2ac76809 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -353,6 +353,47 @@  xfs_gc_delay_freesp(
 	return delay_ms >> shift;
 }
 
+/*
+ * Scale down the background work delay if we're low on free space in this AG.
+ * Similar to the way that we throttle preallocations, we halve the delay time
+ * for every low free space threshold that isn't met.  Return value is in ms.
+ */
+static inline unsigned int
+xfs_gc_delay_perag(
+	struct xfs_perag	*pag,
+	unsigned int		tag,
+	unsigned int		delay_ms)
+{
+	struct xfs_mount	*mp = pag->pag_mount;
+	xfs_extlen_t		freesp;
+	unsigned int		shift = 0;
+
+	if (!pag->pagf_init)
+		return delay_ms;
+
+	/* Free space in this AG that can be allocated to file data */
+	freesp = pag->pagf_freeblks + pag->pagf_flcount;
+	freesp -= (pag->pag_meta_resv.ar_reserved +
+		   pag->pag_rmapbt_resv.ar_reserved);
+
+	if (freesp < mp->m_ag_low_space[XFS_LOWSP_5_PCNT]) {
+		shift = 2;
+		if (freesp < mp->m_ag_low_space[XFS_LOWSP_4_PCNT])
+			shift++;
+		if (freesp < mp->m_ag_low_space[XFS_LOWSP_3_PCNT])
+			shift++;
+		if (freesp < mp->m_ag_low_space[XFS_LOWSP_2_PCNT])
+			shift++;
+		if (freesp < mp->m_ag_low_space[XFS_LOWSP_1_PCNT])
+			shift++;
+	}
+
+	if (shift)
+		trace_xfs_gc_delay_agfreeblks(pag, tag, shift);
+
+	return delay_ms >> shift;
+}
+
 /*
  * Compute the lag between scheduling and executing some kind of background
  * garbage collection work.  Return value is in ms.  If an inode is passed in,
@@ -360,12 +401,13 @@  xfs_gc_delay_freesp(
  */
 static inline unsigned int
 xfs_gc_delay_ms(
-	struct xfs_mount	*mp,
+	struct xfs_perag	*pag,
 	struct xfs_inode	*ip,
 	unsigned int		tag)
 {
+	struct xfs_mount	*mp = pag->pag_mount;
 	unsigned int		default_ms;
-	unsigned int		udelay, gdelay, pdelay, fdelay, rdelay;
+	unsigned int		udelay, gdelay, pdelay, fdelay, rdelay, adelay;
 
 	switch (tag) {
 	case XFS_ICI_INODEGC_TAG:
@@ -388,9 +430,11 @@  xfs_gc_delay_ms(
 	pdelay = xfs_gc_delay_dquot(ip, XFS_DQTYPE_PROJ, tag, default_ms);
 	fdelay = xfs_gc_delay_freesp(mp, tag, default_ms);
 	rdelay = xfs_gc_delay_freertx(mp, ip, tag, default_ms);
+	adelay = xfs_gc_delay_perag(pag, tag, default_ms);
 
 	udelay = min(udelay, gdelay);
 	pdelay = min(pdelay, fdelay);
+	rdelay = min(rdelay, adelay);
 
 	udelay = min(udelay, pdelay);
 
@@ -432,7 +476,7 @@  xfs_inodegc_queue(
 	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INODEGC_TAG)) {
 		unsigned int	delay;
 
-		delay = xfs_gc_delay_ms(mp, ip, XFS_ICI_INODEGC_TAG);
+		delay = xfs_gc_delay_ms(pag, ip, XFS_ICI_INODEGC_TAG);
 		trace_xfs_inodegc_queue(pag, delay);
 		queue_delayed_work(mp->m_gc_workqueue, &pag->pag_inodegc_work,
 				msecs_to_jiffies(delay));
@@ -473,7 +517,7 @@  xfs_gc_requeue_now(
 	if (!radix_tree_tagged(&mp->m_perag_tree, tag))
 		goto unlock;
 
-	if (xfs_gc_delay_ms(mp, ip, tag) == default_ms)
+	if (xfs_gc_delay_ms(pag, ip, tag) == default_ms)
 		goto unlock;
 
 	trace_xfs_gc_requeue_now(pag, tag);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 37afb0e0d879..811ce8e9310e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -367,6 +367,7 @@  xfs_set_low_space_thresholds(
 {
 	uint64_t		dblocks = mp->m_sb.sb_dblocks;
 	uint64_t		rtexts = mp->m_sb.sb_rextents;
+	uint32_t		agblocks = mp->m_sb.sb_agblocks / 100;
 	int			i;
 
 	do_div(dblocks, 100);
@@ -375,6 +376,7 @@  xfs_set_low_space_thresholds(
 	for (i = 0; i < XFS_LOWSP_MAX; i++) {
 		mp->m_low_space[i] = dblocks * (i + 1);
 		mp->m_low_rtexts[i] = rtexts * (i + 1);
+		mp->m_ag_low_space[i] = agblocks * (i + 1);
 	}
 }
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index edd5c4fd6533..74ca2a458b14 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -131,6 +131,7 @@  typedef struct xfs_mount {
 	uint			m_rsumsize;	/* size of rt summary, bytes */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_qflags;	/* quota status flags */
+	int32_t			m_ag_low_space[XFS_LOWSP_MAX];
 	uint64_t		m_flags;	/* global mount flags */
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 	int64_t			m_low_rtexts[XFS_LOWSP_MAX];
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2c504c3e63e6..43fb699e6aaf 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -318,6 +318,33 @@  TRACE_EVENT(xfs_gc_delay_frextents,
 		  __entry->frextents)
 );
 
+TRACE_EVENT(xfs_gc_delay_agfreeblks,
+	TP_PROTO(struct xfs_perag *pag, unsigned int tag, unsigned int shift),
+	TP_ARGS(pag, tag, shift),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(unsigned int, freeblks)
+		__field(unsigned int, tag)
+		__field(unsigned int, shift)
+	),
+	TP_fast_assign(
+		__entry->dev = pag->pag_mount->m_super->s_dev;
+		__entry->agno = pag->pag_agno;
+		__entry->freeblks = pag->pagf_freeblks + pag->pagf_flcount;
+		__entry->freeblks -= (pag->pag_meta_resv.ar_reserved +
+				      pag->pag_rmapbt_resv.ar_reserved);
+		__entry->tag = tag;
+		__entry->shift = shift;
+	),
+	TP_printk("dev %d:%d tag %u shift %u agno %u freeblks %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tag,
+		  __entry->shift,
+		  __entry->agno,
+		  __entry->freeblks)
+);
+
 DECLARE_EVENT_CLASS(xfs_gc_queue_class,
 	TP_PROTO(struct xfs_perag *pag, unsigned int delay_ms),
 	TP_ARGS(pag, delay_ms),