@@ -173,6 +173,7 @@ __xfs_free_perag(
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
+ ASSERT(!delayed_work_pending(&pag->pag_inodegc_work));
ASSERT(atomic_read(&pag->pag_ref) == 0);
kmem_free(pag);
}
@@ -195,6 +196,7 @@ xfs_free_perag(
ASSERT(atomic_read(&pag->pag_ref) == 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ cancel_delayed_work_sync(&pag->pag_inodegc_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
@@ -253,6 +255,7 @@ xfs_initialize_perag(
spin_lock_init(&pag->pagb_lock);
spin_lock_init(&pag->pag_state_lock);
INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+ INIT_DELAYED_WORK(&pag->pag_inodegc_work, xfs_inodegc_worker);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
init_waitqueue_head(&pag->pagb_wait);
pag->pagb_count = 0;
@@ -96,6 +96,9 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
+ /* background inode inactivation */
+ struct delayed_work pag_inodegc_work;
+
/*
* Unlinked inode information. This incore information reflects
* data stored in the AGI, so callers must hold the AGI buffer lock
@@ -299,6 +299,43 @@ xfs_worker_delay_freesp(
return delay_ms >> shift;
}
+/*
+ * Scale down the background work delay if we're low on free space in this AG.
+ * Similar to the way that we throttle preallocations, we halve the delay time
+ * for every low free space threshold that isn't met. Return value is in ms.
+ */
+static inline unsigned int
+xfs_work_delay_perag(
+ struct xfs_perag *pag,
+ unsigned int delay_ms)
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ xfs_extlen_t freesp;
+ unsigned int shift = 0;
+
+ if (!pag->pagf_init)
+ return delay_ms;
+
+ /* Free space in this AG that can be allocated to file data */
+ freesp = pag->pagf_freeblks + pag->pagf_flcount;
+ freesp -= (pag->pag_meta_resv.ar_reserved +
+ pag->pag_rmapbt_resv.ar_reserved);
+
+ if (freesp < mp->m_ag_low_space[XFS_LOWSP_5_PCNT]) {
+ shift = 2;
+ if (freesp < mp->m_ag_low_space[XFS_LOWSP_4_PCNT])
+ shift++;
+ if (freesp < mp->m_ag_low_space[XFS_LOWSP_3_PCNT])
+ shift++;
+ if (freesp < mp->m_ag_low_space[XFS_LOWSP_2_PCNT])
+ shift++;
+ if (freesp < mp->m_ag_low_space[XFS_LOWSP_1_PCNT])
+ shift++;
+ }
+
+ return delay_ms >> shift;
+}
+
/*
* Compute the lag between scheduling and executing background work based on
* free space in the filesystem. If an inode is passed in, its dquots will
@@ -306,18 +343,20 @@ xfs_worker_delay_freesp(
*/
static inline unsigned int
xfs_worker_delay_ms(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_inode *ip,
unsigned int default_ms)
{
- unsigned int udelay, gdelay, pdelay, fdelay;
+ struct xfs_mount *mp = pag->pag_mount;
+ unsigned int udelay, gdelay, pdelay, fdelay, adelay;
udelay = xfs_worker_delay_dquot(ip, XFS_DQTYPE_USER, default_ms);
gdelay = xfs_worker_delay_dquot(ip, XFS_DQTYPE_GROUP, default_ms);
pdelay = xfs_worker_delay_dquot(ip, XFS_DQTYPE_PROJ, default_ms);
fdelay = xfs_worker_delay_freesp(mp, default_ms);
+ adelay = xfs_work_delay_perag(pag, default_ms);
- return min(min(udelay, gdelay), min(pdelay, fdelay));
+ return min(adelay, min(min(udelay, gdelay), min(pdelay, fdelay)));
}
/*
@@ -343,9 +382,11 @@ xfs_blockgc_queue(
*/
static void
xfs_inodegc_queue(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_inode *ip)
{
+ struct xfs_mount *mp = pag->pag_mount;
+
if (!test_bit(XFS_OPFLAG_INODEGC_RUNNING_BIT, &mp->m_opflags))
return;
@@ -353,9 +394,9 @@ xfs_inodegc_queue(
if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INODEGC_TAG)) {
unsigned int delay;
- delay = xfs_worker_delay_ms(mp, ip, xfs_inodegc_ms);
- trace_xfs_inodegc_queue(mp, delay, _RET_IP_);
- queue_delayed_work(mp->m_gc_workqueue, &mp->m_inodegc_work,
+ delay = xfs_worker_delay_ms(pag, ip, xfs_inodegc_ms);
+ trace_xfs_inodegc_queue(mp, pag->pag_agno, delay, _RET_IP_);
+ queue_delayed_work(mp->m_gc_workqueue, &pag->pag_inodegc_work,
msecs_to_jiffies(delay));
}
rcu_read_unlock();
@@ -367,11 +408,13 @@ xfs_inodegc_queue(
*/
static void
xfs_inodegc_queue_sooner(
- struct xfs_mount *mp,
+ struct xfs_perag *pag,
struct xfs_inode *ip)
{
+ struct xfs_mount *mp = pag->pag_mount;
+
if (!XFS_IS_QUOTA_ON(mp) ||
- !delayed_work_pending(&mp->m_inodegc_work) ||
+ !delayed_work_pending(&pag->pag_inodegc_work) ||
!test_bit(XFS_OPFLAG_INODEGC_RUNNING_BIT, &mp->m_opflags))
return;
@@ -379,11 +422,11 @@ xfs_inodegc_queue_sooner(
if (!radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INODEGC_TAG))
goto unlock;
- if (xfs_worker_delay_ms(mp, ip, xfs_inodegc_ms) == xfs_inodegc_ms)
+ if (xfs_worker_delay_ms(pag, ip, xfs_inodegc_ms) == xfs_inodegc_ms)
goto unlock;
- trace_xfs_inodegc_queue(mp, 0, _RET_IP_);
- queue_delayed_work(mp->m_gc_workqueue, &mp->m_inodegc_work, 0);
+ trace_xfs_inodegc_queue(mp, pag->pag_agno, 0, _RET_IP_);
+ mod_delayed_work(mp->m_gc_workqueue, &pag->pag_inodegc_work, 0);
unlock:
rcu_read_unlock();
}
@@ -427,7 +470,7 @@ xfs_perag_set_inode_tag(
xfs_blockgc_queue(pag);
break;
case XFS_ICI_INODEGC_TAG:
- xfs_inodegc_queue(mp, ip);
+ xfs_inodegc_queue(pag, ip);
break;
}
@@ -561,7 +604,7 @@ xfs_inode_mark_reclaimable(
spin_unlock(&pag->pag_ici_lock);
if (need_inactive && already_queued)
- xfs_inodegc_queue_sooner(mp, ip);
+ xfs_inodegc_queue_sooner(pag, ip);
xfs_perag_put(pag);
}
@@ -2058,16 +2101,17 @@ void
xfs_inodegc_worker(
struct work_struct *work)
{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_inodegc_work);
+ struct xfs_perag *pag = container_of(to_delayed_work(work),
+ struct xfs_perag, pag_inodegc_work);
+ struct xfs_mount *mp = pag->pag_mount;
/*
* Inactivation never returns error codes and never fails to push a
* tagged inode to reclaim. Loop until there there's nothing left.
*/
- while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_INODEGC_TAG)) {
- trace_xfs_inodegc_worker(mp, 0, _RET_IP_);
- xfs_icwalk(mp, XFS_ICWALK_INODEGC, NULL);
+ while (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_INODEGC_TAG)) {
+ trace_xfs_inodegc_worker(mp, pag->pag_agno, 0, _RET_IP_);
+ xfs_icwalk_ag(pag, XFS_ICWALK_INODEGC, NULL);
}
}
@@ -2079,8 +2123,13 @@ void
xfs_inodegc_flush(
struct xfs_mount *mp)
{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
trace_xfs_inodegc_flush(mp, 0, _RET_IP_);
- flush_delayed_work(&mp->m_inodegc_work);
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INODEGC_TAG)
+ flush_delayed_work(&pag->pag_inodegc_work);
}
/* Disable the inode inactivation background worker and wait for it to stop. */
@@ -2088,10 +2137,14 @@ void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
if (!test_and_clear_bit(XFS_OPFLAG_INODEGC_RUNNING_BIT, &mp->m_opflags))
return;
- cancel_delayed_work_sync(&mp->m_inodegc_work);
+ for_each_perag(mp, agno, pag)
+ cancel_delayed_work_sync(&pag->pag_inodegc_work);
trace_xfs_inodegc_stop(mp, 0, _RET_IP_);
}
@@ -2103,11 +2156,15 @@ void
xfs_inodegc_start(
struct xfs_mount *mp)
{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
if (test_and_set_bit(XFS_OPFLAG_INODEGC_RUNNING_BIT, &mp->m_opflags))
return;
trace_xfs_inodegc_start(mp, 0, _RET_IP_);
- xfs_inodegc_queue(mp, NULL);
+ for_each_perag_tag(mp, agno, pag, XFS_ICI_INODEGC_TAG)
+ xfs_inodegc_queue(pag, NULL);
}
/* XFS Inode Cache Walking Code */
@@ -365,13 +365,16 @@ void
xfs_set_low_space_thresholds(
struct xfs_mount *mp)
{
- int i;
+ uint64_t space = mp->m_sb.sb_dblocks;
+ uint32_t ag_space = mp->m_sb.sb_agblocks;
+ int i;
+
+ do_div(space, 100);
+ do_div(ag_space, 100);
for (i = 0; i < XFS_LOWSP_MAX; i++) {
- uint64_t space = mp->m_sb.sb_dblocks;
-
- do_div(space, 100);
mp->m_low_space[i] = space * (i + 1);
+ mp->m_ag_low_space[i] = ag_space * (i + 1);
}
}
@@ -131,6 +131,7 @@ typedef struct xfs_mount {
uint m_rsumsize; /* size of rt summary, bytes */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_qflags; /* quota status flags */
+ int32_t m_ag_low_space[XFS_LOWSP_MAX];
uint64_t m_flags; /* global mount flags */
int64_t m_low_space[XFS_LOWSP_MAX];
struct xfs_ino_geometry m_ino_geo; /* inode geometry */
@@ -191,7 +192,6 @@ typedef struct xfs_mount {
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
struct delayed_work m_reclaim_work; /* background inode reclaim */
- struct delayed_work m_inodegc_work; /* background inode inactive */
struct xfs_kobj m_kobj;
struct xfs_kobj m_error_kobj;
struct xfs_kobj m_error_meta_kobj;
@@ -1865,7 +1865,6 @@ static int xfs_init_fs_context(
mutex_init(&mp->m_growlock);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_inodegc_work, xfs_inodegc_worker);
mp->m_kobj.kobject.kset = xfs_kset;
/*
* We don't create the finobt per-ag space reservation until after log
@@ -122,7 +122,7 @@ TRACE_EVENT(xlog_intent_recovery_failed,
__entry->error, __entry->function)
);
-DECLARE_EVENT_CLASS(xfs_perag_class,
+DECLARE_EVENT_CLASS(xfs_perag_ref_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
unsigned long caller_ip),
TP_ARGS(mp, agno, refcount, caller_ip),
@@ -146,7 +146,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
);
#define DEFINE_PERAG_REF_EVENT(name) \
-DEFINE_EVENT(xfs_perag_class, name, \
+DEFINE_EVENT(xfs_perag_ref_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
@@ -155,6 +155,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
+DEFINE_PERAG_REF_EVENT(xfs_inodegc_queue);
+DEFINE_PERAG_REF_EVENT(xfs_inodegc_worker);
DECLARE_EVENT_CLASS(xfs_fs_class,
TP_PROTO(struct xfs_mount *mp, int data, unsigned long caller_ip),
@@ -191,8 +193,6 @@ DEFINE_EVENT(xfs_fs_class, name, \
DEFINE_FS_EVENT(xfs_inodegc_flush);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
-DEFINE_FS_EVENT(xfs_inodegc_queue);
-DEFINE_FS_EVENT(xfs_inodegc_worker);
DEFINE_FS_EVENT(xfs_fs_sync_fs);
DECLARE_EVENT_CLASS(xfs_ag_class,