@@ -29,6 +29,8 @@
/* Forward declarations to reduce indirect calls */
static int xfs_blockgc_scan_inode(struct xfs_inode *ip,
struct xfs_eofblocks *eofb);
+static bool xfs_reclaim_inode_grab(struct xfs_inode *ip);
+static void xfs_reclaim_inode(struct xfs_inode *ip, struct xfs_perag *pag);
/*
* Allocate and initialise an xfs_inode.
@@ -769,6 +771,21 @@ xfs_blockgc_grab(
return false;
}
+static inline bool
+selected_for_walk(
+ unsigned int tag,
+ struct xfs_inode *ip)
+{
+ switch (tag) {
+ case XFS_ICI_BLOCKGC_TAG:
+ return xfs_blockgc_grab(ip);
+ case XFS_ICI_RECLAIM_TAG:
+ return xfs_reclaim_inode_grab(ip);
+ default:
+ return false;
+ }
+}
+
/*
* For a given per-AG structure @pag, grab, execute a tag specific function,
* and release all incore inodes with the given radix tree @tag.
@@ -786,12 +803,14 @@ xfs_inode_walk_ag(
bool done;
int nr_found;
- ASSERT(tag == XFS_ICI_BLOCKGC_TAG);
+ ASSERT(tag < RADIX_TREE_MAX_TAGS);
restart:
done = false;
skipped = 0;
first_index = 0;
+ if (tag == XFS_ICI_RECLAIM_TAG)
+ first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
nr_found = 0;
do {
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
@@ -804,6 +823,7 @@ xfs_inode_walk_ag(
(void **)batch, first_index, XFS_LOOKUP_BATCH,
tag);
if (!nr_found) {
+ done = true;
rcu_read_unlock();
break;
}
@@ -815,7 +835,7 @@ xfs_inode_walk_ag(
for (i = 0; i < nr_found; i++) {
struct xfs_inode *ip = batch[i];
- if (done || !xfs_blockgc_grab(ip))
+ if (done || !selected_for_walk(tag, ip))
batch[i] = NULL;
/*
@@ -843,8 +863,16 @@ xfs_inode_walk_ag(
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
- error = xfs_blockgc_scan_inode(batch[i], eofb);
- xfs_irele(batch[i]);
+ switch (tag) {
+ case XFS_ICI_BLOCKGC_TAG:
+ error = xfs_blockgc_scan_inode(batch[i], eofb);
+ xfs_irele(batch[i]);
+ break;
+ case XFS_ICI_RECLAIM_TAG:
+ xfs_reclaim_inode(batch[i], pag);
+ error = 0;
+ break;
+ }
if (error == -EAGAIN) {
skipped++;
continue;
@@ -858,9 +886,19 @@ xfs_inode_walk_ag(
break;
cond_resched();
-
+ if (tag == XFS_ICI_RECLAIM_TAG && eofb) {
+ eofb->nr_to_scan -= XFS_LOOKUP_BATCH;
+ if (eofb->nr_to_scan < 0)
+ break;
+ }
} while (nr_found && !done);
+ if (tag == XFS_ICI_RECLAIM_TAG) {
+ if (done)
+ first_index = 0;
+ WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
+ }
+
if (skipped) {
delay(1);
goto restart;
@@ -883,7 +921,7 @@ xfs_inode_walk(
int last_error = 0;
xfs_agnumber_t ag;
- ASSERT(tag == XFS_ICI_BLOCKGC_TAG);
+ ASSERT(tag < RADIX_TREE_MAX_TAGS);
ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
@@ -1027,108 +1065,13 @@ xfs_reclaim_inode(
xfs_iflags_clear(ip, XFS_IRECLAIM);
}
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- *
- * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
- * so that callers that want to block until all dirty inodes are written back
- * and reclaimed can sanely loop.
- */
-static void
-xfs_reclaim_inodes_ag(
- struct xfs_mount *mp,
- int *nr_to_scan)
-{
- struct xfs_perag *pag;
- xfs_agnumber_t ag = 0;
-
- while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
- unsigned long first_index = 0;
- int done = 0;
- int nr_found = 0;
-
- ag = pag->pag_agno + 1;
-
- first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
- do {
- struct xfs_inode *batch[XFS_LOOKUP_BATCH];
- int i;
-
- rcu_read_lock();
- nr_found = radix_tree_gang_lookup_tag(
- &pag->pag_ici_root,
- (void **)batch, first_index,
- XFS_LOOKUP_BATCH,
- XFS_ICI_RECLAIM_TAG);
- if (!nr_found) {
- done = 1;
- rcu_read_unlock();
- break;
- }
-
- /*
- * Grab the inodes before we drop the lock. if we found
- * nothing, nr == 0 and the loop will be skipped.
- */
- for (i = 0; i < nr_found; i++) {
- struct xfs_inode *ip = batch[i];
-
- if (done || !xfs_reclaim_inode_grab(ip))
- batch[i] = NULL;
-
- /*
- * Update the index for the next lookup. Catch
- * overflows into the next AG range which can
- * occur if we have inodes in the last block of
- * the AG and we are currently pointing to the
- * last inode.
- *
- * Because we may see inodes that are from the
- * wrong AG due to RCU freeing and
- * reallocation, only update the index if it
- * lies in this AG. It was a race that lead us
- * to see this inode, so another lookup from
- * the same index will not find it again.
- */
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
- pag->pag_agno)
- continue;
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- done = 1;
- }
-
- /* unlock now we've grabbed the inodes. */
- rcu_read_unlock();
-
- for (i = 0; i < nr_found; i++) {
- if (batch[i])
- xfs_reclaim_inode(batch[i], pag);
- }
-
- *nr_to_scan -= XFS_LOOKUP_BATCH;
- cond_resched();
- } while (nr_found && !done && *nr_to_scan > 0);
-
- if (done)
- first_index = 0;
- WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
- xfs_perag_put(pag);
- }
-}
-
void
xfs_reclaim_inodes(
struct xfs_mount *mp)
{
- int nr_to_scan = INT_MAX;
-
while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
xfs_ail_push_all_sync(mp->m_ail);
- xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+ xfs_inode_walk(mp, XFS_ICI_RECLAIM_TAG, NULL);
}
}
@@ -1144,11 +1087,13 @@ xfs_reclaim_inodes_nr(
struct xfs_mount *mp,
int nr_to_scan)
{
+ struct xfs_eofblocks eofb = { .nr_to_scan = nr_to_scan };
+
/* kick background reclaimer and push the AIL */
xfs_reclaim_work_queue(mp);
xfs_ail_push_all(mp->m_ail);
- xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+ xfs_inode_walk(mp, XFS_ICI_RECLAIM_TAG, &eofb);
return 0;
}
@@ -1258,9 +1203,8 @@ xfs_reclaim_worker(
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_reclaim_work);
- int nr_to_scan = INT_MAX;
- xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+ xfs_inode_walk(mp, XFS_ICI_RECLAIM_TAG, NULL);
xfs_reclaim_work_queue(mp);
}
@@ -15,13 +15,14 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
+
+ /* Number of inodes to scan, currently limited to reclaim */
+ int nr_to_scan;
};
/*
* tags for inode radix tree
*/
-#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
- in xfs_inode_walk */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
/* Inode has speculative preallocations (posteof or cow) to clean. */
#define XFS_ICI_BLOCKGC_TAG 1