Message ID | 20200604074606.266213-20-david@fromorbit.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: rework inode flushing to make inode reclaim fully asynchronous | expand |
On Thu, Jun 04, 2020 at 05:45:55PM +1000, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Inode reclaim will still throttle direct reclaim on the per-ag > reclaim locks. This is no longer necessary as reclaim can run > non-blocking now. Hence we can remove these locks so that we don't > arbitrarily block reclaimers just because there are more direct > reclaimers than there are AGs. > > This can result in multiple reclaimers working on the same range of > an AG, but this doesn't cause any apparent issues. Optimising the > spread of concurrent reclaimers for best efficiency can be done in a > future patchset. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > fs/xfs/xfs_icache.c | 31 ++++++++++++------------------- > fs/xfs/xfs_mount.c | 4 ---- > fs/xfs/xfs_mount.h | 1 - > 3 files changed, 12 insertions(+), 24 deletions(-) > > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c > index 74032316ce5cc..c4ba8d7bc45bc 100644 > --- a/fs/xfs/xfs_icache.c > +++ b/fs/xfs/xfs_icache.c ... > @@ -1298,11 +1293,9 @@ xfs_reclaim_inodes_ag( > > } while (nr_found && !done && *nr_to_scan > 0); > > - if (trylock && !done) > - pag->pag_ici_reclaim_cursor = first_index; > - else > - pag->pag_ici_reclaim_cursor = 0; > - mutex_unlock(&pag->pag_ici_reclaim_lock); > + if (done) > + first_index = 0; > + WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); I thought the [READ|WRITE]_ONCE() macros had to do with ordering, not necessarily atomicity. Is this write safe if we're running a 32-bit kernel, for example? Outside of that the broader functional change seems reasonable. Brian > xfs_perag_put(pag); > } > return skipped; > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index d5dcf98698600..03158b42a1943 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -148,7 +148,6 @@ xfs_free_perag( > ASSERT(atomic_read(&pag->pag_ref) == 0); > xfs_iunlink_destroy(pag); > xfs_buf_hash_destroy(pag); > - mutex_destroy(&pag->pag_ici_reclaim_lock); > call_rcu(&pag->rcu_head, __xfs_free_perag); > } > } > @@ -200,7 +199,6 @@ xfs_initialize_perag( > pag->pag_agno = index; > pag->pag_mount = mp; > spin_lock_init(&pag->pag_ici_lock); > - mutex_init(&pag->pag_ici_reclaim_lock); > INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); > if (xfs_buf_hash_init(pag)) > goto out_free_pag; > @@ -242,7 +240,6 @@ xfs_initialize_perag( > out_hash_destroy: > xfs_buf_hash_destroy(pag); > out_free_pag: > - mutex_destroy(&pag->pag_ici_reclaim_lock); > kmem_free(pag); > out_unwind_new_pags: > /* unwind any prior newly initialized pags */ > @@ -252,7 +249,6 @@ xfs_initialize_perag( > break; > xfs_buf_hash_destroy(pag); > xfs_iunlink_destroy(pag); > - mutex_destroy(&pag->pag_ici_reclaim_lock); > kmem_free(pag); > } > return error; > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index 3725d25ad97e8..a72cfcaa4ad12 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -354,7 +354,6 @@ typedef struct xfs_perag { > spinlock_t pag_ici_lock; /* incore inode cache lock */ > struct radix_tree_root pag_ici_root; /* incore inode cache root */ > int pag_ici_reclaimable; /* reclaimable inodes */ > - struct mutex pag_ici_reclaim_lock; /* serialisation point */ > unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ > > /* buffer cache index */ > -- > 2.26.2.761.g0e0b3e54be >
On Fri, Jun 05, 2020 at 12:26:11PM -0400, Brian Foster wrote: > On Thu, Jun 04, 2020 at 05:45:55PM +1000, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > Inode reclaim will still throttle direct reclaim on the per-ag > > reclaim locks. This is no longer necessary as reclaim can run > > non-blocking now. Hence we can remove these locks so that we don't > > arbitrarily block reclaimers just because there are more direct > > reclaimers than there are AGs. > > > > This can result in multiple reclaimers working on the same range of > > an AG, but this doesn't cause any apparent issues. Optimising the > > spread of concurrent reclaimers for best efficiency can be done in a > > future patchset. > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > fs/xfs/xfs_icache.c | 31 ++++++++++++------------------- > > fs/xfs/xfs_mount.c | 4 ---- > > fs/xfs/xfs_mount.h | 1 - > > 3 files changed, 12 insertions(+), 24 deletions(-) > > > > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c > > index 74032316ce5cc..c4ba8d7bc45bc 100644 > > --- a/fs/xfs/xfs_icache.c > > +++ b/fs/xfs/xfs_icache.c > ... > > @@ -1298,11 +1293,9 @@ xfs_reclaim_inodes_ag( > > > > } while (nr_found && !done && *nr_to_scan > 0); > > > > - if (trylock && !done) > > - pag->pag_ici_reclaim_cursor = first_index; > > - else > > - pag->pag_ici_reclaim_cursor = 0; > > - mutex_unlock(&pag->pag_ici_reclaim_lock); > > + if (done) > > + first_index = 0; > > + WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); > > I thought the [READ|WRITE]_ONCE() macros had to do with ordering, not > necessarily atomicity. Is this write safe if we're running a 32-bit > kernel, for example? Outside of that the broader functional change seems > reasonable. They are used for documenting intentional data races now, too. That's what these are - we don't care about serialisation, but there are static checkers that will now spew "data race" warnings because multiple threads can race reading and writing unserialised variables. It is safe on 32 bit machines because these variables are 32 bit on 32 bit machines, and reads/writes of 32 bit variables on 32 bit machines are atomic (though not serialised). Cheers, Dave.
On Sat, Jun 06, 2020 at 07:07:46AM +1000, Dave Chinner wrote: > On Fri, Jun 05, 2020 at 12:26:11PM -0400, Brian Foster wrote: > > On Thu, Jun 04, 2020 at 05:45:55PM +1000, Dave Chinner wrote: > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > Inode reclaim will still throttle direct reclaim on the per-ag > > > reclaim locks. This is no longer necessary as reclaim can run > > > non-blocking now. Hence we can remove these locks so that we don't > > > arbitrarily block reclaimers just because there are more direct > > > reclaimers than there are AGs. > > > > > > This can result in multiple reclaimers working on the same range of > > > an AG, but this doesn't cause any apparent issues. Optimising the > > > spread of concurrent reclaimers for best efficiency can be done in a > > > future patchset. > > > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> > > > --- > > > fs/xfs/xfs_icache.c | 31 ++++++++++++------------------- > > > fs/xfs/xfs_mount.c | 4 ---- > > > fs/xfs/xfs_mount.h | 1 - > > > 3 files changed, 12 insertions(+), 24 deletions(-) > > > > > > diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c > > > index 74032316ce5cc..c4ba8d7bc45bc 100644 > > > --- a/fs/xfs/xfs_icache.c > > > +++ b/fs/xfs/xfs_icache.c > > ... > > > @@ -1298,11 +1293,9 @@ xfs_reclaim_inodes_ag( > > > > > > } while (nr_found && !done && *nr_to_scan > 0); > > > > > > - if (trylock && !done) > > > - pag->pag_ici_reclaim_cursor = first_index; > > > - else > > > - pag->pag_ici_reclaim_cursor = 0; > > > - mutex_unlock(&pag->pag_ici_reclaim_lock); > > > + if (done) > > > + first_index = 0; > > > + WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); > > > > I thought the [READ|WRITE]_ONCE() macros had to do with ordering, not > > necessarily atomicity. Is this write safe if we're running a 32-bit > > kernel, for example? Outside of that the broader functional change seems > > reasonable. > > They are used for documenting intentional data races now, too. > That's what these are - we don't care about serialisation, but there > are static checkers that will now spew "data race" warnings because > multiple threads can race reading and writing unserialised > variables. > I wasn't aware of that. I'm not sure how widely known that is so it might be worth a one liner comment to ensure these are preserved (if they survive the end of the series). > It is safe on 32 bit machines because these variables are 32 bit on > 32 bit machines, and reads/writes of 32 bit variables on 32 bit > machines are atomic (though not serialised). > Ah, right. I was thinking they were always 64-bit but that is not the case. With that: Reviewed-by: Brian Foster <bfoster@redhat.com> > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com >
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 74032316ce5cc..c4ba8d7bc45bc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1211,12 +1211,9 @@ xfs_reclaim_inodes_ag( int *nr_to_scan) { struct xfs_perag *pag; - xfs_agnumber_t ag; - int trylock = flags & SYNC_TRYLOCK; - int skipped; + xfs_agnumber_t ag = 0; + int skipped = 0; - ag = 0; - skipped = 0; while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { unsigned long first_index = 0; int done = 0; @@ -1224,15 +1221,13 @@ xfs_reclaim_inodes_ag( ag = pag->pag_agno + 1; - if (trylock) { - if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { - skipped++; - xfs_perag_put(pag); - continue; - } - first_index = pag->pag_ici_reclaim_cursor; - } else - mutex_lock(&pag->pag_ici_reclaim_lock); + /* + * If the cursor is not zero, we haven't scanned the whole AG + * so we might have skipped inodes here. + */ + first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); + if (first_index) + skipped++; do { struct xfs_inode *batch[XFS_LOOKUP_BATCH]; @@ -1298,11 +1293,9 @@ xfs_reclaim_inodes_ag( } while (nr_found && !done && *nr_to_scan > 0); - if (trylock && !done) - pag->pag_ici_reclaim_cursor = first_index; - else - pag->pag_ici_reclaim_cursor = 0; - mutex_unlock(&pag->pag_ici_reclaim_lock); + if (done) + first_index = 0; + WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); xfs_perag_put(pag); } return skipped; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d5dcf98698600..03158b42a1943 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -148,7 +148,6 @@ xfs_free_perag( ASSERT(atomic_read(&pag->pag_ref) == 0); xfs_iunlink_destroy(pag); xfs_buf_hash_destroy(pag); - mutex_destroy(&pag->pag_ici_reclaim_lock); call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -200,7 +199,6 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; spin_lock_init(&pag->pag_ici_lock); - mutex_init(&pag->pag_ici_reclaim_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); if (xfs_buf_hash_init(pag)) goto out_free_pag; @@ -242,7 +240,6 @@ xfs_initialize_perag( out_hash_destroy: xfs_buf_hash_destroy(pag); out_free_pag: - mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ @@ -252,7 +249,6 @@ xfs_initialize_perag( break; xfs_buf_hash_destroy(pag); xfs_iunlink_destroy(pag); - mutex_destroy(&pag->pag_ici_reclaim_lock); kmem_free(pag); } return error; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 3725d25ad97e8..a72cfcaa4ad12 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -354,7 +354,6 @@ typedef struct xfs_perag { spinlock_t pag_ici_lock; /* incore inode cache lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ - struct mutex pag_ici_reclaim_lock; /* serialisation point */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ /* buffer cache index */