@@ -1862,38 +1862,45 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
}
EXPORT_SYMBOL(__invalidate_device);
+static inline void
+__iterate_bdev(spinlock_t *lock, struct inode *inode, struct inode **old_inode,
+ void (*func)(struct block_device *, void *), void *arg)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+ mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(lock);
+ /*
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * pcpu_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * pcpu_lock. So we keep the reference and iput it later.
+ */
+ iput(*old_inode);
+ *old_inode = inode;
+
+ func(I_BDEV(inode), arg);
+
+ spin_lock(lock);
+}
+
void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{
struct inode *inode, *old_inode = NULL;
spin_lock(&blockdev_superblock->s_inode_list_lock);
- list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
- struct address_space *mapping = inode->i_mapping;
-
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
- mapping->nrpages == 0) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&blockdev_superblock->s_inode_list_lock);
- /*
- * We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the
- * s_inode_list_lock We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it under
- * s_inode_list_lock. So we keep the reference and iput it
- * later.
- */
- iput(old_inode);
- old_inode = inode;
+ list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
+ __iterate_bdev(&blockdev_superblock->s_inode_list_lock,
+ inode, &old_inode, func, arg);
- func(I_BDEV(inode), arg);
-
- spin_lock(&blockdev_superblock->s_inode_list_lock);
- }
spin_unlock(&blockdev_superblock->s_inode_list_lock);
iput(old_inode);
}
@@ -13,28 +13,35 @@
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;
+static inline void __drop_pagecache_sb(spinlock_t *lock, struct inode *inode,
+ struct inode **toput_inode)
+{
+ spin_lock(&inode->i_lock);
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ (inode->i_mapping->nrpages == 0)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(lock);
+
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
+ iput(*toput_inode);
+ *toput_inode = inode;
+
+ spin_lock(lock);
+}
+
static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
struct inode *inode, *toput_inode = NULL;
spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (inode->i_mapping->nrpages == 0)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ __drop_pagecache_sb(&sb->s_inode_list_lock, inode,
+ &toput_inode);
- invalidate_mapping_pages(inode->i_mapping, 0, -1);
- iput(toput_inode);
- toput_inode = inode;
-
- spin_lock(&sb->s_inode_list_lock);
- }
spin_unlock(&sb->s_inode_list_lock);
iput(toput_inode);
}
@@ -2095,6 +2095,43 @@ out_unlock_inode:
}
EXPORT_SYMBOL(__mark_inode_dirty);
+static inline void __wait_sb_inode(spinlock_t *lock, struct inode *inode,
+ struct inode **old_inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ spin_lock(&inode->i_lock);
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ (mapping->nrpages == 0)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(lock);
+
+ /*
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * pcpu_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * pcpu_lock. So we keep the reference and iput it later.
+ */
+ iput(*old_inode);
+ *old_inode = inode;
+
+ /*
+ * We keep the error status of individual mapping so that
+ * applications can catch the writeback error using fsync(2).
+ * See filemap_fdatawait_keep_errors() for details.
+ */
+ filemap_fdatawait_keep_errors(mapping);
+
+ cond_resched();
+
+ spin_lock(lock);
+}
+
/*
* The @s_sync_lock is used to serialise concurrent sync operations
* to avoid lock contention problems with concurrent wait_sb_inodes() calls.
@@ -2124,41 +2161,9 @@ static void wait_sb_inodes(struct super_block *sb)
* In which case, the inode may not be on the dirty list, but
* we still have to wait for that writeout.
*/
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- struct address_space *mapping = inode->i_mapping;
-
- spin_lock(&inode->i_lock);
- if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (mapping->nrpages == 0)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ __wait_sb_inode(&sb->s_inode_list_lock, inode, &old_inode);
- /*
- * We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the
- * s_inode_list_lock. We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it under
- * s_inode_list_lock. So we keep the reference and iput it
- * later.
- */
- iput(old_inode);
- old_inode = inode;
-
- /*
- * We keep the error status of individual mapping so that
- * applications can catch the writeback error using fsync(2).
- * See filemap_fdatawait_keep_errors() for details.
- */
- filemap_fdatawait_keep_errors(mapping);
-
- cond_resched();
-
- spin_lock(&sb->s_inode_list_lock);
- }
spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
mutex_unlock(&sb->s_sync_lock);
@@ -579,6 +579,37 @@ static void dispose_list(struct list_head *head)
}
}
+static inline int __evict_inode(spinlock_t *lock, struct inode *inode,
+ struct list_head *dispose)
+{
+ if (atomic_read(&inode->i_count))
+ return 0;
+
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+
+ inode->i_state |= I_FREEING;
+ inode_lru_list_del(inode);
+ spin_unlock(&inode->i_lock);
+ list_add(&inode->i_lru, dispose);
+
+ /*
+ * We can have a ton of inodes to evict at unmount time given
+ * enough memory, check to see if we need to go to sleep for a
+ * bit so we don't livelock.
+ */
+ if (need_resched()) {
+ spin_unlock(lock);
+ cond_resched();
+ dispose_list(dispose);
+ return 1; /* Redo it again */
+ }
+ return 0;
+}
+
/**
* evict_inodes - evict all evictable inodes for a superblock
* @sb: superblock to operate on
@@ -596,35 +627,39 @@ void evict_inodes(struct super_block *sb)
again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
- if (atomic_read(&inode->i_count))
- continue;
+ if (__evict_inode(&sb->s_inode_list_lock, inode, &dispose))
+ goto again;
+ }
+ spin_unlock(&sb->s_inode_list_lock);
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ dispose_list(&dispose);
+}
- inode->i_state |= I_FREEING;
- inode_lru_list_del(inode);
+static inline void __invalidate_inode(struct inode *inode, bool kill_dirty,
+ struct list_head *dispose, int *busy)
+{
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
- list_add(&inode->i_lru, &dispose);
+ return;
+ }
- /*
- * We can have a ton of inodes to evict at unmount time given
- * enough memory, check to see if we need to go to sleep for a
- * bit so we don't livelock.
- */
- if (need_resched()) {
- spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
- dispose_list(&dispose);
- goto again;
- }
+ if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
+ spin_unlock(&inode->i_lock);
+ *busy = 1;
+ return;
}
- spin_unlock(&sb->s_inode_list_lock);
- dispose_list(&dispose);
+ if (atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ *busy = 1;
+ return;
+ }
+
+ inode->i_state |= I_FREEING;
+ inode_lru_list_del(inode);
+ spin_unlock(&inode->i_lock);
+ list_add(&inode->i_lru, dispose);
}
/**
@@ -644,28 +679,9 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
LIST_HEAD(dispose);
spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
- spin_unlock(&inode->i_lock);
- busy = 1;
- continue;
- }
- if (atomic_read(&inode->i_count)) {
- spin_unlock(&inode->i_lock);
- busy = 1;
- continue;
- }
+ list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list)
+ __invalidate_inode(inode, kill_dirty, &dispose, &busy);
- inode->i_state |= I_FREEING;
- inode_lru_list_del(inode);
- spin_unlock(&inode->i_lock);
- list_add(&inode->i_lru, &dispose);
- }
spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose);
@@ -141,86 +141,98 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
return ret;
}
-/**
- * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
- * @sb: superblock being unmounted.
- *
- * Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
- */
-void fsnotify_unmount_inodes(struct super_block *sb)
+static inline void
+__fsnotify_unmount_inode(spinlock_t *lock, struct inode *inode,
+ struct list_head *head, struct inode **pnext,
+ struct inode **need_iput)
{
- struct inode *inode, *next_i, *need_iput = NULL;
-
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
- struct inode *need_iput_tmp;
+ struct inode *need_iput_tmp;
+ struct inode *next_i = *pnext;
- /*
- * We cannot __iget() an inode in state I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ /*
+ * We cannot __iget() an inode in state I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!atomic_read(&inode->i_count)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
- need_iput_tmp = need_iput;
- need_iput = NULL;
+ need_iput_tmp = *need_iput;
+ *need_iput = NULL;
- /* In case fsnotify_inode_delete() drops a reference. */
- if (inode != need_iput_tmp)
- __iget(inode);
- else
- need_iput_tmp = NULL;
- spin_unlock(&inode->i_lock);
+ /* In case fsnotify_inode_delete() drops a reference. */
+ if (inode != need_iput_tmp)
+ __iget(inode);
+ else
+ need_iput_tmp = NULL;
+ spin_unlock(&inode->i_lock);
- /* In case the dropping of a reference would nuke next_i. */
- while (&next_i->i_sb_list != &sb->s_inodes) {
- spin_lock(&next_i->i_lock);
- if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
- atomic_read(&next_i->i_count)) {
- __iget(next_i);
- need_iput = next_i;
- spin_unlock(&next_i->i_lock);
- break;
- }
+ /* In case the dropping of a reference would nuke next_i. */
+ while (&next_i->i_sb_list != head) {
+ spin_lock(&next_i->i_lock);
+ if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
+ atomic_read(&next_i->i_count)) {
+ __iget(next_i);
+ *need_iput = next_i;
spin_unlock(&next_i->i_lock);
- next_i = list_next_entry(next_i, i_sb_list);
+ break;
}
+ spin_unlock(&next_i->i_lock);
+ next_i = list_next_entry(next_i, i_sb_list);
+ }
+ *pnext = next_i;
- /*
- * We can safely drop s_inode_list_lock here because either
- * we actually hold references on both inode and next_i or
- * end of list. Also no new inodes will be added since the
- * umount has begun.
- */
- spin_unlock(&sb->s_inode_list_lock);
+ /*
+ * We can safely drop pcpu_lock here because either
+ * we actually hold references on both inode and next_i or
+ * end of list. Also no new inodes will be added since the
+ * umount has begun.
+ */
+ spin_unlock(lock);
- if (need_iput_tmp)
- iput(need_iput_tmp);
+ if (need_iput_tmp)
+ iput(need_iput_tmp);
- /* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+ /* for each watch, send FS_UNMOUNT and then remove it */
+ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
- fsnotify_inode_delete(inode);
+ fsnotify_inode_delete(inode);
- iput(inode);
+ iput(inode);
+
+ spin_lock(lock);
+}
+
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
+ * @sb: superblock being unmounted.
+ *
+ * Called during unmount with no locks held, so needs to be safe against
+ * concurrent modifiers. We temporarily drop sb->s_inodes_cpu->lock and CAN
+ * block.
+ */
+void fsnotify_unmount_inodes(struct super_block *sb)
+{
+ struct inode *inode, *next_i, *need_iput = NULL;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list)
+ __fsnotify_unmount_inode(&sb->s_inode_list_lock, inode,
+ &sb->s_inodes, &next_i, &need_iput);
- spin_lock(&sb->s_inode_list_lock);
- }
spin_unlock(&sb->s_inode_list_lock);
}
@@ -920,6 +920,42 @@ static int dqinit_needed(struct inode *inode, int type)
return 0;
}
+static inline void
+__add_dquot_ref(spinlock_t *lock, struct inode *inode, int type,
+#ifdef CONFIG_QUOTA_DEBUG
+ int *reserved,
+#endif
+ struct inode **old_inode)
+{
+ spin_lock(&inode->i_lock);
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ !atomic_read(&inode->i_writecount) ||
+ !dqinit_needed(inode, type)) {
+ spin_unlock(&inode->i_lock);
+ return;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(lock);
+
+#ifdef CONFIG_QUOTA_DEBUG
+ if (unlikely(inode_get_rsv_space(inode) > 0))
+ *reserved = 1;
+#endif
+ iput(*old_inode);
+ __dquot_initialize(inode, type);
+
+ /*
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * pcpu_lock. We cannot iput the inode now as we can be
+ * holding the last reference and we cannot iput it under
+ * pcpu_lock. So we keep the reference and iput it later.
+ */
+ *old_inode = inode;
+ spin_lock(lock);
+}
+
/* This routine is guarded by dqonoff_mutex mutex */
static void add_dquot_ref(struct super_block *sb, int type)
{
@@ -929,36 +965,12 @@ static void add_dquot_ref(struct super_block *sb, int type)
#endif
spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- !atomic_read(&inode->i_writecount) ||
- !dqinit_needed(inode, type)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
-
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ __add_dquot_ref(&sb->s_inode_list_lock, inode, type,
#ifdef CONFIG_QUOTA_DEBUG
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
+ &reserved,
#endif
- iput(old_inode);
- __dquot_initialize(inode, type);
-
- /*
- * We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the
- * s_inode_list_lock. We cannot iput the inode now as we can be
- * holding the last reference and we cannot iput it under
- * s_inode_list_lock. So we keep the reference and iput it
- * later.
- */
- old_inode = inode;
- spin_lock(&sb->s_inode_list_lock);
- }
+ &old_inode);
spin_unlock(&sb->s_inode_list_lock);
iput(old_inode);
@@ -1022,6 +1034,25 @@ static void put_dquot_list(struct list_head *tofree_head)
}
}
+static inline void
+__remove_dquot_ref(struct inode *inode, int type,
+ struct list_head *tofree_head, int *reserved)
+{
+ /*
+ * We have to scan also I_NEW inodes because they can already
+ * have quota pointer initialized. Luckily, we need to touch
+ * only quota pointers and these have separate locking
+ * (dq_data_lock).
+ */
+ spin_lock(&dq_data_lock);
+ if (!IS_NOQUOTA(inode)) {
+ if (unlikely(inode_get_rsv_space(inode) > 0))
+ *reserved = 1;
+ remove_inode_dquot_ref(inode, type, tofree_head);
+ }
+ spin_unlock(&dq_data_lock);
+}
+
static void remove_dquot_ref(struct super_block *sb, int type,
struct list_head *tofree_head)
{
@@ -1029,21 +1060,9 @@ static void remove_dquot_ref(struct super_block *sb, int type,
int reserved = 0;
spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- /*
- * We have to scan also I_NEW inodes because they can already
- * have quota pointer initialized. Luckily, we need to touch
- * only quota pointers and these have separate locking
- * (dq_data_lock).
- */
- spin_lock(&dq_data_lock);
- if (!IS_NOQUOTA(inode)) {
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
- spin_unlock(&dq_data_lock);
- }
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
+ __remove_dquot_ref(inode, type, tofree_head, &reserved);
+
spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
if (reserved) {
This patch refactors the following superblock inode list (sb->s_inodes) iteration functions in vfs: 1. iterate_bdevs() 2. drop_pagecache_sb() 3. wait_sb_inodes() 4. evict_inodes() 5. invalidate_inodes() 6. fsnotify_unmount_inodes() 7. add_dquot_ref() 8. remove_dquot_ref() The per-inode processing codes of the above functions are extracted out into inline functions to ease their conversion to use the per-cpu list. There is no functional change. Signed-off-by: Waiman Long <Waiman.Long@hpe.com> --- fs/block_dev.c | 59 +++++++++++--------- fs/drop_caches.c | 39 ++++++++----- fs/fs-writeback.c | 73 +++++++++++++----------- fs/inode.c | 108 ++++++++++++++++++++--------------- fs/notify/inode_mark.c | 146 ++++++++++++++++++++++++++---------------------- fs/quota/dquot.c | 105 ++++++++++++++++++++-------------- 6 files changed, 298 insertions(+), 232 deletions(-)