@@ -472,16 +472,28 @@ void bdev_drop(struct block_device *bdev)
iput(BD_INODE(bdev));
}
+static int bdev_pages_count(struct inode *inode, void *data)
+{
+ long *pages = data;
+
+ *pages += inode->i_mapping->nrpages;
+ return INO_ITER_DONE;
+}
+
long nr_blockdev_pages(void)
{
- struct inode *inode;
long ret = 0;
- spin_lock(&blockdev_superblock->s_inode_list_lock);
- list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
- ret += inode->i_mapping->nrpages;
- spin_unlock(&blockdev_superblock->s_inode_list_lock);
-
+ /*
+ * We can be called from contexts where blocking is not
+ * desirable. The count is advisory at best, and we only
+ * need to access the inode mapping. Hence as long as we
+ * have an inode existence guarantee, we can safely count
+ * the cached pages on each inode without needing reference
+ * counted inodes.
+ */
+ super_iter_inodes_unsafe(blockdev_superblock,
+ bdev_pages_count, &ret);
return ret;
}
@@ -761,8 +761,11 @@ static void evict(struct inode *inode)
* Dispose-list gets a local list with local inodes in it, so it doesn't
* need to worry about list corruption and SMP locks.
*/
-static void dispose_list(struct list_head *head)
+static bool dispose_list(struct list_head *head)
{
+ if (list_empty(head))
+ return false;
+
while (!list_empty(head)) {
struct inode *inode;
@@ -772,6 +775,7 @@ static void dispose_list(struct list_head *head)
evict(inode);
cond_resched();
}
+ return true;
}
/**
@@ -783,47 +787,50 @@ static void dispose_list(struct list_head *head)
* so any inode reaching zero refcount during or after that call will
* be immediately evicted.
*/
+static int evict_inode_fn(struct inode *inode, void *data)
+{
+ struct list_head *dispose = data;
+
+ spin_lock(&inode->i_lock);
+ if (atomic_read(&inode->i_count) ||
+ (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))) {
+ spin_unlock(&inode->i_lock);
+ return INO_ITER_DONE;
+ }
+
+ inode->i_state |= I_FREEING;
+ inode_lru_list_del(inode);
+ spin_unlock(&inode->i_lock);
+ list_add(&inode->i_lru, dispose);
+
+ /*
+ * If we've run long enough to need rescheduling, abort the
+ * iteration so we can return to evict_inodes() and dispose of the
+ * inodes before collecting more inodes to evict.
+ */
+ if (need_resched())
+ return INO_ITER_ABORT;
+ return INO_ITER_DONE;
+}
+
void evict_inodes(struct super_block *sb)
{
- struct inode *inode, *next;
LIST_HEAD(dispose);
-again:
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
- if (atomic_read(&inode->i_count))
- continue;
-
- spin_lock(&inode->i_lock);
- if (atomic_read(&inode->i_count)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- inode->i_state |= I_FREEING;
- inode_lru_list_del(inode);
- spin_unlock(&inode->i_lock);
- list_add(&inode->i_lru, &dispose);
-
+ do {
/*
- * We can have a ton of inodes to evict at unmount time given
- * enough memory, check to see if we need to go to sleep for a
- * bit so we don't livelock.
+ * We do not want to take references to inodes whilst iterating
+ * because we are trying to evict unreferenced inodes from
+ * the cache. Hence we need to use the unsafe iteration
+ * mechanism and do all the required inode validity checks in
+ * evict_inode_fn() to safely queue unreferenced inodes for
+ * eviction.
+ *
+ * We repeat the iteration until it doesn't find any more
+ * inodes to dispose of.
*/
- if (need_resched()) {
- spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
- dispose_list(&dispose);
- goto again;
- }
- }
- spin_unlock(&sb->s_inode_list_lock);
-
- dispose_list(&dispose);
+ super_iter_inodes_unsafe(sb, evict_inode_fn, &dispose);
+ } while (dispose_list(&dispose));
}
EXPORT_SYMBOL_GPL(evict_inodes);
@@ -1075,41 +1075,51 @@ static int add_dquot_ref(struct super_block *sb, int type)
return err;
}
+struct dquot_ref_data {
+ int type;
+ int reserved;
+};
+
+static int remove_dquot_ref_fn(struct inode *inode, void *data)
+{
+ struct dquot_ref_data *ref = data;
+
+ spin_lock(&dq_data_lock);
+ if (!IS_NOQUOTA(inode)) {
+ struct dquot __rcu **dquots = i_dquot(inode);
+ struct dquot *dquot = srcu_dereference_check(
+ dquots[ref->type], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+
+#ifdef CONFIG_QUOTA_DEBUG
+ if (unlikely(inode_get_rsv_space(inode) > 0))
+ ref->reserved++;
+#endif
+ rcu_assign_pointer(dquots[ref->type], NULL);
+ if (dquot)
+ dqput(dquot);
+ }
+ spin_unlock(&dq_data_lock);
+ return INO_ITER_DONE;
+}
+
static void remove_dquot_ref(struct super_block *sb, int type)
{
- struct inode *inode;
-#ifdef CONFIG_QUOTA_DEBUG
- int reserved = 0;
-#endif
-
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- /*
- * We have to scan also I_NEW inodes because they can already
- * have quota pointer initialized. Luckily, we need to touch
- * only quota pointers and these have separate locking
- * (dq_data_lock).
- */
- spin_lock(&dq_data_lock);
- if (!IS_NOQUOTA(inode)) {
- struct dquot __rcu **dquots = i_dquot(inode);
- struct dquot *dquot = srcu_dereference_check(
- dquots[type], &dquot_srcu,
- lockdep_is_held(&dq_data_lock));
+ struct dquot_ref_data ref = {
+ .type = type,
+ };
+ /*
+ * We have to scan I_NEW inodes because they can already
+ * have quota pointer initialized. Luckily, we need to touch
+ * only quota pointers and these have separate locking
+ * (dq_data_lock) so the existence guarantee that
+ * super_iter_inodes_unsafe() provides inodes passed to
+ * remove_dquot_ref_fn() is sufficient for this operation.
+ */
+ super_iter_inodes_unsafe(sb, remove_dquot_ref_fn, &ref);
#ifdef CONFIG_QUOTA_DEBUG
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
-#endif
- rcu_assign_pointer(dquots[type], NULL);
- if (dquot)
- dqput(dquot);
- }
- spin_unlock(&dq_data_lock);
- }
- spin_unlock(&sb->s_inode_list_lock);
-#ifdef CONFIG_QUOTA_DEBUG
- if (reserved) {
+ if (ref.reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota"
" was disabled thus quota information is probably "
"inconsistent. Please run quotacheck(8).\n", sb->s_id);