[05/11] fs/dcache: Reclaim excessive negative dentries in directories
diff mbox series

Message ID 20200226161404.14136-6-longman@redhat.com
State New
Headers show
Series
  • fs/dcache: Limit # of negative dentries
Related show

Commit Message

Waiman Long Feb. 26, 2020, 4:13 p.m. UTC
When the "dentry-dir-max" sysctl parameter is set, it will enable the
checking of dentry count in the parent directory when a negative dentry
is being retained. If the count exceeds the given limit, it will schedule
a work function to scan the children of that parent directory to find
negative dentries to be reclaimed. Positive dentries will not be touched.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 fs/dcache.c            | 207 +++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   2 +
 2 files changed, 209 insertions(+)

Patch
diff mbox series

diff --git a/fs/dcache.c b/fs/dcache.c
index 8f3ac31a582b..01c6d7277244 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -133,6 +133,11 @@  static DEFINE_PER_CPU(long, nr_dentry_negative);
 int dcache_dentry_dir_max_sysctl __read_mostly;
 EXPORT_SYMBOL_GPL(dcache_dentry_dir_max_sysctl);
 
+static LLIST_HEAD(negative_reclaim_list);
+static void negative_reclaim_check(struct dentry *parent);
+static void negative_reclaim_workfn(struct work_struct *work);
+static DECLARE_WORK(negative_reclaim_work, negative_reclaim_workfn);
+
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 
 /*
@@ -869,7 +874,20 @@  void dput(struct dentry *dentry)
 		rcu_read_unlock();
 
 		if (likely(retain_dentry(dentry))) {
+			struct dentry *neg_parent = NULL;
+
+			if (d_is_negative(dentry)) {
+				neg_parent = dentry->d_parent;
+				rcu_read_lock();
+			}
 			spin_unlock(&dentry->d_lock);
+
+			/*
+			 * Negative dentry reclaim check is only done when
+			 * a negative dentry is being put into a LRU list.
+			 */
+			if (neg_parent)
+				negative_reclaim_check(neg_parent);
 			return;
 		}
 
@@ -1261,6 +1279,195 @@  void shrink_dcache_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
+/*
+ * Return true if reclaiming negative dentry can happen.
+ */
+static inline bool can_reclaim_dentry(unsigned int flags)
+{
+	return !(flags & (DCACHE_SHRINK_LIST | DCACHE_GENOCIDE |
+			  DCACHE_DENTRY_KILLED));
+}
+
+struct reclaim_dentry
+{
+	struct llist_node reclaim_node;
+	struct dentry *parent_dir;
+};
+
+/*
+ * Reclaim excess negative dentries in a directory
+ */
+static void reclaim_negative_dentry(struct dentry *parent,
+				    struct list_head *dispose)
+{
+	struct dentry *child;
+	int limit = READ_ONCE(dcache_dentry_dir_max_sysctl);
+	int cnt;
+
+	lockdep_assert_held(&parent->d_lock);
+
+	cnt = parent->d_nchildren;
+
+	/*
+	 * Compute # of negative dentries to be reclaimed
+	 * An extra 1/8 of dcache_dentry_dir_max_sysctl is added.
+	 */
+	if (cnt <= limit)
+		return;
+	cnt -= limit;
+	cnt += (limit >> 3);
+
+	/*
+	 * The d_subdirs is treated like a kind of LRU where
+	 * non-negative dentries are skipped. Negative dentries
+	 * with DCACHE_REFERENCED bit set are also skipped but
+	 * with DCACHE_REFERENCED cleared.
+	 */
+	list_for_each_entry(child, &parent->d_subdirs, d_child) {
+		/*
+		 * This check is racy and so it may not be accurate.
+		 */
+		if (!d_is_negative(child))
+			continue;
+
+		if (!spin_trylock(&child->d_lock))
+			continue;
+
+		/*
+		 * Only reclaim zero-refcnt negative dentries in the
+		 * LRU, but not in shrink list.
+		 */
+		if (can_reclaim_dentry(child->d_flags) &&
+		    d_is_negative(child) && d_in_lru(child) &&
+		    !child->d_lockref.count) {
+			if (child->d_flags & DCACHE_REFERENCED) {
+				child->d_flags &= ~DCACHE_REFERENCED;
+			} else {
+				cnt--;
+				d_lru_del(child);
+				d_shrink_add(child, dispose);
+			}
+		}
+		spin_unlock(&child->d_lock);
+		if (!cnt) {
+			child = list_next_entry(child, d_child);
+			break;
+		}
+	}
+
+	if (&child->d_child != &parent->d_subdirs) {
+		/*
+		 * Split out the childs from the head up to just
+		 * before the current entry into a separate list and
+		 * then splice it to the end of the child list so
+		 * that the unscanned entries will be in the front.
+		 * This simulates LRU.
+		 */
+		struct list_head list;
+
+		list_cut_before(&list, &parent->d_subdirs,
+				&child->d_child);
+		list_splice_tail(&list, &parent->d_subdirs);
+	}
+}
+
+/*
+ * Excess negative dentry reclaim work function.
+ */
+static void negative_reclaim_workfn(struct work_struct *work)
+{
+	struct llist_node *nodes, *next;
+	struct dentry *parent;
+	struct reclaim_dentry *dentry_node;
+
+	/*
+	 * Collect excess negative dentries in dispose list & shrink them.
+	 */
+	for (nodes = llist_del_all(&negative_reclaim_list);
+	     nodes; nodes = next) {
+		LIST_HEAD(dispose);
+
+		next = llist_next(nodes);
+		dentry_node = container_of(nodes, struct reclaim_dentry,
+					   reclaim_node);
+		parent = dentry_node->parent_dir;
+		spin_lock(&parent->d_lock);
+
+		if (d_is_dir(parent) &&
+		    can_reclaim_dentry(parent->d_flags) &&
+		    (parent->d_flags & DCACHE_RECLAIMING))
+			reclaim_negative_dentry(parent, &dispose);
+
+		if (!list_empty(&dispose)) {
+			spin_unlock(&parent->d_lock);
+			shrink_dentry_list(&dispose);
+			spin_lock(&parent->d_lock);
+		}
+
+		parent->d_flags &= ~DCACHE_RECLAIMING;
+		spin_unlock(&parent->d_lock);
+		dput(parent);
+		kfree(dentry_node);
+		cond_resched();
+	}
+}
+
+/*
+ * Check the parent to see if it has too many negative dentries and queue
+ * it up for the negative dentry reclaim work function to handle it.
+ */
+static void negative_reclaim_check(struct dentry *parent)
+	__releases(rcu)
+{
+	int limit = dcache_dentry_dir_max_sysctl;
+	struct reclaim_dentry *dentry_node;
+
+	if (!limit)
+		goto rcu_unlock_out;
+
+	/*
+	 * These checks are racy before spin_lock().
+	 */
+	if (!can_reclaim_dentry(parent->d_flags) ||
+	    (parent->d_flags & DCACHE_RECLAIMING) ||
+	    (READ_ONCE(parent->d_nchildren) <= limit))
+		goto rcu_unlock_out;
+
+	spin_lock(&parent->d_lock);
+	if (!can_reclaim_dentry(parent->d_flags) ||
+	    (parent->d_flags & DCACHE_RECLAIMING) ||
+	    (READ_ONCE(parent->d_nchildren) <= limit))
+		goto unlock_out;
+
+	if (!d_is_dir(parent))
+		goto unlock_out;
+
+	dentry_node = kzalloc(sizeof(*dentry_node), GFP_NOWAIT);
+	if (!dentry_node)
+		goto unlock_out;
+
+	rcu_read_unlock();
+	__dget_dlock(parent);
+	dentry_node->parent_dir = parent;
+	parent->d_flags |= DCACHE_RECLAIMING;
+	spin_unlock(&parent->d_lock);
+
+	/*
+	 * Only call schedule_work() if negative_reclaim_list is previously
+	 * empty. Otherwise, schedule_work() had been called but the workfn
+	 * workfn hasn't retrieved the list yet.
+	 */
+	if (llist_add(&dentry_node->reclaim_node, &negative_reclaim_list))
+		schedule_work(&negative_reclaim_work);
+	return;
+
+unlock_out:
+	spin_unlock(&parent->d_lock);
+rcu_unlock_out:
+	rcu_read_unlock();
+	return;
+}
+
 /**
  * enum d_walk_ret - action to talke during tree walk
  * @D_WALK_CONTINUE:	contrinue walk
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e9e66eb50d1a..f14d72738903 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -13,6 +13,7 @@ 
 #include <linux/lockref.h>
 #include <linux/stringhash.h>
 #include <linux/wait.h>
+#include <linux/llist.h>
 
 struct path;
 struct vfsmount;
@@ -214,6 +215,7 @@  struct dentry_operations {
 #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
 #define DCACHE_ENCRYPTED_NAME		0x02000000 /* Encrypted name (dir key was unavailable) */
 #define DCACHE_OP_REAL			0x04000000
+#define DCACHE_RECLAIMING		0x08000000 /* Reclaiming negative dentries */
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
 #define DCACHE_DENTRY_CURSOR		0x20000000