diff mbox series

[RFC,v2,5/5] libfs: Refactor offset_iterate_dir()

Message ID 20241126155444.2556-6-cel@kernel.org (mailing list archive)
State New
Headers show
Series Improve simple directory offset wrap behavior | expand

Commit Message

Chuck Lever Nov. 26, 2024, 3:54 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

This line in offset_iterate_dir():

		ctx->pos = dentry2offset(dentry) + 1;

assumes that the next child entry has an offset value that is
greater than the current child entry. Since directory offsets are
actually cookies, this heuristic is not always correct.

We have tested the current code with a limited offset range to see
if this is an operational problem. It doesn't seem to be, but doing
a "+ 1" on what is supposed to be an opaque cookie is very likely
wrong and brittle.

Instead of using the mtree to emit entries in the order of their
offset values, use it only to map the initial ctx->pos to a starting
entry. Then use the directory's d_children list, which is already
maintained by the dcache, to find the next child to emit, as the
simple cursor-based implementation still does.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/libfs.c | 89 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 71 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index be641a84047a..862b4203d389 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -241,9 +241,9 @@  const struct inode_operations simple_dir_inode_operations = {
 };
 EXPORT_SYMBOL(simple_dir_inode_operations);
 
-/* 0 is '.', 1 is '..', so always start with offset 2 or more */
 enum {
-	DIR_OFFSET_MIN	= 2,
+	DIR_OFFSET_FIRST	= 2,	/* seek to the first real entry */
+	DIR_OFFSET_MIN		= 3,	/* lowest real offset value */
 };
 
 static void offset_set(struct dentry *dentry, long offset)
@@ -267,7 +267,7 @@  void simple_offset_init(struct offset_ctx *octx)
 {
 	mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
 	lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
-	octx->next_offset = DIR_OFFSET_MIN;
+	octx->next_offset = 0;
 }
 
 /**
@@ -511,10 +511,30 @@  static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
 	return vfs_setpos(file, offset, LONG_MAX);
 }
 
-static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
+static noinline_for_stack struct dentry *offset_dir_first(struct file *file)
 {
+	struct dentry *child, *found = NULL, *dir = file->f_path.dentry;
+
+	spin_lock(&dir->d_lock);
+	child = d_first_child(dir);
+	if (child && simple_positive(child)) {
+		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+		if (simple_positive(child))
+			found = dget_dlock(child);
+		spin_unlock(&child->d_lock);
+	}
+	spin_unlock(&dir->d_lock);
+	return found;
+}
+
+static noinline_for_stack struct dentry *
+offset_dir_lookup(struct file *file, loff_t offset)
+{
+	struct dentry *child, *found = NULL, *dir = file->f_path.dentry;
+	struct inode *inode = d_inode(dir);
+	struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
+
 	MA_STATE(mas, &octx->mt, offset, offset);
-	struct dentry *child, *found = NULL;
 
 	rcu_read_lock();
 	child = mas_find(&mas, LONG_MAX);
@@ -538,29 +558,62 @@  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
+/*
+ * This is find_next_child() without the dput() tail. We might
+ * combine offset_dir_next() and find_next_child().
+ */
+static struct dentry *offset_dir_next(struct dentry *dentry)
+{
+	struct dentry *parent = dentry->d_parent;
+	struct dentry *d, *found = NULL;
+
+	spin_lock(&parent->d_lock);
+	d = d_next_sibling(dentry);
+	hlist_for_each_entry_from(d, d_sib) {
+		if (simple_positive(d)) {
+			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+			if (simple_positive(d))
+				found = dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			if (likely(found))
+				break;
+		}
+	}
+	spin_unlock(&parent->d_lock);
+	return found;
+}
+
 static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
 {
-	struct dentry *dir = file->f_path.dentry;
-	struct inode *inode = d_inode(dir);
-	struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
-	struct dentry *dentry;
+	struct dentry *dentry, *next = NULL;
+
+	if (ctx->pos == DIR_OFFSET_FIRST)
+		dentry = offset_dir_first(file);
+	else
+		dentry = offset_dir_lookup(file, ctx->pos);
+	if (!dentry) {
+		/* ->private_data is protected by f_pos_lock */
+		offset_set_eod(file);
+		return;
+	}
 
 	while (true) {
-		dentry = offset_find_next(octx, ctx->pos);
-		if (!dentry) {
-			/* ->private_data is protected by f_pos_lock */
-			offset_set_eod(file);
-			return;
-		}
-
 		if (!offset_dir_emit(ctx, dentry)) {
-			dput(dentry);
+			ctx->pos = dentry2offset(dentry);
+			break;
+		}
+
+		next = offset_dir_next(dentry);
+		if (!next) {
+			offset_set_eod(file);
 			break;
 		}
 
-		ctx->pos = dentry2offset(dentry) + 1;
 		dput(dentry);
+		dentry = next;
 	}
+
+	dput(dentry);
 }
 
 /**