From patchwork Wed Nov 27 15:28:11 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13887137 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 845291E51D; Wed, 27 Nov 2024 15:28:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721300; cv=none; b=tyHWa8vd3YEyskSbDpyv/dSxFefrbPB/Kw3V8tB8/KlNSupn24TeFUVt7/9WPViP0UIdHe7HuMqhnX8iJwNSIEGO1mNe+W4zY07m8U4I/0lAR4WwBU6w2iGFFNmTtsWKtwDH3LEPqb26/1BqT+NB/yfG7jJlA8ZK81c2Y7dXmf4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721300; c=relaxed/simple; bh=NPVXYksZkbK7cP6WTdZWPp256P2B4FMiN2imk2oaSkg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qlZ/HsNM9HVVncJTIGKN6U5FaduFYRP8DlbV7mzsfxprd8urER1qWMTFCt7DUUdcEwMo3Y4ZP6465HjqcepkyBg+guucqg1LjLB5RbHe9MhsSPF0vvlYGGiQy110MpAQcWxIM1iQlFH8iUMZxso6YVWbl2Ed394yZb+YBTSB43g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=GqMjro8o; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="GqMjro8o" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 14A4EC4CED4; Wed, 27 Nov 2024 15:28:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732721300; bh=NPVXYksZkbK7cP6WTdZWPp256P2B4FMiN2imk2oaSkg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GqMjro8ojkY1tLpJcZlcUCbWHypMHq37p4mKe4wKeV9ANga4aylCBQE5UXv9iYKgk H1l1QixsymlDu3V1hiAXlqQ+p7QdbeKFcSDKwZLfBJkqKPEtAZbS/0zJ1A5o+Ud9qh u7JN52eMlk8g6rwlpELRbm82tdppU3TV3hpeD9TlLb3IYDi8RMJ7YE8OmDX2u0aEvf Niovao4OIXLG6o/hSiAPHrpsys5AJQjzRXjcxcosIpPWYpk2lyS7FAz/VE13TvCiwz vIVmGM4GwB0ZQ/IklvvUcFWv2pqPyGzdYlEZ9xUf4Ze0PrIz9bToqLxo+7KqJpD+I4 EWbJYlUQFuyzQ== From: cel@kernel.org To: Hugh Dickens , Christian Brauner , Al Viro Cc: , , yukuai3@huawei.com, yangerkun@huaweicloud.com, Chuck Lever , stable@vger.kernel.org, Jeff Layton , Yang Erkun Subject: [RFC PATCH v3 1/5] libfs: Return ENOSPC when the directory offset range is exhausted Date: Wed, 27 Nov 2024 10:28:11 -0500 Message-ID: <20241127152815.151781-2-cel@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241127152815.151781-1-cel@kernel.org> References: <20241127152815.151781-1-cel@kernel.org> Precedence: bulk X-Mailing-List: linux-fsdevel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever Testing shows that the EBUSY error return from mtree_alloc_cyclic() leaks into user space. The ERRORS section of "man creat(2)" says: > EBUSY O_EXCL was specified in flags and pathname refers > to a block device that is in use by the system > (e.g., it is mounted). ENOSPC is closer to what applications expect in this situation. Note that the normal range of simple directory offset values is 2..2^63, so hitting this error is going to be rare to impossible. Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets") Cc: # v6.9+ Reviewed-by: Jeff Layton Reviewed-by: Yang Erkun Signed-off-by: Chuck Lever --- fs/libfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/libfs.c b/fs/libfs.c index 46966fd8bcf9..bf67954b525b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -288,7 +288,9 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN, LONG_MAX, &octx->next_offset, GFP_KERNEL); - if (ret < 0) + if (unlikely(ret == -EBUSY)) + return -ENOSPC; + if (unlikely(ret < 0)) return ret; offset_set(dentry, offset); From patchwork Wed Nov 27 15:28:12 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13887138 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6FFDD200113 for ; Wed, 27 Nov 2024 15:28:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721301; cv=none; b=BSw9GOKPRRWa+MMwrn/CxZaaHA/4oIAZBLIXH/Nb6lOv6buZ4XOaYnSPqnJSpPNiFSsxAgHXRvOyTbkOSJ4as2SlJV8aMmOTnzGfcVCK+f+9KUcIFa9QZEtRaAvoY4EoScI6xQdDE94Jre4knXeNGfqlVt7BVnKW8EAKkJUWunY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721301; c=relaxed/simple; bh=aJTYjqVoUaXrqckTrck4WCU2AgH83lVUIj4TdCwyW2I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AuiCCJ45SMXH8Edl1LWqfOlQJLVnuE7jqlV6/LyxwJLZGG6cKWohAOnsmRiGlTzRmdYd7NYglgP5rcNkD5Flx7h98u4pdiJA3uo/icRVBbH94Pfd6Jnw+RiTsEtijfWGPJHyzCsZuqA8Gg5B+bjzV7JvI46mO5wCm7I5x99yq1s= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=uzrIQLw4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="uzrIQLw4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 49030C4CED2; Wed, 27 Nov 2024 15:28:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732721301; bh=aJTYjqVoUaXrqckTrck4WCU2AgH83lVUIj4TdCwyW2I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=uzrIQLw4Jw7yzpy6ZXPe3lXaRaa6goSV9ivXdsUM8ksj/HydQlqJ4WWkEW9MZGjVR CRD7v+S9HNCXHNrp+a7juwjqJsekk1Obmyr5Cyzkj+KTZdBnCF1e2gExkRo0BOVWEL bONFucLAdSkZqFjbtHRnET8KdeZObE4OQR6y19wYXVqAD8zeYxroq3WdLeFDdbRxji Ma0fzQcfCDBgZ9xqaBtb+Co2HcLC6ddaEpir81jYH6hHGhjAsD0+UFvM5N5H8fI1MW le88zehs5l7qVf4fAV15yWU+OoLIs8rNVNBBlyFnUQPxVtY9RLvCKOlxtJpJJd3uox uF/pVBAOV68Wg== From: cel@kernel.org To: Hugh Dickens , Christian Brauner , Al Viro Cc: , , yukuai3@huawei.com, yangerkun@huaweicloud.com, Chuck Lever Subject: [RFC PATCH v3 2/5] libfs: Remove unnecessary locking from simple_offset_empty() Date: Wed, 27 Nov 2024 10:28:12 -0500 Message-ID: <20241127152815.151781-3-cel@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241127152815.151781-1-cel@kernel.org> References: <20241127152815.151781-1-cel@kernel.org> Precedence: bulk X-Mailing-List: linux-fsdevel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever I hit the DEBUG_LOCKS_WARN_ON() in hlock_class() several times during testing. This indicates that simple_offset_empty() is attempting to lock a negative dentry. That warning is of course silent on a kernel that is built without lock debugging. The simple_positive() check can be done without holding the child's d_lock. Fixes: ecba88a3b32d ("libfs: Add simple_offset_empty()") Signed-off-by: Chuck Lever --- fs/libfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index bf67954b525b..f686336489a3 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -347,13 +347,10 @@ int simple_offset_empty(struct dentry *dentry) index = DIR_OFFSET_MIN; octx = inode->i_op->get_offset_ctx(inode); mt_for_each(&octx->mt, child, index, LONG_MAX) { - spin_lock(&child->d_lock); if (simple_positive(child)) { - spin_unlock(&child->d_lock); ret = 0; break; } - spin_unlock(&child->d_lock); } return ret; From patchwork Wed Nov 27 15:28:13 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13887139 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 27A2E20011B for ; Wed, 27 Nov 2024 15:28:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721302; cv=none; b=OYnH3CPUKxoM49gG3F6x34VbYYS8nXTg2vyvJgC1yDmQ7k71DqqrS2qwuZ5LWoPyedvQP/fvmcGBMTdsygI5POmZqH2/UFWhYxIC3croLxJLtWJ4blJ9heRWIAa1Td2hLwiPZxMtAhOu8drnK5rtIjnvbLLkSnEsEgXGZwstUvM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721302; c=relaxed/simple; bh=VKlx/q6S0TZnqYbdlXcPYo2H5Z6zTd8jSftplsjeB2k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=V4zHyh9icMGdCE5ZJ9rpOG9e4f+03eR21buHlJT2iI/6pKFS/2LtXKcFQjl97whuWRJcd6OVKC9yvBwMWSZd+A2OdqSDIVuT5c/asHCJNXyfBUfRZ3u8vXUPhPIgwM+yWWoXRtShjkz6XPmOMgaB341dzlWMx+FiN9p7cySxDS0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=OUF1FZnE; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="OUF1FZnE" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 417F9C4CED4; Wed, 27 Nov 2024 15:28:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732721302; bh=VKlx/q6S0TZnqYbdlXcPYo2H5Z6zTd8jSftplsjeB2k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=OUF1FZnEbEpTpG79Oe/YjbbeeVfCsXJ/5xBFkexLOTBtxQVGlfx9MOJwh3G2/j/by UYpk2NVmI1OKMIJ7nv+n3tQnlk0qayYy4skYlF6AfnIk+FcSfxCKxwyse3Z2xLthro ljQctQEOJeORXjPDZnfmr7M2dgXbM0vocnmmVjq9ZueoUhhpTxPrY+f2lgZ9nUVTiy 1Df+8URO5rh2kPGypKBxQ5BMX6PiCCd4sMyxsMgQuHFJGZwkvZjvzY8AINLKwTQs/c DhXZdZysUC89taQn6jHsQa7UAL0gArN4EXazDKH6Q2UlEvpLDn71hi2vxH8QyVXmL8 4VrymohnnAQbQ== From: cel@kernel.org To: Hugh Dickens , Christian Brauner , Al Viro Cc: , , yukuai3@huawei.com, yangerkun@huaweicloud.com, Chuck Lever Subject: [RFC PATCH v3 3/5] Revert "libfs: fix infinite directory reads for offset dir" Date: Wed, 27 Nov 2024 10:28:13 -0500 Message-ID: <20241127152815.151781-4-cel@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241127152815.151781-1-cel@kernel.org> References: <20241127152815.151781-1-cel@kernel.org> Precedence: bulk X-Mailing-List: linux-fsdevel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever Using octx->next_offset to determine the newest entries works only because the offset value range is 63-bits. If an offset were to wrap, existing entries are no longer visible to readdir because offset_readdir() stops listing entries once an entry's offset is larger than octx->next_offset. This fix is effective, but it would be better not to use next_offset at all when iterating a directory. Revert this fix to prepare for replacing the current offset_readdir() mechanism. Reverting also makes it easier to apply the replacement code to v6.6. Signed-off-by: Chuck Lever --- fs/libfs.c | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index f686336489a3..a673427d3416 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -449,14 +449,6 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } -static int offset_dir_open(struct inode *inode, struct file *file) -{ - struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); - - file->private_data = (void *)ctx->next_offset; - return 0; -} - /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -470,9 +462,6 @@ static int offset_dir_open(struct inode *inode, struct file *file) */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { - struct inode *inode = file->f_inode; - struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); - switch (whence) { case SEEK_CUR: offset += file->f_pos; @@ -486,8 +475,7 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) } /* In this case, ->private_data is protected by f_pos_lock */ - if (!offset) - file->private_data = (void *)ctx->next_offset; + file->private_data = NULL; return vfs_setpos(file, offset, LONG_MAX); } @@ -518,7 +506,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index) +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -526,21 +514,17 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return; - - if (dentry2offset(dentry) >= last_index) { - dput(dentry); - return; - } + return ERR_PTR(-ENOENT); if (!offset_dir_emit(ctx, dentry)) { dput(dentry); - return; + break; } ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } + return NULL; } /** @@ -567,19 +551,22 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; - long last_index = (long)file->private_data; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; - offset_iterate_dir(d_inode(dir), ctx, last_index); + /* In this case, ->private_data is protected by f_pos_lock */ + if (ctx->pos == DIR_OFFSET_MIN) + file->private_data = NULL; + else if (file->private_data == ERR_PTR(-ENOENT)) + return 0; + file->private_data = offset_iterate_dir(d_inode(dir), ctx); return 0; } const struct file_operations simple_offset_dir_operations = { - .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, From patchwork Wed Nov 27 15:28:14 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13887140 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 66CB33C488 for ; Wed, 27 Nov 2024 15:28:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721303; cv=none; b=FZDRa6FYoAy2lroZ010Yhg9W675BjyzDUfRZGUcp8GiGZUOL4ywCYTDdBPOy4J+NKhrfL2hVzF7TGD/2dix2qRzxAKN+tAtwE3RhaOpvCnfHSRGcpdrqI+6VFOzCA4nmNYfnzHAbDL5Dx6KMcT3EE1tSgB5CHM1q45e/vtzaPKM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721303; c=relaxed/simple; bh=yaUgVsYqn/+UqS21Ay0PUdvP+2Ovp6VDQCczLqXNXYo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UJC7ZbltASyM3W7TaRf9ropjRrGaBxD+7rpJM/X0WeXyX7TWmkqSX6aI8UJTLpKh5qkplIWeyicHrSKA1w3ePrWNIzl3s7BXYqd17c/PQwzsw4xJdqYTNgR9bUrQxqGuZZR7rDjJEBZumB/vf1cBL1J47KgOaaDnH5RNVB0uuJ4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VT08HImq; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VT08HImq" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3A88EC4CED3; Wed, 27 Nov 2024 15:28:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732721303; bh=yaUgVsYqn/+UqS21Ay0PUdvP+2Ovp6VDQCczLqXNXYo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VT08HImqWvjHVpg0vUPPxkxWgmyqx6FPQSiQJRMNw0IopFVIATgz4coLvIXaQA5w+ HttmvTm9x2ggjv3ICKFLH2h80s+uaVVrnaMJSjwVnsR2/fYo0lVxOcpflRO32MDlHv A22VX+RXRrBbmm3pQFuLEaAmcJDB4WsVuQQUMC2PBkwbOD/R2h8tkSbikpz8uryCiI MAOIYikg4eQqyojGl+XUy2EA1W/0bAEc4kVDCJtU3EdmX6y+fDI28xs4E9sT8iNdKn JfH0jERkX0EQp1goCDny+sYUW9vwUuytQPtbRZbr5myfCmnE/xuWTfyt5dnYfvMqJW r9ZGIH9IRFzqQ== From: cel@kernel.org To: Hugh Dickens , Christian Brauner , Al Viro Cc: , , yukuai3@huawei.com, yangerkun@huaweicloud.com, Chuck Lever Subject: [RFC PATCH v3 4/5] libfs: Refactor end-of-directory detection for simple_offset directories Date: Wed, 27 Nov 2024 10:28:14 -0500 Message-ID: <20241127152815.151781-5-cel@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241127152815.151781-1-cel@kernel.org> References: <20241127152815.151781-1-cel@kernel.org> Precedence: bulk X-Mailing-List: linux-fsdevel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever This mechanism seems have been misunderstood more than once. Make the code more self-documentary. Signed-off-by: Chuck Lever --- fs/libfs.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index a673427d3416..0deff5390abb 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -449,6 +449,34 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } +static void offset_set_eod(struct file *file) +{ + file->private_data = ERR_PTR(-ENOENT); +} + +static void offset_clear_eod(struct file *file) +{ + file->private_data = NULL; +} + +static bool offset_at_eod(struct file *file) +{ + return file->private_data == ERR_PTR(-ENOENT); +} + +/** + * offset_dir_open - Open a directory descriptor + * @inode: directory to be opened + * @file: struct file to instantiate + * + * Returns zero on success, or a negative errno value. + */ +static int offset_dir_open(struct inode *inode, struct file *file) +{ + offset_clear_eod(file); + return 0; +} + /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -474,8 +502,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } - /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; + /* ->private_data is protected by f_pos_lock */ + offset_clear_eod(file); return vfs_setpos(file, offset, LONG_MAX); } @@ -506,15 +534,20 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct file *file, struct dir_context *ctx) { + struct dentry *dir = file->f_path.dentry; + struct inode *inode = d_inode(dir); struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; while (true) { dentry = offset_find_next(octx, ctx->pos); - if (!dentry) - return ERR_PTR(-ENOENT); + if (!dentry) { + /* ->private_data is protected by f_pos_lock */ + offset_set_eod(file); + return; + } if (!offset_dir_emit(ctx, dentry)) { dput(dentry); @@ -524,7 +557,6 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; } /** @@ -557,16 +589,14 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + /* ->private_data is protected by f_pos_lock */ + if (!offset_at_eod(file)) + offset_iterate_dir(file, ctx); return 0; } const struct file_operations simple_offset_dir_operations = { + .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, From patchwork Wed Nov 27 15:28:15 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13887141 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5B1FD20012E for ; Wed, 27 Nov 2024 15:28:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721304; cv=none; b=M6UTaRVkN8NxKYmSXqJHjSCBbsIlQ6U7gF5vRgGIhanKAjPo8He1YKepIquogckg3x7+cFXG4IyRewARIdL3/RBBX09yYVAtyieYVR3TajgAO6L4EjCMg6nYTsRiNTA1KJFpoxGtIS1x8FKFFptLsW97lMhWVz93D/0VOdR+izo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1732721304; c=relaxed/simple; bh=cm3XNveZKUOxWBzRqaJXCYdMMoTxeTEl5G9Suuio1jg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=LFct24M6tZF4/Q/MQKrQroFe7SK5M6tIREWE/Xlnp/wi676TNC4kjYCPIOU7okc0uYOsdwJCVYiVFtXWSXn0ncdMLmgG/KHn2eflz7u3ZdH6LrkM/3QctBmEP/eW0IU8wX9yEJimZMV/GJXcWCTN4MkThr+qiSbzMJKNIrk3xdg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=BAQrJMwO; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="BAQrJMwO" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 338BBC4CED8; Wed, 27 Nov 2024 15:28:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1732721303; bh=cm3XNveZKUOxWBzRqaJXCYdMMoTxeTEl5G9Suuio1jg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=BAQrJMwO3HcTBcQJtl9J0sN0/3Y2EaWG1HoGPOJVdXnlxSk8tgna/zSNZUpzF60BQ 3dRUcDoFH67L8gUn89drpvzoZcl7it2YQwMoAFiks5/PUQxhDyxKmxYM+O6WWJEWlc FQIyCfAcqppQ5YYaKu+sco2Dy80Ug5cJOo/6T5axaEkQ0ldr4nCmcLxGs/KcL9CdM5 BlWKRM9OkdVQQzbHH6nPFhE7I9pZ907/6bqzK6KGTHGDpC9RAkEXw7UyaOuPFGjwcU SCblQFp8y4T4FtLnBq7x0hIIAL/rlW8ih3aD8uw47R+heJXIeKxMQXLr0mcP36v18G g0Y5KKEt0904g== From: cel@kernel.org To: Hugh Dickens , Christian Brauner , Al Viro Cc: , , yukuai3@huawei.com, yangerkun@huaweicloud.com, Chuck Lever Subject: [RFC PATCH v3 5/5] libfs: Refactor offset_iterate_dir() Date: Wed, 27 Nov 2024 10:28:15 -0500 Message-ID: <20241127152815.151781-6-cel@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241127152815.151781-1-cel@kernel.org> References: <20241127152815.151781-1-cel@kernel.org> Precedence: bulk X-Mailing-List: linux-fsdevel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever This line in offset_iterate_dir(): ctx->pos = dentry2offset(dentry) + 1; assumes that the next child entry has an offset value that is greater than the current child entry. Since directory offsets are actually cookies, this heuristic is not always correct. We have tested the current code with a limited offset range to see if this is an operational problem. It doesn't seem to be, but doing a "+ 1" on what is supposed to be an opaque cookie is very likely wrong and brittle. Instead of using the mtree to emit entries in the order of their offset values, use it only to map the initial ctx->pos to a starting entry. Then use the directory's d_children list, which is already maintained by the dcache, to find the next child to emit, as the simple cursor-based implementation still does. Signed-off-by: Chuck Lever --- fs/libfs.c | 95 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 21 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 0deff5390abb..2616421bbe0e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -241,9 +241,9 @@ const struct inode_operations simple_dir_inode_operations = { }; EXPORT_SYMBOL(simple_dir_inode_operations); -/* 0 is '.', 1 is '..', so always start with offset 2 or more */ enum { - DIR_OFFSET_MIN = 2, + DIR_OFFSET_FIRST = 2, /* seek to the first real entry */ + DIR_OFFSET_MIN = 3, /* minimum allocated offset value */ }; static void offset_set(struct dentry *dentry, long offset) @@ -507,19 +507,53 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, offset, LONG_MAX); } -static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) +/* Cf. find_next_child() */ +static struct dentry *find_next_sibling_locked(struct dentry *dentry) { - MA_STATE(mas, &octx->mt, offset, offset); + struct dentry *found = NULL; + + hlist_for_each_entry_from(dentry, d_sib) { + if (!simple_positive(dentry)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(dentry)) + found = dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + if (likely(found)) + break; + } + return found; +} + +static noinline_for_stack struct dentry *offset_dir_first(struct file *file) +{ + struct dentry *parent = file->f_path.dentry; + struct dentry *found; + + spin_lock(&parent->d_lock); + found = find_next_sibling_locked(d_first_child(parent)); + spin_unlock(&parent->d_lock); + return found; +} + +static noinline_for_stack struct dentry * +offset_dir_lookup(struct file *file, loff_t offset) +{ + struct dentry *parent = file->f_path.dentry; struct dentry *child, *found = NULL; + struct inode *inode = d_inode(parent); + struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); + + MA_STATE(mas, &octx->mt, offset, offset); rcu_read_lock(); child = mas_find(&mas, LONG_MAX); if (!child) goto out; - spin_lock(&child->d_lock); - if (simple_positive(child)) - found = dget_dlock(child); - spin_unlock(&child->d_lock); + + spin_lock(&parent->d_lock); + found = find_next_sibling_locked(child); + spin_unlock(&parent->d_lock); out: rcu_read_unlock(); return found; @@ -534,29 +568,48 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } +static struct dentry *offset_dir_next(struct dentry *child) +{ + struct dentry *parent = child->d_parent; + struct dentry *found; + + spin_lock(&parent->d_lock); + found = find_next_sibling_locked(d_next_sibling(child)); + spin_unlock(&parent->d_lock); + return found; +} + static void offset_iterate_dir(struct file *file, struct dir_context *ctx) { - struct dentry *dir = file->f_path.dentry; - struct inode *inode = d_inode(dir); - struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); - struct dentry *dentry; + struct dentry *dentry, *next = NULL; + + if (ctx->pos == DIR_OFFSET_FIRST) + dentry = offset_dir_first(file); + else + dentry = offset_dir_lookup(file, ctx->pos); + if (!dentry) { + /* ->private_data is protected by f_pos_lock */ + offset_set_eod(file); + return; + } while (true) { - dentry = offset_find_next(octx, ctx->pos); - if (!dentry) { - /* ->private_data is protected by f_pos_lock */ - offset_set_eod(file); - return; - } - if (!offset_dir_emit(ctx, dentry)) { - dput(dentry); + ctx->pos = dentry2offset(dentry); + break; + } + + next = offset_dir_next(dentry); + if (!next) { + offset_set_eod(file); break; } - ctx->pos = dentry2offset(dentry) + 1; dput(dentry); + dentry = next; } + + dput(dentry); } /**