From patchwork Thu Aug 29 06:30:38 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11120459 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B95BB14DE for ; Thu, 29 Aug 2019 06:30:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id A3DDF2189D for ; Thu, 29 Aug 2019 06:30:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727035AbfH2Gaz (ORCPT ); Thu, 29 Aug 2019 02:30:55 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:53768 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727385AbfH2Gaz (ORCPT ); Thu, 29 Aug 2019 02:30:55 -0400 Received: from dread.disaster.area (pa49-181-255-194.pa.nsw.optusnet.com.au [49.181.255.194]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 668AB43D7FC for ; Thu, 29 Aug 2019 16:30:48 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92) (envelope-from ) id 1i3DxG-0000xZ-IF for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 Received: from dave by discord.disaster.area with local (Exim 4.92) (envelope-from ) id 1i3DxG-0006BN-GN for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 1/5] xfs: move xfs_dir2_addname() Date: Thu, 29 Aug 2019 16:30:38 +1000 Message-Id: <20190829063042.22902-2-david@fromorbit.com> X-Mailer: git-send-email 2.23.0.rc1 In-Reply-To: <20190829063042.22902-1-david@fromorbit.com> References: <20190829063042.22902-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.2 cv=P6RKvmIu c=1 sm=1 tr=0 a=YO9NNpcXwc8z/SaoS+iAiA==:117 a=YO9NNpcXwc8z/SaoS+iAiA==:17 a=jpOVt7BSZ2e4Z31A5e1TngXxSK0=:19 a=FmdZ9Uzk2mMA:10 a=20KFwNOVAAAA:8 a=xTgIVNI2wHdG23_4b9gA:9 Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner This gets rid of the need for a forward declaration of the static function xfs_dir2_addname_int() and readies the code for factoring of xfs_dir2_addname_int(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_dir2_node.c | 140 +++++++++++++++++----------------- 1 file changed, 69 insertions(+), 71 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 1fc44efc344d..e40986cc0759 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -32,8 +32,6 @@ static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state, static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, int index, xfs_da_state_blk_t *dblk, int *rval); -static int xfs_dir2_node_addname_int(xfs_da_args_t *args, - xfs_da_state_blk_t *fblk); /* * Check internal consistency of a leafn block. @@ -1610,75 +1608,6 @@ xfs_dir2_leafn_unbalance( xfs_dir3_leaf_check(dp, drop_blk->bp); } -/* - * Top-level node form directory addname routine. - */ -int /* error */ -xfs_dir2_node_addname( - xfs_da_args_t *args) /* operation arguments */ -{ - xfs_da_state_blk_t *blk; /* leaf block for insert */ - int error; /* error return value */ - int rval; /* sub-return value */ - xfs_da_state_t *state; /* btree cursor */ - - trace_xfs_dir2_node_addname(args); - - /* - * Allocate and initialize the state (btree cursor). - */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - /* - * Look up the name. We're not supposed to find it, but - * this gives us the insertion point. - */ - error = xfs_da3_node_lookup_int(state, &rval); - if (error) - rval = error; - if (rval != -ENOENT) { - goto done; - } - /* - * Add the data entry to a data block. - * Extravalid is set to a freeblock found by lookup. - */ - rval = xfs_dir2_node_addname_int(args, - state->extravalid ? &state->extrablk : NULL); - if (rval) { - goto done; - } - blk = &state->path.blk[state->path.active - 1]; - ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); - /* - * Add the new leaf entry. - */ - rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); - if (rval == 0) { - /* - * It worked, fix the hash values up the btree. - */ - if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) - xfs_da3_fixhashpath(state, &state->path); - } else { - /* - * It didn't work, we need to split the leaf block. - */ - if (args->total == 0) { - ASSERT(rval == -ENOSPC); - goto done; - } - /* - * Split the leaf block and insert the new entry. - */ - rval = xfs_da3_split(state); - } -done: - xfs_da_state_free(state); - return rval; -} - /* * Add the data entry for a node-format directory name addition. * The leaf entry is added in xfs_dir2_leafn_add. @@ -2056,6 +1985,75 @@ xfs_dir2_node_addname_int( return 0; } +/* + * Top-level node form directory addname routine. + */ +int /* error */ +xfs_dir2_node_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block for insert */ + int error; /* error return value */ + int rval; /* sub-return value */ + xfs_da_state_t *state; /* btree cursor */ + + trace_xfs_dir2_node_addname(args); + + /* + * Allocate and initialize the state (btree cursor). + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + /* + * Look up the name. We're not supposed to find it, but + * this gives us the insertion point. + */ + error = xfs_da3_node_lookup_int(state, &rval); + if (error) + rval = error; + if (rval != -ENOENT) { + goto done; + } + /* + * Add the data entry to a data block. + * Extravalid is set to a freeblock found by lookup. + */ + rval = xfs_dir2_node_addname_int(args, + state->extravalid ? &state->extrablk : NULL); + if (rval) { + goto done; + } + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + /* + * Add the new leaf entry. + */ + rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); + if (rval == 0) { + /* + * It worked, fix the hash values up the btree. + */ + if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) + xfs_da3_fixhashpath(state, &state->path); + } else { + /* + * It didn't work, we need to split the leaf block. + */ + if (args->total == 0) { + ASSERT(rval == -ENOSPC); + goto done; + } + /* + * Split the leaf block and insert the new entry. + */ + rval = xfs_da3_split(state); + } +done: + xfs_da_state_free(state); + return rval; +} + /* * Lookup an entry in a node-format directory. * All the real work happens in xfs_da3_node_lookup_int. From patchwork Thu Aug 29 06:30:39 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11120455 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id EF1FA14DE for ; Thu, 29 Aug 2019 06:30:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id C4E7523403 for ; Thu, 29 Aug 2019 06:30:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727418AbfH2Gax (ORCPT ); Thu, 29 Aug 2019 02:30:53 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:53406 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725823AbfH2Gax (ORCPT ); Thu, 29 Aug 2019 02:30:53 -0400 Received: from dread.disaster.area (pa49-181-255-194.pa.nsw.optusnet.com.au [49.181.255.194]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 1C72243D51A for ; Thu, 29 Aug 2019 16:30:48 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92) (envelope-from ) id 1i3DxG-0000xb-JY for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 Received: from dave by discord.disaster.area with local (Exim 4.92) (envelope-from ) id 1i3DxG-0006BQ-Ho for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 2/5] xfs: factor data block addition from xfs_dir2_node_addname_int() Date: Thu, 29 Aug 2019 16:30:39 +1000 Message-Id: <20190829063042.22902-3-david@fromorbit.com> X-Mailer: git-send-email 2.23.0.rc1 In-Reply-To: <20190829063042.22902-1-david@fromorbit.com> References: <20190829063042.22902-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.2 cv=FNpr/6gs c=1 sm=1 tr=0 a=YO9NNpcXwc8z/SaoS+iAiA==:117 a=YO9NNpcXwc8z/SaoS+iAiA==:17 a=jpOVt7BSZ2e4Z31A5e1TngXxSK0=:19 a=FmdZ9Uzk2mMA:10 a=20KFwNOVAAAA:8 a=CLacQ2H4_OS9siamSZoA:9 a=6SokxAx2g-jSOSi0:21 a=_2URlVfc3cAWM3wn:21 Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Factor out the code that adds a data block to a directory from xfs_dir2_node_addname_int(). This makes the code flow cleaner and more obvious and provides clear isolation of upcoming optimsations. Signed-Off-By: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_dir2_node.c | 325 +++++++++++++++++----------------- 1 file changed, 159 insertions(+), 166 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index e40986cc0759..747d10692bb6 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1608,6 +1608,130 @@ xfs_dir2_leafn_unbalance( xfs_dir3_leaf_check(dp, drop_blk->bp); } +/* + * Add a new data block to the directory at the free space index that the caller + * has specified. + */ +static int +xfs_dir2_node_add_datablk( + struct xfs_da_args *args, + struct xfs_da_state_blk *fblk, + xfs_dir2_db_t *dbno, + struct xfs_buf **dbpp, + struct xfs_buf **fbpp, + int *findex) +{ + struct xfs_inode *dp = args->dp; + struct xfs_trans *tp = args->trans; + struct xfs_mount *mp = dp->i_mount; + struct xfs_dir3_icfree_hdr freehdr; + struct xfs_dir2_data_free *bf; + struct xfs_dir2_data_hdr *hdr; + struct xfs_dir2_free *free = NULL; + xfs_dir2_db_t fbno; + struct xfs_buf *fbp; + struct xfs_buf *dbp; + __be16 *bests = NULL; + int error; + + /* Not allowed to allocate, return failure. */ + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) + return -ENOSPC; + + /* Allocate and initialize the new data block. */ + error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, dbno); + if (error) + return error; + error = xfs_dir3_data_init(args, *dbno, &dbp); + if (error) + return error; + + /* + * Get the freespace block corresponding to the data block + * that was just allocated. + */ + fbno = dp->d_ops->db_to_fdb(args->geo, *dbno); + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fbno), &fbp); + if (error) + return error; + + /* + * If there wasn't a freespace block, the read will + * return a NULL fbp. Allocate and initialize a new one. + */ + if (!fbp) { + error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fbno); + if (error) + return error; + + if (dp->d_ops->db_to_fdb(args->geo, *dbno) != fbno) { + xfs_alert(mp, +"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld", + __func__, (unsigned long long)dp->i_ino, + (long long)dp->d_ops->db_to_fdb(args->geo, *dbno), + (long long)*dbno, (long long)fbno); + if (fblk) { + xfs_alert(mp, + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", + fblk, (unsigned long long)fblk->blkno, + fblk->index, fblk->magic); + } else { + xfs_alert(mp, " ... fblk is NULL"); + } + XFS_ERROR_REPORT("xfs_dir2_node_addname_int", + XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + /* Get a buffer for the new block. */ + error = xfs_dir3_free_get_buf(args, fbno, &fbp); + if (error) + return error; + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + + /* Remember the first slot as our empty slot. */ + freehdr.firstdb = (fbno - xfs_dir2_byte_to_db(args->geo, + XFS_DIR2_FREE_OFFSET)) * + dp->d_ops->free_max_bests(args->geo); + } else { + free = fbp->b_addr; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + } + + /* Set the freespace block index from the data block number. */ + *findex = dp->d_ops->db_to_fdindex(args->geo, *dbno); + + /* Extend the freespace table if the new data block is off the end. */ + if (*findex >= freehdr.nvalid) { + ASSERT(*findex < dp->d_ops->free_max_bests(args->geo)); + freehdr.nvalid = *findex + 1; + bests[*findex] = cpu_to_be16(NULLDATAOFF); + } + + /* + * If this entry was for an empty data block (this should always be + * true) then update the header. + */ + if (bests[*findex] == cpu_to_be16(NULLDATAOFF)) { + freehdr.nused++; + dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_header(args, fbp); + } + + /* Update the freespace value for the new block in the table. */ + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); + bests[*findex] = bf[0].length; + + *dbpp = dbp; + *fbpp = fbp; + return 0; +} + /* * Add the data entry for a node-format directory name addition. * The leaf entry is added in xfs_dir2_leafn_add. @@ -1632,10 +1756,9 @@ xfs_dir2_node_addname_int( xfs_dir2_db_t ifbno; /* initial freespace block no */ xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ int length; /* length of the new entry */ - int logfree; /* need to log free entry */ - xfs_mount_t *mp; /* filesystem mount point */ - int needlog; /* need to log data header */ - int needscan; /* need to rescan data frees */ + int logfree = 0; /* need to log free entry */ + int needlog = 0; /* need to log data header */ + int needscan = 0; /* need to rescan data frees */ __be16 *tagp; /* data entry tag pointer */ xfs_trans_t *tp; /* transaction pointer */ __be16 *bests; @@ -1644,7 +1767,6 @@ xfs_dir2_node_addname_int( xfs_dir2_data_aoff_t aoff; dp = args->dp; - mp = dp->i_mount; tp = args->trans; length = dp->d_ops->data_entsize(args->namelen); /* @@ -1673,6 +1795,7 @@ xfs_dir2_node_addname_int( ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); ASSERT(be16_to_cpu(bests[findex]) >= length); dbno = freehdr.firstdb + findex; + goto found_block; } else { /* * The data block looked at didn't have enough room. @@ -1774,168 +1897,46 @@ xfs_dir2_node_addname_int( } } } + /* * If we don't have a data block, we need to allocate one and make * the freespace entries refer to it. */ - if (unlikely(dbno == -1)) { - /* - * Not allowed to allocate, return failure. - */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) - return -ENOSPC; - - /* - * Allocate and initialize the new data block. - */ - if (unlikely((error = xfs_dir2_grow_inode(args, - XFS_DIR2_DATA_SPACE, - &dbno)) || - (error = xfs_dir3_data_init(args, dbno, &dbp)))) - return error; - - /* - * If (somehow) we have a freespace block, get rid of it. - */ - if (fbp) - xfs_trans_brelse(tp, fbp); - if (fblk && fblk->bp) - fblk->bp = NULL; - - /* - * Get the freespace block corresponding to the data block - * that was just allocated. - */ - fbno = dp->d_ops->db_to_fdb(args->geo, dbno); - error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); + if (dbno == -1) { + error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, + &findex); if (error) return error; - /* - * If there wasn't a freespace block, the read will - * return a NULL fbp. Allocate and initialize a new one. - */ - if (!fbp) { - error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, - &fbno); - if (error) - return error; - - if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) { - xfs_alert(mp, -"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d", - __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb( - args->geo, dbno), - (long long)dbno, (long long)fbno, - (unsigned long long)ifbno, lastfbno); - if (fblk) { - xfs_alert(mp, - " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", - fblk, - (unsigned long long)fblk->blkno, - fblk->index, - fblk->magic); - } else { - xfs_alert(mp, " ... fblk is NULL"); - } - XFS_ERROR_REPORT("xfs_dir2_node_addname_int", - XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - - /* - * Get a buffer for the new block. - */ - error = xfs_dir3_free_get_buf(args, fbno, &fbp); - if (error) - return error; - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * Remember the first slot as our empty slot. - */ - freehdr.firstdb = - (fbno - xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET)) * - dp->d_ops->free_max_bests(args->geo); - } else { - free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - } + /* setup current free block buffer */ + free = fbp->b_addr; - /* - * Set the freespace block index from the data block number. - */ - findex = dp->d_ops->db_to_fdindex(args->geo, dbno); - /* - * If it's after the end of the current entries in the - * freespace block, extend that table. - */ - if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(args->geo)); - freehdr.nvalid = findex + 1; - /* - * Tag new entry so nused will go up. - */ - bests[findex] = cpu_to_be16(NULLDATAOFF); - } - /* - * If this entry was for an empty data block - * (this should always be true) then update the header. - */ - if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { - freehdr.nused++; - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(args, fbp); - } - /* - * Update the real value in the table. - * We haven't allocated the data entry yet so this will - * change again. - */ - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - bests[findex] = bf[0].length; + /* we're going to have to log the free block index later */ logfree = 1; - } - /* - * We had a data block so we don't have to make a new one. - */ - else { - /* - * If just checking, we succeeded. - */ + } else { +found_block: + /* If just checking, we succeeded. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; - /* - * Read the data block in. - */ + /* Read the data block in. */ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, dbno), -1, &dbp); if (error) return error; - hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); - logfree = 0; } + + /* setup for data block up now */ + hdr = dbp->b_addr; + bf = dp->d_ops->data_bestfree_p(hdr); ASSERT(be16_to_cpu(bf[0].length) >= length); - /* - * Point to the existing unused space. - */ + + /* Point to the existing unused space. */ dup = (xfs_dir2_data_unused_t *) ((char *)hdr + be16_to_cpu(bf[0].offset)); - needscan = needlog = 0; - /* - * Mark the first part of the unused space, inuse for us. - */ + + /* Mark the first part of the unused space, inuse for us. */ aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length, &needlog, &needscan); @@ -1943,9 +1944,8 @@ xfs_dir2_node_addname_int( xfs_trans_brelse(tp, dbp); return error; } - /* - * Fill in the new entry and log it. - */ + + /* Fill in the new entry and log it. */ dep = (xfs_dir2_data_entry_t *)dup; dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; @@ -1954,32 +1954,25 @@ xfs_dir2_node_addname_int( tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, dbp, dep); - /* - * Rescan the block for bestfree if needed. - */ + + /* Rescan the freespace and log the data block if needed. */ if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); - /* - * Log the data block header if needed. - */ if (needlog) xfs_dir2_data_log_header(args, dbp); - /* - * If the freespace entry is now wrong, update it. - */ - bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ - if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { + + /* If the freespace block entry is now wrong, update it. */ + bests = dp->d_ops->free_bests_p(free); + if (bests[findex] != bf[0].length) { bests[findex] = bf[0].length; logfree = 1; } - /* - * Log the freespace entry if needed. - */ + + /* Log the freespace entry if needed. */ if (logfree) xfs_dir2_free_log_bests(args, fbp, findex, findex); - /* - * Return the data block and offset in args, then drop the data block. - */ + + /* Return the data block and offset in args. */ args->blkno = (xfs_dablk_t)dbno; args->index = be16_to_cpu(*tagp); return 0; From patchwork Thu Aug 29 06:30:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11120457 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6AD27174A for ; Thu, 29 Aug 2019 06:30:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 4B20023403 for ; Thu, 29 Aug 2019 06:30:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1725823AbfH2Gax (ORCPT ); Thu, 29 Aug 2019 02:30:53 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:54328 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727035AbfH2Gax (ORCPT ); Thu, 29 Aug 2019 02:30:53 -0400 Received: from dread.disaster.area (pa49-181-255-194.pa.nsw.optusnet.com.au [49.181.255.194]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 6733E361556 for ; Thu, 29 Aug 2019 16:30:48 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92) (envelope-from ) id 1i3DxG-0000xd-Ki for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 Received: from dave by discord.disaster.area with local (Exim 4.92) (envelope-from ) id 1i3DxG-0006BU-J6 for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 3/5] xfs: factor free block index lookup from xfs_dir2_node_addname_int() Date: Thu, 29 Aug 2019 16:30:40 +1000 Message-Id: <20190829063042.22902-4-david@fromorbit.com> X-Mailer: git-send-email 2.23.0.rc1 In-Reply-To: <20190829063042.22902-1-david@fromorbit.com> References: <20190829063042.22902-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.2 cv=D+Q3ErZj c=1 sm=1 tr=0 a=YO9NNpcXwc8z/SaoS+iAiA==:117 a=YO9NNpcXwc8z/SaoS+iAiA==:17 a=jpOVt7BSZ2e4Z31A5e1TngXxSK0=:19 a=FmdZ9Uzk2mMA:10 a=20KFwNOVAAAA:8 a=YwdpEuwJOOZGESPBAVIA:9 a=6mgyoGnc6Mb6NQ0A:21 a=jhAKos4K3aSXxJVa:21 Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner Simplify the logic in xfs_dir2_node_addname_int() by factoring out the free block index lookup code that finds a block with enough free space for the entry to be added. The code that is moved gets a major cleanup at the same time, but there is no algorithm change here. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_dir2_node.c | 196 ++++++++++++++++++---------------- 1 file changed, 103 insertions(+), 93 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 747d10692bb6..1d3d1c9b5961 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1635,7 +1635,7 @@ xfs_dir2_node_add_datablk( int error; /* Not allowed to allocate, return failure. */ - if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) + if (args->total == 0) return -ENOSPC; /* Allocate and initialize the new data block. */ @@ -1732,43 +1732,29 @@ xfs_dir2_node_add_datablk( return 0; } -/* - * Add the data entry for a node-format directory name addition. - * The leaf entry is added in xfs_dir2_leafn_add. - * We may enter with a freespace block that the lookup found. - */ -static int /* error */ -xfs_dir2_node_addname_int( - xfs_da_args_t *args, /* operation arguments */ - xfs_da_state_blk_t *fblk) /* optional freespace block */ +static int +xfs_dir2_node_find_freeblk( + struct xfs_da_args *args, + struct xfs_da_state_blk *fblk, + xfs_dir2_db_t *dbnop, + struct xfs_buf **fbpp, + int *findexp, + int length) { - xfs_dir2_data_hdr_t *hdr; /* data block header */ - xfs_dir2_db_t dbno; /* data block number */ - struct xfs_buf *dbp; /* data block buffer */ - xfs_dir2_data_entry_t *dep; /* data entry pointer */ - xfs_inode_t *dp; /* incore directory inode */ - xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ - int error; /* error return value */ - xfs_dir2_db_t fbno; /* freespace block number */ - struct xfs_buf *fbp; /* freespace buffer */ - int findex; /* freespace entry index */ - xfs_dir2_free_t *free=NULL; /* freespace block structure */ - xfs_dir2_db_t ifbno; /* initial freespace block no */ - xfs_dir2_db_t lastfbno=0; /* highest freespace block no */ - int length; /* length of the new entry */ - int logfree = 0; /* need to log free entry */ - int needlog = 0; /* need to log data header */ - int needscan = 0; /* need to rescan data frees */ - __be16 *tagp; /* data entry tag pointer */ - xfs_trans_t *tp; /* transaction pointer */ - __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; - struct xfs_dir2_data_free *bf; - xfs_dir2_data_aoff_t aoff; + struct xfs_dir2_free *free = NULL; + struct xfs_inode *dp = args->dp; + struct xfs_trans *tp = args->trans; + struct xfs_buf *fbp = NULL; + xfs_dir2_db_t lastfbno; + xfs_dir2_db_t ifbno = -1; + xfs_dir2_db_t dbno = -1; + xfs_dir2_db_t fbno = -1; + xfs_fileoff_t fo; + __be16 *bests; + int findex; + int error; - dp = args->dp; - tp = args->trans; - length = dp->d_ops->data_entsize(args->namelen); /* * If we came in with a freespace block that means that lookup * found an entry with our hash value. This is the freespace @@ -1776,56 +1762,44 @@ xfs_dir2_node_addname_int( */ if (fblk) { fbp = fblk->bp; - /* - * Remember initial freespace block number. - */ - ifbno = fblk->blkno; free = fbp->b_addr; findex = fblk->index; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - - /* - * This means the free entry showed that the data block had - * space for our entry, so we remembered it. - * Use that data block. - */ if (findex >= 0) { + /* caller already found the freespace for us. */ + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); + ASSERT(findex < freehdr.nvalid); ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); ASSERT(be16_to_cpu(bests[findex]) >= length); dbno = freehdr.firstdb + findex; - goto found_block; - } else { - /* - * The data block looked at didn't have enough room. - * We'll start at the beginning of the freespace entries. - */ - dbno = -1; - findex = 0; + goto out; } - } else { + /* - * Didn't come in with a freespace block, so no data block. + * The data block looked at didn't have enough room. + * We'll start at the beginning of the freespace entries. */ - ifbno = dbno = -1; - fbp = NULL; - findex = 0; + ifbno = fblk->blkno; + fbno = ifbno; } + ASSERT(dbno == -1); + findex = 0; /* - * If we don't have a data block yet, we're going to scan the - * freespace blocks looking for one. Figure out what the - * highest freespace block number is. + * If we don't have a data block yet, we're going to scan the freespace + * blocks looking for one. Figure out what the highest freespace block + * number is. */ - if (dbno == -1) { - xfs_fileoff_t fo; /* freespace block number */ + error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK); + if (error) + return error; + lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); + + /* If we haven't get a search start block, set it now */ + if (fbno == -1) + fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); - if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) - return error; - lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); - fbno = ifbno; - } /* * While we haven't identified a data block, search the freeblock * data for a good data block. If we find a null freeblock entry, @@ -1836,17 +1810,10 @@ xfs_dir2_node_addname_int( * If we don't have a freeblock in hand, get the next one. */ if (fbp == NULL) { - /* - * Happens the first time through unless lookup gave - * us a freespace block to start with. - */ - if (++fbno == 0) - fbno = xfs_dir2_byte_to_db(args->geo, - XFS_DIR2_FREE_OFFSET); /* * If it's ifbno we already looked at it. */ - if (fbno == ifbno) + if (++fbno == ifbno) fbno++; /* * If it's off the end we're done. @@ -1897,35 +1864,77 @@ xfs_dir2_node_addname_int( } } } +out: + *dbnop = dbno; + *fbpp = fbp; + *findexp = findex; + return 0; +} + + +/* + * Add the data entry for a node-format directory name addition. + * The leaf entry is added in xfs_dir2_leafn_add. + * We may enter with a freespace block that the lookup found. + */ +static int +xfs_dir2_node_addname_int( + struct xfs_da_args *args, /* operation arguments */ + struct xfs_da_state_blk *fblk) /* optional freespace block */ +{ + struct xfs_dir2_data_unused *dup; /* data unused entry pointer */ + struct xfs_dir2_data_entry *dep; /* data entry pointer */ + struct xfs_dir2_data_hdr *hdr; /* data block header */ + struct xfs_dir2_data_free *bf; + struct xfs_dir2_free *free = NULL; /* freespace block structure */ + struct xfs_trans *tp = args->trans; + struct xfs_inode *dp = args->dp; + struct xfs_buf *dbp; /* data block buffer */ + struct xfs_buf *fbp; /* freespace buffer */ + xfs_dir2_data_aoff_t aoff; + xfs_dir2_db_t dbno; /* data block number */ + int error; /* error return value */ + int findex; /* freespace entry index */ + int length; /* length of the new entry */ + int logfree = 0; /* need to log free entry */ + int needlog = 0; /* need to log data header */ + int needscan = 0; /* need to rescan data frees */ + __be16 *tagp; /* data entry tag pointer */ + __be16 *bests; + + length = dp->d_ops->data_entsize(args->namelen); + error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &findex, + length); + if (error) + return error; + + /* + * Now we know if we must allocate blocks, so if we are checking whether + * we can insert without allocation then we can return now. + */ + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { + if (dbno != -1) + return 0; + return -ENOSPC; + } /* * If we don't have a data block, we need to allocate one and make * the freespace entries refer to it. */ if (dbno == -1) { - error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, - &findex); - if (error) - return error; - - /* setup current free block buffer */ - free = fbp->b_addr; - /* we're going to have to log the free block index later */ logfree = 1; + error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp, + &findex); } else { -found_block: - /* If just checking, we succeeded. */ - if (args->op_flags & XFS_DA_OP_JUSTCHECK) - return 0; - /* Read the data block in. */ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, dbno), -1, &dbp); - if (error) - return error; } + if (error) + return error; /* setup for data block up now */ hdr = dbp->b_addr; @@ -1962,6 +1971,7 @@ xfs_dir2_node_addname_int( xfs_dir2_data_log_header(args, dbp); /* If the freespace block entry is now wrong, update it. */ + free = fbp->b_addr; bests = dp->d_ops->free_bests_p(free); if (bests[findex] != bf[0].length) { bests[findex] = bf[0].length; From patchwork Thu Aug 29 06:30:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11120453 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8BF96174A for ; Thu, 29 Aug 2019 06:30:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 7536123405 for ; Thu, 29 Aug 2019 06:30:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727392AbfH2Gaw (ORCPT ); Thu, 29 Aug 2019 02:30:52 -0400 Received: from mail104.syd.optusnet.com.au ([211.29.132.246]:53404 "EHLO mail104.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727385AbfH2Gav (ORCPT ); Thu, 29 Aug 2019 02:30:51 -0400 Received: from dread.disaster.area (pa49-181-255-194.pa.nsw.optusnet.com.au [49.181.255.194]) by mail104.syd.optusnet.com.au (Postfix) with ESMTPS id 1E9C843D6BF for ; Thu, 29 Aug 2019 16:30:48 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92) (envelope-from ) id 1i3DxG-0000xi-Lx for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 Received: from dave by discord.disaster.area with local (Exim 4.92) (envelope-from ) id 1i3DxG-0006BX-K8 for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 4/5] xfs: speed up directory bestfree block scanning Date: Thu, 29 Aug 2019 16:30:41 +1000 Message-Id: <20190829063042.22902-5-david@fromorbit.com> X-Mailer: git-send-email 2.23.0.rc1 In-Reply-To: <20190829063042.22902-1-david@fromorbit.com> References: <20190829063042.22902-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.2 cv=P6RKvmIu c=1 sm=1 tr=0 a=YO9NNpcXwc8z/SaoS+iAiA==:117 a=YO9NNpcXwc8z/SaoS+iAiA==:17 a=jpOVt7BSZ2e4Z31A5e1TngXxSK0=:19 a=FmdZ9Uzk2mMA:10 a=20KFwNOVAAAA:8 a=6DkO27ogrPW1xkOiNbQA:9 a=hfy43YsH12EGUV9p:21 a=EvKAaQj5TXdgifvC:21 Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner When running a "create millions inodes in a directory" test recently, I noticed we were spending a huge amount of time converting freespace block headers from disk format to in-memory format: 31.47% [kernel] [k] xfs_dir2_node_addname 17.86% [kernel] [k] xfs_dir3_free_hdr_from_disk 3.55% [kernel] [k] xfs_dir3_free_bests_p We shouldn't be hitting the best free block scanning code so hard when doing sequential directory creates, and it turns out there's a highly suboptimal loop searching the the best free array in the freespace block - it decodes the block header before checking each entry inside a loop, instead of decoding the header once before running the entry search loop. This makes a massive difference to create rates. Profile now looks like this: 13.15% [kernel] [k] xfs_dir2_node_addname 3.52% [kernel] [k] xfs_dir3_leaf_check_int 3.11% [kernel] [k] xfs_log_commit_cil And the wall time/average file create rate differences are just as stark: create time(sec) / rate (files/s) File count vanilla patched 10k 0.41 / 24.3k 0.42 / 23.8k 20k 0.74 / 27.0k 0.76 / 26.3k 100k 3.81 / 26.4k 3.47 / 28.8k 200k 8.58 / 23.3k 7.19 / 27.8k 1M 85.69 / 11.7k 48.53 / 20.6k 2M 280.31 / 7.1k 130.14 / 15.3k The larger the directory, the bigger the performance improvement. Signed-Off-By: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_node.c | 101 +++++++++++++--------------------- 1 file changed, 37 insertions(+), 64 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 1d3d1c9b5961..c6a1d1cc638f 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1751,8 +1751,8 @@ xfs_dir2_node_find_freeblk( xfs_dir2_db_t dbno = -1; xfs_dir2_db_t fbno = -1; xfs_fileoff_t fo; - __be16 *bests; - int findex; + __be16 *bests = NULL; + int findex = 0; int error; /* @@ -1764,16 +1764,15 @@ xfs_dir2_node_find_freeblk( fbp = fblk->bp; free = fbp->b_addr; findex = fblk->index; + bests = dp->d_ops->free_bests_p(free); + dp->d_ops->free_hdr_from_disk(&freehdr, free); if (findex >= 0) { /* caller already found the freespace for us. */ - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(findex < freehdr.nvalid); ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF); ASSERT(be16_to_cpu(bests[findex]) >= length); dbno = freehdr.firstdb + findex; - goto out; + goto found_block; } /* @@ -1783,8 +1782,6 @@ xfs_dir2_node_find_freeblk( ifbno = fblk->blkno; fbno = ifbno; } - ASSERT(dbno == -1); - findex = 0; /* * If we don't have a data block yet, we're going to scan the freespace @@ -1802,69 +1799,45 @@ xfs_dir2_node_find_freeblk( /* * While we haven't identified a data block, search the freeblock - * data for a good data block. If we find a null freeblock entry, - * indicating a hole in the data blocks, remember that. + * data for a data block with enough free space in it. */ - while (dbno == -1) { - /* - * If we don't have a freeblock in hand, get the next one. - */ - if (fbp == NULL) { - /* - * If it's ifbno we already looked at it. - */ - if (++fbno == ifbno) - fbno++; - /* - * If it's off the end we're done. - */ - if (fbno >= lastfbno) - break; - /* - * Read the block. There can be holes in the - * freespace blocks, so this might not succeed. - * This should be really rare, so there's no reason - * to avoid it. - */ - error = xfs_dir2_free_try_read(tp, dp, - xfs_dir2_db_to_da(args->geo, fbno), - &fbp); - if (error) - return error; - if (!fbp) - continue; - free = fbp->b_addr; - findex = 0; - } + for ( ; fbno < lastfbno; fbno++) { + /* If it's ifbno we already looked at it. */ + if (fbno == ifbno) + continue; + /* - * Look at the current free entry. Is it good enough? - * - * The bests initialisation should be where the bufer is read in - * the above branch. But gcc is too stupid to realise that bests - * and the freehdr are actually initialised if they are placed - * there, so we have to do it here to avoid warnings. Blech. + * Read the block. There can be holes in the freespace blocks, + * so this might not succeed. This should be really rare, so + * there's no reason to avoid it. */ + error = xfs_dir2_free_try_read(tp, dp, + xfs_dir2_db_to_da(args->geo, fbno), + &fbp); + if (error) + return error; + if (!fbp) + continue; + + findex = 0; + free = fbp->b_addr; bests = dp->d_ops->free_bests_p(free); dp->d_ops->free_hdr_from_disk(&freehdr, free); - if (be16_to_cpu(bests[findex]) != NULLDATAOFF && - be16_to_cpu(bests[findex]) >= length) - dbno = freehdr.firstdb + findex; - else { - /* - * Are we done with the freeblock? - */ - if (++findex == freehdr.nvalid) { - /* - * Drop the block. - */ - xfs_trans_brelse(tp, fbp); - fbp = NULL; - if (fblk && fblk->bp) - fblk->bp = NULL; + + /* Scan the free entry array for a large enough free space. */ + do { + if (be16_to_cpu(bests[findex]) != NULLDATAOFF && + be16_to_cpu(bests[findex]) >= length) { + dbno = freehdr.firstdb + findex; + goto found_block; } - } + } while (++findex < freehdr.nvalid); + + /* Didn't find free space, go on to next free block */ + xfs_trans_brelse(tp, fbp); } -out: + +found_block: *dbnop = dbno; *fbpp = fbp; *findexp = findex; From patchwork Thu Aug 29 06:30:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Chinner X-Patchwork-Id: 11120451 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3725114E5 for ; Thu, 29 Aug 2019 06:30:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 20C5F233FF for ; Thu, 29 Aug 2019 06:30:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727401AbfH2Gav (ORCPT ); Thu, 29 Aug 2019 02:30:51 -0400 Received: from mail105.syd.optusnet.com.au ([211.29.132.249]:54148 "EHLO mail105.syd.optusnet.com.au" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727392AbfH2Gav (ORCPT ); Thu, 29 Aug 2019 02:30:51 -0400 Received: from dread.disaster.area (pa49-181-255-194.pa.nsw.optusnet.com.au [49.181.255.194]) by mail105.syd.optusnet.com.au (Postfix) with ESMTPS id 47FAD361366 for ; Thu, 29 Aug 2019 16:30:48 +1000 (AEST) Received: from discord.disaster.area ([192.168.253.110]) by dread.disaster.area with esmtp (Exim 4.92) (envelope-from ) id 1i3DxG-0000xm-NR for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 Received: from dave by discord.disaster.area with local (Exim 4.92) (envelope-from ) id 1i3DxG-0006Bb-LP for linux-xfs@vger.kernel.org; Thu, 29 Aug 2019 16:30:46 +1000 From: Dave Chinner To: linux-xfs@vger.kernel.org Subject: [PATCH 5/5] xfs: reverse search directory freespace indexes Date: Thu, 29 Aug 2019 16:30:42 +1000 Message-Id: <20190829063042.22902-6-david@fromorbit.com> X-Mailer: git-send-email 2.23.0.rc1 In-Reply-To: <20190829063042.22902-1-david@fromorbit.com> References: <20190829063042.22902-1-david@fromorbit.com> MIME-Version: 1.0 X-Optus-CM-Score: 0 X-Optus-CM-Analysis: v=2.2 cv=FNpr/6gs c=1 sm=1 tr=0 a=YO9NNpcXwc8z/SaoS+iAiA==:117 a=YO9NNpcXwc8z/SaoS+iAiA==:17 a=jpOVt7BSZ2e4Z31A5e1TngXxSK0=:19 a=FmdZ9Uzk2mMA:10 a=20KFwNOVAAAA:8 a=SUicX0odFCYvpFH38aAA:9 a=OZMYBsLD8hmfPUqf:21 a=x3AMLUU9Dyi89PBk:21 Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Dave Chinner When a directory is growing rapidly, new blocks tend to get added at the end of the directory. These end up at the end of the freespace index, and when the directory gets large finding these new freespaces gets expensive. The code does a linear search across the frespace index from the first block in the directory to the last, hence meaning the newly added space is the last index searched. Instead, do a reverse order index search, starting from the last block and index in the freespace index. This makes most lookups for free space on rapidly growing directories O(1) instead of O(N), but should not have any impact on random insert workloads because the average search length is the same regardless of which end of the array we start at. The result is a major improvement in large directory grow rates: create time(sec) / rate (files/s) File count vanilla Prev commit Patched 10k 0.41 / 24.3k 0.42 / 23.8k 0.41 / 24.3k 20k 0.74 / 27.0k 0.76 / 26.3k 0.75 / 26.7k 100k 3.81 / 26.4k 3.47 / 28.8k 3.27 / 30.6k 200k 8.58 / 23.3k 7.19 / 27.8k 6.71 / 29.8k 1M 85.69 / 11.7k 48.53 / 20.6k 37.67 / 26.5k 2M 280.31 / 7.1k 130.14 / 15.3k 79.55 / 25.2k 10M 3913.26 / 2.5k 552.89 / 18.1k Signed-Off-By: Dave Chinner --- fs/xfs/libxfs/xfs_dir2_node.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index c6a1d1cc638f..d8abbcbde055 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -1746,6 +1746,7 @@ xfs_dir2_node_find_freeblk( struct xfs_inode *dp = args->dp; struct xfs_trans *tp = args->trans; struct xfs_buf *fbp = NULL; + xfs_dir2_db_t firstfbno; xfs_dir2_db_t lastfbno; xfs_dir2_db_t ifbno = -1; xfs_dir2_db_t dbno = -1; @@ -1781,6 +1782,9 @@ xfs_dir2_node_find_freeblk( */ ifbno = fblk->blkno; fbno = ifbno; + xfs_trans_brelse(tp, fbp); + fbp = NULL; + fblk->bp = NULL; } /* @@ -1792,16 +1796,15 @@ xfs_dir2_node_find_freeblk( if (error) return error; lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo); - - /* If we haven't get a search start block, set it now */ - if (fbno == -1) - fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); + firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); /* * While we haven't identified a data block, search the freeblock - * data for a data block with enough free space in it. + * data for a good data block. Do a reverse order search, as growing + * directories will put new blocks with free space at the end of the + * free space index. */ - for ( ; fbno < lastfbno; fbno++) { + for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) { /* If it's ifbno we already looked at it. */ if (fbno == ifbno) continue; @@ -1819,19 +1822,18 @@ xfs_dir2_node_find_freeblk( if (!fbp) continue; - findex = 0; free = fbp->b_addr; bests = dp->d_ops->free_bests_p(free); dp->d_ops->free_hdr_from_disk(&freehdr, free); /* Scan the free entry array for a large enough free space. */ - do { + for (findex = freehdr.nvalid - 1; findex >= 0; findex--) { if (be16_to_cpu(bests[findex]) != NULLDATAOFF && be16_to_cpu(bests[findex]) >= length) { dbno = freehdr.firstdb + findex; goto found_block; } - } while (++findex < freehdr.nvalid); + }; /* Didn't find free space, go on to next free block */ xfs_trans_brelse(tp, fbp);