From patchwork Thu Dec 14 17:05:22 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493297 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="BlnEsQyC" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7D4ECB2 for ; Thu, 14 Dec 2023 09:05:35 -0800 (PST) Received: from pps.filterd (m0246631.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9wtfq021847 for ; Thu, 14 Dec 2023 17:05:34 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=YgrcgW3z92Uy8iOYrWtyxhSklgpDltbkupoXwQhNA2E=; b=BlnEsQyCsGI8OEq5U5kN4sxMHuYts8TilDKWNg4VuP4JQMZDqedRxJ7A4dP0/b34oU6I SoVkCVm2HmjDLkXN7T70CZp3Pr9AhZvPwQSSn+tb5WqPCnMc1WtIwU673PoEPt4oMQkS 7mlI4GwPeuUC4w425BZsuOjMMB5/M0/D2WDE4D5guTvFNdbqoG4gxH8nVX23VRuI91y/ 8htwH66qVnkq3T+uihbW/TON4OA4UZCOrN2Zu7fVjbpPdfSQ4ur9efIay459Lw4tPQRf SuPvXfAtb9s4rZddqsjk2VlQQAxw8ouXKpdUJXxP9JEHGvcKv9c3MZp2LXr+jWkEN3AB aQ== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwfrrsqty-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:33 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGr9Qp012810 for ; Thu, 14 Dec 2023 17:05:33 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcue-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:32 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnf036808 for ; Thu, 14 Dec 2023 17:05:32 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-2; Thu, 14 Dec 2023 17:05:32 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 1/9] xfs: defrag: introduce strucutures and numbers. Date: Thu, 14 Dec 2023 09:05:22 -0800 Message-Id: <20231214170530.8664-2-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-GUID: Sz0hOMo0WXkBAsOuby1yw40XjozpjzwN X-Proofpoint-ORIG-GUID: Sz0hOMo0WXkBAsOuby1yw40XjozpjzwN introduce strucutures and numbers only. Signed-off-by: Wengang Wang --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_fs.h | 1 + fs/xfs/xfs_defrag.c | 60 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_defrag.h | 11 ++++++++ fs/xfs/xfs_ioctl.c | 17 ++++++++++++ fs/xfs/xfs_mount.h | 37 ++++++++++++++++++++++++++ 6 files changed, 127 insertions(+) create mode 100644 fs/xfs/xfs_defrag.c create mode 100644 fs/xfs/xfs_defrag.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 7762c01a85cf..ba7f7bc4abf9 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -93,6 +93,7 @@ xfs-y += xfs_aops.o \ xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ + xfs_defrag.o \ kmem.o # low-level transaction/log code diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 6360073865db..4b0fdb900df5 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -837,6 +837,7 @@ struct xfs_scrub_metadata { #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) +#define XFS_IOC_DEFRAG _IOWR('X', 129, struct xfs_defrag) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c new file mode 100644 index 000000000000..954d05376809 --- /dev/null +++ b/fs/xfs/xfs_defrag.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Wengang Wang + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bmap.h" +#include "xfs_inode_fork.h" +#include "xfs_inode.h" +#include "xfs_reflink.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_bit.h" +#include "xfs_buf.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_refcount_btree.h" +#include "xfs_btree.h" +#include "xfs_refcount.h" +#include "xfs_defer.h" +#include "xfs_log_priv.h" +#include "xfs_extfree_item.h" +#include "xfs_bmap_item.h" +#include "xfs_quota_defs.h" +#include "xfs_quota.h" + +#include + +/* + * The max number of extents in a piece. + * can't be too big, it will have log space presure + */ +#define XFS_DEFRAG_PIECE_MAX_EXT 512 + +/* + * Milliseconds we leave the info unremoved when a defrag failed. + * This aims to give user space a way to get the error code. + */ +#define XFS_DERFAG_GRACE_PERIOD 30000 + +/* limitation of pending online defrag */ +#define XFS_DEFRAG_MAX_PARALLEL 128 + +/* + * The max size, in blocks, of a piece. + * can't be too big, it may hard to get such a free extent + */ +#define XFS_DEFRAG_MAX_PIECE_BLOCKS 4096U + +int xfs_file_defrag(struct file *filp, struct xfs_defrag *defrag) +{ + return -EOPNOTSUPP; +} diff --git a/fs/xfs/xfs_defrag.h b/fs/xfs/xfs_defrag.h new file mode 100644 index 000000000000..21113d8c1567 --- /dev/null +++ b/fs/xfs/xfs_defrag.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Wengang Wang + */ +#ifndef __XFS_DEFRAG_H__ +#define __XFS_DEFRAG_H__ +void xfs_initialize_defrag(struct xfs_mount *mp); +int xfs_file_defrag(struct file *filp, struct xfs_defrag *defrag); +void xfs_stop_wait_defrags(struct xfs_mount *mp); +#endif /* __XFS_DEFRAG_H__ */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6c3919687ea6..7f7a7094ace9 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -39,6 +39,7 @@ #include "xfs_ioctl.h" #include "xfs_xattr.h" #include "xfs_rtbitmap.h" +#include "xfs_defrag.h" #include #include @@ -2160,6 +2161,22 @@ xfs_file_ioctl( return error; } + case XFS_IOC_DEFRAG: { + struct xfs_defrag defrag; + int ret; + + if (xfs_is_readonly(mp)) + return -EROFS; + + if (copy_from_user(&defrag, arg, sizeof(defrag))) + return -EFAULT; + + ret = xfs_file_defrag(filp, &defrag); + if (ret == 0) + ret = copy_to_user(arg, &defrag, sizeof(defrag)); + return ret; + } + default: return -ENOTTY; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 503fe3c7edbf..05b372cde389 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -71,6 +71,34 @@ struct xfs_inodegc { unsigned int cpu; }; +/* Online Defrag */ +enum xfs_defrag_cmd { + XFS_DEFRAG_CMD_START = 1, /* start defrag, or change configuration */ + XFS_DEFRAG_CMD_STOP, /* stop defrag */ + XFS_DEFRAG_CMD_SUSPEND, /* suspend on going defrag */ + XFS_DEFRAG_CMD_RESUME, /* resume suspended defrag */ + XFS_DEFRAG_CMD_STATUS, /* get status */ +}; + +struct xfs_defrag { + /* [IN] XFS_DEFRAG_CMD_* */ + enum xfs_defrag_cmd df_cmd; + /* [IN] the size of piece in blocks */ + unsigned int df_piece_size; + /* [IN] the target extent size */ + unsigned int df_tgt_extsize; + /* [IN] idle time in ms between adjacent pieces */ + unsigned int df_idle_time; + /* [OUT] current running status */ + int df_status; + /* [OUT] the number of the processed blocks */ + unsigned long long df_blocks_done; + /* [OUT] inode number of the file under defragmentation */ + unsigned long df_ino; + /* [OUT] defragmenting on this file is suspended */ + bool df_suspended; +}; + /* * The struct xfsmount layout is optimised to separate read-mostly variables * from variables that are frequently modified. We put the read-mostly variables @@ -252,6 +280,15 @@ typedef struct xfs_mount { /* cpus that have inodes queued for inactivation */ struct cpumask m_inodegc_cpumask; + + /* lock to serialize the access of defrags fields */ + struct semaphore m_defrag_lock; + /* number of pending defragmentation in this FS */ + unsigned int m_nr_defrag; + /* list that links up all pending defragmentation */ + struct list_head m_defrag_list; + /* the task which does defragmentation job */ + struct task_struct *m_defrag_task; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) From patchwork Thu Dec 14 17:05:23 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493295 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="SYd4HMpu" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A34E9B7 for ; Thu, 14 Dec 2023 09:05:35 -0800 (PST) Received: from pps.filterd (m0246631.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGVCYK021767 for ; Thu, 14 Dec 2023 17:05:34 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=1i/uv+lZ0GaOP+g87fHxC17P0e6+gabfE/anZfvV2hg=; b=SYd4HMpu8JKTsU6DsnbiFtAuSMKPk/u8zOG1vMuK+mhPgO5aYQ18gIsXP3lIFTTtVl8J YAfubg0OYJbrARQS+eqL+EUnwJqR8jSMAh4zFMmVaD/nYzo9orfZLCM5D9SmErqfSDRy kD82EvAmq2MRgAYArTYYsay7HIgocwI+Mo4Rsw0SU8LEt4D5tSGFSXCrEdYrkCI57kwT HGQGBvmLoN5MMHw4XONkGqkc1Q/jJR+pDcIFfOqZfpeuAGUv2uHqZiox5Fw6c1yTA6gd V55fXhAi7BEllTHbd7X3rQofiZOCQDuc0n3s/AplxSonZqNOfU6fSWi07jd+zGoNgR34 hw== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwfrrsqu0-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:34 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGZTc5013007 for ; Thu, 14 Dec 2023 17:05:33 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcut-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:33 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnh036808 for ; Thu, 14 Dec 2023 17:05:32 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-3; Thu, 14 Dec 2023 17:05:32 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 2/9] xfs: defrag: initialization and cleanup Date: Thu, 14 Dec 2023 09:05:23 -0800 Message-Id: <20231214170530.8664-3-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-GUID: 5-cc4uFtr0uvziQeBk8Bp3j1L_1fgZF- X-Proofpoint-ORIG-GUID: 5-cc4uFtr0uvziQeBk8Bp3j1L_1fgZF- initialization online defrag on a new mount. cleanup when unmounting. Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 23 +++++++++++++++++++++++ fs/xfs/xfs_mount.c | 3 +++ fs/xfs/xfs_super.c | 3 +++ 3 files changed, 29 insertions(+) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 954d05376809..8bdc6290a69d 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -54,6 +54,29 @@ */ #define XFS_DEFRAG_MAX_PIECE_BLOCKS 4096U +/* initialization called for new mount */ +void xfs_initialize_defrag(struct xfs_mount *mp) +{ + sema_init(&mp->m_defrag_lock, 1); + mp->m_nr_defrag = 0; + mp->m_defrag_task = NULL; + INIT_LIST_HEAD(&mp->m_defrag_list); +} + +/* stop all the defragmentations on this mount and wait until they really stopped */ +void xfs_stop_wait_defrags(struct xfs_mount *mp) +{ + down(&mp->m_defrag_lock); + if (list_empty(&mp->m_defrag_list)) { + up(&mp->m_defrag_lock); + return; + } + ASSERT(mp->m_defrag_task); + up(&mp->m_defrag_lock); + kthread_stop(mp->m_defrag_task); + mp->m_defrag_task = NULL; +} + int xfs_file_defrag(struct file *filp, struct xfs_defrag *defrag) { return -EOPNOTSUPP; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index aed5be5508fe..ed7e1f150b59 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -35,6 +35,7 @@ #include "xfs_trace.h" #include "xfs_ag.h" #include "scrub/stats.h" +#include "xfs_defrag.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; @@ -1056,6 +1057,8 @@ xfs_unmountfs( uint64_t resblks; int error; + xfs_stop_wait_defrags(mp); + /* * Perform all on-disk metadata updates required to inactivate inodes * that the VFS evicted earlier in the unmount process. Freeing inodes diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 764304595e8b..f74706130e35 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -44,6 +44,7 @@ #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" #include "scrub/stats.h" +#include "xfs_defrag.h" #include #include @@ -2023,6 +2024,8 @@ static int xfs_init_fs_context( fc->s_fs_info = mp; fc->ops = &xfs_context_ops; + xfs_initialize_defrag(mp); + return 0; } From patchwork Thu Dec 14 17:05:24 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493296 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="ZexZcHXL" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AD660B9 for ; Thu, 14 Dec 2023 09:05:35 -0800 (PST) Received: from pps.filterd (m0246631.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9wsU8021815 for ; Thu, 14 Dec 2023 17:05:34 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=1ChRpIsxd7Sp6qiT5qd93JYQ0MGBdkLNqXynVube3IA=; b=ZexZcHXLzu3QHKJEKwd3B3VjYnA1YWRlnMnex2y2kwwciM69b2KiwOxiRTH+a/x7j9Ox NHPqPCnJTAFqEK35dUiZst6Zk6WdBslOu3pyvB9wn6BebAovYwVQN5DQYNDI7Qt16rPO W/KoF+c6j5Rhu8daDWoxkFNjjtxk4LFXw7xbYfshBqjhbjyFmrV6qj527SHsqA5WVlcO jyn9AB8KN4GGq5j/oW7sJ2pYHYs1UwNEcM03LvEmMYlOvk6D+p+815Y2E1Uaarl4Ta2l eMGkKJeOWKr8IE1UXAAas4y2/qetvz2wVjZTkxhNs3DpQCEpOA0x2MVLv0yne21mcnJW Fw== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwfrrsqu1-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:34 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGaPPx012850 for ; Thu, 14 Dec 2023 17:05:33 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcv7-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:33 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnj036808 for ; Thu, 14 Dec 2023 17:05:33 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-4; Thu, 14 Dec 2023 17:05:33 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 3/9] xfs: defrag implement stop/suspend/resume/status Date: Thu, 14 Dec 2023 09:05:24 -0800 Message-Id: <20231214170530.8664-4-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-GUID: LKIJCdgIlOefO7KlfAh3AcQtoxsvp5Ba X-Proofpoint-ORIG-GUID: LKIJCdgIlOefO7KlfAh3AcQtoxsvp5Ba 1. we support at most 128 running file defragmentation at a time. 2. the max piece size is 4096 3. the max piece size must no less than twice of target extent size 4. defrag jobs are stored in mp->m_defrag_list. 5. for 'status' command, set the inode number to -1UL for return 6. a separated process m_defrag_task processes all defragmentation jobs Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 200 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 8bdc6290a69d..4a10528912ca 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -54,6 +54,47 @@ */ #define XFS_DEFRAG_MAX_PIECE_BLOCKS 4096U +/* + * A piece is a contigurous (by file block number) range in a file. It contains one + * or more extents. When it contains two or more extents, it's subject to be + * defragmented. During the defragmenting, the original extents are + * deallocated and replaced by a single new-allocated extent covering this + * whole piece. + */ +struct xfs_defrag_piece { + /* the start file block in this piece */ + xfs_fileoff_t dp_start_off; + /* number of blocks contained in this piece */ + xfs_filblks_t dp_len; + /* + * the extents in this piece. they are contigourous by file block + * number after the piece is picked. they are sorted by filesystem + * lock number (low -> high) before unmapping. + */ + struct xfs_bmbt_irec dp_extents[XFS_DEFRAG_PIECE_MAX_EXT]; + /* number of xfs_bmbt_irecs in dp_extents */ + int dp_nr_ext; +}; + +struct xfs_defrag_info { + /* links to xfs_mount.m_defrag_list */ + struct list_head di_list; /* links to xfs_mount.m_defrag_list */ + /* defrag configuration and status */ + struct xfs_defrag di_defrag; + /* the xfs_inode to defragment on */ + struct xfs_inode *di_ip; + /* next file block to start with */ + xfs_fileoff_t di_next_blk; + /* number of pieces which are defragmented */ + unsigned long di_round_nr; + /* current piece to defragment */ + struct xfs_defrag_piece di_dp; + /* timestamp of last defragmenting in jiffies */ + unsigned long di_last_process; + /* flag indicating if defragmentation is stopped by user */ + bool di_user_stopped; +}; + /* initialization called for new mount */ void xfs_initialize_defrag(struct xfs_mount *mp) { @@ -77,7 +118,164 @@ void xfs_stop_wait_defrags(struct xfs_mount *mp) mp->m_defrag_task = NULL; } -int xfs_file_defrag(struct file *filp, struct xfs_defrag *defrag) + +static bool xfs_is_defrag_param_valid(struct xfs_defrag *defrag) +{ + if (defrag->df_piece_size > XFS_DEFRAG_MAX_PIECE_BLOCKS) + return false; + if (defrag->df_piece_size < 2 * defrag->df_tgt_extsize) + return false; + return true; +} + +static inline bool __xfs_new_defrag_allowed(struct xfs_mount *mp) + { + if (mp->m_nr_defrag >= XFS_DEFRAG_MAX_PARALLEL) + return false; + + return true; +} + +/* + * lookup this mount for the xfs_defrag_info structure specified by @ino + * m_defrag_lock is held by caller. + * returns: + * The pointer to that structure on found or NULL if not found. + */ +struct xfs_defrag_info *__xfs_find_defrag(unsigned long ino, + struct xfs_mount *mp) +{ + struct xfs_defrag_info *di; + + list_for_each_entry(di, &mp->m_defrag_list, di_list) { + if (di->di_defrag.df_ino == ino) + return di; + } + return NULL; +} + +/* start a new defragmetation or change the parameters on the existing one */ +static int xfs_file_defrag_start(struct inode *inode, struct xfs_defrag *defrag) { + int ret = 0; + + if ((inode->i_mode & S_IFMT) != S_IFREG) { + ret = -EOPNOTSUPP; + goto out; + } + + if (IS_DAX(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (!xfs_is_defrag_param_valid(defrag)) { + ret = EINVAL; + goto out; + } + +out: return -EOPNOTSUPP; + } + +static void xfs_file_defrag_status(struct inode *inode, struct xfs_defrag *defrag) +{ + struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct xfs_defrag_info *di; + + down(&mp->m_defrag_lock); + di = __xfs_find_defrag(inode->i_ino, mp); + if (di == NULL) { + up(&mp->m_defrag_lock); + defrag->df_ino = -1UL; + return; + } + di->di_defrag.df_cmd = defrag->df_cmd; + *defrag = di->di_defrag; + up(&mp->m_defrag_lock); +} + +static int xfs_file_defrag_stop(struct inode *inode, struct xfs_defrag *defrag) +{ + struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct xfs_defrag_info *di; + + down(&mp->m_defrag_lock); + di = __xfs_find_defrag(inode->i_ino, mp); + if (di == NULL) { + up(&mp->m_defrag_lock); + defrag->df_ino = -1UL; + return -EINVAL; + } + + di->di_user_stopped = true; + di->di_defrag.df_cmd = defrag->df_cmd; + *defrag = di->di_defrag; + up(&mp->m_defrag_lock); + /* wait up the process to process the dropping */ + wake_up_process(mp->m_defrag_task); + return 0; +} + +static int xfs_file_defrag_suspend(struct inode *inode, struct xfs_defrag *defrag) +{ + struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct xfs_defrag_info *di; + + down(&mp->m_defrag_lock); + di = __xfs_find_defrag(inode->i_ino, mp); + if (di == NULL) { + up(&mp->m_defrag_lock); + defrag->df_ino = -1UL; + return -EINVAL; + } + di->di_defrag.df_suspended = true; + di->di_defrag.df_cmd = defrag->df_cmd; + *defrag = di->di_defrag; + up(&mp->m_defrag_lock); + return 0; +} + +static int xfs_file_defrag_resume(struct inode *inode, struct xfs_defrag *defrag) +{ + struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct xfs_defrag_info *di; + + down(&mp->m_defrag_lock); + di = __xfs_find_defrag(inode->i_ino, mp); + if (di == NULL) { + up(&mp->m_defrag_lock); + defrag->df_ino = -1UL; + return -EINVAL; + } + di->di_defrag.df_suspended = false; + + di->di_defrag.df_cmd = defrag->df_cmd; + *defrag = di->di_defrag; + up(&mp->m_defrag_lock); + wake_up_process(mp->m_defrag_task); + return 0; +} + +int xfs_file_defrag(struct file *filp, struct xfs_defrag *defrag) +{ + struct inode *inode = filp->f_inode; + + defrag->df_ino = inode->i_ino; + + switch (defrag->df_cmd) { + case XFS_DEFRAG_CMD_START: + return xfs_file_defrag_start(inode, defrag); + case XFS_DEFRAG_CMD_STOP: + return xfs_file_defrag_stop(inode, defrag); + case XFS_DEFRAG_CMD_STATUS: + xfs_file_defrag_status(inode, defrag); + return 0; + case XFS_DEFRAG_CMD_SUSPEND: + return xfs_file_defrag_suspend(inode, defrag); + case XFS_DEFRAG_CMD_RESUME: + return xfs_file_defrag_resume(inode, defrag); + default: + return -EOPNOTSUPP; + } } From patchwork Thu Dec 14 17:05:25 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493298 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="Vfdznz0m" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2CB63114 for ; Thu, 14 Dec 2023 09:05:36 -0800 (PST) Received: from pps.filterd (m0246631.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9wtfr021847 for ; Thu, 14 Dec 2023 17:05:35 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=rwSyiA5IoYWxhi84dU5zVjoXWfZ1EZP74jPSc/4PVTU=; b=Vfdznz0mfA8SNW2CijBVV8t7n008u43siOicwABM8v4JJtR72xrTFpz2Bm32VPisPqay YYSuVk+9Mu3Ng+5wiaXjLVZf4gwnOQwueRHEihGO3Zj7+IpPT9jVfHOvXAZgwABdSgEk uiycu0MsI0jkl3qQ2bv2uwaRwoS8HPjivC+ph06uzFWaQTzubduk66MkC/B37m5rO9cx zO2ouZgASmW7KNvqhf2osw5p/r3Ek+rQN0pVr1nW+qxiUw+4mfOjNL/RXySmZlY+vap1 iFUEVkGsXf9VMGAlvjErbjOhTO8EoSXcss4mNkCqBUp7vFuwZ1TYrE6aLRA/2iRDwX8K jw== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwfrrsqu2-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:35 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGYQKP012869 for ; Thu, 14 Dec 2023 17:05:34 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcvv-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:34 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnl036808 for ; Thu, 14 Dec 2023 17:05:33 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-5; Thu, 14 Dec 2023 17:05:33 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 4/9] xfs: defrag: allocate/cleanup defragmentation Date: Thu, 14 Dec 2023 09:05:25 -0800 Message-Id: <20231214170530.8664-5-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-GUID: lgCMLncs9TNe5_-H4ud6l1aVQoTCJZOu X-Proofpoint-ORIG-GUID: lgCMLncs9TNe5_-H4ud6l1aVQoTCJZOu 1. allocate new defragmentation 2. clean up defragentations Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 123 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 4a10528912ca..fec617ac5945 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -154,9 +154,74 @@ struct xfs_defrag_info *__xfs_find_defrag(unsigned long ino, return NULL; } +static void xfs_change_defrag_param(struct xfs_defrag *to, struct xfs_defrag *from) +{ + to->df_piece_size = from->df_piece_size; + to->df_tgt_extsize = from->df_tgt_extsize; + to->df_idle_time = from->df_idle_time; + to->df_ino = from->df_ino; +} + +/* caller holds m_defrag_lock */ +static struct xfs_defrag_info *__alloc_new_defrag_info(struct xfs_mount *mp) +{ + struct xfs_defrag_info *di; + + di = kmem_alloc(sizeof(struct xfs_defrag_info), KM_ZERO); + mp->m_nr_defrag++; + return di; +} + +/* sleep some jiffies */ +static inline void xfs_defrag_idle(unsigned int idle_jiffies) +{ + if (idle_jiffies > 0) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(idle_jiffies); + } +} + +/* caller holds mp->m_defrag_lock */ +static void __xfs_drop_defrag(struct xfs_defrag_info *di, struct xfs_mount *mp) +{ + list_del(&di->di_list); + mp->m_nr_defrag--; + iput(VFS_I(di->di_ip)); + kfree(di); +} + +/* clean up all defragmentation jobs in this XFS */ +void clean_up_defrags(struct xfs_mount *mp) +{ + struct xfs_defrag_info *di, *tmp; + + down(&mp->m_defrag_lock); + list_for_each_entry_safe(di, tmp, &mp->m_defrag_list, di_list) { + __xfs_drop_defrag(di, mp); + } + ASSERT(mp->m_nr_defrag == 0); + up(&mp->m_defrag_lock); +} + +/* run as a separated process. + * defragment files in mp->m_defrag_list + */ +int xfs_defrag_process(void *data) +{ + struct xfs_mount *mp = data; + + while (!kthread_should_stop()) + xfs_defrag_idle(1000); + + clean_up_defrags(mp); + return 0; +} + /* start a new defragmetation or change the parameters on the existing one */ static int xfs_file_defrag_start(struct inode *inode, struct xfs_defrag *defrag) { + struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct xfs_defrag_info *di = NULL; int ret = 0; if ((inode->i_mode & S_IFMT) != S_IFREG) { @@ -174,9 +239,63 @@ static int xfs_file_defrag_start(struct inode *inode, struct xfs_defrag *defrag) goto out; } + /* racing with unmount and freeze */ + if (down_read_trylock(&inode->i_sb->s_umount) == 0) { + ret = -EAGAIN; + goto out; + } + + down(&mp->m_defrag_lock); + if (!__xfs_new_defrag_allowed(mp)) { + ret = -EAGAIN; + goto up_return; + } + + di = __xfs_find_defrag(inode->i_ino, mp); + if (di) { + /* + * the file is already under defragmentation, + * a subsequential "start" is used to adjust parameters + * on the existing defragmentation + */ + xfs_change_defrag_param(&di->di_defrag, defrag); + ret = 0; + goto up_return; + } + + inode = igrab(inode); + if (!inode) { + ret = -EAGAIN; + goto up_return; + } + + /* a new defragmentation */ + di = __alloc_new_defrag_info(mp); + xfs_change_defrag_param(&di->di_defrag, defrag); + di->di_ip = XFS_I(inode); + list_add_tail(&di->di_list, &mp->m_defrag_list); + + /* + * defrag process per FS is creatd on demand and keep alive until + * FS is unmounted. + */ + if (mp->m_defrag_task == NULL) { + mp->m_defrag_task = kthread_run(xfs_defrag_process, mp, + "xdf_%s", mp->m_super->s_id); + if (IS_ERR(mp->m_defrag_task)) { + ret = PTR_ERR(mp->m_defrag_task); + mp->m_defrag_task = NULL; + } + } else { + wake_up_process(mp->m_defrag_task); + } + +up_return: + up(&mp->m_defrag_lock); + up_read(&inode->i_sb->s_umount); out: - return -EOPNOTSUPP; - } + return ret; +} static void xfs_file_defrag_status(struct inode *inode, struct xfs_defrag *defrag) { From patchwork Thu Dec 14 17:05:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493299 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="kCbCPhuM" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F391B118 for ; Thu, 14 Dec 2023 09:05:36 -0800 (PST) Received: from pps.filterd (m0246630.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9wrrs009144 for ; Thu, 14 Dec 2023 17:05:35 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=xwWp/MPqnOEHD0iRkaamWsYZjf5ITDsySX36CO2BFfc=; b=kCbCPhuMhkvKszsqg7gp7xP4BSGqQwAjd4XgNPpRmbnB1QddR1kNCPX0UKZH1uVgPRl3 X1mAyTHY4b81Jj9zgYP7BUAjDrHd9RM5ZSyThxmoYr2Ve3L+JtQI+RHlIzIM157MU8p+ 9naf+06ADSjoBlHZ8vCg4MxGmr0HDQqKpWHKVSa1lVh9j4sWEt1NTlwxGbfuSX680qBA /k59NZt8kUfQx3/bQMTZYvbETrFCvonL/9TYtugmJm29x7au39/IfryTlUq1M3VA80cB v4EHVCbKgYktI+vJXMHg0zN8MGKSSsosHyqVoLwrNDebWyxTg9IF2GHmgQCdX/cabes2 gg== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwgn3sh4w-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:35 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGDPZm012793 for ; Thu, 14 Dec 2023 17:05:34 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcw7-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:34 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnn036808 for ; Thu, 14 Dec 2023 17:05:34 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-6; Thu, 14 Dec 2023 17:05:34 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 5/9] xfs: defrag: process some cases in xfs_defrag_process Date: Thu, 14 Dec 2023 09:05:26 -0800 Message-Id: <20231214170530.8664-6-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-ORIG-GUID: QZYDfOeQDKrTvaWE3hcxXXjEPmlLl00E X-Proofpoint-GUID: QZYDfOeQDKrTvaWE3hcxXXjEPmlLl00E In the main process xfs_defrag_process(), deal with following cases: 1. sleep until next defragmentation time come 2. sleep if no defragmetation job exist 3. defragmentation job is stopped by user 4. defragmentation job failed (stay a while for user to pick up error) 5. defragmentation job is suspended 6. defragmentation job is done successfully Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 146 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index fec617ac5945..aee4cfd3f86e 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -190,6 +190,14 @@ static void __xfs_drop_defrag(struct xfs_defrag_info *di, struct xfs_mount *mp) kfree(di); } +/* cleanup when a defragmentation is done, failed, or cancelled. */ +static void xfs_drop_defrag(struct xfs_defrag_info *di, struct xfs_mount *mp) +{ + down(&mp->m_defrag_lock); + __xfs_drop_defrag(di, mp); + up(&mp->m_defrag_lock); +} + /* clean up all defragmentation jobs in this XFS */ void clean_up_defrags(struct xfs_mount *mp) { @@ -203,15 +211,149 @@ void clean_up_defrags(struct xfs_mount *mp) up(&mp->m_defrag_lock); } +/* + * if mp->m_defrag_list is not empty, return the first one in the list. + * returns NULL otherwise. + */ +static struct xfs_defrag_info *get_first_defrag(struct xfs_mount *mp) +{ + struct xfs_defrag_info *first; + + down(&mp->m_defrag_lock); + if (list_empty(&mp->m_defrag_list)) + first = NULL; + else + first = container_of(mp->m_defrag_list.next, + struct xfs_defrag_info, di_list); + up(&mp->m_defrag_lock); + return first; +} + +/* + * if mp->m_defrag_list is not empty, return the last one in the list. + * returns NULL otherwise. + */ +static struct xfs_defrag_info *get_last_defrag(struct xfs_mount *mp) +{ + struct xfs_defrag_info *last; + + down(&mp->m_defrag_lock); + if (list_empty(&mp->m_defrag_list)) + last = NULL; + else + last = container_of(mp->m_defrag_list.prev, + struct xfs_defrag_info, di_list); + up(&mp->m_defrag_lock); + return last; +} + +static inline bool xfs_defrag_failed(struct xfs_defrag_info *di) +{ + return di->di_defrag.df_status != 0; +} + +/* so far do nothing */ +static bool xfs_defrag_file(struct xfs_defrag_info *di) +{ + return true; +} + +static inline bool xfs_defrag_suspended(struct xfs_defrag_info *di) +{ + return di->di_defrag.df_suspended; +} + /* run as a separated process. * defragment files in mp->m_defrag_list */ int xfs_defrag_process(void *data) { + unsigned long smallest_wait = ULONG_MAX; struct xfs_mount *mp = data; + struct xfs_defrag_info *di, *last; + + while (!kthread_should_stop()) { + bool defrag_any = false; - while (!kthread_should_stop()) - xfs_defrag_idle(1000); + if (smallest_wait != ULONG_MAX) { + smallest_wait = max_t(unsigned long, smallest_wait, 10); + xfs_defrag_idle(smallest_wait); + smallest_wait = ULONG_MAX; + } + + last = get_last_defrag(mp); + if (!last) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + continue; /* while loop */ + } + + do { + unsigned long next_defrag_time; + unsigned long save_jiffies; + + if (kthread_should_stop()) + break; /* do */ + + di = get_first_defrag(mp); + /* done this round */ + if (!di) + break; /* do */ + + /* stopped by user, clean up right now */ + if (di->di_user_stopped) { + xfs_drop_defrag(di, mp); + continue; /* do */ + } + + /* + * Defrag failed on this file, give some grace time, say 30s + * for user space to capture the error + */ + if (xfs_defrag_failed(di)) { + unsigned long drop_time = di->di_last_process + + msecs_to_jiffies(XFS_DERFAG_GRACE_PERIOD); + save_jiffies = jiffies; + /* not the time to drop this failed file yet */ + if (time_before(save_jiffies, drop_time)) { + /* wait a while before dropping this file */ + if (smallest_wait > drop_time - save_jiffies) + smallest_wait = drop_time - save_jiffies; + } else { + xfs_drop_defrag(di, mp); + } + continue; /* do */ + } + + if (xfs_defrag_suspended(di)) + continue; /* do */ + + next_defrag_time = di->di_last_process + + msecs_to_jiffies(di->di_defrag.df_idle_time); + + save_jiffies = jiffies; + if (time_before(save_jiffies, next_defrag_time)) { + if (smallest_wait > next_defrag_time - save_jiffies) + smallest_wait = next_defrag_time - save_jiffies; + continue; /* do */ + } + + defrag_any = true; + /* whole file defrag done successfully */ + if (xfs_defrag_file(di)) + xfs_drop_defrag(di, mp); + + /* avoid tight CPU usage */ + xfs_defrag_idle(2); + } while (di != last); + + /* all the left defragmentations are suspended */ + if (defrag_any == false && smallest_wait == ULONG_MAX) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + + } clean_up_defrags(mp); return 0; From patchwork Thu Dec 14 17:05:27 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493300 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="datMsnMF" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 562F511A for ; Thu, 14 Dec 2023 09:05:37 -0800 (PST) Received: from pps.filterd (m0246630.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEH1TZS009178 for ; Thu, 14 Dec 2023 17:05:36 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=4sPQx9ngIN1CIYnj7z2Z1AGQHbvsBMDYRbjTwh0U9vw=; b=datMsnMFuMBQz/PdeYXMNXmCEt197m7hUWr2WhsgyiprHkgtTY8U2gZFZ8Av0QORuvKx rFCK+M0AtCzJbqhagSRCyJ55rm8e60hqdsn1+XYth7A+8FJMjx3/fcuNyUQwD6wAB1H7 0VqDzYuHr6ftTwG9Zu5cygJ50aeTPC4VqIuWjQopPxP1WlqUqiBbWFcJ9QeCUHrJgU48 5ldALKTquJF30UQ7WVyTLo+mjNjfLHvCSuTAsDBYXU3qmrFmBJVkhMiZHGgJ1J2M8IuC iA+lIB02WFKDmtxHLlVX+28jLGk31FfNgFDY7gNQfZOz8SaFSn4BLw/dUqS8o/MnWsWh Fw== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwgn3sh50-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:36 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGhVK0012828 for ; Thu, 14 Dec 2023 17:05:35 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcwn-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:35 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnp036808 for ; Thu, 14 Dec 2023 17:05:34 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-7; Thu, 14 Dec 2023 17:05:34 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 6/9] xfs: defrag: piece picking up Date: Thu, 14 Dec 2023 09:05:27 -0800 Message-Id: <20231214170530.8664-7-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-ORIG-GUID: lV4brouCHm87ctQ8-KpS2pIKVC42yf_7 X-Proofpoint-GUID: lV4brouCHm87ctQ8-KpS2pIKVC42yf_7 A extent in a piece must: 1. be real extent 2. not 'unwritten' extent 3. size no bigger than target extent size Extents in a piece must be contigurous by file block, there can't be holes in pieces. There can be up to XFS_DEFRAG_PIECE_MAX_EXT (512) extents in a piece. Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 187 +++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_inode.h | 1 + include/linux/fs.h | 5 ++ 3 files changed, 190 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index aee4cfd3f86e..3b90ba07c73a 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -252,10 +252,191 @@ static inline bool xfs_defrag_failed(struct xfs_defrag_info *di) return di->di_defrag.df_status != 0; } -/* so far do nothing */ +static inline void xfs_set_defrag_error(struct xfs_defrag *df, int error) +{ + if (df->df_status == 0) + df->df_status = error; +} + +static void xfs_piece_reset(struct xfs_defrag_piece *dp) +{ + dp->dp_start_off = 0; + dp->dp_len = 0; + dp->dp_nr_ext = 0; +} + +/* + * check if the given extent should be skipped from defragmenting + * The following extents are skipped + * 1. non "real" + * 2. unwritten + * 3. size bigger than target + * returns: + * true -- skip this extent + * false -- don't skip + */ +static bool xfs_extent_skip_defrag(struct xfs_bmbt_irec *check, + struct xfs_defrag *defrag) +{ + if (!xfs_bmap_is_real_extent(check)) + return true; + if (check->br_state == XFS_EXT_UNWRITTEN) + return true; + if (check->br_blockcount > defrag->df_tgt_extsize) + return true; + return false; +} + +/* + * add extent to piece + * the extent is expected to be behind all the existing extents. + * returns: + * true -- the piece is full with extents + * false -- not full yet + */ +static bool xfs_add_extent_to_piece(struct xfs_defrag_piece *dp, + struct xfs_bmbt_irec *add, + struct xfs_defrag *defrag, + int pos_in_piece) +{ + ASSERT(dp->dp_nr_ext < XFS_DEFRAG_PIECE_MAX_EXT); + ASSERT(pos_in_piece < XFS_DEFRAG_PIECE_MAX_EXT); + dp->dp_extents[pos_in_piece] = *add; + dp->dp_len += add->br_blockcount; + + /* set up starting file offset */ + if (dp->dp_nr_ext == 0) + dp->dp_start_off = add->br_startoff; + dp->dp_nr_ext++; + if (dp->dp_nr_ext == XFS_DEFRAG_PIECE_MAX_EXT) + return true; + if (dp->dp_len >= defrag->df_piece_size) + return true; + return false; +} + +/* + * check if the given extent is contiguous, by file block number, with the + * previous one in the piece + */ +static bool xfs_is_contig_ext(struct xfs_bmbt_irec *check, + struct xfs_defrag_piece *dp) +{ + /* it's contig if the piece is empty */ + if (dp->dp_len == 0) + return true; + return dp->dp_start_off + dp->dp_len == check->br_startoff; +} + +/* + * pick next piece to defragment starting from the @di->di_next_blk + * takes and drops XFS_ILOCK_SHARED lock + * returns: + * true: piece is selected. + * false: no more pieces in this file. + */ +static bool xfs_pick_next_piece(struct xfs_defrag_info *di) +{ + struct xfs_defrag *defrag = &di->di_defrag; + int pos_in_piece = 0; + struct xfs_defrag_piece *dp = &di->di_dp; + struct xfs_inode *ip = di->di_ip; + bool found; + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec got; + + xfs_piece_reset(dp); + xfs_ilock(ip, XFS_ILOCK_SHARED); + found = xfs_iext_lookup_extent(ip, &ip->i_df, di->di_next_blk, &icur, &got); + + /* fill the piece until it get full or the it reaches block limit */ + while (found) { + if (xfs_extent_skip_defrag(&got, defrag)) { + if (dp->dp_len) { + /* this piece already has some extents, return */ + break; + } + goto next_ext; + } + + if (!xfs_is_contig_ext(&got, dp)) { + /* this extent is not contigurous with previous one, finish this piece */ + break; + } + + if (xfs_add_extent_to_piece(dp, &got, defrag, pos_in_piece++)) { + /* this piece is full */ + break; + } + +next_ext: + found = xfs_iext_next_extent(&ip->i_df, &icur, &got); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + /* set the starting file block for next piece */ + di->di_next_blk = dp->dp_start_off + dp->dp_len; + return !!dp->dp_len; +} + +/* + * defrag a piece of a file + * error code is stored in di->di_defrag.df_status. + * returns: + * true -- whole file defrag done successfully. + * false -- not all done or error happened. + */ + static bool xfs_defrag_file(struct xfs_defrag_info *di) { - return true; + struct xfs_defrag *df = &(di->di_defrag); + struct xfs_inode *ip = di->di_ip; + bool ret = false; + int error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (error) { + xfs_set_defrag_error(df, error); + goto out; + } + + /* prevent further read/write/map/unmap/reflink/GC requests to this file */ + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) + goto out; + + if (!filemap_invalidate_trylock(VFS_I(ip)->i_mapping)) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + goto out; + } + + inode_dio_wait(VFS_I(ip)); + /* + * flush the whole file to get stable data/cow extents + */ + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) { + xfs_set_defrag_error(df, error); + goto unlock_out; + } + + xfs_iflags_set(ip, XFS_IDEFRAG); //set after dirty pages get flushed + /* pick up next piece */ + if (!xfs_pick_next_piece(di)) { + /* no more pieces to defrag, we are done */ + ret = true; + goto clear_out; + } + df->df_blocks_done = di->di_next_blk; +clear_out: + xfs_iflags_clear(ip, XFS_IDEFRAG); +unlock_out: + filemap_invalidate_unlock(VFS_I(ip)->i_mapping); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out: + di->di_last_process = jiffies; + return ret; } static inline bool xfs_defrag_suspended(struct xfs_defrag_info *di) @@ -266,7 +447,7 @@ static inline bool xfs_defrag_suspended(struct xfs_defrag_info *di) /* run as a separated process. * defragment files in mp->m_defrag_list */ -int xfs_defrag_process(void *data) +static int xfs_defrag_process(void *data) { unsigned long smallest_wait = ULONG_MAX; struct xfs_mount *mp = data; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3beb470f1892..4f4e27cb9bbe 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -346,6 +346,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) /* Quotacheck is running but inode has not been added to quota counts. */ #define XFS_IQUOTAUNCHECKED (1 << 14) +#define XFS_IDEFRAG (1 << 15) /* defrag in progress */ /* * Remap in progress. Callers that wish to update file data while diff --git a/include/linux/fs.h b/include/linux/fs.h index 98b7a7a8c42e..87497c4ca552 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -847,6 +847,11 @@ static inline void filemap_invalidate_lock(struct address_space *mapping) down_write(&mapping->invalidate_lock); } +static inline int filemap_invalidate_trylock(struct address_space *mapping) +{ + return down_write_trylock(&mapping->invalidate_lock); +} + static inline void filemap_invalidate_unlock(struct address_space *mapping) { up_write(&mapping->invalidate_lock); From patchwork Thu Dec 14 17:05:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493301 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="ifVmUpYx" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 243E2A0 for ; Thu, 14 Dec 2023 09:05:38 -0800 (PST) Received: from pps.filterd (m0246630.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9x0b3009307 for ; Thu, 14 Dec 2023 17:05:37 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=7Q7obAmblXDFCBWG9ld4J/Oeo9gUKUvHENl0+9SIbqo=; b=ifVmUpYxyk1rvfnHAZTMtMj1VZF02up1+GPpvV9cVYxi0MX1xxblVy9hrGnd7Y+KAC92 K+6WbQpjalqgV+4eV8y0SnSCDiXpbnNpOaDkUxfHuXkw9N21S2Pvw5CGFRU7bDtqxRC0 gOFmZLnjrrBue05fct4xQFAikbt/ZFZzklnbiAMfEtkBgsPJbHWCbXFqZb6hVc3Ptd8/ RvNkDCdxlfhFDsgV6/s4oxb1Bai4CU4cvGfjDB91CMQCcGMH9Z+7bt8esY0IUHl589+K ciD6ohuUAEWo2YNo2zL4Gy+qwuC9QIHFg0PWQkJKB56BnFjI3k1C7EXJu05S8iyizoXQ EQ== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwgn3sh51-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:36 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEFokjI012849 for ; Thu, 14 Dec 2023 17:05:35 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcx4-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:35 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnr036808 for ; Thu, 14 Dec 2023 17:05:35 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-8; Thu, 14 Dec 2023 17:05:35 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 7/9] xfs: defrag: guarantee contigurous blocks in cow fork Date: Thu, 14 Dec 2023 09:05:28 -0800 Message-Id: <20231214170530.8664-8-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-ORIG-GUID: 2t_bCqNIL0rgUitGVWPs4z0_JF5tnonf X-Proofpoint-GUID: 2t_bCqNIL0rgUitGVWPs4z0_JF5tnonf Make sure there are contigurous blocks in cow fork covers the piece. 1. if the piece is covered, we are done 2. if the piece is partially overlap with existing extents in cow fork, reclaim the overlap parts. 3. allocate exact piece size contigurous blocks skipping cow hint, fail out if the allocation failed. 4. new blocks are stored in cow fork Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 157 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode.c | 4 ++ 2 files changed, 161 insertions(+) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 3b90ba07c73a..3c86dd1f5cd4 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -379,6 +379,154 @@ static bool xfs_pick_next_piece(struct xfs_defrag_info *di) return !!dp->dp_len; } +/* + * check if the extent _imap_ covers the range specified by 'off_start' + * and 'length'. + * returns the following codes + */ +#define XFS_DEFRAG_IMAP_NOOVERLAP 0 /* no overlap */ +#define XFS_DEFRAG_IMAP_COVER 1 /* fully cover */ +#define XFS_DEFRAG_IMAP_PARTIAL_COVER 2 /* partially cover */ +static int xfs_extent_cover_range(struct xfs_bmbt_irec *imap, + xfs_fileoff_t off_start, + xfs_fileoff_t length) +{ + if (off_start >= imap->br_startoff + imap->br_blockcount) + return XFS_DEFRAG_IMAP_NOOVERLAP; + + if (off_start + length <= imap->br_startoff) + return XFS_DEFRAG_IMAP_NOOVERLAP; + + if (imap->br_startoff <= off_start && + imap->br_blockcount + imap->br_startoff - off_start >= length) + return XFS_DEFRAG_IMAP_COVER; + + return XFS_DEFRAG_IMAP_PARTIAL_COVER; +} + +/* + * make sure there is contiguous blocks to cover the given piece in cowfp. + * if there is already such an extent covering the piece, we are done. + * otherwise, we reclaim the non-contigurous blocks if there are, and allocate + * new contigurous blocks. + * parameters: + * dp --> [input] the piece + * icur --> [output] cow tree context + * imap --> [outout] the extent that covers the piece. + */ +static int xfs_guarantee_cow_extent(struct xfs_defrag_info *di, + struct xfs_iext_cursor *icur, + struct xfs_bmbt_irec *imap) +{ +#define XFS_DEFRAG_NO_ALLOC 0 /* Cow extent covers, no alloc */ +#define XFS_DEFRAG_ALLOC_NO_CANCEL 1 /* No Cow extents to cancel, alloc */ +#define XFS_DEFRAG_ALLOC_CANCEL 2 /* Cow extents to cancel, alloc */ + struct xfs_inode *ip = di->di_ip; + struct xfs_mount *mp = ip->i_mount; + struct xfs_defrag_piece *dp = &di->di_dp; + int need_alloc; + int nmap = 1; + unsigned int resblks; + int error; + struct xfs_trans *tp; + + xfs_ifork_init_cow(ip); + if (!xfs_inode_has_cow_data(ip)) { + need_alloc = XFS_DEFRAG_ALLOC_NO_CANCEL; + } else if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, dp->dp_start_off, + icur, imap)) { + need_alloc = XFS_DEFRAG_ALLOC_NO_CANCEL; + } else { + int ret = xfs_extent_cover_range(imap, dp->dp_start_off, dp->dp_len); + + if (ret == XFS_DEFRAG_IMAP_COVER) + need_alloc = XFS_DEFRAG_NO_ALLOC; + else if (ret == XFS_DEFRAG_IMAP_PARTIAL_COVER) + need_alloc = XFS_DEFRAG_ALLOC_CANCEL; + else // XFS_DEFRAG_IMAP_NOOVERLAP + need_alloc = XFS_DEFRAG_ALLOC_NO_CANCEL; + } + + /* this piece is fully covered by exsting Cow extent, we are done */ + if (need_alloc == XFS_DEFRAG_NO_ALLOC) + goto out; + + /* + * this piece is partially covered by existing Cow extent, reclaim the + * overlapping blocks + */ + if (need_alloc == XFS_DEFRAG_ALLOC_CANCEL) { + /* + * reclaim overlap (but not covers) extents in a separated + * transaction + */ + error = xfs_reflink_cancel_cow_range(ip, + XFS_FSB_TO_B(mp, dp->dp_start_off), + XFS_FSB_TO_B(mp, dp->dp_len), true); + if (error) + return error; + } + + resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + dp->dp_len; + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, + resblks, 0, false, &tp); + if (error) + goto out; + + /* now we have ILOCK_EXCL locked */ + error = xfs_bmapi_write(tp, ip, dp->dp_start_off, dp->dp_len, + XFS_BMAPI_COWFORK | XFS_BMAPI_CONTIG, 0, imap, &nmap); + if (error) + goto cancel_out; + + if (nmap == 0) { + error = -ENOSPC; + goto cancel_out; + } + + error = xfs_trans_commit(tp); + if (error) + goto unlock_out; + + xfs_iext_lookup_extent(ip, ip->i_cowfp, dp->dp_start_off, icur, imap); + + /* new extent can be merged into existing extent(s) though it's rare */ + ASSERT(imap->br_blockcount >= dp->dp_len); + goto unlock_out; + +cancel_out: + xfs_trans_cancel(tp); +unlock_out: + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + return error; +} + +/* defrag on the given piece + * XFS_ILOCK_EXCL is held by caller + */ +static int xfs_defrag_file_piece(struct xfs_defrag_info *di) +{ + struct xfs_inode *ip = di->di_ip; + struct xfs_bmbt_irec imap; + int error; + struct xfs_iext_cursor icur; + + if (xfs_is_shutdown(ip->i_mount)) { + error = -EIO; + goto out; + } + + /* allocate contig new blocks to Cow fork */ + error = xfs_guarantee_cow_extent(di, &icur, &imap); + if (error) + goto out; + + ASSERT(imap.br_blockcount >= di->di_dp.dp_len); +out: + return error; +} + /* * defrag a piece of a file * error code is stored in di->di_defrag.df_status. @@ -428,6 +576,14 @@ static bool xfs_defrag_file(struct xfs_defrag_info *di) ret = true; goto clear_out; } + + if (di->di_dp.dp_nr_ext > 1) { + /* defrag the piece */ + error = xfs_defrag_file_piece(di); + if (error) + xfs_set_defrag_error(df, error); + } + df->df_blocks_done = di->di_next_blk; clear_out: xfs_iflags_clear(ip, XFS_IDEFRAG); @@ -536,6 +692,7 @@ static int xfs_defrag_process(void *data) } + /* unmount in progress, clean up the defrags */ clean_up_defrags(mp); return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c0f1c89786c2..e0e319847f7d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -82,6 +82,10 @@ xfs_get_cowextsz_hint( { xfs_extlen_t a, b; + /* defrag need exact required size and skip the hint */ + if (xfs_iflags_test(ip, XFS_IDEFRAG)) + return 0; + a = 0; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) a = ip->i_cowextsize; From patchwork Thu Dec 14 17:05:29 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493302 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="cGsgkcyy" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 366F811B for ; Thu, 14 Dec 2023 09:05:38 -0800 (PST) Received: from pps.filterd (m0246632.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BE9x6Ti018702 for ; Thu, 14 Dec 2023 17:05:37 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=6eFXehK3AiU80VK2aem/OTQ8evVHuw4+zACwCb+wrJg=; b=cGsgkcyyWyPV4Lz02wcd20y+SZxc6llWhGKtfOOn4yzh7KYqJdpuLyAzq7jnXgdMUMw2 NFUrkk7n6E2Hr5QvfIt1htbYmz/tR0SnTNlJNQHPH/T5UGYmfSgvX3yeDqEQ9oclqUxh XOiNK64H30or8ptg07oZOlpqV/AKg5XwpF4Jcj5id1tNXdOZeYulFAGg6vdmvEwCqqu/ O0vOmMRgccPGRWecRvhEmArLQqaT75kvyyoSEgUfg6/emvsVH6eRZLooNZv8pIyg3klF HO2k3NheMsb3bJdgNXI786/jIvbm6EkuXrqtpzgwEZdWOmQnYNvovpxLq3+fYX6R05B9 5g== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uvfuubavb-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:37 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGheWW012874 for ; Thu, 14 Dec 2023 17:05:36 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcxn-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:36 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnt036808 for ; Thu, 14 Dec 2023 17:05:35 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-9; Thu, 14 Dec 2023 17:05:35 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 8/9] xfs: defrag: copy data from old blocks to new blocks Date: Thu, 14 Dec 2023 09:05:29 -0800 Message-Id: <20231214170530.8664-9-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-GUID: I9zKDl9QkAw_Tje_k4NK9xSALrVy-ju1 X-Proofpoint-ORIG-GUID: I9zKDl9QkAw_Tje_k4NK9xSALrVy-ju1 copy data from old blocks to new blocks synchronously Signed-off-by: Wengang Wang --- fs/xfs/xfs_defrag.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 3c86dd1f5cd4..0375b542024e 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -502,6 +502,57 @@ static int xfs_guarantee_cow_extent(struct xfs_defrag_info *di, return error; } +static int xfs_do_copy_extent_sync(struct xfs_mount *mp, xfs_fsblock_t src_blk, + xfs_fsblock_t tgt_blk, xfs_filblks_t count) +{ + struct xfs_buf *bp = NULL; + xfs_daddr_t src_daddr, tgt_daddr; + size_t nblocks; + int error; + + src_daddr = XFS_FSB_TO_DADDR(mp, src_blk); + tgt_daddr = XFS_FSB_TO_DADDR(mp, tgt_blk); + nblocks = XFS_FSB_TO_BB(mp, count); + + error = xfs_buf_read_uncached(mp->m_ddev_targp, src_daddr, nblocks, 0, &bp, NULL); + if (error) + goto rel_bp; + + /* write to new blocks */ + bp->b_maps[0].bm_bn = tgt_daddr; + error = xfs_bwrite(bp); +rel_bp: + if (bp) + xfs_buf_relse(bp); + return error; +} + +/* Physically copy data from old extents to new extents synchronously + * Note: @new extent is expected either exact same as piece size or it's bigger + * than that. + */ +static int xfs_defrag_copy_piece_sync(struct xfs_defrag_info *di, + struct xfs_bmbt_irec *new) +{ + struct xfs_defrag_piece *dp = &di->di_dp; + xfs_fsblock_t new_strt_blk; + int error = 0; + int i; + + new_strt_blk = new->br_startblock + dp->dp_start_off - new->br_startoff; + for (i = 0; i < dp->dp_nr_ext; i++) { + struct xfs_bmbt_irec *irec = &dp->dp_extents[i]; + + error = xfs_do_copy_extent_sync(di->di_ip->i_mount, + irec->br_startblock, new_strt_blk, + irec->br_blockcount); + if (error) + break; + new_strt_blk += irec->br_blockcount; + } + return error; +} + /* defrag on the given piece * XFS_ILOCK_EXCL is held by caller */ @@ -523,6 +574,11 @@ static int xfs_defrag_file_piece(struct xfs_defrag_info *di) goto out; ASSERT(imap.br_blockcount >= di->di_dp.dp_len); + + /* copy data to new blocks */ + error = xfs_defrag_copy_piece_sync(di, &imap); + if (error) + goto out; out: return error; } From patchwork Thu Dec 14 17:05:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wengang Wang X-Patchwork-Id: 13493303 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=oracle.com header.i=@oracle.com header.b="DyS3A2yS" Received: from mx0b-00069f02.pphosted.com (mx0b-00069f02.pphosted.com [205.220.177.32]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EC4B2A7 for ; Thu, 14 Dec 2023 09:05:38 -0800 (PST) Received: from pps.filterd (m0246630.ppops.net [127.0.0.1]) by mx0b-00069f02.pphosted.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEH1TZT009178 for ; Thu, 14 Dec 2023 17:05:38 GMT DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=oracle.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-transfer-encoding; s=corp-2023-11-20; bh=lIdR4U0naPkbuVC5cb0SLe2e9nCwtGXK5HjeZSzMY0A=; b=DyS3A2ySY8wa338SomVJI0rZgsG3exHY2YR2vgKGOU3KM/h7UFKacEej3h12c3rXbvVe 8NGlCtu++LI9+2HXghyjiDZVO/yfkL3zEyNkZ9OzqaeuLwGYjg2kByo6fmlnnKJ212MN nR8kTUTVu4LkDhS/+utTTFG4zsI+WhXPUb4ZScJW/+Nzji8sN4KNYqwPN7EWyAo1oJwO Gye8GYHvrjiloHeIc83eUDQWH4jWmPjckEIfJ4SX/HVjpm4n2fAT00SZco3WX4qxnslU xzjux9FLxxdeOy6/Fsi2g3QsdYMopybhUtS1F5l3KQcMaYL2DnCEskrZLOtZNsJNlBgt 5w== Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.appoci.oracle.com [138.1.37.129]) by mx0b-00069f02.pphosted.com (PPS) with ESMTPS id 3uwgn3sh53-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:37 +0000 Received: from pps.filterd (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (8.17.1.19/8.17.1.19) with ESMTP id 3BEGvV5D012781 for ; Thu, 14 Dec 2023 17:05:37 GMT Received: from pps.reinject (localhost [127.0.0.1]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTPS id 3uvepahcyf-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Thu, 14 Dec 2023 17:05:36 +0000 Received: from phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com [127.0.0.1]) by pps.reinject (8.17.1.5/8.17.1.5) with ESMTP id 3BEH0mnv036808 for ; Thu, 14 Dec 2023 17:05:36 GMT Received: from wwg-mac.us.oracle.com (dhcp-10-65-131-193.vpn.oracle.com [10.65.131.193]) by phxpaimrmta03.imrmtpd1.prodappphxaev1.oraclevcn.com (PPS) with ESMTP id 3uvepahcsv-10; Thu, 14 Dec 2023 17:05:36 +0000 From: Wengang Wang To: linux-xfs@vger.kernel.org Cc: wen.gang.wang@oracle.com Subject: [PATCH 9/9] xfs: defrag: map new blocks Date: Thu, 14 Dec 2023 09:05:30 -0800 Message-Id: <20231214170530.8664-10-wen.gang.wang@oracle.com> X-Mailer: git-send-email 2.39.3 (Apple Git-145) In-Reply-To: <20231214170530.8664-1-wen.gang.wang@oracle.com> References: <20231214170530.8664-1-wen.gang.wang@oracle.com> Precedence: bulk X-Mailing-List: linux-xfs@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.272,Aquarius:18.0.997,Hydra:6.0.619,FMLib:17.11.176.26 definitions=2023-12-14_11,2023-12-14_01,2023-05-22_02 X-Proofpoint-Spam-Details: rule=notspam policy=default score=0 mlxlogscore=999 bulkscore=0 spamscore=0 mlxscore=0 adultscore=0 phishscore=0 malwarescore=0 suspectscore=0 classifier=spam adjust=0 reason=mlx scancount=1 engine=8.12.0-2311290000 definitions=main-2312140121 X-Proofpoint-ORIG-GUID: fon9KLM77MpDlenxv3mVTFT24LsRce23 X-Proofpoint-GUID: fon9KLM77MpDlenxv3mVTFT24LsRce23 Unmap original extents. Drop refcounter for shared blocks; free not shared ones. Fre Cow orphon record. map new blocks to data fork and remove them from cow fork. copy data from old blocks to new blocks synchronously Signed-off-by: Wengang Wang --- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_defrag.c | 140 ++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_reflink.c | 7 ++- fs/xfs/xfs_reflink.h | 3 +- 5 files changed, 147 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 731260a5af6d..09053c0abe28 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -301,7 +301,7 @@ xfs_getbmap_report_one( bool shared = false; int error; - error = xfs_reflink_trim_around_shared(ip, got, &shared); + error = xfs_reflink_trim_around_shared(ip, got, &shared, NULL); if (error) return error; diff --git a/fs/xfs/xfs_defrag.c b/fs/xfs/xfs_defrag.c index 0375b542024e..6867f81783a0 100644 --- a/fs/xfs/xfs_defrag.c +++ b/fs/xfs/xfs_defrag.c @@ -553,13 +553,102 @@ static int xfs_defrag_copy_piece_sync(struct xfs_defrag_info *di, return error; } +/* caller makes sure both irec1 and irec2 are real ones. */ +static int compare_bmbt_by_fsb(const void *a, const void *b) +{ + const struct xfs_bmbt_irec *irec1 = a, *irec2 = b; + + return irec1->br_startblock > irec2->br_startblock ? 1 : -1; +} + +/* sort the extents in dp_extents to be in fsb order, low to high */ +static void xfs_sort_piece_exts_by_fsb(struct xfs_defrag_piece *dp) +{ + sort(dp->dp_extents, dp->dp_nr_ext, sizeof(struct xfs_bmbt_irec), + compare_bmbt_by_fsb, NULL); +} + +/* + * unmap the given extent from inode + * free non-shared blocks and decrease shared counter for shared ones. + */ +static int xfs_defrag_unmap_ext(struct xfs_inode *ip, + struct xfs_bmbt_irec *irec, + struct xfs_trans *tp) +{ + struct xfs_bmbt_irec unmap = *irec; /* don't update original extent */ + xfs_fsblock_t irec_end = irec->br_startblock + irec->br_blockcount; + int error = 0; + + while (unmap.br_startblock < irec_end) { + bool shared; + + error = xfs_reflink_trim_around_shared(ip, &unmap, &shared, tp); + if (error) + goto out; + + /* unmap blocks from data fork */ + xfs_bmap_unmap_extent(tp, ip, &unmap); + /* + * decrease refcount counter for shared blocks, or free the + * non-shared blocks + */ + if (shared) { + xfs_refcount_decrease_extent(tp, &unmap); + } else { + ASSERT(unmap.br_state != XFS_EXT_UNWRITTEN); + __xfs_free_extent_later(tp, unmap.br_startblock, + unmap.br_blockcount, NULL, 0, false); + } + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, + -unmap.br_blockcount); + + /* for next */ + unmap.br_startoff += unmap.br_blockcount; + unmap.br_startblock += unmap.br_blockcount; + unmap.br_blockcount = irec_end - unmap.br_startblock; + } +out: + return error; +} + +/* + * unmap original extents in this piece + * for those non-shared ones, also free them; for shared, decrease refcount + * counter. + * XFS_ILOCK_EXCL is locked by caller. + */ +static int xfs_defrag_unmap_piece(struct xfs_defrag_info *di, struct xfs_trans *tp) +{ + struct xfs_defrag_piece *dp = &di->di_dp; + xfs_fsblock_t last_fsb = 0; + int i, error; + + for (i = 0; i < dp->dp_nr_ext; i++) { + struct xfs_bmbt_irec *irec = &dp->dp_extents[i]; + + /* debug only, remove the following two lines for production use */ + ASSERT(last_fsb == 0 || irec->br_startblock > last_fsb); + last_fsb = irec->br_startblock; + + error = xfs_defrag_unmap_ext(di->di_ip, irec, tp); + if (error) + break; + } + return error; +} + /* defrag on the given piece * XFS_ILOCK_EXCL is held by caller */ static int xfs_defrag_file_piece(struct xfs_defrag_info *di) { struct xfs_inode *ip = di->di_ip; - struct xfs_bmbt_irec imap; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp = NULL; + struct xfs_bmbt_irec imap, del; + unsigned int resblks; + int error; struct xfs_iext_cursor icur; @@ -579,6 +668,55 @@ static int xfs_defrag_file_piece(struct xfs_defrag_info *di) error = xfs_defrag_copy_piece_sync(di, &imap); if (error) goto out; + + /* sort the extents by FSB, low -> high, for later unmapping*/ + xfs_sort_piece_exts_by_fsb(&di->di_dp); + + resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, + XFS_TRANS_RESERVE, &tp); + if (error) + goto out; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + /* unmap original extents in data fork */ + error = xfs_defrag_unmap_piece(di, tp); + if (error) { + xfs_trans_cancel(tp); + goto out; + } + + /* adjust new blocks to proper range */ + del = imap; + if (del.br_blockcount > di->di_dp.dp_len) { + xfs_filblks_t diff = di->di_dp.dp_start_off - del.br_startoff; + + del.br_startoff += diff; + del.br_startblock += diff; + del.br_blockcount = di->di_dp.dp_len; + } + + /* Free the CoW orphan record. */ + xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount); + + /* map the adjusted new blocks to data fork */ + xfs_bmap_map_extent(tp, ip, &del); + + /* Charge this new data fork mapping to the on-disk quota. */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, + (long)del.br_blockcount); + + /* remove the extent from Cow fork */ + xfs_bmap_del_extent_cow(ip, &icur, &imap, &del); + + /* modify inode change time */ + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + out: return error; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 18c8f168b153..f6fdff3bdca4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1256,7 +1256,7 @@ xfs_read_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) - error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, NULL); seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); xfs_iunlock(ip, lockmode); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e5b62dc28466..7d7d67087fcc 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -168,7 +168,8 @@ int xfs_reflink_trim_around_shared( struct xfs_inode *ip, struct xfs_bmbt_irec *irec, - bool *shared) + bool *shared, + struct xfs_trans *tp) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; @@ -190,7 +191,7 @@ xfs_reflink_trim_around_shared( agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); aglen = irec->br_blockcount; - error = xfs_reflink_find_shared(pag, NULL, agbno, aglen, &fbno, &flen, + error = xfs_reflink_find_shared(pag, tp, agbno, aglen, &fbno, &flen, true); xfs_perag_put(pag); if (error) @@ -238,7 +239,7 @@ xfs_bmap_trim_cow( } /* Trim the mapping to the nearest shared extent boundary. */ - return xfs_reflink_trim_around_shared(ip, imap, shared); + return xfs_reflink_trim_around_shared(ip, imap, shared, NULL); } static int diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 65c5dfe17ecf..d751420650f2 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -17,7 +17,8 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip) } extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, - struct xfs_bmbt_irec *irec, bool *shared); + struct xfs_bmbt_irec *irec, bool *shared, + struct xfs_trans *tp); int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared);