From patchwork Sun May 8 14:36:14 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842391 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id E11D0184F for ; Sun, 8 May 2022 14:36:41 +0000 (UTC) IronPort-Data: A9a23:II1mM6IAgxJ2+kkoFE+RVJQlxSXFcZb7ZxGrkP8bfHC81Tkl0WNUxjcbWmHVPP2DYWDzLdByaNi+pB9Q7ZTcm4NqS1BcGVNFFSwT8ZWfbTi6wuYcBwvLd4ubChsPA/w2MrEsF+hpCC+MzvuRGuK59yMkj/nRHuOU5NPsYUideyc1EU/Ntjozw4bVsqYw6TSIK1vlVeHa+qUzC3f5s9JACV/43orYwP9ZUFsejxtD1rA2TagjUFYzDBD5BrpHTU26ByOQroW5goeHq+j/ILGRpgs1/j8mDJWrj7T6blYXBLXVOGBiiFIPA+773EcE/Xd0j87XN9JFAatToy+UltZq2ZNDs4esYQk0PKzQg/lbWB5de817FfQcpOeYfCfj6aR/yGWDKRMA2c5GAEgoPIEw9PxwBGZU//0EbjsKa3irg+OwxbOyTelhrsQ+JdbmPcUUvXQI5THSDd4nR57ZSqnH7NMe2y0/7uhRHPLaduIYbzR1ZRjNahEJPU0YYLoyleHuhD/gcjlcqVuQvoI25XTeyEp6172FGNbXZduMSu1Wk1yeq2aA+H72ajkeNdqC2X+A91qvmObEnmX8Qo16PKe56vNxgF27wm0VFQ1QVFG+5/K+jyaWXcxTKkkR0i4vtrQpskiqSMTtGRG1vhasvhUcc95LD6s25WmlzKPT8g/fBm8eTzFcY9wnnMk7Tnoh0Vrht9HgAzEpu72IYXWH/7yQoHW5Pi19BXUNYisIUhoDy8L+u4x1gh+nZtJiFrOly9PuFTzuzjSisicznfMQgNQN2qH9+krI6xqop57UXks26x/RU2aN8Ax0fsimapau5Fyd6uxPRK6dT1+crD0UldO28u8DF9eOmTaLTeFLG6umj96bMSfbqUxiGZg/sTCs/WOzO4dK73djJy9U3mwsEdPySBaL/1oPu9kIZz33BZKbqrmZU6wCpZUM3/y4PhwMUudzXw== IronPort-HdrOrdr: A9a23:zWyLjKocRwUksGYCzM0ZCQ4aV5rPeYIsimQD101hICG8cqSj+fxG+85rsyMc6QxhIU3I9urhBEDtex/hHNtOkOws1NSZLW7bUQmTXeJfBOLZqlWKcUDDH6xmpMNdmsNFaeEYY2IUsS+D2njbLz8/+qj7zImYwffZ02x2TRxnL4Vp7wJCAA6dFUFsLTM2fqYRJd6N4NZdvTq8dTAyZsS/PHMMWO/OvJnlj5TjCCR2fSIP2U2fiy+y8r7mH1y91hcaaTlGxrAv6izkvmXCl92ej80= X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075740" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:31 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id C26704D17198; Sun, 8 May 2022 22:36:28 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:31 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:27 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Christoph Hellwig , Ritesh Harjani Subject: [PATCH v11 01/07] fsdax: Output address in dax_iomap_pfn() and rename it Date: Sun, 8 May 2022 22:36:14 +0800 Message-ID: <20220508143620.1775214-9-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: C26704D17198.AE845 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Add address output in dax_iomap_pfn() in order to perform a memcpy() in CoW case. Since this function both output address and pfn, rename it to dax_iomap_direct_access(). Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong --- fs/dax.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 4d3dfc8bee33..d4f195aeaa12 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1025,8 +1025,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); -static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, - pfn_t *pfnp) +static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, + size_t size, void **kaddr, pfn_t *pfnp) { pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); int id, rc; @@ -1034,11 +1034,13 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + kaddr, pfnp); if (length < 0) { rc = length; goto out; } + if (!pfnp) + goto out_check_addr; rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; @@ -1048,6 +1050,12 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; + +out_check_addr: + if (!kaddr) + goto out; + if (!*kaddr) + rc = -EFAULT; out: dax_read_unlock(id); return rc; @@ -1444,7 +1452,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS; } - err = dax_iomap_pfn(&iter->iomap, pos, size, &pfn); + err = dax_iomap_direct_access(&iter->iomap, pos, size, NULL, &pfn); if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); From patchwork Sun May 8 14:36:15 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842392 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 983C01850 for ; Sun, 8 May 2022 14:36:42 +0000 (UTC) IronPort-Data: A9a23:Ddf2Ma0SqF7ICATsW/bD5T9wkn2cJEfYwER7XOPLsXnJ0TMl0DdVz2QZWGqCb/rea2emeo1wPty2/U4F68DRndA2QQE+nZ1PZygU8JKaX7x1DatR0xu6d5SFFAQ+hyknQoGowPscEzmM9n9BDpC79SMmjfvQH+KlYAL5EnsZqTFMGX5JZS1Ly7ZRbr5A2bBVMivV0T/Ai5S31GyNh1aYBlkpB5er83uDihhdVAQw5TTSbdgT1LPXeuJ84Jg3fcldJFOgKmVY83LTegrN8F251juxExYFAdXjnKv5c1ERX/jZOg3mZnh+AvDk20Yd4HdplPtT2Pk0MC+7jx2Tgtl308QLu5qrVS8nI6/NhP8AFRJfFkmSOIUfouOffiXg7Zf7I0ruNiGEL+9VJE0/I4wU0uhtBmRJ7/YZNHYGaRXrr+a3xre6Q+5si+wjMcD0MYJZsXZlpRnZBvYOQJbNWazG6NZUmjAqiahmAvfaY9sxaDxhdh3MbhRDfFANB/oWkO6uwHu5bDxcrFOcoLEf4m7PwQg327/oWPLZeMONQ8p9nUuCoG/CuWPjDXkyMN2Z1CrA93eEhfHGliC9X5gdfJW+6PJrhVi7wm0IFAZQUVq9vOn/hkOgM/pfIEw8/jEy66Q/nGStR97sVlu4p2SFsQMXW9t4FeAxrgqKz8L84Q+fCy4PTiNpb8Yvv8s7Azct0zehhdzuATBwobu9Um+G+/GYoFuaPSkTMH9HazQIQBUI5/H9r4wpyBHCVNBuFOiylNKdMTXxxS2a6SsznbMeieYV2Kihu1PKmTShot7OVAFdzgHWWH+1qxN3f6a7aIGyr1vW9/BNKMCeVFbplGYFgc+2/u0IDI/LkC2LXfVLG6umoeuGWAAwK3YH84IJrmzroiD8O9sLpmwWGaugCe5cEReBXaMZkV45CEdvAUaX IronPort-HdrOrdr: A9a23:PmWomq6iKxE9SfrpXAPXwC7XdLJyesId70hD6qhwISY1TiX+rbHWoB17726NtN9/YgBCpTntAsa9qDbnhPpICOoqTNGftWvdyQmVxehZhOOIqVCNJ8S9zJ876U4KSchD4bPLY2SS9fyKhTVQDexQvOWvweS5g/vE1XdxQUVPY6Fk1Q1wDQGWCSRNNXN7LKt8BJyB/dBGujblXXwWa/6wDn4DU/OGiMbMkPvdEGM7Li9i+A+Tlimp9bK/NxCZ2y0VWzRJzaxn0UWtqX2D2pme X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075743" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:31 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id C35ED4D17199; Sun, 8 May 2022 22:36:29 +0800 (CST) Received: from G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:33 +0800 Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:32 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:28 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Christoph Hellwig Subject: [PATCH v11 02/07] fsdax: Introduce dax_iomap_cow_copy() Date: Sun, 8 May 2022 22:36:15 +0800 Message-ID: <20220508143620.1775214-10-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: C35ED4D17199.A1367 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No In the case where the iomap is a write operation and iomap is not equal to srcmap after iomap_begin, we consider it is a CoW operation. In this case, the destination (iomap->addr) points to a newly allocated extent. It is needed to copy the data from srcmap to the extent. In theory, it is better to copy the head and tail ranges which is outside of the non-aligned area instead of copying the whole aligned range. But in dax page fault, it will always be an aligned range. So copy the whole range in this case. Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/dax.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index d4f195aeaa12..a4d56cfa33d0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1061,6 +1061,60 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, return rc; } +/** + * dax_iomap_cow_copy - Copy the data from source to destination before write + * @pos: address to do copy from. + * @length: size of copy operation. + * @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE) + * @srcmap: iomap srcmap + * @daddr: destination address to copy to. + * + * This can be called from two places. Either during DAX write fault (page + * aligned), to copy the length size data to daddr. Or, while doing normal DAX + * write operation, dax_iomap_actor() might call this to do the copy of either + * start or end unaligned address. In the latter case the rest of the copy of + * aligned ranges is taken care by dax_iomap_actor() itself. + */ +static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, + const struct iomap *srcmap, void *daddr) +{ + loff_t head_off = pos & (align_size - 1); + size_t size = ALIGN(head_off + length, align_size); + loff_t end = pos + length; + loff_t pg_end = round_up(end, align_size); + bool copy_all = head_off == 0 && end == pg_end; + void *saddr = 0; + int ret = 0; + + ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); + if (ret) + return ret; + + if (copy_all) { + ret = copy_mc_to_kernel(daddr, saddr, length); + return ret ? -EIO : 0; + } + + /* Copy the head part of the range */ + if (head_off) { + ret = copy_mc_to_kernel(daddr, saddr, head_off); + if (ret) + return -EIO; + } + + /* Copy the tail part of the range */ + if (end < pg_end) { + loff_t tail_off = head_off + length; + loff_t tail_len = pg_end - end; + + ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off, + tail_len); + if (ret) + return -EIO; + } + return 0; +} + /* * The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with @@ -1231,15 +1285,17 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, struct iov_iter *iter) { const struct iomap *iomap = &iomi->iomap; + const struct iomap *srcmap = &iomi->srcmap; loff_t length = iomap_length(iomi); loff_t pos = iomi->pos; struct dax_device *dax_dev = iomap->dax_dev; loff_t end = pos + length, done = 0; + bool write = iov_iter_rw(iter) == WRITE; ssize_t ret = 0; size_t xfer; int id; - if (iov_iter_rw(iter) == READ) { + if (!write) { end = min(end, i_size_read(iomi->inode)); if (pos >= end) return 0; @@ -1248,7 +1304,12 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, return iov_iter_zero(min(length, end - pos), iter); } - if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) + /* + * In DAX mode, enforce either pure overwrites of written extents, or + * writes to unwritten extents as part of a copy-on-write operation. + */ + if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED && + !(iomap->flags & IOMAP_F_SHARED))) return -EIO; /* @@ -1282,13 +1343,21 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; } + if (write && + srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { + ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap, + kaddr); + if (ret) + break; + } + map_len = PFN_PHYS(map_len); kaddr += offset; map_len -= offset; if (map_len > end - pos) map_len = end - pos; - if (iov_iter_rw(iter) == WRITE) + if (write) xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else @@ -1428,6 +1497,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; const struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = &iter->srcmap; size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -1435,6 +1505,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, unsigned long entry_flags = pmd ? DAX_PMD : 0; int err = 0; pfn_t pfn; + void *kaddr; if (!pmd && vmf->cow_page) return dax_fault_cow_page(vmf, iter); @@ -1447,18 +1518,25 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_pmd_load_hole(xas, vmf, iomap, entry); } - if (iomap->type != IOMAP_MAPPED) { + if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { WARN_ON_ONCE(1); return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS; } - err = dax_iomap_direct_access(&iter->iomap, pos, size, NULL, &pfn); + err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn); if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, write && !sync); + if (write && + srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { + err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr); + if (err) + return dax_fault_return(err); + } + if (sync) return dax_fault_synchronous_pfnp(pfnp, pfn); From patchwork Sun May 8 14:36:16 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842393 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 44E4D1856 for ; Sun, 8 May 2022 14:36:43 +0000 (UTC) IronPort-Data: A9a23:PTvZ/KiFucNeSA6oz9WCDYUCX1619hEKZh0ujC45NGQNrF6WrkVUxmobUGCCbv7eNGXxfIpwOd+w9hxQ7cDXn95iSlQ+qXw8FHgiRejtX4rAdhiqV8+xwmwvdGo+toNGLICowPkcFhcwnT/wdOixxZVA/fvQHOCkUradYnkZqTJME0/NtzoywobVvaY42bBVMyvV0T/Di5W31G2NglaYAUpIg063ky6Didyp0N8uUvPSUtgQ1LPWvyF94JvyvshdJVOgKmVfNrbSq+ouUNiEEm3lExcFUrtJk57wdAsEX7zTIROTzHFRXsBOgDAb/mprjPl9b6FaNC+7iB3Q9zx14M9QvJqrWEEnOLbQsOoAURhECDw4NqpDkFPCCSHl7ZTMkhKaKRMAxN0rVinaJ7Yw9u9pAG1m++YfLTcXZBGfwemxxdqTTuhqg8UqK8nmFIMCs25tzHfSCvNOaZDIQ43L49FC1Ts9j8wIGuzRD+IGaD5rfTzBZRNVM1saAZ54m/2n7lHzejseqhSKpK4z4mHW1yRw1qTgNJzefdnibclXgUGeqUrF8n7/DxVcM8aQoRKB83SxlqrKmAv4RosZF/u/7PECqEeS2mEICB0+UVq9vOn/i0S7HdlYLiQ8/DQirK033EiqVcXmGRm5pmOU+BIRRbJ4E+Y6wAWW1uzY7m6xAGEDXzcHaNs8tcArTjwr/lmElJXiAjkHmL+cT3/b/beJhTSoMCMRICkJYipsZREK5N3vv5A1pgnSVdslG6mw5vXvFjb0zy+bqgAlmq4ey8IGv42//Fbak3eivZTEUAMxzhvYU3jj7Q5jYoOhIYuy5jDz6fdGMZbcTVSbunUAs9aR4fpIDpyXkiGJBuIXE9mB4/eDLS2ZkVB0N4cu+i7r+HO5e41UpjZkKy9BLMcefhf7bUnSp0VV5ZlOLD2td6AxfoHZNiiA5cAMDvy8DraNMIUIOcM3KWe6EOhVTRb49wjQfIIEzMnT4aumTPs= IronPort-HdrOrdr: A9a23:loNhKKoDNew1bswG0vmCZHgaV5rPeYIsimQD101hICG8cqSj+fxG+85rsyMc6QxhIU3I9urhBEDtex/hHNtOkOws1NSZLW7bUQmTXeJfBOLZqlWKcUDDH6xmpMNdmsNFaeEYY2IUsS+D2njbLz8/+qj7zImYwffZ02x2TRxnL4Vp7wJCAA6dFUFsLTM2fqYRJd6N4NZdvTq8dTAyZsS/PHMMWO/OvJnlj5TjCCR2fSIP2U2fiy+y8r7mH1y91hcaaTlGxrAv6izkvmXCl92ej80= X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075742" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:31 +0800 Received: from G08CNEXMBPEKD05.g08.fujitsu.local (unknown [10.167.33.204]) by cn.fujitsu.com (Postfix) with ESMTP id 764594D1719A; Sun, 8 May 2022 22:36:30 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD05.g08.fujitsu.local (10.167.33.204) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:33 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:28 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Goldwyn Rodrigues , Christoph Hellwig , Ritesh Harjani Subject: [PATCH v11 03/07] fsdax: Replace mmap entry in case of CoW Date: Sun, 8 May 2022 22:36:16 +0800 Message-ID: <20220508143620.1775214-11-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 764594D1719A.AFF7C X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Replace the existing entry to the newly allocated one in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this entry as writeprotected. This helps us snapshots so new write pagefaults after snapshots trigger a CoW. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong --- fs/dax.c | 77 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 00d2cb72ec58..78e26204697b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -828,6 +828,23 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter return 0; } +/* + * MAP_SYNC on a dax mapping guarantees dirty metadata is + * flushed on write-faults (non-cow), but not read-faults. + */ +static bool dax_fault_is_synchronous(const struct iomap_iter *iter, + struct vm_area_struct *vma) +{ + return (iter->flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && + (iter->iomap.flags & IOMAP_F_DIRTY); +} + +static bool dax_fault_is_cow(const struct iomap_iter *iter) +{ + return (iter->flags & IOMAP_WRITE) && + (iter->iomap.flags & IOMAP_F_SHARED); +} + /* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to @@ -835,16 +852,19 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ -static void *dax_insert_entry(struct xa_state *xas, - struct address_space *mapping, struct vm_fault *vmf, - void *entry, pfn_t pfn, unsigned long flags, bool dirty) +static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void *entry, pfn_t pfn, + unsigned long flags) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; void *new_entry = dax_make_entry(pfn, flags); + bool dirty = !dax_fault_is_synchronous(iter, vmf->vma); + bool cow = dax_fault_is_cow(iter); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -856,12 +876,12 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, - false); + cow); /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -881,6 +901,9 @@ static void *dax_insert_entry(struct xa_state *xas, if (dirty) xas_set_mark(xas, PAGECACHE_TAG_DIRTY); + if (cow) + xas_set_mark(xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irq(xas); return entry; } @@ -1122,17 +1145,15 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. */ -static vm_fault_t dax_load_hole(struct xa_state *xas, - struct address_space *mapping, void **entry, - struct vm_fault *vmf) +static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void **entry) { - struct inode *inode = mapping->host; + struct inode *inode = iter->inode; unsigned long vaddr = vmf->address; pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); @@ -1141,7 +1162,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, #ifdef CONFIG_FS_DAX_PMD static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; @@ -1159,8 +1180,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, goto fallback; pfn = page_to_pfn_t(zero_page); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, + DAX_PMD | DAX_ZERO_PAGE); if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); @@ -1193,7 +1214,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #else static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { return VM_FAULT_FALLBACK; } @@ -1427,17 +1448,6 @@ static vm_fault_t dax_fault_return(int error) return vmf_error(error); } -/* - * MAP_SYNC on a dax mapping guarantees dirty metadata is - * flushed on write-faults (non-cow), but not read-faults. - */ -static bool dax_fault_is_synchronous(unsigned long flags, - struct vm_area_struct *vma, const struct iomap *iomap) -{ - return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) - && (iomap->flags & IOMAP_F_DIRTY); -} - /* * When handling a synchronous page fault and the inode need a fsync, we can * insert the PTE/PMD into page tables only after that fsync happened. Skip @@ -1495,13 +1505,11 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, const struct iomap_iter *iter, pfn_t *pfnp, struct xa_state *xas, void **entry, bool pmd) { - struct address_space *mapping = vmf->vma->vm_file->f_mapping; const struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = &iter->srcmap; size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; - bool write = vmf->flags & FAULT_FLAG_WRITE; - bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap); + bool write = iter->flags & IOMAP_WRITE; unsigned long entry_flags = pmd ? DAX_PMD : 0; int err = 0; pfn_t pfn; @@ -1514,8 +1522,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, if (!write && (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) { if (!pmd) - return dax_load_hole(xas, mapping, entry, vmf); - return dax_pmd_load_hole(xas, vmf, iomap, entry); + return dax_load_hole(xas, vmf, iter, entry); + return dax_pmd_load_hole(xas, vmf, iter, entry); } if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { @@ -1527,8 +1535,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, - write && !sync); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags); if (write && srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) { @@ -1537,7 +1544,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_fault_return(err); } - if (sync) + if (dax_fault_is_synchronous(iter, vmf->vma)) return dax_fault_synchronous_pfnp(pfnp, pfn); /* insert PMD pfn */ From patchwork Sun May 8 14:36:10 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842389 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 826EA10ED for ; Sun, 8 May 2022 14:36:39 +0000 (UTC) IronPort-Data: A9a23:BH7Aja0A+gWrByATGPbD5T9wkn2cJEfYwER7XOPLsXnJgD9z3jYAzDQcDWGOO6qNZTGhfdBwOoqy/U4GvsDRzNY2QQE+nZ1PZygU8JKaX7x1DatR0xu6d5SFFAQ+hyknQoGowPscEzmM9n9BDpC79SMmjfvQH+KlYAL5EnsZqTFMGX5JZS1Ly7ZRbr5A2bBVMivV0T/Ai5S31GyNh1aYBlkpB5er83uDihhdVAQw5TTSbdgT1LPXeuJ84Jg3fcldJFOgKmVY83LTegrN8F251juxExYFAdXjnKv5c1ERX/jZOg3mZnh+AvDk20Yd4HdplPtT2Pk0MC+7jx2Tgtl308QLu5qrVS8nI6/NhP8AFRJfFkmSOIUfouOffiXg7ZX7I0ruNiGEL+9VJE0/I4wU0uhtBmRJ7/YZNHYGaRXrr+a3xre6Q+5si+wjMcD0MYJZsXZlpRnZBvYOQJbNWazG6NZUmjAqiahmAvfaY9sxaDxhdh3MbhRDfFANB/oWkO6uwHu5bDxcrFOcoLEf4m7PwQg327/oWPLZeMONQ8p9nUuCoG/CuWPjDXkyMN2Z1CrA93eEhfHGliC9X5gdfJW+6PJrhVi7wm0IFAZQUVq9vOn/hkOgM/pfIEw8/jEy66Q/nGStR97sVlu4p2SFsQMXW9t4FeAxrgqKz8L84Q+fCy4PTiNpb8Yvv8s7Azct0zehhdzuATBwobu9Um+G+/GYoFuaPSkTMH9HazQIQBUI5/H9r4wpyBHCVNBuFOiylNKdMTXxxS2a6SsznbMeieYV2Kihu1PKmTShot7OVAFdzgHWWH+1qxN3f6a7aIGyr1vW9/BNKMCeVFbplGYFgc+2/u0IDI/LkC2LXfVLG6umoeuGWAAwK3YH84IJrmzroiD8O9sLpmwWGaugCe5cEReBXaMZkVk5CEdvAUaX IronPort-HdrOrdr: A9a23:vXo0qa23PhXR77qd1J97oAqjBFQkLtp133Aq2lEZdPRUGvb4qynIpoVj6faUskdoZJhOo6HiBEDtexzhHNtOkO0s1NSZLW/bUQmTXeNfBOLZqlWKcUCTygce79YGT0EUMr3N5DZB4/oSmDPIdurI3uP3jJyAtKPPyWt3VwF2Z+VF5wd9MAySFUp7X2B9dOAEPavZ9sxavCChZHhSSsy6A0MOV+/Fq8aOu4nhZXc9dmMawTjLnTW186T7DhTd+h8fVglEybAk/XOAsyGR3NTZj82G X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075741" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:31 +0800 Received: from G08CNEXMBPEKD05.g08.fujitsu.local (unknown [10.167.33.204]) by cn.fujitsu.com (Postfix) with ESMTP id A46A34D16FDF; Sun, 8 May 2022 22:36:25 +0800 (CST) Received: from G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) by G08CNEXMBPEKD05.g08.fujitsu.local (10.167.33.204) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:28 +0800 Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:28 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:23 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Christoph Hellwig Subject: [PATCH v14 04/07] fsdax: Introduce dax_lock_mapping_entry() Date: Sun, 8 May 2022 22:36:10 +0800 Message-ID: <20220508143620.1775214-5-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: A46A34D16FDF.A26AD X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No The current dax_lock_page() locks dax entry by obtaining mapping and index in page. To support 1-to-N RMAP in NVDIMM, we need a new function to lock a specific dax entry corresponding to this file's mapping,index. And output the page corresponding to the specific dax entry for caller use. Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/dax.c | 63 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/dax.h | 15 +++++++++++ 2 files changed, 78 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 1ac12e877f4f..57efd3f73655 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -455,6 +455,69 @@ void dax_unlock_page(struct page *page, dax_entry_t cookie) dax_unlock_entry(&xas, (void *)cookie); } +/* + * dax_lock_mapping_entry - Lock the DAX entry corresponding to a mapping + * @mapping: the file's mapping whose entry we want to lock + * @index: the offset within this file + * @page: output the dax page corresponding to this dax entry + * + * Return: A cookie to pass to dax_unlock_mapping_entry() or 0 if the entry + * could not be locked. + */ +dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, pgoff_t index, + struct page **page) +{ + XA_STATE(xas, NULL, 0); + void *entry; + + rcu_read_lock(); + for (;;) { + entry = NULL; + if (!dax_mapping(mapping)) + break; + + xas.xa = &mapping->i_pages; + xas_lock_irq(&xas); + xas_set(&xas, index); + entry = xas_load(&xas); + if (dax_is_locked(entry)) { + rcu_read_unlock(); + wait_entry_unlocked(&xas, entry); + rcu_read_lock(); + continue; + } + if (!entry || + dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + /* + * Because we are looking for entry from file's mapping + * and index, so the entry may not be inserted for now, + * or even a zero/empty entry. We don't think this is + * an error case. So, return a special value and do + * not output @page. + */ + entry = (void *)~0UL; + } else { + *page = pfn_to_page(dax_to_pfn(entry)); + dax_lock_entry(&xas, entry); + } + xas_unlock_irq(&xas); + break; + } + rcu_read_unlock(); + return (dax_entry_t)entry; +} + +void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index, + dax_entry_t cookie) +{ + XA_STATE(xas, &mapping->i_pages, index); + + if (cookie == ~0UL) + return; + + dax_unlock_entry(&xas, (void *)cookie); +} + /* * Find page cache entry at given index. If it is a DAX entry, return it * with the entry locked. If the page cache doesn't contain an entry at diff --git a/include/linux/dax.h b/include/linux/dax.h index 9c426a207ba8..c152f315d1c9 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -143,6 +143,10 @@ struct page *dax_layout_busy_page(struct address_space *mapping); struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); +dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page); +void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie); #else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { @@ -170,6 +174,17 @@ static inline dax_entry_t dax_lock_page(struct page *page) static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) { } + +static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping, + unsigned long index, struct page **page) +{ + return 0; +} + +static inline void dax_unlock_mapping_entry(struct address_space *mapping, + unsigned long index, dax_entry_t cookie) +{ +} #endif int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, From patchwork Sun May 8 14:36:18 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842396 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 7B52B1856 for ; Sun, 8 May 2022 14:36:46 +0000 (UTC) IronPort-Data: A9a23:x+ieNKvVCV+9tZ5NfARi/gUfvOfnVK9fMUV32f8akzHdYEJGY0x3nTEcDWuHOamJYjDyLtwiYd628ksO6JSAn9QwTAdkrXhgHilAwSbnLY7Hdx+vZUt+DSFioHpPtpxYMp+ZRCwNZie0SiyFb/6x/RGQ6YnSHuCmULScY3goLeNZYHxJZSxLyrdRbrFA0YDR7zOl4bsekuWHULOX82cc3lE8t8pvnChSUMHa41v0iLCRicdj5zcyn1FNZH4WyDrYw3HQGuG4FcbiLwrPIS3Qw4/Xw/stIovNfrfTeUtMTKPQPBSVlzxdXK3Kbhpq/3R0i/hkcqFHLxo/ZzahxridzP1XqJW2UhZvMKvXhMwTThtZDzpje6ZB/dcrJFDm65DLkBCZLyuEL/JGSRte0Zcj0up+H2BC3fICLzUKdBqCm6S9x7fTYu1tgMEiJc7rMasfp3h/wDCfBvEjKbjDSKXi5NlWxj48i8lCW/HEaKIxdjtraAXoYhtBIF4bBZsy2uCyiRHXfzRe7lDTuqsz52nayRdZ0b7xPd6TcduPLe1ZnFmfoG3u/GnjBBwectuFxlKt9nOqm/+KmCbTW5wbH77+8eRl6HWV2GASDRg+UVqgveL/jk+4RsIZJ0EKkgIupqga8Fe3CNXwNzW+qXmVt1gcXMBRHPAx6AClzKffpQ2eAwAsTDdHZZottNIeQiYj3VuE2djuAFRHqrKSTX6C57G8ti6pNG4eKmpqTTULSg8J/MjliJoulR+JQtsLOKq0iMDlXD/rzz2UoSwWmboel4gI2r+98FSBhCijzrDNTwgo9kDUU3ij4wdReoGofcqr5ELd4PIGK5yWJnGFvX4Zi42O4vsmE56AjmqOTf8LEbXv4OyKWBXCgERoN4ss8TWzvXqie51ApjZkKwF0Ma45lZXBCKPIkVoJosYNYz3xNukqC79dwv8ClcDIfekJnNiIBjaWXqVMSQ== IronPort-HdrOrdr: A9a23:FvquYKrPgnUYWLwJImAm1kgaV5rDeYIsimQD101hICG8cqSj+fxG+85rsSMc6QxhP03I9urhBEDtex/hHP1OkOws1NWZLWrbUQKTRekIh+bfKlbbehEWmNQz6U4ZSdkdNDTvNykAse/KpBm/D807wMSKtIShheLlxX9rSg1wApsQljtRO0KKFFFsXglaCd4cHJqY3MBOoD2tYjA5dcK+b0N1J9Trlpnako78ex4aC1oC4AmKtzmh77n3CFy5834lIlVy/Ys= X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075748" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:38 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 2724E4D1718C; Sun, 8 May 2022 22:36:32 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:35 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:30 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Goldwyn Rodrigues , Christoph Hellwig Subject: [PATCH v11 05/07] fsdax: Dedup file range to use a compare function Date: Sun, 8 May 2022 22:36:18 +0800 Message-ID: <20220508143620.1775214-13-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 2724E4D1718C.A1607 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No With dax we cannot deal with readpage() etc. So, we create a dax comparison function which is similar with vfs_dedupe_file_range_compare(). And introduce dax_remap_file_range_prep() for filesystem use. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/dax.c | 82 ++++++++++++++++++++++++++++++++++++++++++++ fs/remap_range.c | 31 ++++++++++++++--- fs/xfs/xfs_reflink.c | 8 +++-- include/linux/dax.h | 8 +++++ include/linux/fs.h | 12 ++++--- 5 files changed, 130 insertions(+), 11 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b3aa863e9fec..601a23c6378c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1860,3 +1860,85 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, return dax_insert_pfn_mkwrite(vmf, pfn, order); } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); + +static loff_t dax_range_compare_iter(struct iomap_iter *it_src, + struct iomap_iter *it_dest, u64 len, bool *same) +{ + const struct iomap *smap = &it_src->iomap; + const struct iomap *dmap = &it_dest->iomap; + loff_t pos1 = it_src->pos, pos2 = it_dest->pos; + void *saddr, *daddr; + int id, ret; + + len = min(len, min(smap->length, dmap->length)); + + if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) { + *same = true; + return len; + } + + if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) { + *same = false; + return 0; + } + + id = dax_read_lock(); + ret = dax_iomap_direct_access(smap, pos1, ALIGN(pos1 + len, PAGE_SIZE), + &saddr, NULL); + if (ret < 0) + goto out_unlock; + + ret = dax_iomap_direct_access(dmap, pos2, ALIGN(pos2 + len, PAGE_SIZE), + &daddr, NULL); + if (ret < 0) + goto out_unlock; + + *same = !memcmp(saddr, daddr, len); + if (!*same) + len = 0; + dax_read_unlock(id); + return len; + +out_unlock: + dax_read_unlock(id); + return -EIO; +} + +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dst, loff_t dstoff, loff_t len, bool *same, + const struct iomap_ops *ops) +{ + struct iomap_iter src_iter = { + .inode = src, + .pos = srcoff, + .len = len, + .flags = IOMAP_DAX, + }; + struct iomap_iter dst_iter = { + .inode = dst, + .pos = dstoff, + .len = len, + .flags = IOMAP_DAX, + }; + int ret; + + while ((ret = iomap_iter(&src_iter, ops)) > 0) { + while ((ret = iomap_iter(&dst_iter, ops)) > 0) { + dst_iter.processed = dax_range_compare_iter(&src_iter, + &dst_iter, len, same); + } + if (ret <= 0) + src_iter.processed = ret; + } + return ret; +} + +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops) +{ + return __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, ops); +} +EXPORT_SYMBOL_GPL(dax_remap_file_range_prep); diff --git a/fs/remap_range.c b/fs/remap_range.c index e112b5424cdb..231de627c1b9 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" #include @@ -271,9 +272,11 @@ static int vfs_dedupe_file_range_compare(struct file *src, loff_t srcoff, * If there's an error, then the usual negative error code is returned. * Otherwise returns 0 with *len set to the request length. */ -int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *len, unsigned int remap_flags) +int +__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -333,8 +336,18 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; - ret = vfs_dedupe_file_range_compare(file_in, pos_in, - file_out, pos_out, *len, &is_same); + if (*len == 0) + return 0; + + if (!IS_DAX(inode_in)) + ret = vfs_dedupe_file_range_compare(file_in, pos_in, + file_out, pos_out, *len, &is_same); + else if (dax_read_ops) + ret = dax_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same, + dax_read_ops); + else + return -EINVAL; if (ret) return ret; if (!is_same) @@ -352,6 +365,14 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; } + +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags) +{ + return __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, NULL); +} EXPORT_SYMBOL(generic_remap_file_range_prep); loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1ae6d3434ad2..10a9947e35d9 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1342,8 +1342,12 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags); + if (!IS_DAX(inode_in)) + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags); + else + ret = dax_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, &xfs_read_iomap_ops); if (ret || *len == 0) goto out_unlock; diff --git a/include/linux/dax.h b/include/linux/dax.h index c152f315d1c9..f955a2dc96cb 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -228,6 +228,14 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same, + const struct iomap_ops *ops); +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/fs.h b/include/linux/fs.h index c2a61e0f43de..dfc981fef3c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,6 +74,7 @@ struct fsverity_operations; struct fs_context; struct fs_parameter_spec; struct fileattr; +struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2055,10 +2056,13 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); -extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *count, - unsigned int remap_flags); +int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops); +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); From patchwork Sun May 8 14:36:19 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842395 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 0DE78184F for ; Sun, 8 May 2022 14:36:43 +0000 (UTC) IronPort-Data: A9a23:67OTtqrXk3vGNlpS4cq9wbADwQZeBmLAZBIvgKrLsJaIsI5as4F+vmZKDT2DbqyPYzSgKdwjbNi28RkDvJPWmoNkG1dlqSAyQiMRo6IpJ/zDcB6oYHn6wu4v7a5fx5xHLIGGdajYd1eEzvuWGuWn/SkUOZ2gHOKmUraeYnkpHGeIdQ964f5ds79g6mJXqYjha++9kYuaT/z3YDdJ6RYtWo4nw/7rRCdUgRjHkGhwUmrSyhx8lAS2e3E9VPrzLEwqRpfyatE88uWSH44vwFwll1418SvBCvv9+lr6WkYMBLDPPwmSkWcQUK+n6vRAjnVqlP9la7xHMgEK49mKt4kZJNFlr4G5Txw4eKPKg/g1XQRaEj1lIOtN/7qvzX2X6JbPlxKbLCa0qxlpJARsVWECwc57CH9P+dQWMjcIaQqJhv7wy7W+IsFoh8ImLcDsPI43umxp0jzYS/0hRPjrQ67Kzd5e0i05is1HEbDZfcVxQSVuaBDRSxxJNE0eBJ83kKGvnHaXWzFRrhSX47U252zSxQlq+LnrLNfRPNeNQK19kkSHoWTJ12f0GBcXMJqY0zXt2natgPLf2Cb+cIEMHba7sPlwjzW7wHIfCRgTfV+6uuWizEq/Xc9PbUAZ5EIGq6E15UXtTt7nXhKlq36FlhgRUJxbFOhSwAOEzKeS6AaELm8eRzVFZZots8pebSYl0VuFgMLvLSdyq7DTRX/13rOVqy6ifCYOIWIcaCssUwQI+Z/grZs1gxaJScxseIaxj9voCXTzziqMoSwWmboel4gI2r+98FSBhCijzrDNTwgo9kDHUHmN8Ax0fsimapau5Fyd6uxPRK6HT0OGlGoJncmAquQPC4yd0iuXT6MQH9mUC1ytWNHHqQc3WcB/qHL2oDj+Fb28KQpWfC9BWvvosxe1CKMLhT5s2Q== IronPort-HdrOrdr: A9a23:UL8onK5TuglLwacnBQPXwPTXdLJyesId70hD6qkRc20wTiX8ra2TdZsguyMc9wx6ZJhNo7G90cq7MBbhHPxOkOos1N6ZNWGIhILCFvAB0WKN+V3dMhy73utc+IMlSKJmFeD3ZGIQse/KpCW+DPYsqePqzJyV X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075750" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:38 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id F31414D17197; Sun, 8 May 2022 22:36:32 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:36 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:31 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , Subject: [PATCH v11 06/07] xfs: support CoW in fsdax mode Date: Sun, 8 May 2022 22:36:19 +0800 Message-ID: <20220508143620.1775214-14-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: F31414D17197.AEC13 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. After that, new allocated extents needs to be remapped to the file. So, add a CoW identification in ->iomap_begin(), and implement ->iomap_end() to do the remapping work. Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_file.c | 7 ++----- fs/xfs/xfs_iomap.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 3 +++ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index af954a5b71f8..5a4508b23b51 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -669,7 +669,7 @@ xfs_file_dax_write( pos = iocb->ki_pos; trace_xfs_file_dax_write(iocb, from); - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1285,10 +1285,7 @@ __xfs_filemap_fault( pfn_t pfn; xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, - (write_fault && !vmf->cow_page) ? - &xfs_direct_write_iomap_ops : - &xfs_read_iomap_ops); + ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5a393259a3a3..e35842215d22 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -27,6 +27,7 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_reflink.h" +#include "linux/dax.h" #define XFS_ALLOC_ALIGN(mp, off) \ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) @@ -773,7 +774,8 @@ xfs_direct_write_iomap_begin( /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, + (flags & IOMAP_DIRECT) || IS_DAX(inode)); if (error) goto out_unlock; if (shared) @@ -867,6 +869,33 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_dax_write_iomap_end( + struct inode *inode, + loff_t pos, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (!xfs_is_cow_inode(ip)) + return 0; + + if (!written) { + xfs_reflink_cancel_cow_range(ip, pos, length, true); + return 0; + } + + return xfs_reflink_end_cow(ip, pos, written); +} + +const struct iomap_ops xfs_dax_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, + .iomap_end = xfs_dax_write_iomap_end, +}; + static int xfs_buffered_write_iomap_begin( struct inode *inode, @@ -1358,3 +1387,18 @@ xfs_truncate_page( return iomap_truncate_page(inode, pos, did_zero, &xfs_buffered_write_iomap_ops); } + +#ifdef CONFIG_FS_DAX +int +xfs_dax_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size, + bool write_fault, + pfn_t *pfn) +{ + return dax_iomap_fault(vmf, pe_size, pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_dax_write_iomap_ops : + &xfs_read_iomap_ops); +} +#endif diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index e88dc162c785..89dfa3bb099f 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -25,6 +25,8 @@ int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, bool *did_zero); int xfs_truncate_page(struct xfs_inode *ip, loff_t pos, bool *did_zero); +int xfs_dax_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + bool write_fault, pfn_t *pfn); static inline xfs_filblks_t xfs_aligned_fsb_count( @@ -51,5 +53,6 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_dax_write_iomap_ops; #endif /* __XFS_IOMAP_H__*/ From patchwork Sun May 8 14:36:13 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12842390 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 845F1110A for ; Sun, 8 May 2022 14:36:39 +0000 (UTC) IronPort-Data: A9a23:dgDLBagHbyfnfMQ3dbsb8h3gX161jBEKZh0ujC45NGQNrF6WrkVRzGYfDW6FOazeMWD0edkjPou+8U5VsJCEyd5mGgE4q3w8FHgiRejtX4rAdhiqV8+xwmwvdGo+toNGLICowPkcFhcwnT/wdOixxZVA/fvQHOCkUradYnkZqTJME0/NtzoywobVvaY42bBVMyvV0T/Di5W31G2NglaYAUpIg063ky6Didyp0N8uUvPSUtgQ1LPWvyF94JvyvshdJVOgKmVfNrbSq+ouUNiEEm3lExcFUrtJk57wdAsEX7zTIROTzHFRXsBOgDAb/mprjPl9b6FaNC+7iB3Q9zx14M9QvJqrWEEnOLbQsOoAURhECDw4NqpDkFPCCSHl7ZTMkhKdIhMAxN0rVinaJ7Yw9u9pAG1m++YfLTcXZBGfwemxxdqTTuhqg8UqK8nmFIMCs25tzHfSCvNOaZDIQ43L49FC1Ts9j8wIGuzRD+IGaD5rfTzBZRNVM1saAZ54m/2n7lHzejseqhSKpK4z4mHW1yRw1qTgNJzefdnibclXgUGeqUrF8n7/DxVcM8aQoRKB83SxlqrKmAv4RosZF/u/7PECqFuNym0WDTUSVECnur+9i0ijS5RTJlJ80iolrYA271DtQtSVdxuxp2+N+B4bQdtfDuY66SmLx6GS6AGcbkAGRzhMLtcmqecxXzUh0lLPlNTsbRR1v7qRRW2M8J+PsCi/fyQYRUcGZCkZXU4L+NXuvow3pgzAQ8wlE6OviNDxXzbqzFiiqCk4mqVWjsMR0ai/1U7IjijqpZXTSAMxoALNUQqN6gJ/eZ7gd4KzwUbU4OwGL4uDSFSF+n8elKC28uEUCrmfmSqMXqMJHbe097CCKjKanF0HInWL31xB4Fb6JcYJvm44fxwvb645lfbSSBe7kWtsCFV7ZhNGtZNKXr8= IronPort-HdrOrdr: A9a23:/K8r5ar5PcUalDfTj2WU/e4aV5rPeYIsimQD101hICG8cqSj+fxG+85rsyMc6QxhIU3I9urhBEDtex/hHNtOkOws1NSZLW7bUQmTXeJfBOLZqlWKcUDDH6xmpMNdmsNFaeEYY2IUsS+D2njbLz8/+qj7zImYwffZ02x2TRxnL4Vp7wJCAA6dFUFsLTM2fqYRJd6N4NZdvTq8dTAyZsS/PHMMWO/OvJnlj5TjCCR2fSIP2U2fiy+y8r7mH1y91hcaaTlGxrAv6izkvmXCl92ej80= X-IronPort-AV: E=Sophos;i="5.88,333,1635177600"; d="scan'208";a="124075739" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 08 May 2022 22:36:31 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id E5CF24D17197; Sun, 8 May 2022 22:36:27 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Sun, 8 May 2022 22:36:31 +0800 Received: from irides.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Sun, 8 May 2022 22:36:26 +0800 From: Shiyang Ruan To: , , , , CC: , , , , , , , , , , Christoph Hellwig Subject: [PATCH v14 07/07] fsdax: set a CoW flag when associate reflink mappings Date: Sun, 8 May 2022 22:36:13 +0800 Message-ID: <20220508143620.1775214-8-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> References: <20220508143620.1775214-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: E5CF24D17197.AF878 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Introduce a PAGE_MAPPING_DAX_COW flag to support association with CoW file mappings. In this case, since the dax-rmap has already took the responsibility to look up for shared files by given dax page, the page->mapping is no longer to used for rmap but for marking that this dax page is shared. And to make sure disassociation works fine, we use page->index as refcount, and clear page->mapping to the initial state when page->index is decreased to 0. With the help of this new flag, it is able to distinguish normal case and CoW case, and keep the warning in normal case. Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/dax.c | 50 +++++++++++++++++++++++++++++++------- include/linux/page-flags.h | 6 +++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 57efd3f73655..4d3dfc8bee33 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -334,13 +334,35 @@ static unsigned long dax_end_pfn(void *entry) for (pfn = dax_to_pfn(entry); \ pfn < dax_end_pfn(entry); pfn++) +static inline bool dax_mapping_is_cow(struct address_space *mapping) +{ + return (unsigned long)mapping == PAGE_MAPPING_DAX_COW; +} + /* - * TODO: for reflink+dax we need a way to associate a single page with - * multiple address_space instances at different linear_page_index() - * offsets. + * Set the page->mapping with FS_DAX_MAPPING_COW flag, increase the refcount. + */ +static inline void dax_mapping_set_cow(struct page *page) +{ + if ((uintptr_t)page->mapping != PAGE_MAPPING_DAX_COW) { + /* + * Reset the index if the page was already mapped + * regularly before. + */ + if (page->mapping) + page->index = 1; + page->mapping = (void *)PAGE_MAPPING_DAX_COW; + } + page->index++; +} + +/* + * When it is called in dax_insert_entry(), the cow flag will indicate that + * whether this entry is shared by multiple files. If so, set the page->mapping + * FS_DAX_MAPPING_COW, and use page->index as refcount. */ static void dax_associate_entry(void *entry, struct address_space *mapping, - struct vm_area_struct *vma, unsigned long address) + struct vm_area_struct *vma, unsigned long address, bool cow) { unsigned long size = dax_entry_size(entry), pfn, index; int i = 0; @@ -352,9 +374,13 @@ static void dax_associate_entry(void *entry, struct address_space *mapping, for_each_mapped_pfn(entry, pfn) { struct page *page = pfn_to_page(pfn); - WARN_ON_ONCE(page->mapping); - page->mapping = mapping; - page->index = index + i++; + if (cow) { + dax_mapping_set_cow(page); + } else { + WARN_ON_ONCE(page->mapping); + page->mapping = mapping; + page->index = index + i++; + } } } @@ -370,7 +396,12 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping, struct page *page = pfn_to_page(pfn); WARN_ON_ONCE(trunc && page_ref_count(page) > 1); - WARN_ON_ONCE(page->mapping && page->mapping != mapping); + if (dax_mapping_is_cow(page->mapping)) { + /* keep the CoW flag if this page is still shared */ + if (page->index-- > 0) + continue; + } else + WARN_ON_ONCE(page->mapping && page->mapping != mapping); page->mapping = NULL; page->index = 0; } @@ -829,7 +860,8 @@ static void *dax_insert_entry(struct xa_state *xas, void *old; dax_disassociate_entry(entry, mapping, false); - dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); + dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address, + false); /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index fe47ee8dc258..cad9aeb5e75c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -650,6 +650,12 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) +/* + * Different with flags above, this flag is used only for fsdax mode. It + * indicates that this page->mapping is now under reflink case. + */ +#define PAGE_MAPPING_DAX_COW 0x1 + static __always_inline int PageMappingFlags(struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;