From patchwork Tue Sep 28 06:23:04 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521855 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 2841972 for ; Tue, 28 Sep 2021 06:23:26 +0000 (UTC) IronPort-Data: A9a23:GRWP7azeUug2UwtVswB6t+cnxyrEfRIJ4+MujC/XYbTApDJwhDIBxmBKC2uPbKqKYGPwe492a4qzp00Pu5aBz4A3HQtv/xmBbVoQ95OdWo7xwmQcns+qBpSaChohtq3yU/GYRCwPZiKa9krF3oTJ9yEmjPnZHOqkUYYoBwgqLeNaYHZ44f5cs75h6mJYqYDR7zKl4bsekeWGULOW82Ic3lYv1k62gEgHUMIeF98vlgdWifhj5DcynpSOZX4VDfnZw3DQGuG4EgMmLtsvwo1V/kuBl/ssItij1LjmcEwWWaOUNg+L4pZUc/H6xEEc+WppieBmXBYfQR4/ZzGhhc14zs5c85K2UhsBMLDOmfgGTl9TFCQW0ahuoeaaeSnh4JHNp6HBWz62qxl0N2k6NJMZ9s55G2ZL8uYSKSxLZReG78qpwba/W8FtgMo5JcXmNY9ZvWtvpRnVBPBgQ9bcQqHO5NZdxx8xgNxDGbDVYM9xQTZtcxPGbDVMN00RBZZ4m/2n7lHlciFVs1KVja425XXDig171f7mN9+9UtqUScRQm26cp3na5CL9AxcHJJqTxCTt2nKnhsfLhj+9VI96PKe38fpmn0yV7ncOExBQWVbTif24jFOuHtxEJ0EK9y4Gs6c/7gqoQ8P7Uhn+p2SL1jYYWtxNA6g55RuLx678/QmUHC4HQyRHZdhgs9U5LRQu11mUj5b5CydHrrKYUzSe+62SoDf0PjIaRUccZDUDZRkI5dj95oUyiA/fCNF5H+iojbXI9ZvYq9yRhHFmwexN0ohQjOPmlW0rSgmE/vDhJjPZLC2ONo590j5EWQ== IronPort-HdrOrdr: A9a23:b+kR1qMzyEAD/MBcTv2jsMiBIKoaSvp037BL7TEUdfUxSKGlfq+V8sjzqiWftN98YhAdcLO7Scy9qBHnhP1ICOAqVN/MYOCMghrLEGgN1+vf6gylMyj/28oY7q14bpV5YeeaMXFKyer8/ym0euxN/OW6 X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096956" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:26 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 75C8B4D0DC7A; Tue, 28 Sep 2021 14:23:24 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:13 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:12 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , , Ritesh Harjani Subject: [PATCH v10 1/8] fsdax: Output address in dax_iomap_pfn() and rename it Date: Tue, 28 Sep 2021 14:23:04 +0800 Message-ID: <20210928062311.4012070-2-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 75C8B4D0DC7A.A1011 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Add address output in dax_iomap_pfn() in order to perform a memcpy() in CoW case. Since this function both output address and pfn, rename it to dax_iomap_direct_access(). Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Dan Williams Reviewed-by: Darrick J. Wong --- fs/dax.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 4e3e5a283a91..8b482a58acae 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1010,8 +1010,8 @@ static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos) return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; } -static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, - pfn_t *pfnp) +static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, + size_t size, void **kaddr, pfn_t *pfnp) { const sector_t sector = dax_iomap_sector(iomap, pos); pgoff_t pgoff; @@ -1023,11 +1023,13 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, return rc; id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + kaddr, pfnp); if (length < 0) { rc = length; goto out; } + if (!pfnp) + goto out_check_addr; rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; @@ -1037,6 +1039,12 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; + +out_check_addr: + if (!kaddr) + goto out; + if (!*kaddr) + rc = -EFAULT; out: dax_read_unlock(id); return rc; @@ -1401,7 +1409,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS; } - err = dax_iomap_pfn(&iter->iomap, pos, size, &pfn); + err = dax_iomap_direct_access(&iter->iomap, pos, size, NULL, &pfn); if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); From patchwork Tue Sep 28 06:23:05 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521857 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id B631E3FD5 for ; Tue, 28 Sep 2021 06:23:27 +0000 (UTC) IronPort-Data: A9a23:YaM8Zq2h9D1ji+8C1PbD5ahwkn2cJEfYwER7XOPLsXnJ02kr1GFUzWVJC2GAP/uIZmChedhzb97gpEtX6sXTnNQ2QQE+nZ1PZygU8JKaX7x1DatR0xu6d5SFFAQ+hyknQoGowPscEzmM+39BDpC79SMljfDSFuKlYAL5EnsZqTFMGX5JZS1Ly7ZRbr5A2bBVMivV0T/Ai5S31GyNh1aYBlkpB5er83uDihhdVAQw5TTSbdgT1LPXeuJ84Jg3fcldJFOgKmVY83LTegrN8F251juxExYFAdXjnKv5c1ERX/jZOg3mZnh+AvDk20Yd4HdplPtT2Pk0MC+7jx2Tgtl308QLu5qrVS8nI6/NhP8AFRJfFkmSOIUfoeKfcCbu7JP7I0ruNiGEL+9VJE0/I4wU0uhtBmRJ7/YZNHYGaRXrr/23xLaqYuhqiN4qIMTiMMUYoH4I5T3QC7AkB4/CR6HL7NpD9DY2ms1KW/3ZYqIxZThwaxLPSx5CIFEaDNQ5hujArn/hfzxdrXqRpLEr+C7XzQpswP7hPcS9UtyBRe1RhVreqm+u1372BRUWK82Z4SGY6X/qiuKntSf6Xp8CUbOj+vN0jVm72GMeElsVWEG9rP3/jVSxM/pbKkoJ6m8toLI0+UiDUNbwRVu7rWSCsxpaXMBfe8U+6QeQ2u/E7R2xGGcJVHhCZcYguctwQiYlvneXnsnuLS5itryLD3ac8KqE6zSoNm4ILgc/iYUsJecey4C75tht0VSUFZA+eJNZR+bdQVnYqw1mZgBl71nLsfM26g== IronPort-HdrOrdr: A9a23:f/QaSq+CyopyD6CNCipuk+DkI+orL9Y04lQ7vn2ZKCYlFvBw8vrCoB1173HJYUkqMk3I9ergBEDiewK4yXcW2/hzAV7KZmCP11dAR7sSj7cKrQeBJwTOssZZ1YpFN5N1EcDMCzFB5vrS0U2VFMkBzbC8nJyVuQ== X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096957" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:26 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 849F64D0DC85; Tue, 28 Sep 2021 14:23:25 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:24 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:23 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , Subject: [PATCH v10 2/8] fsdax: Introduce dax_iomap_cow_copy() Date: Tue, 28 Sep 2021 14:23:05 +0800 Message-ID: <20210928062311.4012070-3-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 849F64D0DC85.A18DD X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No In the case where the iomap is a write operation and iomap is not equal to srcmap after iomap_begin, we consider it is a CoW operation. In this case, the destination (iomap->addr) points to a newly allocated extent. It is needed to copy the data from srcmap to the extent. In theory, it is better to copy the head and tail ranges which is outside of the non-aligned area instead of copying the whole aligned range. But in dax page fault, it will always be an aligned range. So copy the whole range in this case. Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/dax.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 8b482a58acae..dded08be54dc 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1050,6 +1050,60 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, return rc; } +/** + * dax_iomap_cow_copy - Copy the data from source to destination before write + * @pos: address to do copy from. + * @length: size of copy operation. + * @align_size: aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE) + * @srcmap: iomap srcmap + * @daddr: destination address to copy to. + * + * This can be called from two places. Either during DAX write fault (page + * aligned), to copy the length size data to daddr. Or, while doing normal DAX + * write operation, dax_iomap_actor() might call this to do the copy of either + * start or end unaligned address. In the latter case the rest of the copy of + * aligned ranges is taken care by dax_iomap_actor() itself. + */ +static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, + const struct iomap *srcmap, void *daddr) +{ + loff_t head_off = pos & (align_size - 1); + size_t size = ALIGN(head_off + length, align_size); + loff_t end = pos + length; + loff_t pg_end = round_up(end, align_size); + bool copy_all = head_off == 0 && end == pg_end; + void *saddr = 0; + int ret = 0; + + ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); + if (ret) + return ret; + + if (copy_all) { + ret = copy_mc_to_kernel(daddr, saddr, length); + return ret ? -EIO : 0; + } + + /* Copy the head part of the range */ + if (head_off) { + ret = copy_mc_to_kernel(daddr, saddr, head_off); + if (ret) + return -EIO; + } + + /* Copy the tail part of the range */ + if (end < pg_end) { + loff_t tail_off = head_off + length; + loff_t tail_len = pg_end - end; + + ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off, + tail_len); + if (ret) + return -EIO; + } + return 0; +} + /* * The user has performed a load from a hole in the file. Allocating a new * page in the file would cause excessive storage usage for workloads with @@ -1175,16 +1229,18 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, struct iov_iter *iter) { const struct iomap *iomap = &iomi->iomap; + const struct iomap *srcmap = &iomi->srcmap; loff_t length = iomap_length(iomi); loff_t pos = iomi->pos; struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; loff_t end = pos + length, done = 0; + bool write = iov_iter_rw(iter) == WRITE; ssize_t ret = 0; size_t xfer; int id; - if (iov_iter_rw(iter) == READ) { + if (!write) { end = min(end, i_size_read(iomi->inode)); if (pos >= end) return 0; @@ -1193,7 +1249,12 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, return iov_iter_zero(min(length, end - pos), iter); } - if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) + /* + * In DAX mode, enforce either pure overwrites of written extents, or + * writes to unwritten extents as part of a copy-on-write operation. + */ + if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED && + !(iomap->flags & IOMAP_F_SHARED))) return -EIO; /* @@ -1232,6 +1293,14 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; } + if (write && + srcmap->addr != IOMAP_HOLE && srcmap->addr != iomap->addr) { + ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap, + kaddr); + if (ret) + break; + } + map_len = PFN_PHYS(map_len); kaddr += offset; map_len -= offset; @@ -1243,7 +1312,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, * validated via access_ok() in either vfs_read() or * vfs_write(), depending on which operation we are doing. */ - if (iov_iter_rw(iter) == WRITE) + if (write) xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else @@ -1385,6 +1454,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, { struct address_space *mapping = vmf->vma->vm_file->f_mapping; const struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = &iter->srcmap; size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -1392,6 +1462,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, unsigned long entry_flags = pmd ? DAX_PMD : 0; int err = 0; pfn_t pfn; + void *kaddr; if (!pmd && vmf->cow_page) return dax_fault_cow_page(vmf, iter); @@ -1404,18 +1475,25 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_pmd_load_hole(xas, vmf, iomap, entry); } - if (iomap->type != IOMAP_MAPPED) { + if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { WARN_ON_ONCE(1); return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS; } - err = dax_iomap_direct_access(&iter->iomap, pos, size, NULL, &pfn); + err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn); if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, write && !sync); + if (write && + srcmap->addr != IOMAP_HOLE && srcmap->addr != iomap->addr) { + err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr); + if (err) + return dax_fault_return(err); + } + if (sync) return dax_fault_synchronous_pfnp(pfnp, pfn); From patchwork Tue Sep 28 06:23:06 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521859 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 2F90272 for ; Tue, 28 Sep 2021 06:23:40 +0000 (UTC) IronPort-Data: A9a23:S+uVnK0lcU2MakKAQPbD5f5wkn2cJEfYwER7XOPLsXnJ0mgihGFWzWoaUW7UM/mPZDDxKdAgaI+w8xgPupDXnIc2QQE+nZ1PZygU8JKaX7x1DatR0xu6d5SFFAQ+hyknQoGowPscEzmM+39BDpC79SMljfDSFuKlYAL5EnsZqTFMGX5JZS1Ly7ZRbr5A2bBVMivV0T/Ai5S31GyNh1aYBlkpB5er83uDihhdVAQw5TTSbdgT1LPXeuJ84Jg3fcldJFOgKmVY83LTegrN8F251juxExYFAdXjnKv5c1ERX/jZOg3mZnh+AvDk20Yd4HdplPtT2Pk0MC+7jx2Tgtl308QLu5qrVS8nI6/NhP8AFRJfFkmSOIUfoeKfcCbu7pH7I0ruNiGEL+9VJE0/I4wU0uhtBmRJ7/YZNHYGaRXrr/23xLaqYuhqiN4qIMTiMMUYoH4I5T3QC7AkB4/CR6HL7NpD9DY2ms1KW/3ZYqIxZThwaxLPSx5CIFEaDNQ5hujArmP+bzBDqFK9oasx/niVzQZ0lrPqNbL9fMKGRMBQtkKZvX7duWD4BAwKctCS11Kt8nmsruvUgWX3Veo6DrK/8vJ1kVu73XEIBVsdUl7TieO2jUqyRMNZA1cJ4SdooaVa3EiqSMTtGhOjrHOasxo0RdVdCas55RuLx66S5ByWbkAATzhceJkludUwSDgCyFCEhZXqCCZpvbnTTmiSnp+QrDWvKW0FI3QqeyAJV00G7sPlrYV1iQjAJv59EbSyps/4HzDuhTSLqjUuwbIJgogW1M2GEfrv6963jsGRCFdruUOMBST4hj6VrbWNP+SAgWU3J94eRGpBcmS8gQ== IronPort-HdrOrdr: A9a23:7je93qi5y8ga9r9qgFALJh4RcXBQXuYji2hC6mlwRA09TyX4rbHLoB1/73LJYVkqNk3I5urrBEDtexLhHP1OkOws1NWZLWrbUQKTRekM0WKI+UyDJ8SRzI5g/JYlW61/Jfm1NlJikPv9iTPSL/8QhPWB74Ck7N2z80tQ X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096975" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:39 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 224794D0DC7E; Tue, 28 Sep 2021 14:23:37 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:26 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:24 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , , Goldwyn Rodrigues , Ritesh Harjani Subject: [PATCH v10 3/8] fsdax: Replace mmap entry in case of CoW Date: Tue, 28 Sep 2021 14:23:06 +0800 Message-ID: <20210928062311.4012070-4-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 224794D0DC7E.AFAC5 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Replace the existing entry to the newly allocated one in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this entry as writeprotected. This helps us snapshots so new write pagefaults after snapshots trigger a CoW. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan Reviewed-by: Christoph Hellwig Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong --- fs/dax.c | 75 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index dded08be54dc..b437badfe0dd 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -734,6 +734,23 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return 0; } +/* + * MAP_SYNC on a dax mapping guarantees dirty metadata is + * flushed on write-faults (non-cow), but not read-faults. + */ +static bool dax_fault_is_synchronous(const struct iomap_iter *iter, + struct vm_area_struct *vma) +{ + return (iter->flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) && + (iter->iomap.flags & IOMAP_F_DIRTY); +} + +static bool dax_fault_is_cow(const struct iomap_iter *iter) +{ + return (iter->flags & IOMAP_WRITE) && + (iter->iomap.flags & IOMAP_F_SHARED); +} + /* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to @@ -741,16 +758,19 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d * already in the tree, we will skip the insertion and just dirty the PMD as * appropriate. */ -static void *dax_insert_entry(struct xa_state *xas, - struct address_space *mapping, struct vm_fault *vmf, - void *entry, pfn_t pfn, unsigned long flags, bool dirty) +static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void *entry, pfn_t pfn, + unsigned long flags) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; void *new_entry = dax_make_entry(pfn, flags); + bool dirty = !dax_fault_is_synchronous(iter, vmf->vma); + bool cow = dax_fault_is_cow(iter); if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -762,7 +782,7 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; dax_disassociate_entry(entry, mapping, false); @@ -786,6 +806,9 @@ static void *dax_insert_entry(struct xa_state *xas, if (dirty) xas_set_mark(xas, PAGECACHE_TAG_DIRTY); + if (cow) + xas_set_mark(xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irq(xas); return entry; } @@ -1111,17 +1134,15 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size, * If this page is ever written to we will re-fault and change the mapping to * point to real DAX storage instead. */ -static vm_fault_t dax_load_hole(struct xa_state *xas, - struct address_space *mapping, void **entry, - struct vm_fault *vmf) +static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf, + const struct iomap_iter *iter, void **entry) { - struct inode *inode = mapping->host; + struct inode *inode = iter->inode; unsigned long vaddr = vmf->address; pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr)); vm_fault_t ret; - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); @@ -1130,7 +1151,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, #ifdef CONFIG_FS_DAX_PMD static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; unsigned long pmd_addr = vmf->address & PMD_MASK; @@ -1148,8 +1169,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, goto fallback; pfn = page_to_pfn_t(zero_page); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE, false); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, + DAX_PMD | DAX_ZERO_PAGE); if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); @@ -1182,7 +1203,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #else static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, - const struct iomap *iomap, void **entry) + const struct iomap_iter *iter, void **entry) { return VM_FAULT_FALLBACK; } @@ -1381,17 +1402,6 @@ static vm_fault_t dax_fault_return(int error) return vmf_error(error); } -/* - * MAP_SYNC on a dax mapping guarantees dirty metadata is - * flushed on write-faults (non-cow), but not read-faults. - */ -static bool dax_fault_is_synchronous(unsigned long flags, - struct vm_area_struct *vma, const struct iomap *iomap) -{ - return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC) - && (iomap->flags & IOMAP_F_DIRTY); -} - /* * When handling a synchronous page fault and the inode need a fsync, we can * insert the PTE/PMD into page tables only after that fsync happened. Skip @@ -1452,13 +1462,11 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, const struct iomap_iter *iter, pfn_t *pfnp, struct xa_state *xas, void **entry, bool pmd) { - struct address_space *mapping = vmf->vma->vm_file->f_mapping; const struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = &iter->srcmap; size_t size = pmd ? PMD_SIZE : PAGE_SIZE; loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT; - bool write = vmf->flags & FAULT_FLAG_WRITE; - bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap); + bool write = iter->flags & IOMAP_WRITE; unsigned long entry_flags = pmd ? DAX_PMD : 0; int err = 0; pfn_t pfn; @@ -1471,8 +1479,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, if (!write && (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) { if (!pmd) - return dax_load_hole(xas, mapping, entry, vmf); - return dax_pmd_load_hole(xas, vmf, iomap, entry); + return dax_load_hole(xas, vmf, iter, entry); + return dax_pmd_load_hole(xas, vmf, iter, entry); } if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) { @@ -1484,8 +1492,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); - *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags, - write && !sync); + *entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags); if (write && srcmap->addr != IOMAP_HOLE && srcmap->addr != iomap->addr) { @@ -1494,7 +1501,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return dax_fault_return(err); } - if (sync) + if (dax_fault_is_synchronous(iter, vmf->vma)) return dax_fault_synchronous_pfnp(pfnp, pfn); /* insert PMD pfn */ From patchwork Tue Sep 28 06:23:07 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521861 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id DC59872 for ; Tue, 28 Sep 2021 06:23:45 +0000 (UTC) IronPort-Data: A9a23:FCzbTqm+M5KPVD6GhCmDkxHo5gz6J0RdPkR7XQ2eYbTBsI5bpzJUzTAaDWGCPvnYY2D9KNoiPdni9ENXvZCAxtdrTFM4+CA2RRqmi+KfW43BcR2Y0wB+jyH7ZBs+qZ1YM7EsFehsJpPnjkrrYuWJQUVUj/nSH+KtUr6cY0ideCc/IMsfoUM68wIGqt4w6TSJK1vlVeLa+6UzCnf8s9JHGj58B5a4lf9alK+aVAX0EbAJTasjUFf2zxH5BX+ETE27ByOQroJ8RoZWSwtfpYxV8F81/z91Yj+kur39NEMXQL/OJhXIgX1TM0SgqkEa4HVsjeBgb7xBAatUo2zhc9RZ0shEs4ehDwkvJbHklvkfUgVDDmd1OqguFLrveCHi6Z3Nnh2bG5fr67A0ZK0sBqUU8/h2DUlA7/sdLyoHbwzFjOWzqJqkS+1ol+wiKsfxNY8Ss30myivWZd4qSJaFQePV5Ntc3T41nehPG+rTY4wSbj8HRBjCfBpJNX8UBYg4kePugWPwGxVetl6UoK8f52nI0Bc31LnrLcqTdtGULe1VlUawonnauWj0ajkAO9ubxSWU9Fq3m/TC2y/2MKoWFbul5rtkm1Ge2GEXIAMZWEH9ovSjjEO6HdVFJCQ8/isosLh390GxSNT5dwO3rWTCvRMGXddUVeog52mlzqvS/hbcFmYfZiBOZcZgt8IsQzEukFiTkLvBGz11t5WHRHSc6PGQrDWvKW4SN2BEeCxscOevy7EPu6lq1lSWEIklS/Xz07XI9fjL62jihEADa3871qbnD5mGwG0= IronPort-HdrOrdr: A9a23:fjO2la6PIcLhzZZ/4gPXwPTXdLJyesId70hD6qkRc20wTiX8ra2TdZsguyMc9wx6ZJhNo7G90cq7MBbhHPxOkOos1N6ZNWGIhILCFvAB0WKN+V3dMhy73utc+IMlSKJmFeD3ZGIQse/KpCW+DPYsqePqzJyV X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096985" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:45 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 83B3D4D0DC7F; Tue, 28 Sep 2021 14:23:43 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:37 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:36 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , Subject: [PATCH v10 4/8] fsdax: Convert dax_iomap_zero to iter model Date: Tue, 28 Sep 2021 14:23:07 +0800 Message-ID: <20210928062311.4012070-5-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 83B3D4D0DC7F.A110A X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Let dax_iomap_zero() support iter model. Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/dax.c | 3 ++- fs/iomap/buffered-io.c | 3 +-- include/linux/dax.h | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b437badfe0dd..debe459680f2 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1209,8 +1209,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, } #endif /* CONFIG_FS_DAX_PMD */ -s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) +s64 dax_iomap_zero(const struct iomap_iter *iter, loff_t pos, u64 length) { + const struct iomap *iomap = &iter->iomap; sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); pgoff_t pgoff; long rc, id; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 9cc5798423d1..84a861d3b3e0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -889,7 +889,6 @@ static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { - struct iomap *iomap = &iter->iomap; const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; loff_t length = iomap_length(iter); @@ -903,7 +902,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) s64 bytes; if (IS_DAX(iter->inode)) - bytes = dax_iomap_zero(pos, length, iomap); + bytes = dax_iomap_zero(iter, pos, length); else bytes = __iomap_zero_iter(iter, pos, length); if (bytes < 0) diff --git a/include/linux/dax.h b/include/linux/dax.h index 2619d94c308d..b6f5d0d30065 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -13,6 +13,7 @@ typedef unsigned long dax_entry_t; struct iomap_ops; struct iomap; +struct iomap_iter; struct dax_device; struct dax_operations { /* @@ -210,7 +211,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap); +s64 dax_iomap_zero(const struct iomap_iter *iter, loff_t pos, u64 length); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); From patchwork Tue Sep 28 06:23:08 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521863 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 3172D72 for ; Tue, 28 Sep 2021 06:23:52 +0000 (UTC) IronPort-Data: A9a23:zxInbKIOS5sSqRVzFE+RwZQlxSXFcZb7ZxGrkP8bfHC60zkl1GFVyDEbWjuFa6mOajTwKo8nPo6/pkIHuJPQxoNqS1BcGVNFFSwT8ZWfbTi6wuYcBwvLd4ubChsPA/w2MrEsF+hpCC+BzvuRGuK59yAkhPvYHuOU5NPsYUideyc1EU/Ntjozw4bVsqYw6TSIK1vlVeHa+qUzC3f5s9JACV/43orYwP9ZUFsejxtD1rA2TagjUFYzDBD5BrpHTU26ByOQroW5goeHq+j/ILGRpgs1/j8mDJWrj7T6blYXBLXVOGBiiFIPA+773EcE/Xd0j87XN9JFAatToy+UltZq2ZNDs4esYQk0PKzQg/lbWB5de817FfQfpeeWfynu6qR/yGWDKRMA2c5GAEgoPIEw9PxwBGZU//0EbjsKa3irmOOyxKOTS+9inM0vIcDneoQFtRlIwTjfS/RgXpHHR6TD4MRw3TEsi8QIFvHbD+IVayVoahvoYBBVPFoTTpUkk4+AnHjjfiZYqHqRpKwq8y7Sxgk327/oWPLTZNCLQMB9mkeDunmA+2X/HwFcONGBoRKF+XKEgvTT2y/2MKoIG7q8+uF7hnWI23ceThEbPXO/oP+kmguwQN5SNUEQ0jQhoLJ090GxSNT5GRqirxasuh8aRsoVEOAg7gyJ4rTb7hzfBWUeSDNFLts8u6ceQT0sy0/Mj93yLSJgvafTSn+H8LqQ6zSoNkA9M24YYgcWQA0E/Z/noYcunlTIVNklDa3dszFfMVkc2BjT9G5n2epV1pVNis2GEZn8q2rEjvD0osQdu207hl6Y0z4= IronPort-HdrOrdr: A9a23:unpnqKNbIuqYKcBcTv2jsMiBIKoaSvp037BL7TEUdfUxSKGlfq+V8sjzqiWftN98YhAdcLO7Scy9qBHnhP1ICOAqVN/MYOCMghrLEGgN1+vf6gylMyj/28oY7q14bpV5YeeaMXFKyer8/ym0euxN/OW6 X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096993" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:51 +0800 Received: from G08CNEXMBPEKD04.g08.fujitsu.local (unknown [10.167.33.201]) by cn.fujitsu.com (Postfix) with ESMTP id 8836C4D0DC83; Tue, 28 Sep 2021 14:23:50 +0800 (CST) Received: from G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) by G08CNEXMBPEKD04.g08.fujitsu.local (10.167.33.201) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:44 +0800 Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXJMPEKD02.g08.fujitsu.local (10.167.33.202) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:43 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:42 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , , Ritesh Harjani Subject: [PATCH v10 5/8] fsdax: Add dax_iomap_cow_copy() for dax_iomap_zero Date: Tue, 28 Sep 2021 14:23:08 +0800 Message-ID: <20210928062311.4012070-6-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 8836C4D0DC83.AED86 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Punch hole on a reflinked file needs dax_iomap_cow_copy() too. Otherwise, data in not aligned area will be not correct. So, add the CoW operation for not aligned case in dax_iomap_zero(). Signed-off-by: Shiyang Ruan Reviewed-by: Ritesh Harjani Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/dax.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index debe459680f2..5379de8ad0c7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1212,6 +1212,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, s64 dax_iomap_zero(const struct iomap_iter *iter, loff_t pos, u64 length) { const struct iomap *iomap = &iter->iomap; + const struct iomap *srcmap = &iter->srcmap; sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); pgoff_t pgoff; long rc, id; @@ -1230,21 +1231,27 @@ s64 dax_iomap_zero(const struct iomap_iter *iter, loff_t pos, u64 length) id = dax_read_lock(); - if (page_aligned) + if (page_aligned) { rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); - else - rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); - if (rc < 0) { - dax_read_unlock(id); - return rc; + goto out; } - if (!page_aligned) { - memset(kaddr + offset, 0, size); + rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); + if (rc < 0) + goto out; + memset(kaddr + offset, 0, size); + if (srcmap->addr != IOMAP_HOLE && srcmap->addr != iomap->addr) { + rc = dax_iomap_cow_copy(pos, size, PAGE_SIZE, srcmap, + kaddr); + if (rc < 0) + goto out; + dax_flush(iomap->dax_dev, kaddr, PAGE_SIZE); + } else dax_flush(iomap->dax_dev, kaddr + offset, size); - } + +out: dax_read_unlock(id); - return size; + return rc < 0 ? rc : size; } static loff_t dax_iomap_iter(const struct iomap_iter *iomi, From patchwork Tue Sep 28 06:23:09 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521865 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id C16BB3FD5 for ; Tue, 28 Sep 2021 06:23:52 +0000 (UTC) IronPort-Data: A9a23:sp8ECaCx63XSOxVW/z3iw5YqxClBgxIJ4g17XOLfUAi90DIng2cPyjRJWz2CP6zbMGShL9l1PYqz8h5T7ZCAx9UxeLYW3SszFioV86IpJjg4wn/YZnrUdouaJK5ex512huLocYZkExcwmj/3auK49SgmhfnRLlbBILWs1h5ZFFYMpBgJ2UoLd94R2uaEsPDha++/kYqaT/73ZDdJ7wVJ3lc8sMpvnv/AUMPa41v0tnRmDRxCUcS3e3M9VPrzLonpR5f0rxU9IwK0ewrD5OnREmLx9BFrBM6nk6rgbwsBRbu60Qqm0yIQAvb9xEMZ4HFaPqUTbZLwbW9NljyPhME3xtNWqbS+VAUoIrbR3u8aVnG0FgknZPAeqeGWcCLXXcu7iheun2HX6/lnEkA6FYMC/eNwG2tP6boTLzVlRg+Cg+an6LO9RPNliskqII/sJox3kn1py3fbS+knRZTCSqDRzd5ewDo0wMtJGJ72a8gGbjxgRBfNeRtCPhEQEp1WtOG2inj6dhVcqUmJvuwz4m7O3Ep93aaFGNreevSOXtkTkkvwjnjJ+GD1HQAcHMeC0jfD+XWp7sfVkiT/VJ0DEpWj6+VnxlGerkQXCRsLRR61uvW0lEO6c8xQJlZS+Sc0q6U2skuxQbHVWxy+vW7BvRMGXddUO/M15RvLyafO5QudQG8eQVZpbN0gqd9zVTIx/kGGksmvBjF1trCRD3WH+d+8szKoPgAHIGkDe2kATA0Y85/kuo51kxGnczrJOMZZlfWsQXepnW/M93N42t0uYQcw//3T1Tj6b/iE+/AlljII2zg= IronPort-HdrOrdr: A9a23:+ndMYa+/Vyx4i/19hdJuk+DkI+orL9Y04lQ7vn2ZKCYlFvBw8vrCoB1173HJYUkqMk3I9ergBEDiewK4yXcW2/hzAV7KZmCP11dAR7sSj7cKrQeBJwTOssZZ1YpFN5N1EcDMCzFB5vrS0U2VFMkBzbC8nJyVuQ== X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115096992" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:51 +0800 Received: from G08CNEXMBPEKD05.g08.fujitsu.local (unknown [10.167.33.204]) by cn.fujitsu.com (Postfix) with ESMTP id B42E04D0DC7C; Tue, 28 Sep 2021 14:23:45 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD05.g08.fujitsu.local (10.167.33.204) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:44 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:43 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , , Goldwyn Rodrigues Subject: [PATCH v10 6/8] fsdax: Dedup file range to use a compare function Date: Tue, 28 Sep 2021 14:23:09 +0800 Message-ID: <20210928062311.4012070-7-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: B42E04D0DC7C.A14E7 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No With dax we cannot deal with readpage() etc. So, we create a dax comparison function which is similar with vfs_dedupe_file_range_compare(). And introduce dax_remap_file_range_prep() for filesystem use. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/dax.c | 79 ++++++++++++++++++++++++++++++++++++++++++++ fs/remap_range.c | 31 ++++++++++++++--- fs/xfs/xfs_reflink.c | 8 +++-- include/linux/dax.h | 8 +++++ include/linux/fs.h | 12 ++++--- 5 files changed, 127 insertions(+), 11 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 5379de8ad0c7..e45944251956 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1815,3 +1815,82 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, return dax_insert_pfn_mkwrite(vmf, pfn, order); } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); + +static loff_t dax_range_compare_iter(struct iomap_iter *it_src, + struct iomap_iter *it_dest, bool *same) +{ + const struct iomap *smap = &it_src->iomap; + const struct iomap *dmap = &it_dest->iomap; + loff_t pos1 = it_src->pos, pos2 = it_dest->pos; + loff_t len = min(smap->length, dmap->length); + void *saddr, *daddr; + int id, ret; + + if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) { + *same = true; + return len; + } + + if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) { + *same = false; + return 0; + } + + id = dax_read_lock(); + ret = dax_iomap_direct_access(smap, pos1, ALIGN(pos1 + len, PAGE_SIZE), + &saddr, NULL); + if (ret < 0) + goto out_unlock; + + ret = dax_iomap_direct_access(dmap, pos2, ALIGN(pos2 + len, PAGE_SIZE), + &daddr, NULL); + if (ret < 0) + goto out_unlock; + + *same = !memcmp(saddr, daddr, len); + if (!*same) + len = 0; + dax_read_unlock(id); + return len; + +out_unlock: + dax_read_unlock(id); + return -EIO; +} + +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dst, loff_t dstoff, loff_t len, bool *same, + const struct iomap_ops *ops) +{ + struct iomap_iter src_iter = { + .inode = src, + .pos = srcoff, + .len = len, + }; + struct iomap_iter dst_iter = { + .inode = dst, + .pos = dstoff, + .len = len, + }; + int ret; + + while ((ret = iomap_iter(&src_iter, ops)) > 0) { + while ((ret = iomap_iter(&dst_iter, ops)) > 0) { + dst_iter.processed = dax_range_compare_iter(&src_iter, + &dst_iter, same); + } + if (ret <= 0) + src_iter.processed = ret; + } + return ret; +} + +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops) +{ + return __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, ops); +} +EXPORT_SYMBOL_GPL(dax_remap_file_range_prep); diff --git a/fs/remap_range.c b/fs/remap_range.c index 6d4a9beaa097..1157169b9d79 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" #include @@ -277,9 +278,11 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, * If there's an error, then the usual negative error code is returned. * Otherwise returns 0 with *len set to the request length. */ -int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *len, unsigned int remap_flags) +int +__generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -339,8 +342,18 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; - ret = vfs_dedupe_file_range_compare(inode_in, pos_in, - inode_out, pos_out, *len, &is_same); + if (*len == 0) + return 0; + + if (!IS_DAX(inode_in)) + ret = vfs_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same); + else if (dax_read_ops) + ret = dax_dedupe_file_range_compare(inode_in, pos_in, + inode_out, pos_out, *len, &is_same, + dax_read_ops); + else + return -EINVAL; if (ret) return ret; if (!is_same) @@ -358,6 +371,14 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; } + +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags) +{ + return __generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, NULL); +} EXPORT_SYMBOL(generic_remap_file_range_prep); loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 76355f293488..7ecea0311e88 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1332,8 +1332,12 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags); + if (!IS_DAX(inode_in)) + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags); + else + ret = dax_remap_file_range_prep(file_in, pos_in, file_out, + pos_out, len, remap_flags, &xfs_read_iomap_ops); if (ret || *len == 0) goto out_unlock; diff --git a/include/linux/dax.h b/include/linux/dax.h index b6f5d0d30065..043b8008d8f4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -212,6 +212,14 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); s64 dax_iomap_zero(const struct iomap_iter *iter, loff_t pos, u64 length); +int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same, + const struct iomap_ops *ops); +int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *ops); static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd2..670b53c722a7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -71,6 +71,7 @@ struct fsverity_operations; struct fs_context; struct fs_parameter_spec; struct fileattr; +struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2175,10 +2176,13 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); -extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *count, - unsigned int remap_flags); +int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *len, unsigned int remap_flags, + const struct iomap_ops *dax_read_ops); +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, unsigned int remap_flags); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); From patchwork Tue Sep 28 06:23:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521867 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 022B33FD5 for ; Tue, 28 Sep 2021 06:23:58 +0000 (UTC) IronPort-Data: A9a23:Ge4Gg6AJgpGh+hVW/1Liw5YqxClBgxIJ4g17XOLfU1bq1mkmhj1RxmEdXmGCOK2CZGqnKdh+bIm1pB9Xv5SAx9UxeLYW3SszFioV86IpJjg4wn/YZnrUdouaJK5ex512huLocYZkExcwmj/3auK49SgmhfnRLlbBILWs1h5ZFFYMpBgJ2UoLd94R2uaEsPDha++/kYqaT/73ZDdJ7wVJ3lc8sMpvnv/AUMPa41v0tnRmDRxCUcS3e3M9VPrzLonpR5f0rxU9IwK0ewrD5OnREmLx9BFrBM6nk6rgbwsBRbu60Qqm0yIQAvb9xEMZ4HFaPqUTbZLwbW9NljyPhME3xtNWqbS+VAUoIrbR3u8aVnG0FgknZPAeqeCfeSXXXcu7iheun2HX6/lnEkA6FYMC/eNwG2tP6boTLzVlRg+Cg+an6LO9RPNliskqII/sJox3kn1py3fbS+knRZTCSqDRzd5ewDo0wMtJGJ72a8gGbjxgRBfNeRtCPhEQEp1WtOG2inj6dhVcqUmJvuwz4m7O3Ep93aaFGNreevSOXtkTkkvwjnjJ+GD1HQAcHMeC0jfD/n/EruvOmz7rHYwJGLCm+/pCnlKe3CoQBQcQWF/9puO24ma6WtRCOwkX9zAooKwa6kOmVJ/+Uge+rXrCuQQTM/JUEusn+ESdxLH8/QmUHC4HQyRHZdhgs9U5LRQ010WOt8HkAz1x9rmUT2+NsLCOonWvOkAowcUqDcMfZVJdpYC9/8do1VSSJuuP2ZWd1rXdcQwcCRjVxMTmu4gusA== IronPort-HdrOrdr: A9a23:YhTbZKuToejSSiI4NXeoG//H7skDStV00zEX/kB9WHVpm62j5qSTdZEguCMc5wx+ZJheo7q90cW7IE80lqQFhLX5X43SPzUO0VHARO5fBODZsl/d8kPFltJ15ONJdqhSLJnKB0FmsMCS2mKFOudl7N6Z0K3Av4vj80s= X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115097005" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:23:57 +0800 Received: from G08CNEXMBPEKD06.g08.fujitsu.local (unknown [10.167.33.206]) by cn.fujitsu.com (Postfix) with ESMTP id 2C6564D0DC7D; Tue, 28 Sep 2021 14:23:57 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD06.g08.fujitsu.local (10.167.33.206) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:45 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:45 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , Subject: [PATCH v10 7/8] xfs: support CoW in fsdax mode Date: Tue, 28 Sep 2021 14:23:10 +0800 Message-ID: <20210928062311.4012070-8-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 2C6564D0DC7D.A0868 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. After that, new allocated extents needs to be remapped to the file. So, add a CoW identification in ->iomap_begin(), and implement ->iomap_end() to do the remapping work. Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 3 +-- fs/xfs/xfs_file.c | 7 ++----- fs/xfs/xfs_iomap.c | 30 +++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 44 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_iops.c | 7 +++---- fs/xfs/xfs_reflink.c | 3 +-- 6 files changed, 80 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 73a36b7be3bd..0681250e0a5d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1009,8 +1009,7 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, offset, len, NULL); if (error) return error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7aa943edfc02..afde4fbefb6f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -704,7 +704,7 @@ xfs_file_dax_write( pos = iocb->ki_pos; trace_xfs_file_dax_write(iocb, from); - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1327,10 +1327,7 @@ __xfs_filemap_fault( pfn_t pfn; xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, - (write_fault && !vmf->cow_page) ? - &xfs_direct_write_iomap_ops : - &xfs_read_iomap_ops); + ret = xfs_dax_iomap_fault(vmf, pe_size, write_fault, &pfn); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 093758440ad5..51cb5b713521 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -761,7 +761,8 @@ xfs_direct_write_iomap_begin( /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, + (flags & IOMAP_DIRECT) || IS_DAX(inode)); if (error) goto out_unlock; if (shared) @@ -854,6 +855,33 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_dax_write_iomap_end( + struct inode *inode, + loff_t pos, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (!xfs_is_cow_inode(ip)) + return 0; + + if (!written) { + xfs_reflink_cancel_cow_range(ip, pos, length, true); + return 0; + } + + return xfs_reflink_end_cow(ip, pos, written); +} + +const struct iomap_ops xfs_dax_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, + .iomap_end = xfs_dax_write_iomap_end, +}; + static int xfs_buffered_write_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 7d3703556d0e..81726bfbf890 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -7,6 +7,7 @@ #define __XFS_IOMAP_H__ #include +#include struct xfs_inode; struct xfs_bmbt_irec; @@ -45,5 +46,48 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_dax_write_iomap_ops; + +static inline int +xfs_iomap_zero_range( + struct xfs_inode *ip, + loff_t pos, + loff_t len, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return iomap_zero_range(inode, pos, len, did_zero, + IS_DAX(inode) ? + &xfs_dax_write_iomap_ops : + &xfs_buffered_write_iomap_ops); +} + +static inline int +xfs_iomap_truncate_page( + struct xfs_inode *ip, + loff_t pos, + bool *did_zero) +{ + struct inode *inode = VFS_I(ip); + + return iomap_truncate_page(inode, pos, did_zero, + IS_DAX(inode) ? + &xfs_dax_write_iomap_ops : + &xfs_buffered_write_iomap_ops); +} + +static inline int +xfs_dax_iomap_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size, + bool write_fault, + pfn_t *pfn) +{ + return dax_iomap_fault(vmf, pe_size, pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_dax_write_iomap_ops : + &xfs_read_iomap_ops); +} #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index a607d6aca5c4..332e6208dffd 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -911,8 +911,8 @@ xfs_setattr_size( */ if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + error = xfs_iomap_zero_range(ip, oldsize, newsize - oldsize, + &did_zeroing); } else { /* * iomap won't detect a dirty page over an unwritten block (or a @@ -924,8 +924,7 @@ xfs_setattr_size( newsize); if (error) return error; - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + error = xfs_iomap_truncate_page(ip, newsize, &did_zeroing); } if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 7ecea0311e88..9d876e268734 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1269,8 +1269,7 @@ xfs_reflink_zero_posteof( return 0; trace_xfs_zero_eof(ip, isize, pos - isize); - return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, - &xfs_buffered_write_iomap_ops); + return xfs_iomap_zero_range(ip, isize, pos - isize, NULL); } /* From patchwork Tue Sep 28 06:23:11 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Shiyang Ruan X-Patchwork-Id: 12521869 Received: from heian.cn.fujitsu.com (mail.cn.fujitsu.com [183.91.158.132]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 793B872 for ; Tue, 28 Sep 2021 06:24:04 +0000 (UTC) IronPort-Data: A9a23:M+p0O69uN1adIemA41NLDrUD+3+TJUtcMsCJ2f8bfWQNrUoigTwAzmNMWDzVaarcZmvzcth/O9608RxQ78XTyoNgSVdlrnsFo1Bi85ScXYvDRqvT04J+FuWaFQQ/qZx2huDodKjYdVeB4EfwWlTdhSMkj/jQF+CsULes1h1ZHmeIdg9w0HqPpMZp2uaEsfDha++8kYuaT//3YTdJ6BYoWo4g0J9vnTs01BjEVJz0iXRlDRxDlAe2e3D4l/vzL4npR5fzatE88uJX24/+IL+FEmPxp3/BC/uulPD1b08LXqXPewOJjxK6WYD72l4b+HN0if19aZLwam8O49mNt8pswdNWpNq+Txw1FqPRmuUBSAQeGCZ7VUFD0OaefCDu7pTJliUqdFOpmZ2CFnoeMYQG++pfD3tJ8PsCIjERKBuEgoqe37O/TvhEh8ItNsDnMYoT/HZ6wlnxAf8gB5KFXKTO4d5R2SwYh8ZSEPKYbM0cARJjbgvHZRJnOVoNDp862uCyiRHXdSNUqVeQja42+HTIighw1qX9dtbYZLSiRc5VtkKDuiTK8gzRGB4dMNCA2Dyt6W+3i6nDkEvTXIMUCa39+OVmjUOewkQNBxAME1i2u/+0jgi5Qd03A0gV/Dc+6Ks/7kqmSvHjUBCi5n2JpBgRX5xXCeJSwAWMzLfEphaXHUAaQTNbLt8rrsk7QXotzFDht83oHztHorCTSGzb8raSsCP0PjIaa3IBDRLo5yNtD8LL+dl110yQCI04VvPdszE8IhmoqxjikcT0r+t7YRY36piG IronPort-HdrOrdr: A9a23:ViJ5daNsm6Qwg8BcTv2jsMiBIKoaSvp037BL7TEUdfUxSKGlfq+V8sjzqiWftN98YhAdcLO7Scy9qBHnhP1ICOAqVN/MYOCMghrLEGgN1+vf6gylMyj/28oY7q14bpV5YeeaMXFKyer8/ym0euxN/OW6 X-IronPort-AV: E=Sophos;i="5.85,328,1624291200"; d="scan'208";a="115097021" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 28 Sep 2021 14:24:04 +0800 Received: from G08CNEXMBPEKD05.g08.fujitsu.local (unknown [10.167.33.204]) by cn.fujitsu.com (Postfix) with ESMTP id 3D3194D0DC7C; Tue, 28 Sep 2021 14:23:58 +0800 (CST) Received: from G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.85) by G08CNEXMBPEKD05.g08.fujitsu.local (10.167.33.204) with Microsoft SMTP Server (TLS) id 15.0.1497.23; Tue, 28 Sep 2021 14:23:57 +0800 Received: from irides.mr.mr.mr (10.167.225.141) by G08CNEXCHPEKD09.g08.fujitsu.local (10.167.33.209) with Microsoft SMTP Server id 15.0.1497.23 via Frontend Transport; Tue, 28 Sep 2021 14:23:56 +0800 From: Shiyang Ruan To: , , , CC: , , , , , , , Subject: [PATCH v10 8/8] xfs: Add dax dedupe support Date: Tue, 28 Sep 2021 14:23:11 +0800 Message-ID: <20210928062311.4012070-9-ruansy.fnst@fujitsu.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> References: <20210928062311.4012070-1-ruansy.fnst@fujitsu.com> Precedence: bulk X-Mailing-List: nvdimm@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-yoursite-MailScanner-ID: 3D3194D0DC7C.A0ED8 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@fujitsu.com X-Spam-Status: No Introduce xfs_mmaplock_two_inodes_and_break_dax_layout() for dax files who are going to be deduped. After that, call compare range function only when files are both DAX or not. Signed-off-by: Shiyang Ruan Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_inode.c | 69 +++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_reflink.c | 4 +-- 4 files changed, 69 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index afde4fbefb6f..97d03bc25fda 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -846,7 +846,7 @@ xfs_wait_dax_page( xfs_ilock(ip, XFS_MMAPLOCK_EXCL); } -static int +int xfs_break_dax_layouts( struct inode *inode, bool *retry) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a4f6f034fb81..8e03cca4ce61 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3790,6 +3790,50 @@ xfs_iolock_two_inodes_and_break_layout( return 0; } +static int +xfs_mmaplock_two_inodes_and_break_dax_layout( + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + int error; + bool retry; + struct page *page; + + if (ip1->i_ino > ip2->i_ino) + swap(ip1, ip2); + +again: + retry = false; + /* Lock the first inode */ + xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); + error = xfs_break_dax_layouts(VFS_I(ip1), &retry); + if (error || retry) { + xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + if (error == 0 && retry) + goto again; + return error; + } + + if (ip1 == ip2) + return 0; + + /* Nested lock the second inode */ + xfs_ilock(ip2, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1)); + /* + * We cannot use xfs_break_dax_layouts() directly here because it may + * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable + * for this nested lock case. + */ + page = dax_layout_busy_page(VFS_I(ip2)->i_mapping); + if (page && page_ref_count(page) != 1) { + xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); + xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + goto again; + } + + return 0; +} + /* * Lock two inodes so that userspace cannot initiate I/O via file syscalls or * mmap activity. @@ -3804,8 +3848,19 @@ xfs_ilock2_io_mmap( ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); if (ret) return ret; - filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, - VFS_I(ip2)->i_mapping); + + if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) { + ret = xfs_mmaplock_two_inodes_and_break_dax_layout(ip1, ip2); + if (ret) { + inode_unlock(VFS_I(ip2)); + if (ip1 != ip2) + inode_unlock(VFS_I(ip1)); + return ret; + } + } else + filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); + return 0; } @@ -3815,8 +3870,14 @@ xfs_iunlock2_io_mmap( struct xfs_inode *ip1, struct xfs_inode *ip2) { - filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, - VFS_I(ip2)->i_mapping); + if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) { + xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); + if (ip1 != ip2) + xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); + } else + filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping, + VFS_I(ip2)->i_mapping); + inode_unlock(VFS_I(ip2)); if (ip1 != ip2) inode_unlock(VFS_I(ip1)); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b21b177832d1..f7e26fe31a26 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -472,6 +472,7 @@ enum xfs_prealloc_flags { int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); +int xfs_break_dax_layouts(struct inode *inode, bool *retry); int xfs_break_layouts(struct inode *inode, uint *iolock, enum layout_break_reason reason); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 9d876e268734..3b99c9dfcf0d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1327,8 +1327,8 @@ xfs_reflink_remap_prep( if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; - /* Don't share DAX file data for now. */ - if (IS_DAX(inode_in) || IS_DAX(inode_out)) + /* Don't share DAX file data with non-DAX file. */ + if (IS_DAX(inode_in) != IS_DAX(inode_out)) goto out_unlock; if (!IS_DAX(inode_in))