From patchwork Wed Oct 30 04:13:52 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11219003 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D7CEB14DB for ; Wed, 30 Oct 2019 04:14:45 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id C06582173E for ; Wed, 30 Oct 2019 04:14:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727350AbfJ3EO2 (ORCPT ); Wed, 30 Oct 2019 00:14:28 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:45395 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726627AbfJ3EOW (ORCPT ); Wed, 30 Oct 2019 00:14:22 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665114" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:20 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id 3291C4B6AE15; Wed, 30 Oct 2019 12:06:18 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:26 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , , Goldwyn Rodrigues Subject: [RFC PATCH v2 1/7] dax: Introduce dax_copy_edges() for COW. Date: Wed, 30 Oct 2019 12:13:52 +0800 Message-ID: <20191030041358.14450-2-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: 3291C4B6AE15.A438C X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org To copy source data to destance address before write. Change dax_iomap_pfn() to return the address as well in order to use it for performing a memcpy in case the type is IOMAP_COW. dax_copy_edges() is a helper functions performs a copy from one part of the device to another for data not page aligned. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan --- fs/dax.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 68eef98cd9c4..e1f4493ce56a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -987,8 +987,8 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; } -static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, - pfn_t *pfnp) +static int dax_iomap_direct_access(struct iomap *iomap, loff_t pos, size_t size, + pfn_t *pfnp, void **addr) { const sector_t sector = dax_iomap_sector(iomap, pos); pgoff_t pgoff; @@ -999,12 +999,14 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, if (rc) return rc; id = dax_read_lock(); - length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), addr, + pfnp); if (length < 0) { rc = length; goto out; } + if (!pfnp) + goto out_check_addr; rc = -EINVAL; if (PFN_PHYS(length) < size) goto out; @@ -1014,6 +1016,12 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size, if (length > 1 && !pfn_t_devmap(*pfnp)) goto out; rc = 0; + +out_check_addr: + if (!addr) + goto out; + if (!*addr) + rc = -EFAULT; out: dax_read_unlock(id); return rc; @@ -1056,6 +1064,48 @@ static bool dax_range_is_aligned(struct block_device *bdev, return true; } +/* + * dax_copy_edges - Copies the part of the pages not included in + * the write, but required for CoW because + * offset/offset+length are not page aligned. + */ +static int dax_copy_edges(loff_t pos, loff_t length, struct iomap *srcmap, + void *daddr, bool pmd) +{ + size_t page_size = pmd ? PMD_SIZE : PAGE_SIZE; + loff_t offset = pos & (page_size - 1); + size_t size = ALIGN(offset + length, page_size); + loff_t end = pos + length; + loff_t pg_end = round_up(end, page_size); + void *saddr = 0; + int ret = 0; + + ret = dax_iomap_direct_access(srcmap, pos, size, NULL, &saddr); + if (ret) + return ret; + /* + * Copy the first part of the page + * Note: we pass offset as length + */ + if (offset) { + if (saddr) + ret = memcpy_mcsafe(daddr, saddr, offset); + else + memset(daddr, 0, offset); + } + + /* Copy the last part of the range */ + if (end < pg_end) { + if (saddr) + ret = memcpy_mcsafe(daddr + offset + length, + saddr + offset + length, pg_end - end); + else + memset(daddr + offset + length, 0, + pg_end - end); + } + return ret; +} + int __dax_zero_page_range(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, unsigned int offset, unsigned int size) @@ -1342,7 +1392,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } - error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn); + error = dax_iomap_direct_access(&iomap, pos, PAGE_SIZE, &pfn, + NULL); if (error < 0) goto error_finish_iomap; @@ -1560,7 +1611,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, switch (iomap.type) { case IOMAP_MAPPED: - error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn); + error = dax_iomap_direct_access(&iomap, pos, PMD_SIZE, &pfn, + NULL); if (error < 0) goto finish_iomap; From patchwork Wed Oct 30 04:13:53 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11218979 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 514B8912 for ; Wed, 30 Oct 2019 04:14:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 38EC4208E3 for ; Wed, 30 Oct 2019 04:14:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727189AbfJ3EOW (ORCPT ); Wed, 30 Oct 2019 00:14:22 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4003 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727183AbfJ3EOW (ORCPT ); Wed, 30 Oct 2019 00:14:22 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665115" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:20 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id 5D4BF4B6EC86; Wed, 30 Oct 2019 12:06:20 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:28 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , Subject: [RFC PATCH v2 2/7] dax: copy data before write. Date: Wed, 30 Oct 2019 12:13:53 +0800 Message-ID: <20191030041358.14450-3-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: 5D4BF4B6EC86.A86AA X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Add dax_copy_edges() into each dax actor functions to perform copy on write. Signed-off-by: Shiyang Ruan --- fs/dax.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index e1f4493ce56a..949a40cf1fe7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1159,7 +1159,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, return iov_iter_zero(min(length, end - pos), iter); } - if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) + if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED && iomap == srcmap)) return -EIO; /* @@ -1198,6 +1198,12 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, break; } + if (iomap != srcmap) { + ret = dax_copy_edges(pos, length, srcmap, kaddr, false); + if (ret) + break; + } + map_len = PFN_PHYS(map_len); kaddr += offset; map_len -= offset; @@ -1306,6 +1312,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, vm_fault_t ret = 0; void *entry; pfn_t pfn; + void *kaddr; trace_dax_pte_fault(inode, vmf, ret); /* @@ -1387,19 +1394,27 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, switch (iomap.type) { case IOMAP_MAPPED: +cow: if (iomap.flags & IOMAP_F_NEW) { count_vm_event(PGMAJFAULT); count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } error = dax_iomap_direct_access(&iomap, pos, PAGE_SIZE, &pfn, - NULL); + &kaddr); if (error < 0) goto error_finish_iomap; entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, write && !sync); + if (srcmap.type != IOMAP_HOLE) { + error = dax_copy_edges(pos, PAGE_SIZE, &srcmap, kaddr, + false); + if (error) + goto error_finish_iomap; + } + /* * If we are doing synchronous page fault and inode needs fsync, * we can insert PTE into page tables only after that happens. @@ -1423,6 +1438,9 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; case IOMAP_UNWRITTEN: + if (srcmap.type != IOMAP_HOLE) + goto cow; + /*FALLTHRU*/ case IOMAP_HOLE: if (!write) { ret = dax_load_hole(&xas, mapping, &entry, vmf); @@ -1530,6 +1548,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, loff_t pos; int error; pfn_t pfn; + void *kaddr; /* * Check whether offset isn't beyond end of file now. Caller is @@ -1610,15 +1629,23 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap); switch (iomap.type) { +cow: case IOMAP_MAPPED: error = dax_iomap_direct_access(&iomap, pos, PMD_SIZE, &pfn, - NULL); + &kaddr); if (error < 0) goto finish_iomap; entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, DAX_PMD, write && !sync); + if (srcmap.type != IOMAP_HOLE) { + error = dax_copy_edges(pos, PMD_SIZE, &srcmap, kaddr, + true); + if (error) + goto unlock_entry; + } + /* * If we are doing synchronous page fault and inode needs fsync, * we can insert PMD into page tables only after that happens. @@ -1637,6 +1664,9 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, result = vmf_insert_pfn_pmd(vmf, pfn, write); break; case IOMAP_UNWRITTEN: + if (srcmap.type != IOMAP_HOLE) + goto cow; + /*FALLTHRU*/ case IOMAP_HOLE: if (WARN_ON_ONCE(write)) break; From patchwork Wed Oct 30 04:13:54 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11218987 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6EA99912 for ; Wed, 30 Oct 2019 04:14:28 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 4C96B21479 for ; Wed, 30 Oct 2019 04:14:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727283AbfJ3EOX (ORCPT ); Wed, 30 Oct 2019 00:14:23 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4003 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727222AbfJ3EOX (ORCPT ); Wed, 30 Oct 2019 00:14:23 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665116" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:20 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id 786AD4CE1510; Wed, 30 Oct 2019 12:06:22 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:30 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , , Goldwyn Rodrigues Subject: [RFC PATCH v2 3/7] dax: replace mmap entry in case of CoW Date: Wed, 30 Oct 2019 12:13:54 +0800 Message-ID: <20191030041358.14450-4-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: 786AD4CE1510.A420B X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Goldwyn Rodrigues We replace the existing entry to the newly allocated one in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this entry as writeprotected. This helps us snapshots so new write pagefaults after snapshots trigger a CoW. btrfs does not support hugepages so we don't handle PMD. Signed-off-by: Goldwyn Rodrigues --- fs/dax.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 949a40cf1fe7..e6174c0c669a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -705,6 +705,9 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, return 0; } +#define DAX_IF_DIRTY (1ULL << 0) +#define DAX_IF_COW (1ULL << 1) + /* * By this point grab_mapping_entry() has ensured that we have a locked entry * of the appropriate size so we don't have to worry about downgrading PMDs to @@ -714,14 +717,17 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, */ static void *dax_insert_entry(struct xa_state *xas, struct address_space *mapping, struct vm_fault *vmf, - void *entry, pfn_t pfn, unsigned long flags, bool dirty) + void *entry, pfn_t pfn, unsigned long flags, + unsigned int insert_flags) { void *new_entry = dax_make_entry(pfn, flags); + bool dirty = insert_flags & DAX_IF_DIRTY; + bool cow = insert_flags & DAX_IF_COW; if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) { + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) { unsigned long index = xas->xa_index; /* we are replacing a zero page with block mapping */ if (dax_is_pmd_entry(entry)) @@ -733,7 +739,7 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { void *old; dax_disassociate_entry(entry, mapping, false); @@ -757,6 +763,9 @@ static void *dax_insert_entry(struct xa_state *xas, if (dirty) xas_set_mark(xas, PAGECACHE_TAG_DIRTY); + if (cow) + xas_set_mark(xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irq(xas); return entry; } @@ -1044,7 +1053,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, vm_fault_t ret; *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_ZERO_PAGE, false); + DAX_ZERO_PAGE, 0); ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); trace_dax_load_hole(inode, vmf, ret); @@ -1313,6 +1322,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, void *entry; pfn_t pfn; void *kaddr; + unsigned long insert_flags = 0; trace_dax_pte_fault(inode, vmf, ret); /* @@ -1405,8 +1415,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto error_finish_iomap; - entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, - 0, write && !sync); + if (write && !sync) + insert_flags |= DAX_IF_DIRTY; + entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, 0, + insert_flags); if (srcmap.type != IOMAP_HOLE) { error = dax_copy_edges(pos, PAGE_SIZE, &srcmap, kaddr, @@ -1438,8 +1450,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, goto finish_iomap; case IOMAP_UNWRITTEN: - if (srcmap.type != IOMAP_HOLE) + if (srcmap.type != IOMAP_HOLE) { + insert_flags |= DAX_IF_COW; goto cow; + } /*FALLTHRU*/ case IOMAP_HOLE: if (!write) { @@ -1497,7 +1511,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, pfn = page_to_pfn_t(zero_page); *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, - DAX_PMD | DAX_ZERO_PAGE, false); + DAX_PMD | DAX_ZERO_PAGE, 0); if (arch_needs_pgtable_deposit()) { pgtable = pte_alloc_one(vma->vm_mm); @@ -1549,6 +1563,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, int error; pfn_t pfn; void *kaddr; + unsigned long insert_flags = 0; /* * Check whether offset isn't beyond end of file now. Caller is @@ -1636,8 +1651,11 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto finish_iomap; + if (write && !sync) + insert_flags |= DAX_IF_DIRTY; + entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn, - DAX_PMD, write && !sync); + DAX_PMD, insert_flags); if (srcmap.type != IOMAP_HOLE) { error = dax_copy_edges(pos, PMD_SIZE, &srcmap, kaddr, @@ -1664,8 +1682,10 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, result = vmf_insert_pfn_pmd(vmf, pfn, write); break; case IOMAP_UNWRITTEN: - if (srcmap.type != IOMAP_HOLE) + if (srcmap.type != IOMAP_HOLE) { + insert_flags |= DAX_IF_COW; goto cow; + } /*FALLTHRU*/ case IOMAP_HOLE: if (WARN_ON_ONCE(write)) From patchwork Wed Oct 30 04:13:55 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11218999 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 41304912 for ; Wed, 30 Oct 2019 04:14:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 1D81821479 for ; Wed, 30 Oct 2019 04:14:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727429AbfJ3EOe (ORCPT ); Wed, 30 Oct 2019 00:14:34 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4016 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727370AbfJ3EOb (ORCPT ); Wed, 30 Oct 2019 00:14:31 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665122" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:26 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id 616F24CE1508; Wed, 30 Oct 2019 12:06:24 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:32 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , , Goldwyn Rodrigues Subject: [RFC PATCH v2 4/7] fs: dedup file range to use a compare function Date: Wed, 30 Oct 2019 12:13:55 +0800 Message-ID: <20191030041358.14450-5-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: 616F24CE1508.A4B19 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Goldwyn Rodrigues With dax we cannot deal with readpage() etc. So, we create a funciton callback to perform the file data comparison and pass it to generic_remap_file_range_prep() so it can use iomap-based functions. This may not be the best way to solve this. Suggestions welcome. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Shiyang Ruan --- fs/btrfs/ioctl.c | 3 ++- fs/dax.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/file.c | 2 +- fs/read_write.c | 11 +++++---- fs/xfs/xfs_reflink.c | 2 +- include/linux/dax.h | 4 +++ include/linux/fs.h | 9 ++++++- 7 files changed, 80 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index de730e56d3f5..ee16e275c44a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3910,7 +3910,8 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags); + len, remap_flags, + vfs_dedupe_file_range_compare); } loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, diff --git a/fs/dax.c b/fs/dax.c index e6174c0c669a..a4f90f3faddb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -30,6 +30,8 @@ #define CREATE_TRACE_POINTS #include +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + static inline unsigned int pe_order(enum page_entry_size pe_size) { if (pe_size == PE_SIZE_PTE) @@ -1826,3 +1828,59 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, return dax_insert_pfn_mkwrite(vmf, pfn, order); } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); + +int dax_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, + loff_t destoff, loff_t len, bool *is_same, const struct iomap_ops *ops) +{ + void *saddr, *daddr; + struct iomap s_iomap = {0}; + struct iomap d_iomap = {0}; + bool same = true; + loff_t cmp_len; + int id, ret = 0; + + id = dax_read_lock(); + while (len) { + ret = ops->iomap_begin(src, srcoff, len, 0, &s_iomap, NULL); + if (ret < 0) { + if (ops->iomap_end) + ops->iomap_end(src, srcoff, len, ret, 0, &s_iomap); + return ret; + } + cmp_len = len; + cmp_len = MIN(len, s_iomap.offset + s_iomap.length - srcoff); + + ret = ops->iomap_begin(dest, destoff, cmp_len, 0, &d_iomap, NULL); + if (ret < 0) { + if (ops->iomap_end) { + ops->iomap_end(src, srcoff, len, ret, 0, &s_iomap); + ops->iomap_end(dest, destoff, len, ret, 0, &d_iomap); + } + return ret; + } + cmp_len = MIN(cmp_len, d_iomap.offset + d_iomap.length - destoff); + + dax_iomap_direct_access(&s_iomap, srcoff, + ALIGN(srcoff + cmp_len, PAGE_SIZE), + NULL, &saddr); + dax_iomap_direct_access(&d_iomap, destoff, + ALIGN(destoff + cmp_len, PAGE_SIZE), + NULL, &daddr); + + same = !memcmp(saddr, daddr, cmp_len); + if (!same) + break; + len -= cmp_len; + srcoff += cmp_len; + destoff += cmp_len; + + if (ops->iomap_end) { + ret = ops->iomap_end(src, srcoff, len, 0, 0, &s_iomap); + ret = ops->iomap_end(dest, destoff, len, 0, 0, &d_iomap); + } + } + dax_read_unlock(id); + *is_same = same; + return ret; +} +EXPORT_SYMBOL_GPL(dax_file_range_compare); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2e982db3e1ae..f29315d2e26b 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2545,7 +2545,7 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - &len, remap_flags); + &len, remap_flags, vfs_dedupe_file_range_compare); if (ret < 0 || len == 0) goto out_unlock; diff --git a/fs/read_write.c b/fs/read_write.c index 5bbf587f5bc1..9c7a8320a260 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1853,7 +1853,7 @@ static void vfs_unlock_two_pages(struct page *page1, struct page *page2) * Compare extents of two files to see if they are the same. * Caller must have locked both inodes to prevent write races. */ -static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, +int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same) { @@ -1934,6 +1934,7 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, out_error: return error; } +EXPORT_SYMBOL_GPL(vfs_dedupe_file_range_compare); /* * Check that the two inodes are eligible for cloning, the ranges make @@ -1945,7 +1946,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - loff_t *len, unsigned int remap_flags) + loff_t *len, unsigned int remap_flags, + compare_range_t compare) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -2004,9 +2006,8 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, */ if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; - - ret = vfs_dedupe_file_range_compare(inode_in, pos_in, - inode_out, pos_out, *len, &is_same); + ret = (*compare)(inode_in, pos_in, + inode_out, pos_out, *len, &is_same); if (ret) return ret; if (!is_same) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index de451235c4ee..2c8ad581d4db 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1339,7 +1339,7 @@ xfs_reflink_remap_prep( goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags); + len, remap_flags, vfs_dedupe_file_range_compare); if (ret < 0 || *len == 0) goto out_unlock; diff --git a/include/linux/dax.h b/include/linux/dax.h index 9bd8528bd305..4533bfb99683 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -220,6 +220,10 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int dax_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same, + const struct iomap_ops *ops); #ifdef CONFIG_FS_DAX int __dax_zero_page_range(struct block_device *bdev, diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..ac5755f70995 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1912,13 +1912,20 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same); +typedef int (*compare_range_t)(struct inode *src, loff_t srcpos, + struct inode *dest, loff_t destpos, + loff_t len, bool *is_same); extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, - unsigned int remap_flags); + unsigned int remap_flags, + compare_range_t cmp); extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); From patchwork Wed Oct 30 04:13:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11219001 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9F5CC1599 for ; Wed, 30 Oct 2019 04:14:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8628121734 for ; Wed, 30 Oct 2019 04:14:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727411AbfJ3EOb (ORCPT ); Wed, 30 Oct 2019 00:14:31 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4010 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727344AbfJ3EOa (ORCPT ); Wed, 30 Oct 2019 00:14:30 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665121" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:26 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id E68F04B6EC86; Wed, 30 Oct 2019 12:06:26 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:34 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , , Goldwyn Rodrigues Subject: [RFC PATCH v2 5/7] dax: memcpy before zeroing range Date: Wed, 30 Oct 2019 12:13:56 +0800 Message-ID: <20191030041358.14450-6-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: E68F04B6EC86.A6A86 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org From: Goldwyn Rodrigues However, this needed more iomap fields, so it was easier to pass iomap and compute inside the function rather than passing a log of arguments. Note, there is subtle difference between iomap_sector and dax_iomap_sector(). Can we replace dax_iomap_sector with iomap_sector()? It would need pos & PAGE_MASK though or else bdev_dax_pgoff() return -EINVAL. Signed-off-by: Goldwyn Rodrigues --- fs/dax.c | 21 ++++++++++++++++----- fs/iomap/buffered-io.c | 8 ++++---- include/linux/dax.h | 12 ++++++------ 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a4f90f3faddb..eab6bb256205 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1117,11 +1117,16 @@ static int dax_copy_edges(loff_t pos, loff_t length, struct iomap *srcmap, return ret; } -int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int size) +int __dax_zero_page_range(struct iomap *iomap, struct iomap *srcmap, loff_t pos, + unsigned int offset, unsigned int size) { - if (dax_range_is_aligned(bdev, offset, size)) { + sector_t sector = dax_iomap_sector(iomap, pos & PAGE_MASK); + struct block_device *bdev = iomap->bdev; + struct dax_device *dax_dev = iomap->dax_dev; + int ret = 0; + + if (iomap == srcmap && + dax_range_is_aligned(bdev, offset, size)) { sector_t start_sector = sector + (offset >> 9); return blkdev_issue_zeroout(bdev, start_sector, @@ -1141,11 +1146,17 @@ int __dax_zero_page_range(struct block_device *bdev, dax_read_unlock(id); return rc; } + if (iomap != srcmap) { + ret = dax_copy_edges(pos, size, srcmap, kaddr, false); + if (ret) + goto out_unlock; + } memset(kaddr + offset, 0, size); dax_flush(dax_dev, kaddr + offset, size); +out_unlock: dax_read_unlock(id); } - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dax_zero_page_range); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c62e807956b6..3fa79389e4d0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -960,10 +960,9 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, } static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, - struct iomap *iomap) + struct iomap *iomap, struct iomap *srcmap) { - return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, - iomap_sector(iomap, pos & PAGE_MASK), offset, bytes); + return __dax_zero_page_range(iomap, srcmap, pos, offset, bytes); } static loff_t @@ -985,7 +984,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) - status = iomap_dax_zero(pos, offset, bytes, iomap); + status = iomap_dax_zero(pos, offset, bytes, iomap, + srcmap); else status = iomap_zero(inode, pos, offset, bytes, iomap, srcmap); diff --git a/include/linux/dax.h b/include/linux/dax.h index 4533bfb99683..7adf3b9e1061 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -12,6 +12,7 @@ typedef unsigned long dax_entry_t; +struct iomap; struct iomap_ops; struct dax_device; struct dax_operations { @@ -226,13 +227,12 @@ int dax_file_range_compare(struct inode *src, loff_t srcoff, const struct iomap_ops *ops); #ifdef CONFIG_FS_DAX -int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length); +int __dax_zero_page_range(struct iomap *iomap, struct iomap *srcmap, loff_t pos, + unsigned int offset, unsigned int size); #else -static inline int __dax_zero_page_range(struct block_device *bdev, - struct dax_device *dax_dev, sector_t sector, - unsigned int offset, unsigned int length) +static inline int __dax_zero_page_range(struct iomap *iomap, + struct iomap *srcmap, loff_t pos, + unsigned int offset, unsigned int size) { return -ENXIO; } From patchwork Wed Oct 30 04:13:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11219005 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 376931599 for ; Wed, 30 Oct 2019 04:14:46 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 1D84F21479 for ; Wed, 30 Oct 2019 04:14:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727222AbfJ3EO2 (ORCPT ); Wed, 30 Oct 2019 00:14:28 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4010 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727014AbfJ3EO2 (ORCPT ); Wed, 30 Oct 2019 00:14:28 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665123" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:26 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id BA7B84CE1513; Wed, 30 Oct 2019 12:06:28 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:37 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , Subject: [RFC PATCH v2 6/7] xfs: handle copy-on-write in fsdax write() path. Date: Wed, 30 Oct 2019 12:13:57 +0800 Message-ID: <20191030041358.14450-7-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: BA7B84CE1513.A6BD3 X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org In fsdax mode, WRITE and ZERO on a shared extent need COW mechanism performed. After COW, new extents needs to be remapped to the file. Signed-off-by: Shiyang Ruan --- fs/xfs/xfs_bmap_util.c | 6 +++++- fs/xfs/xfs_file.c | 10 +++++++--- fs/xfs/xfs_iomap.c | 3 ++- fs/xfs/xfs_iops.c | 11 ++++++++--- fs/xfs/xfs_reflink.c | 2 ++ 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 99bf372ed551..1b7d0665d889 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1114,10 +1114,14 @@ xfs_free_file_space( if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + IS_DAX(VFS_I(ip)) ? + &xfs_direct_write_iomap_ops : &xfs_buffered_write_iomap_ops); if (error) return error; + if (xfs_is_reflink_inode(ip)) + xfs_reflink_end_cow(ip, offset, len); + /* * If we zeroed right up to EOF and EOF straddles a page boundary we * must make sure that the post-EOF area is also zeroed because the diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 24659667d5cb..48071f16a436 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -594,9 +594,13 @@ xfs_file_dax_write( trace_xfs_file_dax_write(ip, count, pos); ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); - if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { - i_size_write(inode, iocb->ki_pos); - error = xfs_setfilesize(ip, pos, ret); + if (ret > 0) { + if (iocb->ki_pos > i_size_read(inode)) { + i_size_write(inode, iocb->ki_pos); + error = xfs_setfilesize(ip, pos, ret); + } + if (xfs_is_cow_inode(ip)) + xfs_reflink_end_cow(ip, pos, ret); } out: xfs_iunlock(ip, iolock); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e8fb500e1880..db8ee371252f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -772,13 +772,14 @@ xfs_direct_write_iomap_begin( goto out_unlock; if (imap_needs_cow(ip, flags, &imap, nimaps)) { + bool need_convert = flags & IOMAP_DIRECT || IS_DAX(inode); error = -EAGAIN; if (flags & IOMAP_NOWAIT) goto out_unlock; /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, need_convert); if (error) goto out_unlock; if (shared) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 329a34af8e79..0052a97ccebf 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -834,6 +834,7 @@ xfs_setattr_size( int error; uint lock_flags = 0; bool did_zeroing = false; + const struct iomap_ops *ops; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); @@ -880,13 +881,17 @@ xfs_setattr_size( * extension, or zeroing out the rest of the block on a downward * truncate. */ + if (IS_DAX(inode)) + ops = &xfs_direct_write_iomap_ops; + else + ops = &xfs_buffered_write_iomap_ops; + if (newsize > oldsize) { trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); + &did_zeroing, ops); } else { - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); + error = iomap_truncate_page(inode, newsize, &did_zeroing, ops); } if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 2c8ad581d4db..e3620bc794a2 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1270,6 +1270,8 @@ xfs_reflink_zero_posteof( trace_xfs_zero_eof(ip, isize, pos - isize); return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL, + IS_DAX(VFS_I(ip)) ? + &xfs_direct_write_iomap_ops : &xfs_buffered_write_iomap_ops); } From patchwork Wed Oct 30 04:13:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ruan Shiyang X-Patchwork-Id: 11218997 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0730514DB for ; Wed, 30 Oct 2019 04:14:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id E271F20866 for ; Wed, 30 Oct 2019 04:14:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727453AbfJ3EOf (ORCPT ); Wed, 30 Oct 2019 00:14:35 -0400 Received: from mail.cn.fujitsu.com ([183.91.158.132]:4021 "EHLO heian.cn.fujitsu.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727344AbfJ3EOe (ORCPT ); Wed, 30 Oct 2019 00:14:34 -0400 X-IronPort-AV: E=Sophos;i="5.68,245,1569254400"; d="scan'208";a="77665127" Received: from unknown (HELO cn.fujitsu.com) ([10.167.33.5]) by heian.cn.fujitsu.com with ESMTP; 30 Oct 2019 12:14:32 +0800 Received: from G08CNEXCHPEKD01.g08.fujitsu.local (unknown [10.167.33.80]) by cn.fujitsu.com (Postfix) with ESMTP id B32E34B6AE15; Wed, 30 Oct 2019 12:06:30 +0800 (CST) Received: from localhost.localdomain (10.167.225.140) by G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server (TLS) id 14.3.439.0; Wed, 30 Oct 2019 12:14:39 +0800 From: Shiyang Ruan To: , , , , , CC: , , , , Subject: [RFC PATCH v2 7/7] xfs: support dedupe for fsdax. Date: Wed, 30 Oct 2019 12:13:58 +0800 Message-ID: <20191030041358.14450-8-ruansy.fnst@cn.fujitsu.com> X-Mailer: git-send-email 2.17.0 In-Reply-To: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> References: <20191030041358.14450-1-ruansy.fnst@cn.fujitsu.com> MIME-Version: 1.0 X-Originating-IP: [10.167.225.140] X-yoursite-MailScanner-ID: B32E34B6AE15.A5F6D X-yoursite-MailScanner: Found to be clean X-yoursite-MailScanner-From: ruansy.fnst@cn.fujitsu.com X-Spam-Status: No Sender: linux-xfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Use xfs_break_layouts() to break files' layouts when locking them. And call dax_file_range_compare() function to compare range for DAX files. Signed-off-by: Shiyang Ruan --- fs/xfs/xfs_reflink.c | 77 ++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e3620bc794a2..3d8d1d2f0ac0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1186,47 +1186,41 @@ xfs_reflink_remap_blocks( * back out both locks. */ static int -xfs_iolock_two_inodes_and_break_layout( - struct inode *src, - struct inode *dest) +xfs_reflink_remap_lock_and_break_layouts( + struct file *file_in, + struct file *file_out) { int error; + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; - if (src > dest) + if (inode_in > inode_out) { + swap(inode_in, inode_out); swap(src, dest); - -retry: - /* Wait to break both inodes' layouts before we start locking. */ - error = break_layout(src, true); - if (error) - return error; - if (src != dest) { - error = break_layout(dest, true); - if (error) - return error; } - /* Lock one inode and make sure nobody got in and leased it. */ - inode_lock(src); - error = break_layout(src, false); + inode_lock(inode_in); + xfs_ilock(src, XFS_MMAPLOCK_EXCL); + error = xfs_break_layouts(inode_in, &iolock, BREAK_UNMAP); + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); if (error) { - inode_unlock(src); - if (error == -EWOULDBLOCK) - goto retry; + inode_unlock(inode_in); return error; } - if (src == dest) + if (inode_in == inode_out) return 0; - /* Lock the other inode and make sure nobody got in and leased it. */ - inode_lock_nested(dest, I_MUTEX_NONDIR2); - error = break_layout(dest, false); + inode_lock_nested(inode_out, I_MUTEX_NONDIR2); + xfs_ilock(dest, XFS_MMAPLOCK_EXCL); + error = xfs_break_layouts(inode_out, &iolock, BREAK_UNMAP); + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); if (error) { - inode_unlock(src); - inode_unlock(dest); - if (error == -EWOULDBLOCK) - goto retry; + inode_unlock(inode_in); + inode_unlock(inode_out); return error; } @@ -1245,6 +1239,11 @@ xfs_reflink_remap_unlock( struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); + if (inode_in > inode_out) { + swap(inode_in, inode_out); + swap(src, dest); + } + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); if (!same_inode) xfs_iunlock(src, XFS_MMAPLOCK_EXCL); @@ -1275,6 +1274,14 @@ xfs_reflink_zero_posteof( &xfs_buffered_write_iomap_ops); } +int xfs_reflink_dedupe_file_range_compare(struct inode *src, loff_t srcoff, + struct inode *dest, loff_t destoff, + loff_t len, bool *is_same) +{ + return dax_file_range_compare(src, srcoff, dest, destoff, len, is_same, + &xfs_read_iomap_ops); +} + /* * Prepare two files for range cloning. Upon a successful return both inodes * will have the iolock and mmaplock held, the page cache of the out file will @@ -1319,9 +1326,10 @@ xfs_reflink_remap_prep( struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); ssize_t ret; + compare_range_t cmp; /* Lock both files against IO */ - ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); + ret = xfs_reflink_remap_lock_and_break_layouts(file_in, file_out); if (ret) return ret; if (same_inode) @@ -1336,12 +1344,17 @@ xfs_reflink_remap_prep( if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; - /* Don't share DAX file data for now. */ - if (IS_DAX(inode_in) || IS_DAX(inode_out)) + /* Don't share DAX file data with non-DAX file. */ + if (IS_DAX(inode_in) != IS_DAX(inode_out)) goto out_unlock; + if (IS_DAX(inode_in)) + cmp = xfs_reflink_dedupe_file_range_compare; + else + cmp = vfs_dedupe_file_range_compare; + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, remap_flags, vfs_dedupe_file_range_compare); + len, remap_flags, cmp); if (ret < 0 || *len == 0) goto out_unlock;