From patchwork Tue Feb 12 21:24:46 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808721 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BC53213B4 for ; Tue, 12 Feb 2019 21:38:16 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id AA6F92C097 for ; Tue, 12 Feb 2019 21:38:16 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 9F2872C09D; Tue, 12 Feb 2019 21:38:16 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 3DC362C097 for ; Tue, 12 Feb 2019 21:38:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732538AbfBLVh1 (ORCPT ); Tue, 12 Feb 2019 16:37:27 -0500 Received: from mga14.intel.com ([192.55.52.115]:57721 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727345AbfBLVhZ (ORCPT ); Tue, 12 Feb 2019 16:37:25 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:24 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="299238388" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by orsmga005.jf.intel.com with ESMTP; 12 Feb 2019 13:37:23 -0800 Subject: [PATCH 1/7] libnvdimm/pfn: Account for PAGE_SIZE > info-block-size in nd_pfn_init() From: Dan Williams To: linux-nvdimm@lists.01.org Cc: Oliver O'Halloran , linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:24:46 -0800 Message-ID: <155000668608.348031.18233783401856221451.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Similar to "libnvdimm: Fix altmap reservation size calculation" provide for a reservation of a full page worth of info block space at info-block establishment time. Typically there is already slack in the padding from honoring the default 2MB alignment, but provide for a reservation for corner case configurations that would otherwise fit. Cc: Oliver O'Halloran Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 7760c1b91853..ba74a341da5d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -580,6 +580,11 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) } EXPORT_SYMBOL(nd_pfn_probe); +static u32 info_block_reserve(void) +{ + return ALIGN(SZ_8K, PAGE_SIZE); +} + /* * We hotplug memory at section granularity, pad the reserved area from * the previous section base to the namespace base address. @@ -593,7 +598,7 @@ static unsigned long init_altmap_base(resource_size_t base) static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = PFN_UP(SZ_8K); + unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); @@ -608,6 +613,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + u32 reserve = info_block_reserve(); struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; @@ -621,7 +627,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->end -= end_trunc; if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (offset < reserve) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); pgmap->altmap_valid = false; @@ -634,7 +640,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); memcpy(altmap, &__altmap, sizeof(*altmap)); - altmap->free = PHYS_PFN(offset - SZ_8K); + altmap->free = PHYS_PFN(offset - reserve); altmap->alloc = 0; pgmap->altmap_valid = true; } else @@ -687,9 +693,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + u32 start_pad, end_trunc, reserve = info_block_reserve(); resource_size_t start, size; struct nd_region *nd_region; - u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -734,7 +740,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ start = nsio->res.start + start_pad; size = resource_size(&nsio->res); - npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) + npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* @@ -742,10 +748,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, + offset = ALIGN(start + reserve + 64 * npfns + dax_label_reserve, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + SZ_8K + dax_label_reserve, + offset = ALIGN(start + reserve + dax_label_reserve, nd_pfn->align) - start; else return -ENXIO; From patchwork Tue Feb 12 21:24:51 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808719 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D7ABA1390 for ; Tue, 12 Feb 2019 21:38:13 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C68932C097 for ; Tue, 12 Feb 2019 21:38:13 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id BAB872C09D; Tue, 12 Feb 2019 21:38:13 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6D2882C097 for ; Tue, 12 Feb 2019 21:38:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732556AbfBLVhb (ORCPT ); Tue, 12 Feb 2019 16:37:31 -0500 Received: from mga14.intel.com ([192.55.52.115]:57721 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1732533AbfBLVha (ORCPT ); Tue, 12 Feb 2019 16:37:30 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:29 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="115696737" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by orsmga006.jf.intel.com with ESMTP; 12 Feb 2019 13:37:29 -0800 Subject: [PATCH 2/7] libnvdimm/pmem: Honor force_raw for legacy pmem regions From: Dan Williams To: linux-nvdimm@lists.01.org Cc: stable@vger.kernel.org, linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:24:51 -0800 Message-ID: <155000669130.348031.13764885263577554365.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP For recovery, where non-dax access is needed to a given physical address range, and testing, allow the 'force_raw' attribute to override the default establishment of a dev_pagemap. Otherwise without this capability it is possible to end up with a namespace that can not be activated due to corrupted info-block, and one that can not be repaired due to a section collision. Cc: Fixes: 004f1afbe199 ("libnvdimm, pmem: direct map legacy pmem by default") Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..33a3b23b3db7 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -138,6 +138,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) @@ -149,6 +150,9 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + if (ndns->force_raw) + return false; + nsio = to_nd_namespace_io(dev); if (region_intersects(nsio->res.start, resource_size(&nsio->res), IORESOURCE_SYSTEM_RAM, From patchwork Tue Feb 12 21:24:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808703 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8EC9A13B4 for ; Tue, 12 Feb 2019 21:37:42 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7D4552C097 for ; Tue, 12 Feb 2019 21:37:42 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 715172C09D; Tue, 12 Feb 2019 21:37:42 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 191CA2C097 for ; Tue, 12 Feb 2019 21:37:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732584AbfBLVhg (ORCPT ); Tue, 12 Feb 2019 16:37:36 -0500 Received: from mga09.intel.com ([134.134.136.24]:16717 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1732531AbfBLVhf (ORCPT ); Tue, 12 Feb 2019 16:37:35 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:34 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="117400830" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by orsmga008.jf.intel.com with ESMTP; 12 Feb 2019 13:37:34 -0800 Subject: [PATCH 3/7] dax: Check the end of the block-device capacity with dax_direct_access() From: Dan Williams To: linux-nvdimm@lists.01.org Cc: Jan Kara , "Darrick J. Wong" , linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:24:56 -0800 Message-ID: <155000669646.348031.16690970886357498896.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The checks in __bdev_dax_supported() helped mitigate a potential data corruption bug in the pmem driver's handling of section alignment padding. Strengthen the checks, including checking the end of the range, to validate the dev_pagemap, Xarray entries, and sector-to-pfn translation established for pmem namespaces. Cc: Jan Kara Cc: "Darrick J. Wong" Signed-off-by: Dan Williams --- drivers/dax/super.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 6e928f37d084..a27395cfcec6 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -86,12 +86,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { struct dax_device *dax_dev; bool dax_enabled = false; + pgoff_t pgoff, pgoff_end; struct request_queue *q; - pgoff_t pgoff; - int err, id; - pfn_t pfn; - long len; char buf[BDEVNAME_SIZE]; + void *kaddr, *end_kaddr; + pfn_t pfn, end_pfn; + sector_t last_page; + long len, len2; + int err, id; if (blocksize != PAGE_SIZE) { pr_debug("%s: error: unsupported blocksize for dax\n", @@ -113,6 +115,15 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } + last_page = ALIGN_DOWN(part_nr_sects_read(bdev->bd_part) + - PAGE_SIZE / 512, PAGE_SIZE / 512); + err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); + if (err) { + pr_debug("%s: error: unaligned partition for dax\n", + bdevname(bdev, buf)); + return false; + } + dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); if (!dax_dev) { pr_debug("%s: error: device does not support dax\n", @@ -121,14 +132,15 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) } id = dax_read_lock(); - len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn); + len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); + len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); dax_read_unlock(id); put_dax(dax_dev); - if (len < 1) { + if (len < 1 || len2 < 1) { pr_debug("%s: error: dax access failed (%ld)\n", - bdevname(bdev, buf), len); + bdevname(bdev, buf), len < 1 ? len : len2); return false; } @@ -143,13 +155,20 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); dax_enabled = true; - } else if (pfn_t_devmap(pfn)) { - struct dev_pagemap *pgmap; + } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) { + struct dev_pagemap *pgmap, *end_pgmap; pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); - if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) + end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL); + if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX + && pfn_t_to_page(pfn)->pgmap == pgmap + && pfn_t_to_page(end_pfn)->pgmap == pgmap + && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr)) + && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr))) dax_enabled = true; put_dev_pagemap(pgmap); + put_dev_pagemap(end_pgmap); + } if (!dax_enabled) { From patchwork Tue Feb 12 21:25:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808717 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D270313B4 for ; Tue, 12 Feb 2019 21:38:07 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C12432C097 for ; Tue, 12 Feb 2019 21:38:07 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id B5CD52C09D; Tue, 12 Feb 2019 21:38:07 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A4CF82C097 for ; Tue, 12 Feb 2019 21:38:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732596AbfBLVhl (ORCPT ); Tue, 12 Feb 2019 16:37:41 -0500 Received: from mga03.intel.com ([134.134.136.65]:41143 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1732531AbfBLVhk (ORCPT ); Tue, 12 Feb 2019 16:37:40 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:40 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="114410998" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by orsmga007.jf.intel.com with ESMTP; 12 Feb 2019 13:37:39 -0800 Subject: [PATCH 4/7] libnvdimm/pfn: Introduce super-block minimum version requirements From: Dan Williams To: linux-nvdimm@lists.01.org Cc: Jeff Moyer , Johannes Thumshirn , linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:25:01 -0800 Message-ID: <155000670159.348031.17631616775326330606.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP In support of fixing a pfn info-block interpretation problem that requires a backwards incompatible format change introduce a mechanism to indicate a minimum support level to the parsing agent. Hopefully this mechanism never needs to be used, but it otherwise prevents the need to hunt for a state that causes old implementations to bail out. For now, the method chosen to make implementations prior to this introduction fail is to use new PFN3_MODE_* values. The use of 'mode' ensures that older Linux fails, but agents like the EFI driver that only care about 'data_offset' can continue to ignore the requirement. That is to say 'data_offset' should always be valid, but the interpretation of values that older Linux kernels use to setup the dev_pagemap and perform sector-to-pfn translations is going to change. Cc: Jeff Moyer Cc: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/pfn.h | 4 +++- drivers/nvdimm/pfn_devs.c | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index dde9853453d3..710cb743fad6 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -20,6 +20,7 @@ #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" #define DAX_SIG "NVDIMM_DAX_INFO\0" +#define PFN_VERSION_SUPPORT 2 struct nd_pfn_sb { u8 signature[PFN_SIG_LEN]; @@ -36,7 +37,8 @@ struct nd_pfn_sb { __le32 end_trunc; /* minor-version-2 record the base alignment of the mapping */ __le32 align; - u8 padding[4000]; + __le16 min_version; + u8 padding[3998]; __le64 checksum; }; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ba74a341da5d..108f9f7ed064 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -459,6 +459,12 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) if (__le16_to_cpu(pfn_sb->version_minor) < 2) pfn_sb->align = 0; + if (__le16_to_cpu(pfn_sb->version_major) != 1) + return -EINVAL; + + if (__le16_to_cpu(pfn_sb->min_version) > PFN_VERSION_SUPPORT) + return -EINVAL; + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: @@ -771,6 +777,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); pfn_sb->version_minor = cpu_to_le16(2); + pfn_sb->min_version = cpu_to_le16(2); pfn_sb->start_pad = cpu_to_le32(start_pad); pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); From patchwork Tue Feb 12 21:25:06 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808707 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 461A813B4 for ; Tue, 12 Feb 2019 21:37:52 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 365512C09B for ; Tue, 12 Feb 2019 21:37:52 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 288962C0A7; Tue, 12 Feb 2019 21:37:52 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D43A22C09B for ; Tue, 12 Feb 2019 21:37:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732613AbfBLVhq (ORCPT ); Tue, 12 Feb 2019 16:37:46 -0500 Received: from mga04.intel.com ([192.55.52.120]:51436 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1732531AbfBLVhp (ORCPT ); Tue, 12 Feb 2019 16:37:45 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:44 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="274558039" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by orsmga004.jf.intel.com with ESMTP; 12 Feb 2019 13:37:44 -0800 Subject: [PATCH 5/7] libnvdimm/pfn: Remove dax_label_reserve From: Dan Williams To: linux-nvdimm@lists.01.org Cc: linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:25:06 -0800 Message-ID: <155000670671.348031.8414677031368179860.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The reserve was for an abandoned effort to add label (partitioning support) to device-dax instances. Remove it. Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 108f9f7ed064..110699f4c3e4 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -696,7 +696,6 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun static int nd_pfn_init(struct nd_pfn *nd_pfn) { - u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); u32 start_pad, end_trunc, reserve = info_block_reserve(); @@ -754,11 +753,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + reserve + 64 * npfns + dax_label_reserve, + offset = ALIGN(start + reserve + 64 * npfns, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + reserve + dax_label_reserve, - nd_pfn->align) - start; + offset = ALIGN(start + reserve, nd_pfn->align) - start; else return -ENXIO; From patchwork Tue Feb 12 21:25:11 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808715 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CDE1213B4 for ; Tue, 12 Feb 2019 21:38:05 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BB4CB2C097 for ; Tue, 12 Feb 2019 21:38:05 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id AEAC12C09D; Tue, 12 Feb 2019 21:38:05 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A766D2C097 for ; Tue, 12 Feb 2019 21:38:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732628AbfBLVhw (ORCPT ); Tue, 12 Feb 2019 16:37:52 -0500 Received: from mga01.intel.com ([192.55.52.88]:48544 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1731976AbfBLVhv (ORCPT ); Tue, 12 Feb 2019 16:37:51 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:49 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="123993938" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by fmsmga008.fm.intel.com with ESMTP; 12 Feb 2019 13:37:49 -0800 Subject: [PATCH 6/7] libnvdimm/pfn: Introduce 'struct pfn_map_info' From: Dan Williams To: linux-nvdimm@lists.01.org Cc: linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:25:11 -0800 Message-ID: <155000671184.348031.10730652743216753279.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The 'start_pad' accounting bug resulted from the pmem driver inferring properties of the established pagemap to calculate pmem->phys_addr and pmem->size, and that nd_pfn->data_offset was identical to pmem->data_offset. The assumptions in the current implementation are only true when 'start_pad' is zero. The confusion resulted in the wrong offset being applied for sector-to-physical-page translations. In advance of introducing a corrected implementation make the mapping parameters and translation explicit via a new 'struct pfn_map_info' to carry the mapping and device-offset to physical address translation parameters. No functional change intended, this cleanup preserves the existing bug. Signed-off-by: Dan Williams --- drivers/dax/pmem.c | 9 ++- drivers/nvdimm/nd.h | 13 ++++- drivers/nvdimm/pfn_devs.c | 21 ++++++- drivers/nvdimm/pmem.c | 111 ++++++++++++++++++--------------------- drivers/nvdimm/pmem.h | 12 +--- tools/testing/nvdimm/pmem-dax.c | 15 +++-- 6 files changed, 96 insertions(+), 85 deletions(-) diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 2c1f459c0c63..80359df4f662 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -64,6 +64,7 @@ static int dax_pmem_probe(struct device *dev) struct nd_pfn_sb *pfn_sb; struct dev_dax *dev_dax; struct dax_pmem *dax_pmem; + struct pfn_map_info mi; struct nd_namespace_io *nsio; struct dax_region *dax_region; struct nd_namespace_common *ndns; @@ -83,7 +84,7 @@ static int dax_pmem_probe(struct device *dev) rc = devm_nsio_enable(dev, nsio); if (rc) return rc; - rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap); + rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap, &mi); if (rc) return rc; devm_nsio_disable(dev, nsio); @@ -117,8 +118,10 @@ static int dax_pmem_probe(struct device *dev) return PTR_ERR(addr); /* adjust the dax_region resource to the start of data */ - memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); - res.start += le64_to_cpu(pfn_sb->dataoff); + res = (struct resource) { + .start = mi.map_base + mi.map_pad + mi.map_offset, + .end = mi.map_base + mi.map_pad + mi.map_size - 1, + }; rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); if (rc != 2) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 379bf4305e61..4339d338928b 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -377,13 +377,22 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, unsigned int pmem_sector_size(struct nd_namespace_common *ndns); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); +struct pfn_map_info { + resource_size_t map_base; + unsigned long map_offset; + resource_size_t map_size; + unsigned long map_pad; + u64 pfn_flags; + void *map; +}; #if IS_ENABLED(CONFIG_ND_CLAIM) -int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, + struct pfn_map_info *mi); int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); #else static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct dev_pagemap *pgmap) + struct dev_pagemap *pgmap, struct pfn_map_info *mi) { return -ENXIO; } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 110699f4c3e4..1c2a0e707da3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -611,7 +611,8 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) +static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, + struct pfn_map_info *mi) { struct resource *res = &pgmap->res; struct vmem_altmap *altmap = &pgmap->altmap; @@ -632,6 +633,19 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->start += start_pad; res->end -= end_trunc; + *mi = (struct pfn_map_info) { + /* + * TODO fix implementation to properly account for + * 'start_pad' in map_base, and map_offset. As is, the + * fact that __va(map_base) != __pa(map) leads + * mistranslations in pmem_direct_access(). + */ + .map_base = nsio->res.start, + .map_pad = start_pad, + .map_offset = offset, + .map_size = resource_size(res), + }; + if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < reserve) return -EINVAL; @@ -789,7 +803,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * Determine the effective resource range and vmem_altmap from an nd_pfn * instance. */ -int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, + struct pfn_map_info *mi) { int rc; @@ -801,6 +816,6 @@ int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) return rc; /* we need a valid pfn_sb before we can init a dev_pagemap */ - return __nvdimm_setup_pfn(nd_pfn, pgmap); + return __nvdimm_setup_pfn(nd_pfn, pgmap, mi); } EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bc2f700feef8..46b823f3b94d 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -34,9 +34,7 @@ #include #include #include "pmem.h" -#include "pfn.h" #include "nd.h" -#include "nd-core.h" static struct device *to_dev(struct pmem_device *pmem) { @@ -58,7 +56,7 @@ static void hwpoison_clear(struct pmem_device *pmem, unsigned long pfn_start, pfn_end, pfn; /* only pmem in the linear map supports HWPoison */ - if (is_vmalloc_addr(pmem->virt_addr)) + if (is_vmalloc_addr(pmem->mi.map)) return; pfn_start = PHYS_PFN(phys); @@ -80,17 +78,18 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { struct device *dev = to_dev(pmem); + struct pfn_map_info *mi = &pmem->mi; sector_t sector; long cleared; blk_status_t rc = BLK_STS_OK; - sector = (offset - pmem->data_offset) / 512; + sector = (offset - mi->map_offset) / 512; - cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + cleared = nvdimm_clear_poison(dev, mi->map_base + offset, len); if (cleared < len) rc = BLK_STS_IOERR; if (cleared > 0 && cleared / 512) { - hwpoison_clear(pmem, pmem->phys_addr + offset, cleared); + hwpoison_clear(pmem, mi->map_base + offset, cleared); cleared /= 512; dev_dbg(dev, "%#llx clear %ld sector%s\n", (unsigned long long) sector, cleared, @@ -100,7 +99,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, sysfs_notify_dirent(pmem->bb_state); } - arch_invalidate_pmem(pmem->virt_addr + offset, len); + arch_invalidate_pmem(mi->map + offset, len); return rc; } @@ -151,8 +150,9 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, { blk_status_t rc = BLK_STS_OK; bool bad_pmem = false; - phys_addr_t pmem_off = sector * 512 + pmem->data_offset; - void *pmem_addr = pmem->virt_addr + pmem_off; + struct pfn_map_info *mi = &pmem->mi; + phys_addr_t pmem_off = sector * 512 + mi->map_offset; + void *pmem_addr = mi->map + pmem_off; if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; @@ -247,16 +247,17 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; + struct pfn_map_info *mi = &pmem->mi; + resource_size_t offset = PFN_PHYS(pgoff) + mi->map_offset; if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, PFN_PHYS(nr_pages)))) return -EIO; if (kaddr) - *kaddr = pmem->virt_addr + offset; + *kaddr = mi->map + offset; if (pfn) - *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + *pfn = phys_to_pfn_t(mi->map_base + offset, mi->pfn_flags); /* * If badblocks are present, limit known good range to the @@ -264,7 +265,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, */ if (unlikely(pmem->bb.count)) return nr_pages; - return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); + return PHYS_PFN(mi->map_size - offset); } static const struct block_device_operations pmem_fops = { @@ -354,45 +355,49 @@ static int pmem_attach_disk(struct device *dev, struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); int nid = dev_to_node(dev), fua; - struct resource *res = &nsio->res; + struct pfn_map_info *mi; struct resource bb_res; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; - struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; struct request_queue *q; struct device *gendev; struct gendisk *disk; - void *addr; int rc; pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return -ENOMEM; + mi = &pmem->mi; /* while nsio_rw_bytes is active, parse a pfn info block if present */ if (is_nd_pfn(dev)) { nd_pfn = to_nd_pfn(dev); - rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); + rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap, mi); if (rc) return rc; + } else { + *mi = (struct pfn_map_info) { + .map_offset = 0, + .map_base = nsio->res.start, + .map_size = resource_size(&nsio->res), + }; } /* we're attaching a block device, disable raw namespace access */ devm_nsio_disable(dev, nsio); dev_set_drvdata(dev, pmem); - pmem->phys_addr = res->start; - pmem->size = resource_size(res); fua = nvdimm_has_flush(nd_region); if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - if (!devm_request_mem_region(dev, res->start, resource_size(res), + if (!devm_request_mem_region(dev, nsio->res.start, + resource_size(&nsio->res), dev_name(&ndns->dev))) { - dev_warn(dev, "could not reserve region %pR\n", res); + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); return -EBUSY; } @@ -403,37 +408,27 @@ static int pmem_attach_disk(struct device *dev, if (devm_add_action_or_reset(dev, pmem_release_queue, q)) return -ENOMEM; - pmem->pfn_flags = PFN_DEV; + mi->pfn_flags = PFN_DEV; pmem->pgmap.ref = &q->q_usage_counter; pmem->pgmap.kill = pmem_freeze_queue; if (is_nd_pfn(dev)) { if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; - addr = devm_memremap_pages(dev, &pmem->pgmap); - pfn_sb = nd_pfn->pfn_sb; - pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = resource_size(res) - - resource_size(&pmem->pgmap.res); - pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - bb_res.start += pmem->data_offset; + mi->map = devm_memremap_pages(dev, &pmem->pgmap); + mi->pfn_flags |= PFN_MAP; } else if (pmem_should_map_pages(dev)) { memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.altmap_valid = false; if (setup_pagemap_fsdax(dev, &pmem->pgmap)) return -ENOMEM; - addr = devm_memremap_pages(dev, &pmem->pgmap); - pmem->pfn_flags |= PFN_MAP; - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); - } else { - addr = devm_memremap(dev, pmem->phys_addr, - pmem->size, ARCH_MEMREMAP_PMEM); - memcpy(&bb_res, &nsio->res, sizeof(bb_res)); - } + mi->map = devm_memremap_pages(dev, &pmem->pgmap); + mi->pfn_flags |= PFN_MAP; + } else + mi->map = devm_memremap(dev, mi->map_base, mi->map_size, + ARCH_MEMREMAP_PMEM); - if (IS_ERR(addr)) - return PTR_ERR(addr); - pmem->virt_addr = addr; + if (IS_ERR(mi->map)) + return PTR_ERR(mi->map); blk_queue_write_cache(q, true, fua); blk_queue_make_request(q, pmem_make_request); @@ -441,7 +436,7 @@ static int pmem_attach_disk(struct device *dev, blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - if (pmem->pfn_flags & PFN_MAP) + if (mi->pfn_flags & PFN_MAP) blk_queue_flag_set(QUEUE_FLAG_DAX, q); q->queuedata = pmem; @@ -455,10 +450,13 @@ static int pmem_attach_disk(struct device *dev, disk->flags = GENHD_FL_EXT_DEVT; disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; nvdimm_namespace_disk_name(ndns, disk->disk_name); - set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) - / 512); + set_capacity(disk, (mi->map_size - mi->map_offset) / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; + bb_res = (struct resource) { + .start = mi->map_base + mi->map_pad + mi->map_offset, + .end = mi->map_base + mi->map_pad + mi->map_size - 1, + }; nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); disk->bb = &pmem->bb; @@ -540,7 +538,6 @@ static void nd_pmem_shutdown(struct device *dev) static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) { struct nd_region *nd_region; - resource_size_t offset = 0, end_trunc = 0; struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct resource res; @@ -556,32 +553,26 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) ndns = nd_btt->ndns; nd_region = to_nd_region(ndns->dev.parent); nsio = to_nd_namespace_io(&ndns->dev); + res = (struct resource) { + .start = nsio->res.start, + .end = nsio->res.end, + }; bb = &nsio->bb; bb_state = NULL; } else { struct pmem_device *pmem = dev_get_drvdata(dev); + struct pfn_map_info *mi = &pmem->mi; nd_region = to_region(pmem); bb = &pmem->bb; bb_state = pmem->bb_state; - if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - - ndns = nd_pfn->ndns; - offset = pmem->data_offset + - __le32_to_cpu(pfn_sb->start_pad); - end_trunc = __le32_to_cpu(pfn_sb->end_trunc); - } else { - ndns = to_ndns(dev); - } - - nsio = to_nd_namespace_io(&ndns->dev); + res = (struct resource) { + .start = mi->map_base + mi->map_pad + mi->map_offset, + .end = mi->map_base + mi->map_pad + mi->map_size - 1, + }; } - res.start = nsio->res.start + offset; - res.end = nsio->res.end - end_trunc; nvdimm_badblocks_populate(nd_region, bb, &res); if (bb_state) sysfs_notify_dirent(bb_state); diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 59cfe13ea8a8..6c27bbae349f 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -6,19 +6,11 @@ #include #include #include +#include "nd.h" /* this definition is in it's own header for tools/testing/nvdimm to consume */ struct pmem_device { - /* One contiguous memory region per device */ - phys_addr_t phys_addr; - /* when non-zero this device is hosting a 'pfn' instance */ - phys_addr_t data_offset; - u64 pfn_flags; - void *virt_addr; - /* immutable base size of the namespace */ - size_t size; - /* trim size when namespace capacity has been section aligned */ - u32 pfn_pad; + struct pfn_map_info mi; struct kernfs_node *bb_state; struct badblocks bb; struct dax_device *dax_dev; diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index 2e7fd8227969..52a8760d2ec5 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -18,7 +18,8 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; + struct pfn_map_info *mi = &pmem->mi; + resource_size_t offset = PFN_PHYS(pgoff) + mi->map_offset; if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, PFN_PHYS(nr_pages)))) @@ -28,12 +29,12 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, * Limit dax to a single page at a time given vmalloc()-backed * in the nfit_test case. */ - if (get_nfit_res(pmem->phys_addr + offset)) { + if (get_nfit_res(mi->map_base + offset)) { struct page *page; if (kaddr) - *kaddr = pmem->virt_addr + offset; - page = vmalloc_to_page(pmem->virt_addr + offset); + *kaddr = mi->map + offset; + page = vmalloc_to_page(mi->map + offset); if (pfn) *pfn = page_to_pfn_t(page); pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n", @@ -43,9 +44,9 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, } if (kaddr) - *kaddr = pmem->virt_addr + offset; + *kaddr = mi->map + offset; if (pfn) - *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + *pfn = phys_to_pfn_t(mi->map_base + offset, mi->pfn_flags); /* * If badblocks are present, limit known good range to the @@ -53,5 +54,5 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, */ if (unlikely(pmem->bb.count)) return nr_pages; - return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); + return PHYS_PFN(mi->map_size - offset); } From patchwork Tue Feb 12 21:25:17 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dan Williams X-Patchwork-Id: 10808713 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9FB3C1390 for ; Tue, 12 Feb 2019 21:38:03 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8CF2F2C097 for ; Tue, 12 Feb 2019 21:38:03 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 80A9F2C09D; Tue, 12 Feb 2019 21:38:03 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 895DF2C097 for ; Tue, 12 Feb 2019 21:38:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732641AbfBLVh5 (ORCPT ); Tue, 12 Feb 2019 16:37:57 -0500 Received: from mga18.intel.com ([134.134.136.126]:36601 "EHLO mga18.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1731976AbfBLVh4 (ORCPT ); Tue, 12 Feb 2019 16:37:56 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga106.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 12 Feb 2019 13:37:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,362,1544515200"; d="scan'208";a="319837447" Received: from dwillia2-desk3.jf.intel.com (HELO dwillia2-desk3.amr.corp.intel.com) ([10.54.39.16]) by fmsmga005.fm.intel.com with ESMTP; 12 Feb 2019 13:37:54 -0800 Subject: [PATCH 7/7] libnvdimm/pfn: Fix 'start_pad' implementation From: Dan Williams To: linux-nvdimm@lists.01.org Cc: stable@vger.kernel.org, linux-kernel@vger.kernel.org, vishal.l.verma@intel.com, linux-fsdevel@vger.kernel.org Date: Tue, 12 Feb 2019 13:25:17 -0800 Message-ID: <155000671719.348031.2347363160141119237.stgit@dwillia2-desk3.amr.corp.intel.com> In-Reply-To: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> References: <155000668075.348031.9371497273408112600.stgit@dwillia2-desk3.amr.corp.intel.com> User-Agent: StGit/0.18-2-gc94f MIME-Version: 1.0 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP In the beginning the pmem driver simply passed the persistent memory resource range to memremap and was done. With the introduction of devm_memremap_pages() and vmem_altmap the implementation needed to contend with metadata at the start of the resource to indicate whether the vmemmap is located in System RAM or Persistent Memory, and reserve vmemmap capacity in pmem for the latter case. The indication of metadata space was communicated in the nd_pfn->data_offset property and it was defined to be identical to the pmem_device->data_offset property, i.e. relative to the raw resource base of the namespace. Up until this point in the driver's development pmem_device->phys_addr == __pa(pmem_device->virt_addr). This implementation was fine up until the discovery of platforms with physical address layouts that mapped Persistent Memory and System RAM to the same Linux memory hotplug section (128MB span). The nd_pfn->start_pad and nd_pfn->end_trunc properties were introduced to pad and truncate the capacity to fit within an exclusive Linux memory hotplug section span, and it was at this point where the ->start_pad definition did not comprehend the pmem_device->phys_addr to pmem_device->virt_addr relationship. Platforms in the wild typically only collided 'System RAM' at the end of the Persistent Memory range so ->start_pad was often zero. Lately Linux has encountered platforms that collide Persistent Memory regions between each other, specifically cases where ->start_pad needed to be non-zero. This lead to commit ae86cbfef381 "libnvdimm, pfn: Pad pfn namespaces relative to other regions". That commit allowed namespaces to be mapped with devm_memremap_pages(). However dax operations on those configurations currently fail if attempted within the ->start_pad range because pmem_device->data_offset was still relative to raw resource base not relative to the section aligned resource range mapped by devm_memremap_pages(). Luckily __bdev_dax_supported() caught these failures and simply disabled dax. However, to fix this situation a non-backwards compatible change needs to be made to the interpretation of the nd_pfn info-block. ->start_pad needs to be accounted in ->map.map_offset (formerly ->data_offset), and ->map.map_base (formerly ->phys_addr) needs to be adjusted to the section aligned resource base used to establish ->map.map formerly (formerly ->virt_addr). The guiding principles of the info-block compatibility fixup is to maintain the interpretation of ->data_offset for implementations like the EFI driver that only care about data_access not dax, but cause older Linux implementations that care about the mode and dax to fail to parse the new info-block. A unit test in ndctl is introduced to test the collision case going forward as well as how new kernels interpret older info-block configurations. Recall that the __bdev_dax_supported() checks have been strengthened to catch more failure cases (changed in a previous patch) and the new test caught several regressions in the course of developing this fixed support. Fixes: ae86cbfef381 ("libnvdimm, pfn: Pad pfn namespaces relative to other regions") Cc: Signed-off-by: Dan Williams --- drivers/nvdimm/nd.h | 2 + drivers/nvdimm/pfn.h | 2 - drivers/nvdimm/pfn_devs.c | 163 +++++++++++++++++++++++++++++---------------- 3 files changed, 109 insertions(+), 58 deletions(-) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 4339d338928b..7c33e7f7fae0 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -198,6 +198,8 @@ enum nd_pfn_mode { PFN_MODE_NONE, PFN_MODE_RAM, PFN_MODE_PMEM, + PFN3_MODE_RAM, + PFN3_MODE_PMEM, }; struct nd_pfn { diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index 710cb743fad6..53563a67c48d 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -20,7 +20,7 @@ #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" #define DAX_SIG "NVDIMM_DAX_INFO\0" -#define PFN_VERSION_SUPPORT 2 +#define PFN_VERSION_SUPPORT 3 struct nd_pfn_sb { u8 signature[PFN_SIG_LEN]; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 1c2a0e707da3..56fa75a299b7 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -210,6 +210,36 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static u32 pfn_start_pad(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + + /* starting v1.3 start_pad is accounted in dataoff */ + if (__le16_to_cpu(pfn_sb->version_minor) < 3) + return __le32_to_cpu(pfn_sb->start_pad); + return 0; +} + +/* + * Where does data start relative to the start of the namespace resource + * base? + */ +static u64 pfn_dataoff(struct nd_pfn *nd_pfn) +{ + return __le64_to_cpu(nd_pfn->pfn_sb->dataoff); +} + +/* + * How much of the namespace resource capacity is taking up by padding, + * outside of what is accounted in the data_offset? + */ +static u32 pfn_pad(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + + return pfn_start_pad(nd_pfn) + __le32_to_cpu(pfn_sb->end_trunc); +} + static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -218,14 +248,11 @@ static ssize_t resource_show(struct device *dev, device_lock(dev); if (dev->driver) { - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - u64 offset = __le64_to_cpu(pfn_sb->dataoff); struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start - + start_pad + offset); + + pfn_dataoff(nd_pfn) + pfn_start_pad(nd_pfn)); } else { /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; @@ -244,16 +271,12 @@ static ssize_t size_show(struct device *dev, device_lock(dev); if (dev->driver) { - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - u64 offset = __le64_to_cpu(pfn_sb->dataoff); struct nd_namespace_common *ndns = nd_pfn->ndns; - u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); - u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); rc = sprintf(buf, "%llu\n", (unsigned long long) - resource_size(&nsio->res) - start_pad - - end_trunc - offset); + resource_size(&nsio->res) - pfn_dataoff(nd_pfn) + - pfn_pad(nd_pfn)); } else { /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; @@ -422,10 +445,10 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { + unsigned long align; u64 checksum, offset; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; - unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -465,20 +488,29 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) if (__le16_to_cpu(pfn_sb->min_version) > PFN_VERSION_SUPPORT) return -EINVAL; - switch (le32_to_cpu(pfn_sb->mode)) { + mode = le32_to_cpu(pfn_sb->mode); + /* + * PFN3_MODEs are used to make older Linux implementations fail + * to parse the info-block. + */ + switch (mode) { case PFN_MODE_RAM: case PFN_MODE_PMEM: break; + case PFN3_MODE_RAM: + mode = PFN_MODE_RAM; + break; + case PFN3_MODE_PMEM: + mode = PFN_MODE_PMEM; + break; default: return -ENXIO; } align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); - start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); - mode = le32_to_cpu(pfn_sb->mode); if (!nd_pfn->uuid) { /* @@ -534,7 +566,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + + pfn_start_pad(nd_pfn), align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", @@ -586,31 +619,23 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) } EXPORT_SYMBOL(nd_pfn_probe); -static u32 info_block_reserve(void) +static u32 info_block_reserve(u32 start_pad) { - return ALIGN(SZ_8K, PAGE_SIZE); + u32 reserve = ALIGN(SZ_8K, PAGE_SIZE); + + /* + * Does the start_pad subsume the info-block at the start of the + * raw resource base? + */ + if (start_pad <= reserve) + return reserve - start_pad; + return 0; } /* * We hotplug memory at section granularity, pad the reserved area from * the previous section base to the namespace base address. */ -static unsigned long init_altmap_base(resource_size_t base) -{ - unsigned long base_pfn = PHYS_PFN(base); - - return PFN_SECTION_ALIGN_DOWN(base_pfn); -} - -static unsigned long init_altmap_reserve(resource_size_t base) -{ - unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; - unsigned long base_pfn = PHYS_PFN(base); - - reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); - return reserve; -} - static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, struct pfn_map_info *mi) { @@ -618,31 +643,39 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, struct vmem_altmap *altmap = &pgmap->altmap; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = le64_to_cpu(pfn_sb->dataoff); - u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad), map_start_pad; u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); - u32 reserve = info_block_reserve(); + u32 reserve = info_block_reserve(start_pad); struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; + struct device *dev = &nd_pfn->dev; struct vmem_altmap __altmap = { - .base_pfn = init_altmap_base(base), - .reserve = init_altmap_reserve(base), + .base_pfn = PHYS_PFN(base), + .reserve = PHYS_PFN(reserve), }; memcpy(res, &nsio->res, sizeof(*res)); res->start += start_pad; res->end -= end_trunc; + /* + * map_start_pad is adjusted to 0 for pre-v1.3 infoblocks to + * preserve a bug in the implementation that can only be fixed + * by migrating to a v1.3+ configuration. + * + * Post v1.3 start_pad is accounted in ->data_offset, and + * ensures that: + * __va(map_base) == __pa(map) + * + * map_pad is only non-zero when ensuring backwards + * compatibility with pre-v1.3 configurations. + */ + map_start_pad = start_pad - pfn_start_pad(nd_pfn); *mi = (struct pfn_map_info) { - /* - * TODO fix implementation to properly account for - * 'start_pad' in map_base, and map_offset. As is, the - * fact that __va(map_base) != __pa(map) leads - * mistranslations in pmem_direct_access(). - */ - .map_base = nsio->res.start, - .map_pad = start_pad, - .map_offset = offset, + .map_base = nsio->res.start + map_start_pad, + .map_pad = pfn_start_pad(nd_pfn), + .map_offset = offset - map_start_pad, .map_size = resource_size(res), }; @@ -653,14 +686,14 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap, pgmap->altmap_valid = false; } else if (nd_pfn->mode == PFN_MODE_PMEM) { nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) + - pfn_start_pad(nd_pfn) - offset) / PAGE_SIZE); if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) - dev_info(&nd_pfn->dev, - "number of pfns truncated from %lld to %ld\n", + dev_info(dev, "number of pfns truncated from %lld to %ld\n", le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); memcpy(altmap, &__altmap, sizeof(*altmap)); - altmap->free = PHYS_PFN(offset - reserve); + altmap->free = PHYS_PFN(mi->map_offset - reserve); altmap->alloc = 0; pgmap->altmap_valid = true; } else @@ -712,7 +745,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) { struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - u32 start_pad, end_trunc, reserve = info_block_reserve(); + u32 start_pad, end_trunc, reserve; resource_size_t start, size; struct nd_region *nd_region; struct nd_pfn_sb *pfn_sb; @@ -750,6 +783,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) if (start_pad + end_trunc) dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); + reserve = info_block_reserve(start_pad); /* * Note, we use 64 here for the standard size of struct page, @@ -769,29 +803,44 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ offset = ALIGN(start + reserve + 64 * npfns, max(nd_pfn->align, PMD_SIZE)) - start; - } else if (nd_pfn->mode == PFN_MODE_RAM) + offset += start_pad; + } else if (nd_pfn->mode == PFN_MODE_RAM) { offset = ALIGN(start + reserve, nd_pfn->align) - start; - else + offset += start_pad; + } else return -ENXIO; - if (offset + start_pad + end_trunc >= size) { + if (offset + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (size - offset - start_pad - end_trunc) / SZ_4K; - pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + + npfns = (size - offset - end_trunc) / SZ_4K; + pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); - pfn_sb->version_minor = cpu_to_le16(2); - pfn_sb->min_version = cpu_to_le16(2); pfn_sb->start_pad = cpu_to_le32(start_pad); pfn_sb->end_trunc = cpu_to_le32(end_trunc); + if (start_pad) { + /* + * Require implementations to account for start_pad in + * data_offset. Use PFN3_MODE to cause versions older + * than the introduction of min_version to fail. + */ + pfn_sb->mode = cpu_to_le32(nd_pfn->mode + 2); + pfn_sb->version_minor = cpu_to_le16(3); + pfn_sb->min_version = cpu_to_le16(3); + } else { + pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + pfn_sb->version_minor = cpu_to_le16(2); + pfn_sb->min_version = cpu_to_le16(2); + } pfn_sb->align = cpu_to_le32(nd_pfn->align); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum);