From patchwork Tue Oct 15 20:48:12 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ralph Campbell X-Patchwork-Id: 11191647 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3C19613BD for ; Tue, 15 Oct 2019 20:48:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 0852020873 for ; Tue, 15 Oct 2019 20:48:26 +0000 (UTC) Authentication-Results: mail.kernel.org; dkim=pass (2048-bit key) header.d=nvidia.com header.i=@nvidia.com header.b="Hw/n59XS" Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2387968AbfJOUsZ (ORCPT ); Tue, 15 Oct 2019 16:48:25 -0400 Received: from hqemgate16.nvidia.com ([216.228.121.65]:8933 "EHLO hqemgate16.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728276AbfJOUsZ (ORCPT ); Tue, 15 Oct 2019 16:48:25 -0400 Received: from hqpgpgate101.nvidia.com (Not Verified[216.228.121.13]) by hqemgate16.nvidia.com (using TLS: TLSv1.2, DES-CBC3-SHA) id ; Tue, 15 Oct 2019 13:48:25 -0700 Received: from hqmail.nvidia.com ([172.20.161.6]) by hqpgpgate101.nvidia.com (PGP Universal service); Tue, 15 Oct 2019 13:48:23 -0700 X-PGP-Universal: processed; by hqpgpgate101.nvidia.com on Tue, 15 Oct 2019 13:48:23 -0700 Received: from HQMAIL101.nvidia.com (172.20.187.10) by HQMAIL111.nvidia.com (172.20.187.18) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Tue, 15 Oct 2019 20:48:22 +0000 Received: from hqnvemgw01.nvidia.com (172.20.150.20) by HQMAIL101.nvidia.com (172.20.187.10) with Microsoft SMTP Server (TLS) id 15.0.1473.3 via Frontend Transport; Tue, 15 Oct 2019 20:48:22 +0000 Received: from rcampbell-dev.nvidia.com (Not Verified[10.110.48.66]) by hqnvemgw01.nvidia.com with Trustwave SEG (v7,5,8,10121) id ; Tue, 15 Oct 2019 13:48:22 -0700 From: Ralph Campbell To: Jerome Glisse , John Hubbard , Christoph Hellwig , Jason Gunthorpe CC: , , Ralph Campbell Subject: [PATCH v2 1/3] mm/hmm: make full use of walk_page_range() Date: Tue, 15 Oct 2019 13:48:12 -0700 Message-ID: <20191015204814.30099-2-rcampbell@nvidia.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20191015204814.30099-1-rcampbell@nvidia.com> References: <20191015204814.30099-1-rcampbell@nvidia.com> MIME-Version: 1.0 X-NVConfidentiality: public DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1571172506; bh=QwxewNyXzfZfuepmVwzO3XNBOMjKY9xTiFhnt408MdI=; h=X-PGP-Universal:From:To:CC:Subject:Date:Message-ID:X-Mailer: In-Reply-To:References:MIME-Version:X-NVConfidentiality: Content-Type:Content-Transfer-Encoding; b=Hw/n59XSWuFhDP4PRm8itMtwFrLTYByXO0MPfPJvYWuAk/+4ewIRTXRoKT5Q4A95p 0hjcNOyEqwM2AbbNq8ZtvkOx/4iF8bqInFuNE2o6ThWtRqa2xHclbsaw4Zh0n+xMAT 56uUjjbXekNAM/rxK7hJ3jT6V/I0XH8/0DLcHg0gonJyoprrN5dMVq0Czr7pDu4YcQ P87kAvqhslY+tlDMhJWZjbncqnRbJAzPmDqJTODlqfyQDAjAJ6zRLNH1ceMv4DcKlz gGwMBZf055jA21ll4+7/j0vcMX45rcu9P/36OvtV5ZpOS+othSqk6rI9WbMCbsrGy+ /6VHTdUBvLL3A== Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org hmm_range_fault() calls find_vma() and walk_page_range() in a loop. This is unnecessary duplication since walk_page_range() calls find_vma() in a loop already. Simplify hmm_range_fault() by defining a walk_test() callback function to filter unhandled vmas. This also fixes a bug where hmm_range_fault() was not checking start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault() could return an error based on the wrong vma for the requested range. Signed-off-by: Ralph Campbell Cc: "Jérôme Glisse" Cc: Jason Gunthorpe Cc: Christoph Hellwig --- mm/hmm.c | 143 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 902f5fa6bf93..5df0dbf77e89 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -252,18 +252,15 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, return -EFAULT; } -static int hmm_pfns_bad(unsigned long addr, - unsigned long end, - struct mm_walk *walk) +static int hmm_pfns_fill(unsigned long addr, unsigned long end, + struct hmm_range *range, enum hmm_pfn_value_e value) { - struct hmm_vma_walk *hmm_vma_walk = walk->private; - struct hmm_range *range = hmm_vma_walk->range; uint64_t *pfns = range->pfns; unsigned long i; i = (addr - range->start) >> PAGE_SHIFT; for (; addr < end; addr += PAGE_SIZE, i++) - pfns[i] = range->values[HMM_PFN_ERROR]; + pfns[i] = range->values[value]; return 0; } @@ -584,7 +581,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, } return 0; } else if (!pmd_present(pmd)) - return hmm_pfns_bad(start, end, walk); + return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /* @@ -612,7 +609,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * recover. */ if (pmd_bad(pmd)) - return hmm_pfns_bad(start, end, walk); + return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); ptep = pte_offset_map(pmdp, addr); i = (addr - range->start) >> PAGE_SHIFT; @@ -770,13 +767,68 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, #define hmm_vma_walk_hugetlb_entry NULL #endif /* CONFIG_HUGETLB_PAGE */ -static void hmm_pfns_clear(struct hmm_range *range, - uint64_t *pfns, - unsigned long addr, - unsigned long end) +static bool hmm_range_needs_fault(unsigned long addr, unsigned long end, + const struct hmm_vma_walk *hmm_vma_walk) { - for (; addr < end; addr += PAGE_SIZE, pfns++) - *pfns = range->values[HMM_PFN_NONE]; + bool fault, write_fault; + unsigned long i; + uint64_t *pfns; + + if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) + return false; + + fault = false; + write_fault = false; + pfns = hmm_vma_walk->range->pfns; + i = (addr - hmm_vma_walk->range->start) >> PAGE_SHIFT; + for (; addr < end; addr += PAGE_SIZE, i++) { + hmm_pte_need_fault(hmm_vma_walk, pfns[i], 0, + &fault, &write_fault); + if (fault || write_fault) + return true; + } + return false; +} + +static int hmm_vma_walk_test(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + struct hmm_vma_walk *hmm_vma_walk = walk->private; + struct hmm_range *range = hmm_vma_walk->range; + struct vm_area_struct *vma = walk->vma; + + /* If range is no longer valid, force retry. */ + if (!range->valid) + return -EBUSY; + + /* + * Skip vma ranges that don't have struct page backing them or + * map I/O devices directly. + */ + if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) + return -EFAULT; + + /* + * If the vma does not allow read access, then assume that it does not + * allow write access either. HMM does not support architectures + * that allow write without read. + */ + if (!(vma->vm_flags & VM_READ)) { + /* + * Check to see if a fault is requested for any page in the + * range. + */ + if (hmm_range_needs_fault(start, end, hmm_vma_walk)) + return -EFAULT; + + hmm_pfns_fill(start, end, range, HMM_PFN_NONE); + hmm_vma_walk->last = end; + + /* Skip this vma and continue processing the next vma. */ + return 1; + } + + return 0; } /* @@ -857,6 +909,7 @@ static const struct mm_walk_ops hmm_walk_ops = { .pmd_entry = hmm_vma_walk_pmd, .pte_hole = hmm_vma_walk_hole, .hugetlb_entry = hmm_vma_walk_hugetlb_entry, + .test_walk = hmm_vma_walk_test, }; /** @@ -889,63 +942,27 @@ static const struct mm_walk_ops hmm_walk_ops = { */ long hmm_range_fault(struct hmm_range *range, unsigned int flags) { - const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; - unsigned long start = range->start, end; - struct hmm_vma_walk hmm_vma_walk; + unsigned long start = range->start; + struct hmm_vma_walk hmm_vma_walk = { + .range = range, + .last = start, + .flags = flags, + }; struct hmm *hmm = range->hmm; - struct vm_area_struct *vma; int ret; lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); do { - /* If range is no longer valid force retry. */ - if (!range->valid) - return -EBUSY; + ret = walk_page_range(hmm->mmu_notifier.mm, start, range->end, + &hmm_walk_ops, &hmm_vma_walk); + start = hmm_vma_walk.last; - vma = find_vma(hmm->mmu_notifier.mm, start); - if (vma == NULL || (vma->vm_flags & device_vma)) - return -EFAULT; + /* Keep trying while the range is valid. */ + } while (ret == -EBUSY && range->valid); - if (!(vma->vm_flags & VM_READ)) { - /* - * If vma do not allow read access, then assume that it - * does not allow write access, either. HMM does not - * support architecture that allow write without read. - */ - hmm_pfns_clear(range, range->pfns, - range->start, range->end); - return -EPERM; - } - - hmm_vma_walk.pgmap = NULL; - hmm_vma_walk.last = start; - hmm_vma_walk.flags = flags; - hmm_vma_walk.range = range; - end = min(range->end, vma->vm_end); - - walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops, - &hmm_vma_walk); - - do { - ret = walk_page_range(vma->vm_mm, start, end, - &hmm_walk_ops, &hmm_vma_walk); - start = hmm_vma_walk.last; - - /* Keep trying while the range is valid. */ - } while (ret == -EBUSY && range->valid); - - if (ret) { - unsigned long i; - - i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; - hmm_pfns_clear(range, &range->pfns[i], - hmm_vma_walk.last, range->end); - return ret; - } - start = end; - - } while (start < range->end); + if (ret) + return ret; return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; }