diff mbox series

[v5,03/13] X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs

Message ID 1547026933-31226-4-git-send-email-karahmed@amazon.de (mailing list archive)
State New, archived
Headers show
Series KVM/X86: Introduce a new guest mapping interface | expand

Commit Message

KarimAllah Ahmed Jan. 9, 2019, 9:42 a.m. UTC
From: Filippo Sironi <sironi@amazon.de>

cmpxchg_gpte() calls get_user_pages_fast() to retrieve the number of
pages and the respective struct page to map in the kernel virtual
address space.
This doesn't work if get_user_pages_fast() is invoked with a userspace
virtual address that's backed by PFNs outside of kernel reach (e.g., when
limiting the kernel memory with mem= in the command line and using
/dev/mem to map memory).

If get_user_pages_fast() fails, look up the VMA that back the userspace
virtual address, compute the PFN and the physical address, and map it in
the kernel virtual address space with memremap().

Signed-off-by: Filippo Sironi <sironi@amazon.de>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
---
 arch/x86/kvm/paging_tmpl.h | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

Comments

Konrad Rzeszutek Wilk Jan. 23, 2019, 5:31 p.m. UTC | #1
On Wed, Jan 09, 2019 at 10:42:03AM +0100, KarimAllah Ahmed wrote:
> From: Filippo Sironi <sironi@amazon.de>
> 
> cmpxchg_gpte() calls get_user_pages_fast() to retrieve the number of
> pages and the respective struct page to map in the kernel virtual
> address space.
> This doesn't work if get_user_pages_fast() is invoked with a userspace
> virtual address that's backed by PFNs outside of kernel reach (e.g., when
> limiting the kernel memory with mem= in the command line and using
> /dev/mem to map memory).
> 
> If get_user_pages_fast() fails, look up the VMA that back the userspace
> virtual address, compute the PFN and the physical address, and map it in
> the kernel virtual address space with memremap().
> 
> Signed-off-by: Filippo Sironi <sironi@amazon.de>
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>

I personally would have used some crafty goto statements to jump to
'err' label which would have
	up_read(&current->mm->mmap_sem);
	return -EFAULT;

which would be after
 154         return (ret != orig_pte);                                               

But that is bike-shedding so feel free to ignore it.

Either way:

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>


> ---
>  arch/x86/kvm/paging_tmpl.h | 38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 6bdca39..c40af67 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  	struct page *page;
>  
>  	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
> -	/* Check if the user is doing something meaningless. */
> -	if (unlikely(npages != 1))
> -		return -EFAULT;
> -
> -	table = kmap_atomic(page);
> -	ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -	kunmap_atomic(table);
> -
> -	kvm_release_page_dirty(page);
> +	if (likely(npages == 1)) {
> +		table = kmap_atomic(page);
> +		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> +		kunmap_atomic(table);
> +
> +		kvm_release_page_dirty(page);
> +	} else {
> +		struct vm_area_struct *vma;
> +		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
> +		unsigned long pfn;
> +		unsigned long paddr;
> +
> +		down_read(&current->mm->mmap_sem);
> +		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
> +		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
> +			up_read(&current->mm->mmap_sem);
> +			return -EFAULT;
> +		}
> +		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> +		paddr = pfn << PAGE_SHIFT;
> +		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
> +		if (!table) {
> +			up_read(&current->mm->mmap_sem);
> +			return -EFAULT;
> +		}
> +		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> +		memunmap(table);
> +		up_read(&current->mm->mmap_sem);
> +	}
>  
>  	return (ret != orig_pte);
>  }
> -- 
> 2.7.4
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bdca39..c40af67 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -141,15 +141,35 @@  static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	struct page *page;
 
 	npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
-	/* Check if the user is doing something meaningless. */
-	if (unlikely(npages != 1))
-		return -EFAULT;
-
-	table = kmap_atomic(page);
-	ret = CMPXCHG(&table[index], orig_pte, new_pte);
-	kunmap_atomic(table);
-
-	kvm_release_page_dirty(page);
+	if (likely(npages == 1)) {
+		table = kmap_atomic(page);
+		ret = CMPXCHG(&table[index], orig_pte, new_pte);
+		kunmap_atomic(table);
+
+		kvm_release_page_dirty(page);
+	} else {
+		struct vm_area_struct *vma;
+		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+		unsigned long pfn;
+		unsigned long paddr;
+
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+		paddr = pfn << PAGE_SHIFT;
+		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+		if (!table) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+		ret = CMPXCHG(&table[index], orig_pte, new_pte);
+		memunmap(table);
+		up_read(&current->mm->mmap_sem);
+	}
 
 	return (ret != orig_pte);
 }