diff mbox series

[v2,1/3] s390/pci: Fix s390_mmio_read/write syscall page fault handling

Message ID 20240523-vfio_pci_mmap-v2-1-0dc6c139a4f1@linux.ibm.com (mailing list archive)
State New
Headers show
Series vfio/pci: s390: Fix issues preventing VFIO_PCI_MMAP=y for s390 and enable it | expand

Commit Message

Niklas Schnelle May 23, 2024, 11:10 a.m. UTC
The s390 MMIO syscalls when using the classic PCI instructions do not
cause a page fault when follow_pte() fails due to the page not being
present. Besides being a general deficiency this breaks vfio-pci's mmap()
handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
access. Fix this by following a failed follow_pte() with
fixup_user_page() and retrying the follow_pte().

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 arch/s390/pci/pci_mmio.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Comments

Niklas Schnelle May 28, 2024, 3:12 p.m. UTC | #1
On Thu, 2024-05-23 at 13:10 +0200, Niklas Schnelle wrote:
> The s390 MMIO syscalls when using the classic PCI instructions do not
> cause a page fault when follow_pte() fails due to the page not being
> present. Besides being a general deficiency this breaks vfio-pci's mmap()
> handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
> access. Fix this by following a failed follow_pte() with
> fixup_user_page() and retrying the follow_pte().
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
>  arch/s390/pci/pci_mmio.c | 18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
> index a90499c087f0..217defbcb4f1 100644
> --- a/arch/s390/pci/pci_mmio.c
> +++ b/arch/s390/pci/pci_mmio.c
> @@ -170,8 +170,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
>  		goto out_unlock_mmap;
>  
>  	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
> -	if (ret)
> -		goto out_unlock_mmap;
> +	if (ret) {
> +		fixup_user_fault(vma->vm_mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
> +		ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);

This needs a slight adjustment in v6.10-rc1 due to a change in
follow_pte()'s signature. The above should take "vma" directly. We
could then also use current->mm in fixup_user_fault() as seems more
common. I've already pushed this to my kernel.org repository and will
send a v3 rebased on v6.10-rc1 with just this as I'm not tracking any
other feedback.

> +		if (ret)
> +			goto out_unlock_mmap;
> +	}
>  
>  	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
>  			(mmio_addr & ~PAGE_MASK));
> @@ -305,12 +309,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
>  	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
>  		goto out_unlock_mmap;
>  	ret = -EACCES;
> -	if (!(vma->vm_flags & VM_WRITE))
> +	if (!(vma->vm_flags & VM_READ))
>  		goto out_unlock_mmap;
>  
>  	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
> -	if (ret)
> -		goto out_unlock_mmap;
> +	if (ret) {
> +		fixup_user_fault(vma->vm_mm, mmio_addr, 0, NULL);
> +		ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);

Here as well

> +		if (ret)
> +			goto out_unlock_mmap;
> +	}
>  
>  	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
>  			(mmio_addr & ~PAGE_MASK));
>
Matthew Rosato May 28, 2024, 3:13 p.m. UTC | #2
On 5/23/24 7:10 AM, Niklas Schnelle wrote:
> The s390 MMIO syscalls when using the classic PCI instructions do not
> cause a page fault when follow_pte() fails due to the page not being
> present. Besides being a general deficiency this breaks vfio-pci's mmap()
> handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
> access. Fix this by following a failed follow_pte() with
> fixup_user_page() and retrying the follow_pte().
> 
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>

FYI, as of -rc1 this patch has a merge conflict with 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte().")
diff mbox series

Patch

diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index a90499c087f0..217defbcb4f1 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -170,8 +170,12 @@  SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 		goto out_unlock_mmap;
 
 	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
-	if (ret)
-		goto out_unlock_mmap;
+	if (ret) {
+		fixup_user_fault(vma->vm_mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+		ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+		if (ret)
+			goto out_unlock_mmap;
+	}
 
 	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));
@@ -305,12 +309,16 @@  SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
 		goto out_unlock_mmap;
 	ret = -EACCES;
-	if (!(vma->vm_flags & VM_WRITE))
+	if (!(vma->vm_flags & VM_READ))
 		goto out_unlock_mmap;
 
 	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
-	if (ret)
-		goto out_unlock_mmap;
+	if (ret) {
+		fixup_user_fault(vma->vm_mm, mmio_addr, 0, NULL);
+		ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+		if (ret)
+			goto out_unlock_mmap;
+	}
 
 	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));