Message ID | 20240809160909.1023470-20-peterx@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: Support huge pfnmaps | expand |
On Fri, Aug 09, 2024 at 12:09:09PM -0400, Peter Xu wrote: > @@ -1672,30 +1679,49 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) > if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) > goto out_unlock; > > - ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); > - if (ret & VM_FAULT_ERROR) > - goto out_unlock; > - > - /* > - * Pre-fault the remainder of the vma, abort further insertions and > - * supress error if fault is encountered during pre-fault. > - */ > - for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) { > - if (addr == vmf->address) > - continue; > - > - if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) > - break; > + switch (order) { > + case 0: > + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); > + break; > +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP > + case PMD_ORDER: > + ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff, > + PFN_DEV), false); > + break; > +#endif > +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP > + case PUD_ORDER: > + ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff, > + PFN_DEV), false); > + break; > +#endif I feel like this switch should be in some general function? vmf_insert_pfn_order(vmf, order, __pfn_to_pfn_t(pfn + pgoff, PFN_DEV), false); No reason to expose every driver to this when you've already got a nice contract to have the driver work on the passed in order. What happens if the driver can't get a PFN that matches the requested order? Jason
On Wed, 14 Aug 2024 10:25:08 -0300 Jason Gunthorpe <jgg@nvidia.com> wrote: > On Fri, Aug 09, 2024 at 12:09:09PM -0400, Peter Xu wrote: > > @@ -1672,30 +1679,49 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) > > if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) > > goto out_unlock; > > > > - ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); > > - if (ret & VM_FAULT_ERROR) > > - goto out_unlock; > > - > > - /* > > - * Pre-fault the remainder of the vma, abort further insertions and > > - * supress error if fault is encountered during pre-fault. > > - */ > > - for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) { > > - if (addr == vmf->address) > > - continue; > > - > > - if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) > > - break; > > + switch (order) { > > + case 0: > > + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); > > + break; > > +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP > > + case PMD_ORDER: > > + ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff, > > + PFN_DEV), false); > > + break; > > +#endif > > +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP > > + case PUD_ORDER: > > + ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff, > > + PFN_DEV), false); > > + break; > > +#endif > > I feel like this switch should be in some general function? > > vmf_insert_pfn_order(vmf, order, __pfn_to_pfn_t(pfn + pgoff, PFN_DEV), false); > > No reason to expose every driver to this when you've already got a > nice contract to have the driver work on the passed in order. > > What happens if the driver can't get a PFN that matches the requested > order? There was some alignment and size checking chopped from the previous reply that triggered a fallback, but in general PCI BARs are a power of two and naturally aligned, so there should always be an order aligned pfn. Thanks, Alex
On Wed, Aug 14, 2024 at 10:08:49AM -0600, Alex Williamson wrote: > There was some alignment and size checking chopped from the previous > reply that triggered a fallback, but in general PCI BARs are a power of > two and naturally aligned, so there should always be an order aligned > pfn. Sure, though I was mostlyo thinking about how to use this API in other drivers. Maybe the device has 2M page alignment only but the VMA was aligned to 1G? It will be called with an order higher than it can support but that is not an error that should be failed. Jason
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index ba0ce0075b2f..2d7478e9a62d 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -20,6 +20,7 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/pci.h> +#include <linux/pfn_t.h> #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/types.h> @@ -1657,14 +1658,20 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma) return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff; } -static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) +static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, + unsigned int order) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; - unsigned long addr = vma->vm_start; vm_fault_t ret = VM_FAULT_SIGBUS; + if (order && (vmf->address & ((PAGE_SIZE << order) - 1) || + vmf->address + (PAGE_SIZE << order) > vma->vm_end)) { + ret = VM_FAULT_FALLBACK; + goto out; + } + pfn = vma_to_pfn(vma); down_read(&vdev->memory_lock); @@ -1672,30 +1679,49 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) goto out_unlock; - ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); - if (ret & VM_FAULT_ERROR) - goto out_unlock; - - /* - * Pre-fault the remainder of the vma, abort further insertions and - * supress error if fault is encountered during pre-fault. - */ - for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) { - if (addr == vmf->address) - continue; - - if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) - break; + switch (order) { + case 0: + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); + break; +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + case PMD_ORDER: + ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff, + PFN_DEV), false); + break; +#endif +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP + case PUD_ORDER: + ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff, + PFN_DEV), false); + break; +#endif + default: + ret = VM_FAULT_FALLBACK; } out_unlock: up_read(&vdev->memory_lock); +out: + dev_dbg_ratelimited(&vdev->pdev->dev, + "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n", + __func__, order, + vma->vm_pgoff >> + (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT), + pgoff, (unsigned int)ret); return ret; } +static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf) +{ + return vfio_pci_mmap_huge_fault(vmf, 0); +} + static const struct vm_operations_struct vfio_pci_mmap_ops = { - .fault = vfio_pci_mmap_fault, + .fault = vfio_pci_mmap_page_fault, +#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP + .huge_fault = vfio_pci_mmap_huge_fault, +#endif }; int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)