From patchwork Wed Nov 22 18:58:21 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jean-Philippe Brucker X-Patchwork-Id: 10070767 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 01AFD601D5 for ; Wed, 22 Nov 2017 18:56:52 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id E9ACD29D74 for ; Wed, 22 Nov 2017 18:56:51 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id DE97229D97; Wed, 22 Nov 2017 18:56:51 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-6.9 required=2.0 tests=BAYES_00,RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2030229D74 for ; Wed, 22 Nov 2017 18:56:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752207AbdKVS4t (ORCPT ); Wed, 22 Nov 2017 13:56:49 -0500 Received: from foss.arm.com ([217.140.101.70]:55054 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752176AbdKVS4j (ORCPT ); Wed, 22 Nov 2017 13:56:39 -0500 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 862A91435; Wed, 22 Nov 2017 10:56:39 -0800 (PST) Received: from e106794-lin.cambridge.arm.com (e106794-lin.cambridge.arm.com [10.1.210.50]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 303F23F487; Wed, 22 Nov 2017 10:56:38 -0800 (PST) From: Jean-Philippe Brucker To: kvm@vger.kernel.org Cc: Will.Deacon@arm.com, Robin.Murphy@arm.com, Lorenzo.Pieralisi@arm.com, Marc.Zyngier@arm.com, Punit.Agrawal@arm.com, alex.williamson@redhat.com Subject: [PATCH v4 kvmtool 10/12] vfio: Support non-mmappable regions Date: Wed, 22 Nov 2017 18:58:21 +0000 Message-Id: <20171122185823.7765-11-jean-philippe.brucker@arm.com> X-Mailer: git-send-email 2.14.3 In-Reply-To: <20171122185823.7765-1-jean-philippe.brucker@arm.com> References: <20171122185823.7765-1-jean-philippe.brucker@arm.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP In some cases device regions don't support mmap. They can still be made available to the guest by trapping all accesses and forwarding reads or writes to VFIO. Such regions may be: * PCI I/O port BARs. * Sub-page regions, for example a 4kB region on a host with 64k pages. * Similarly, sparse mmap regions. For example when VFIO allows to mmap fragments of a PCI BAR and forbids accessing things like MSI-X tables. We don't support the sparse capability at the moment, so trap these regions instead (if VFIO rejects the mmap). Signed-off-by: Jean-Philippe Brucker --- v3->v4: new. --- include/kvm/vfio.h | 3 + vfio/core.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++------ vfio/pci.c | 41 +++++++------ 3 files changed, 176 insertions(+), 33 deletions(-) diff --git a/include/kvm/vfio.h b/include/kvm/vfio.h index eee821b333d9..0e3366274d0e 100644 --- a/include/kvm/vfio.h +++ b/include/kvm/vfio.h @@ -83,8 +83,11 @@ struct vfio_pci_device { struct vfio_region { struct vfio_region_info info; + struct vfio_device *vdev; u64 guest_phys_addr; void *host_addr; + u32 port_base; + int is_ioport :1; }; struct vfio_device { diff --git a/vfio/core.c b/vfio/core.c index e1b7366b9eda..fade197a1dd0 100644 --- a/vfio/core.c +++ b/vfio/core.c @@ -1,5 +1,6 @@ #include "kvm/kvm.h" #include "kvm/vfio.h" +#include "kvm/ioport.h" #include @@ -78,6 +79,141 @@ out_free_buf: return ret; } +static bool vfio_ioport_in(struct ioport *ioport, struct kvm_cpu *vcpu, + u16 port, void *data, int len) +{ + u32 val; + ssize_t nr; + struct vfio_region *region = ioport->priv; + struct vfio_device *vdev = region->vdev; + + u32 offset = port - region->port_base; + + if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) + return false; + + nr = pread(vdev->fd, &val, len, region->info.offset + offset); + if (nr != len) { + dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n", + len, port); + return false; + } + + switch (len) { + case 1: + ioport__write8(data, val); + break; + case 2: + ioport__write16(data, val); + break; + case 4: + ioport__write32(data, val); + break; + default: + return false; + } + + return true; +} + +static bool vfio_ioport_out(struct ioport *ioport, struct kvm_cpu *vcpu, + u16 port, void *data, int len) +{ + u32 val; + ssize_t nr; + struct vfio_region *region = ioport->priv; + struct vfio_device *vdev = region->vdev; + + u32 offset = port - region->port_base; + + if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) + return false; + + switch (len) { + case 1: + val = ioport__read8(data); + break; + case 2: + val = ioport__read16(data); + break; + case 4: + val = ioport__read32(data); + break; + default: + return false; + } + + nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); + if (nr != len) + dev_err(vdev, "could not write %d bytes to I/O port 0x%x", len, + port); + + return nr == len; +} + +static struct ioport_operations vfio_ioport_ops = { + .io_in = vfio_ioport_in, + .io_out = vfio_ioport_out, +}; + +static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len, + u8 is_write, void *ptr) +{ + u64 val; + ssize_t nr; + struct vfio_region *region = ptr; + struct vfio_device *vdev = region->vdev; + + u32 offset = addr - region->guest_phys_addr; + + if (len < 1 || len > 8) + goto err_report; + + if (is_write) { + if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE)) + goto err_report; + + memcpy(&val, data, len); + + nr = pwrite(vdev->fd, &val, len, region->info.offset + offset); + if ((u32)nr != len) + goto err_report; + } else { + if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ)) + goto err_report; + + nr = pread(vdev->fd, &val, len, region->info.offset + offset); + if ((u32)nr != len) + goto err_report; + + memcpy(data, &val, len); + } + + return; + +err_report: + dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ? + "write" : "read", len, offset, addr); +} + +static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev, + struct vfio_region *region) +{ + if (region->is_ioport) { + int port = ioport__register(kvm, IOPORT_EMPTY, &vfio_ioport_ops, + region->info.size, region); + if (port < 0) + return port; + + region->port_base = port; + return 0; + } + + return kvm__register_mmio(kvm, region->guest_phys_addr, + region->info.size, false, vfio_mmio_access, + region); +} + int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, struct vfio_region *region) { @@ -86,17 +222,8 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, /* KVM needs page-aligned regions */ u64 map_size = ALIGN(region->info.size, PAGE_SIZE); - /* - * We don't want to mess about trapping config accesses, so require that - * they can be mmap'd. Note that for PCI, this precludes the use of I/O - * BARs in the guest (we will hide them from Configuration Space, which - * is trapped). - */ - if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) { - dev_info(vdev, "ignoring region %u, as it can't be mmap'd", - region->info.index); - return 0; - } + if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) + return vfio_setup_trap_region(kvm, vdev, region); if (region->info.flags & VFIO_REGION_INFO_FLAG_READ) prot |= PROT_READ; @@ -106,10 +233,10 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd, region->info.offset); if (base == MAP_FAILED) { - ret = -errno; - dev_err(vdev, "failed to mmap region %u (0x%llx bytes)", - region->info.index, region->info.size); - return ret; + /* TODO: support sparse mmap */ + dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping", + region->info.index, region->info.size); + return vfio_setup_trap_region(kvm, vdev, region); } region->host_addr = base; @@ -125,7 +252,13 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev, void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region) { - munmap(region->host_addr, region->info.size); + if (region->host_addr) { + munmap(region->host_addr, region->info.size); + } else if (region->is_ioport) { + ioport__unregister(kvm, region->port_base); + } else { + kvm__deregister_mmio(kvm, region->guest_phys_addr); + } } static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev) diff --git a/vfio/pci.c b/vfio/pci.c index 698b19582143..9c3ef2f1a373 100644 --- a/vfio/pci.c +++ b/vfio/pci.c @@ -629,24 +629,27 @@ static int vfio_pci_fixup_cfg_space(struct vfio_device *vdev) struct vfio_region_info *info; struct vfio_pci_device *pdev = &vdev->pci; - /* Enable exclusively MMIO and bus mastering */ - pdev->hdr.command &= ~PCI_COMMAND_IO; - pdev->hdr.command |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; - /* Initialise the BARs */ for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) { + u64 base; struct vfio_region *region = &vdev->regions[i]; - u64 base = region->guest_phys_addr; + + /* Construct a fake reg to match what we've mapped. */ + if (region->is_ioport) { + base = (region->port_base & PCI_BASE_ADDRESS_IO_MASK) | + PCI_BASE_ADDRESS_SPACE_IO; + } else { + base = (region->guest_phys_addr & + PCI_BASE_ADDRESS_MEM_MASK) | + PCI_BASE_ADDRESS_SPACE_MEMORY; + } + + pdev->hdr.bar[i] = base; if (!base) continue; pdev->hdr.bar_size[i] = region->info.size; - - /* Construct a fake reg to match what we've mapped. */ - pdev->hdr.bar[i] = (base & PCI_BASE_ADDRESS_MEM_MASK) | - PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_32; } /* I really can't be bothered to support cardbus. */ @@ -785,6 +788,7 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, size_t nr) { int ret; + u32 bar; size_t map_size; struct vfio_pci_device *pdev = &vdev->pci; struct vfio_region *region = &vdev->regions[nr]; @@ -792,6 +796,10 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, if (nr >= vdev->info.num_regions) return 0; + bar = pdev->hdr.bar[nr]; + + region->vdev = vdev; + region->is_ioport = !!(bar & PCI_BASE_ADDRESS_SPACE_IO); region->info = (struct vfio_region_info) { .argsz = sizeof(*region), .index = nr, @@ -819,14 +827,13 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, } } - /* Grab some MMIO space in the guest */ - map_size = ALIGN(region->info.size, PAGE_SIZE); - region->guest_phys_addr = pci_get_io_space_block(map_size); + if (!region->is_ioport) { + /* Grab some MMIO space in the guest */ + map_size = ALIGN(region->info.size, PAGE_SIZE); + region->guest_phys_addr = pci_get_io_space_block(map_size); + } - /* - * Map the BARs into the guest. We'll later need to update - * configuration space to reflect our allocation. - */ + /* Map the BARs into the guest or setup a trap region. */ ret = vfio_map_region(kvm, vdev, region); if (ret) return ret;