From patchwork Tue Jul 20 22:11:16 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Williamson X-Patchwork-Id: 113091 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.4/8.14.3) with ESMTP id o6KMBM4D008142 for ; Tue, 20 Jul 2010 22:11:22 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1761800Ab0GTWLT (ORCPT ); Tue, 20 Jul 2010 18:11:19 -0400 Received: from mx1.redhat.com ([209.132.183.28]:32023 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1761750Ab0GTWLS (ORCPT ); Tue, 20 Jul 2010 18:11:18 -0400 Received: from int-mx04.intmail.prod.int.phx2.redhat.com (int-mx04.intmail.prod.int.phx2.redhat.com [10.5.11.17]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o6KMBHHR021860 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Tue, 20 Jul 2010 18:11:17 -0400 Received: from localhost6.localdomain6 (ovpn01.gateway.prod.ext.phx2.redhat.com [10.5.9.1]) by int-mx04.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o6KMBG0f022430; Tue, 20 Jul 2010 18:11:17 -0400 From: Alex Williamson Subject: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available To: kvm@vger.kernel.org Cc: ddutile@redhat.com, chrisw@redhat.com, alex.williamson@redhat.com Date: Tue, 20 Jul 2010 16:11:16 -0600 Message-ID: <20100720221045.3576.3435.stgit@localhost6.localdomain6> User-Agent: StGIT/0.14.3 MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.67 on 10.5.11.17 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Tue, 20 Jul 2010 22:11:22 +0000 (UTC) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 2bba22f..37c1278 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,28 @@ static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr) return region->u.r_baseport + (addr - region->e_physbase); } +static int assigned_dev_ioport_rw(AssignedDevRegion *dev_region, + uint32_t addr, int len, uint32_t *val, + int write) +{ + if (dev_region->region->resource_fd == -1) + return -1; + + if (write) { + if (pwrite(dev_region->region->resource_fd, val, len, + (addr - dev_region->e_physbase)) != len) { + return -1; + } + } else { + if (pread(dev_region->region->resource_fd, val, len, + (addr - dev_region->e_physbase)) != len) { + return -1; + } + } + + return 0; +} + static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, uint32_t value) { @@ -77,7 +99,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, r_pio, (int)r_access->e_physbase, (unsigned long)r_access->u.r_baseport, value); - outb(value, r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 1) != 0) { + outb(value, r_pio); + } } static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, @@ -90,7 +114,9 @@ static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, r_pio, (int)r_access->e_physbase, (unsigned long)r_access->u.r_baseport, value); - outw(value, r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 1) != 0) { + outw(value, r_pio); + } } static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, @@ -103,7 +129,9 @@ static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, r_pio, (int)r_access->e_physbase, (unsigned long)r_access->u.r_baseport, value); - outl(value, r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 1) != 0) { + outl(value, r_pio); + } } static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) @@ -112,7 +140,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) uint32_t r_pio = guest_to_host_ioport(r_access, addr); uint32_t value; - value = inb(r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 0) != 0) { + value = inb(r_pio); + } DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, @@ -127,7 +157,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr) uint32_t r_pio = guest_to_host_ioport(r_access, addr); uint32_t value; - value = inw(r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 0) != 0) { + value = inw(r_pio); + } DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, @@ -142,7 +174,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr) uint32_t r_pio = guest_to_host_ioport(r_access, addr); uint32_t value; - value = inl(r_pio); + if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 0) != 0) { + value = inl(r_pio); + } DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", r_pio, (int)r_access->e_physbase, @@ -305,7 +339,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n", addr, region->u.r_baseport, type, size, region_num); - if (first_map) { + if (first_map && region->region->resource_fd < 0) { struct ioperm_data *data; data = qemu_mallocz(sizeof(struct ioperm_data)); @@ -586,19 +620,46 @@ static int assigned_dev_register_regions(PCIRegion *io_regions, slow_map ? assigned_dev_iomem_map_slow : assigned_dev_iomem_map); continue; + } else { + /* handle port io regions */ + uint32_t val; + int ret; + + /* Test kernel support for ioport resource read/write. Old + * kernels return EIO. New kernels only allow 1/2/4 byte reads + * so should return EINVAL for a 3 byte read */ + ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0); + if (ret == 3) { + fprintf(stderr, "I/O port resource supports 3 byte read?!\n"); + abort(); + } else if (errno == EIO) { + fprintf(stderr, + "pcisysfs does not support rw ioport resource\n"); + close(pci_dev->v_addrs[i].region->resource_fd); + pci_dev->v_addrs[i].region->resource_fd = -1; + } else if (errno != EINVAL) { + fprintf(stderr, "Unexpected return from ioport pread (%d) %s\n", + errno, strerror(errno)); + abort(); + } + + /* Root user can use direct access for compatibility */ + if (getuid() == 0) { + close(pci_dev->v_addrs[i].region->resource_fd); + pci_dev->v_addrs[i].region->resource_fd = -1; + } + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + pci_register_bar((PCIDevice *) pci_dev, i, + cur_region->size, PCI_BASE_ADDRESS_SPACE_IO, + assigned_dev_ioport_map); + + /* not relevant for port io */ + pci_dev->v_addrs[i].memory_index = 0; } - /* handle port io regions */ - pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; - pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; - pci_dev->v_addrs[i].r_size = cur_region->size; - pci_dev->v_addrs[i].e_size = 0; - - pci_register_bar((PCIDevice *) pci_dev, i, - cur_region->size, PCI_BASE_ADDRESS_SPACE_IO, - assigned_dev_ioport_map); - - /* not relevant for port io */ - pci_dev->v_addrs[i].memory_index = 0; } /* success */ @@ -705,20 +766,22 @@ again: continue; if (flags & IORESOURCE_MEM) { flags &= ~IORESOURCE_IO; - if (r != PCI_ROM_SLOT) { - snprintf(name, sizeof(name), "%sresource%d", dir, r); - fd = open(name, O_RDWR); - if (fd == -1) - continue; - rp->resource_fd = fd; - } - } else + } else { flags &= ~IORESOURCE_PREFETCH; + } + if (r != PCI_ROM_SLOT) { + snprintf(name, sizeof(name), "%sresource%d", dir, r); + fd = open(name, O_RDWR); + if (fd == -1) + continue; + rp->resource_fd = fd; + } rp->type = flags; rp->valid = 1; rp->base_addr = start; rp->size = size; + pci_dev->v_addrs[r].region = rp; DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n", r, rp->size, start, rp->type, rp->resource_fd); } @@ -780,8 +843,10 @@ static void free_assigned_device(AssignedDevice *dev) continue; if (pci_region->type & IORESOURCE_IO) { - kvm_remove_ioperm_data(region->u.r_baseport, region->r_size); - continue; + if (pci_region->resource_fd < 0) { + kvm_remove_ioperm_data(region->u.r_baseport, + region->r_size); + } } else if (pci_region->type & IORESOURCE_MEM) { if (region->u.r_virtbase) { if (region->memory_index) { @@ -795,11 +860,11 @@ static void free_assigned_device(AssignedDevice *dev) fprintf(stderr, "Failed to unmap assigned device region: %s\n", strerror(errno)); - if (pci_region->resource_fd >= 0) { - close(pci_region->resource_fd); - } } - } + } + if (pci_region->resource_fd >= 0) { + close(pci_region->resource_fd); + } } if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) diff --git a/hw/device-assignment.h b/hw/device-assignment.h index 4e7fe87..9a3ea12 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -71,6 +71,7 @@ typedef struct { int num; /* our index within v_addrs[] */ pcibus_t e_size; /* emulated size of region in bytes */ pcibus_t r_size; /* real size of region in bytes */ + PCIRegion *region; } AssignedDevRegion; typedef struct AssignedDevice {