Message ID | 1296175554.2891.29.camel@x201 (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Alex, On 2011-01-28 01:45, Alex Williamson wrote: > On Thu, 2011-01-27 at 12:56 +0100, André Weidemann wrote: >> Hi Alex, >> >> On 26.01.2011 06:12, Alex Williamson wrote: >> >>> So while your initial results are promising, my guess is that you're >>> using card specific drivers and still need to consider some of the >>> harder problems with generic support for vga assignment. I hacked on >>> this for a bit trying to see if I could get vga assignment working >>> with the vfio driver. Setting up the legacy access and preventing >>> qemu from stealing it back should get you basic vga modes and might >>> even allow the option rom to run to initialize the card for pre-boot. >>> I was able to get this far on a similar ATI card. I never hard much >>> luck with other cards though, and I was never able to get the vesa >>> extensions working. Thanks, >> >> Do you mind sharing these patches? > > Attached. > We are about to try some pass-through with an NVIDA card. So I already hacked on your vfio patch to make it build against current devices assignment code. Some questions arose while studying the code: ... > --- /dev/null > +++ b/hw/vfio-vga.c > @@ -0,0 +1,291 @@ > +/* > + * vfio VGA device assignment support > + * > + * Copyright Red Hat, Inc. 2010 > + * > + * Authors: > + * Alex Williamson <alex.williamson@redhat.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Based on qemu-kvm device-assignment: > + * Adapted for KVM by Qumranet. > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) > + */ > + > +#include <stdio.h> > +#include <unistd.h> > +#include <sys/io.h> > +#include <sys/mman.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include "event_notifier.h" > +#include "hw.h" > +#include "memory.h" > +#include "monitor.h" > +#include "pc.h" > +#include "qemu-error.h" > +#include "sysemu.h" > +#include "vfio.h" > +#include <pci/header.h> > +#include <pci/types.h> > +#include <linux/types.h> > +#include "linux-vfio.h" > + > +//#define DEBUG_VFIO_VGA > +#ifdef DEBUG_VFIO_VGA > +#define DPRINTF(fmt, ...) \ > + do { printf("vfio-vga: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/* > + * VGA setup > + */ > +static void vfio_vga_write(VFIODevice *vdev, uint32_t addr, > + uint32_t val, int len) > +{ > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, 0xa0000 + addr, len, val); > + switch (len) { > + case 1: > + *(uint8_t *)(vdev->vga_mmio + addr) = (uint8_t)val; > + break; > + case 2: > + *(uint16_t *)(vdev->vga_mmio + addr) = (uint16_t)val; > + break; > + case 4: > + *(uint32_t *)(vdev->vga_mmio + addr) = val; > + break; > + } > +} > + > +static void vfio_vga_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 1); > +} > + > +static void vfio_vga_writew(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 2); > +} > + > +static void vfio_vga_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 4); > +} > + > +static CPUWriteMemoryFunc * const vfio_vga_writes[] = { > + &vfio_vga_writeb, > + &vfio_vga_writew, > + &vfio_vga_writel > +}; > + > +static uint32_t vfio_vga_read(VFIODevice *vdev, uint32_t addr, int len) > +{ > + uint32_t val = 0xffffffff; > + switch (len) { > + case 1: > + val = (uint32_t)*(uint8_t *)(vdev->vga_mmio + addr); > + break; > + case 2: > + val = (uint32_t)*(uint16_t *)(vdev->vga_mmio + addr); > + break; > + case 4: > + val = *(uint32_t *)(vdev->vga_mmio + addr); > + break; > + } > + DPRINTF("%s 0x%x %d = 0x%x\n", __func__, 0xa0000 + addr, len, val); > + return val; > +} > + > +static uint32_t vfio_vga_readb(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 1); > +} > + > +static uint32_t vfio_vga_readw(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 2); > +} > + > +static uint32_t vfio_vga_readl(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 4); > +} > + > +static CPUReadMemoryFunc * const vfio_vga_reads[] = { > + &vfio_vga_readb, > + &vfio_vga_readw, > + &vfio_vga_readl > +}; > + > +static void vfio_vga_out(VFIODevice *vdev, uint32_t addr, uint32_t val, int len) > +{ > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, addr, len, val); > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ Why do you have to re-establish the ioperms here on each access? Are we just lacking the use of generic kvm ioperm management? > + switch (len) { > + case 1: > + outb(val, addr); > + break; > + case 2: > + outw(val, addr); > + break; > + case 4: > + outl(val, addr); > + break; > + } > +} > + > +static void vfio_vga_outb(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 1); > +} > + > +static void vfio_vga_outw(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 2); > +} > + > +static void vfio_vga_outl(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 4); > +} > + > +static uint32_t vfio_vga_in(VFIODevice *vdev, uint32_t addr, int len) > +{ > + uint32_t val = 0xffffffff; > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ > + switch (len) { > + case 1: > + val = inb(addr); > + break; > + case 2: > + val = inw(addr); > + break; > + case 4: > + val = inl(addr); > + break; > + } > + DPRINTF("%s 0x%x, %d = 0x%x\n", __func__, addr, len, val); > + return val; > +} > + > +static uint32_t vfio_vga_inb(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 1); > +} > + > +static uint32_t vfio_vga_inw(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 2); > +} > + > +static uint32_t vfio_vga_inl(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 4); > +} > + > +int vfio_vga_setup(VFIODevice *vdev) > +{ > + char buf[256]; > + int ret; > + > + if (vga_interface_type != VGA_NONE) { > + fprintf(stderr, > + "VGA devie assigned without -vga none param, no ISA VGA\n"); > + return -1; > + } > + > + vdev->vga_fd = open("/dev/vga_arbiter", O_RDWR); > + if (vdev->vga_fd < 0) { > + fprintf(stderr, "%s - Failed to open vga arbiter (%s)\n", > + __func__, strerror(errno)); > + return -1; > + } > + ret = read(vdev->vga_fd, buf, sizeof(buf)); > + if (ret <= 0) { > + fprintf(stderr, "%s - Failed to read from vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } > + buf[ret - 1] = 0; > + vdev->vga_orig = qemu_strdup(buf); > + > + snprintf(buf, sizeof(buf), "target PCI:%04x:%02x:%02x.%x", > + vdev->host.seg, vdev->host.bus, vdev->host.dev, vdev->host.func); > + ret = write(vdev->vga_fd, buf, strlen(buf)); > + if (ret != strlen(buf)) { > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } > + snprintf(buf, sizeof(buf), "decodes io+mem"); > + ret = write(vdev->vga_fd, buf, strlen(buf)); > + if (ret != strlen(buf)) { > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } OK, so we grab the assigned adapter and make it handle legacy io+mem. I guess this approach only works with a single guest with an assigned adapter. Would it be possible and not extremely costly to do some on-demand grabbing of the range to share it with multiple VMs? And what about the host? When does Linux release the legacy range? Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? Is there some other way to pass the legacy accesses from the guest to a specific adapter without going via the host's legacy area? I.e. do some adapters allow remapping? > + > + vdev->vga_mmio_fd = open("/dev/mem", O_RDWR); > + if (vdev->vga_mmio_fd < 0) { > + fprintf(stderr, "%s - Failed to open /dev/mem (%s)\n", > + __func__, strerror(errno)); > + return -1; > + } > + vdev->vga_mmio = mmap(NULL, 0x40000, PROT_READ | PROT_WRITE, > + MAP_SHARED, vdev->vga_mmio_fd, 0xa0000); > + if (vdev->vga_mmio == MAP_FAILED) { > + fprintf(stderr, "%s - mmap failed (%s)\n", __func__, strerror(errno)); > + return -1; > + } > + > +#if 1 > + vdev->vga_io = cpu_register_io_memory(vfio_vga_reads, > + vfio_vga_writes, vdev); > + cpu_register_physical_memory(0xa0000, 0x20000, vdev->vga_io); > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > +#else > + cpu_register_physical_memory(0xa0000, 0x20000, > + qemu_ram_map(&vdev->pdev.qdev, "VGA", 0x20000, vdev->vga_mmio)); > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > +#endif To make the second case work, we would have to track the mode switches of the guest via legacy VGA interfaces and switch the mapping on the fly, right? > + > + register_ioport_write(0x3b0, 0x30, 1, vfio_vga_outb, vdev); > + register_ioport_write(0x3b0, 0x30, 2, vfio_vga_outw, vdev); > + register_ioport_write(0x3b0, 0x30, 4, vfio_vga_outl, vdev); > + register_ioport_read(0x3b0, 0x30, 1, vfio_vga_inb, vdev); > + register_ioport_read(0x3b0, 0x30, 2, vfio_vga_inw, vdev); > + register_ioport_read(0x3b0, 0x30, 4, vfio_vga_inl, vdev); > + if (ioperm(0x3b0, 0x30, 1)) { > + fprintf(stderr, "%s - ioperm failed (%s)\n", __func__, strerror(errno)); > + return -1; > + } > + return 0; > +} > + > +void vfio_vga_exit(VFIODevice *vdev) > +{ > + if (!vdev->vga_io) > + return; > + > + isa_unassign_ioport(0x3b0, 0x30); > + qemu_unregister_coalesced_mmio(0xa0000, 0x20000); > + cpu_register_physical_memory(0xa0000, 0x20000, IO_MEM_UNASSIGNED); > + cpu_unregister_io_memory(vdev->vga_io); > + munmap(vdev->vga_mmio, 0x40000); > + close(vdev->vga_mmio_fd); > + qemu_free(vdev->vga_orig); > + close(vdev->vga_fd); > +} > + Thanks, Jan
Hi Jan, On Thu, 2011-05-05 at 10:50 +0200, Jan Kiszka wrote: > Hi Alex, > > On 2011-01-28 01:45, Alex Williamson wrote: > > On Thu, 2011-01-27 at 12:56 +0100, André Weidemann wrote: > >> Hi Alex, > >> > >> On 26.01.2011 06:12, Alex Williamson wrote: > >> > >>> So while your initial results are promising, my guess is that you're > >>> using card specific drivers and still need to consider some of the > >>> harder problems with generic support for vga assignment. I hacked on > >>> this for a bit trying to see if I could get vga assignment working > >>> with the vfio driver. Setting up the legacy access and preventing > >>> qemu from stealing it back should get you basic vga modes and might > >>> even allow the option rom to run to initialize the card for pre-boot. > >>> I was able to get this far on a similar ATI card. I never hard much > >>> luck with other cards though, and I was never able to get the vesa > >>> extensions working. Thanks, > >> > >> Do you mind sharing these patches? > > > > Attached. > > > > We are about to try some pass-through with an NVIDA card. So I already > hacked on your vfio patch to make it build against current devices > assignment code. Some questions arose while studying the code: Cool! > > --- /dev/null > > +++ b/hw/vfio-vga.c > > @@ -0,0 +1,291 @@ > > +/* > > + * vfio VGA device assignment support > > + * > > + * Copyright Red Hat, Inc. 2010 > > + * > > + * Authors: > > + * Alex Williamson <alex.williamson@redhat.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2. See > > + * the COPYING file in the top-level directory. > > + * > > + * Based on qemu-kvm device-assignment: > > + * Adapted for KVM by Qumranet. > > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) > > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) > > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) > > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) > > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) > > + */ > > + > > +#include <stdio.h> > > +#include <unistd.h> > > +#include <sys/io.h> > > +#include <sys/mman.h> > > +#include <sys/types.h> > > +#include <sys/stat.h> > > +#include "event_notifier.h" > > +#include "hw.h" > > +#include "memory.h" > > +#include "monitor.h" > > +#include "pc.h" > > +#include "qemu-error.h" > > +#include "sysemu.h" > > +#include "vfio.h" > > +#include <pci/header.h> > > +#include <pci/types.h> > > +#include <linux/types.h> > > +#include "linux-vfio.h" > > + > > +//#define DEBUG_VFIO_VGA > > +#ifdef DEBUG_VFIO_VGA > > +#define DPRINTF(fmt, ...) \ > > + do { printf("vfio-vga: " fmt, ## __VA_ARGS__); } while (0) > > +#else > > +#define DPRINTF(fmt, ...) \ > > + do { } while (0) > > +#endif > > + > > +/* > > + * VGA setup > > + */ > > +static void vfio_vga_write(VFIODevice *vdev, uint32_t addr, > > + uint32_t val, int len) > > +{ > > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, 0xa0000 + addr, len, val); > > + switch (len) { > > + case 1: > > + *(uint8_t *)(vdev->vga_mmio + addr) = (uint8_t)val; > > + break; > > + case 2: > > + *(uint16_t *)(vdev->vga_mmio + addr) = (uint16_t)val; > > + break; > > + case 4: > > + *(uint32_t *)(vdev->vga_mmio + addr) = val; > > + break; > > + } > > +} > > + > > +static void vfio_vga_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) > > +{ > > + vfio_vga_write(opaque, addr, val, 1); > > +} > > + > > +static void vfio_vga_writew(void *opaque, target_phys_addr_t addr, uint32_t val) > > +{ > > + vfio_vga_write(opaque, addr, val, 2); > > +} > > + > > +static void vfio_vga_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > > +{ > > + vfio_vga_write(opaque, addr, val, 4); > > +} > > + > > +static CPUWriteMemoryFunc * const vfio_vga_writes[] = { > > + &vfio_vga_writeb, > > + &vfio_vga_writew, > > + &vfio_vga_writel > > +}; > > + > > +static uint32_t vfio_vga_read(VFIODevice *vdev, uint32_t addr, int len) > > +{ > > + uint32_t val = 0xffffffff; > > + switch (len) { > > + case 1: > > + val = (uint32_t)*(uint8_t *)(vdev->vga_mmio + addr); > > + break; > > + case 2: > > + val = (uint32_t)*(uint16_t *)(vdev->vga_mmio + addr); > > + break; > > + case 4: > > + val = *(uint32_t *)(vdev->vga_mmio + addr); > > + break; > > + } > > + DPRINTF("%s 0x%x %d = 0x%x\n", __func__, 0xa0000 + addr, len, val); > > + return val; > > +} > > + > > +static uint32_t vfio_vga_readb(void *opaque, target_phys_addr_t addr) > > +{ > > + return vfio_vga_read(opaque, addr, 1); > > +} > > + > > +static uint32_t vfio_vga_readw(void *opaque, target_phys_addr_t addr) > > +{ > > + return vfio_vga_read(opaque, addr, 2); > > +} > > + > > +static uint32_t vfio_vga_readl(void *opaque, target_phys_addr_t addr) > > +{ > > + return vfio_vga_read(opaque, addr, 4); > > +} > > + > > +static CPUReadMemoryFunc * const vfio_vga_reads[] = { > > + &vfio_vga_readb, > > + &vfio_vga_readw, > > + &vfio_vga_readl > > +}; > > + > > +static void vfio_vga_out(VFIODevice *vdev, uint32_t addr, uint32_t val, int len) > > +{ > > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, addr, len, val); > > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ > > Why do you have to re-establish the ioperms here on each access? Are we > just lacking the use of generic kvm ioperm management? IIRC, setting it up initially wasn't sticking, so I put it here as just a quick fix to make sure it was set before we used it. I never fully made it though debugging why it wasn't working when set earlier. In general, legacy mmio and ioport needs a better solution. I wish x86 implemented the legacy io feature of pci sysfs so we could do it that way, which might also move vga arbitration and chipset vga routing into the host kernel. > > + switch (len) { > > + case 1: > > + outb(val, addr); > > + break; > > + case 2: > > + outw(val, addr); > > + break; > > + case 4: > > + outl(val, addr); > > + break; > > + } > > +} > > + > > +static void vfio_vga_outb(void *opaque, uint32_t addr, uint32_t val) > > +{ > > + vfio_vga_out(opaque, addr, val, 1); > > +} > > + > > +static void vfio_vga_outw(void *opaque, uint32_t addr, uint32_t val) > > +{ > > + vfio_vga_out(opaque, addr, val, 2); > > +} > > + > > +static void vfio_vga_outl(void *opaque, uint32_t addr, uint32_t val) > > +{ > > + vfio_vga_out(opaque, addr, val, 4); > > +} > > + > > +static uint32_t vfio_vga_in(VFIODevice *vdev, uint32_t addr, int len) > > +{ > > + uint32_t val = 0xffffffff; > > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ > > + switch (len) { > > + case 1: > > + val = inb(addr); > > + break; > > + case 2: > > + val = inw(addr); > > + break; > > + case 4: > > + val = inl(addr); > > + break; > > + } > > + DPRINTF("%s 0x%x, %d = 0x%x\n", __func__, addr, len, val); > > + return val; > > +} > > + > > +static uint32_t vfio_vga_inb(void *opaque, uint32_t addr) > > +{ > > + return vfio_vga_in(opaque, addr, 1); > > +} > > + > > +static uint32_t vfio_vga_inw(void *opaque, uint32_t addr) > > +{ > > + return vfio_vga_in(opaque, addr, 2); > > +} > > + > > +static uint32_t vfio_vga_inl(void *opaque, uint32_t addr) > > +{ > > + return vfio_vga_in(opaque, addr, 4); > > +} > > + > > +int vfio_vga_setup(VFIODevice *vdev) > > +{ > > + char buf[256]; > > + int ret; > > + > > + if (vga_interface_type != VGA_NONE) { > > + fprintf(stderr, > > + "VGA devie assigned without -vga none param, no ISA VGA\n"); > > + return -1; > > + } > > + > > + vdev->vga_fd = open("/dev/vga_arbiter", O_RDWR); > > + if (vdev->vga_fd < 0) { > > + fprintf(stderr, "%s - Failed to open vga arbiter (%s)\n", > > + __func__, strerror(errno)); > > + return -1; > > + } > > + ret = read(vdev->vga_fd, buf, sizeof(buf)); > > + if (ret <= 0) { > > + fprintf(stderr, "%s - Failed to read from vga arbiter (%s)\n", > > + __func__, strerror(errno)); > > + close(vdev->vga_fd); > > + return -1; > > + } > > + buf[ret - 1] = 0; > > + vdev->vga_orig = qemu_strdup(buf); > > + > > + snprintf(buf, sizeof(buf), "target PCI:%04x:%02x:%02x.%x", > > + vdev->host.seg, vdev->host.bus, vdev->host.dev, vdev->host.func); > > + ret = write(vdev->vga_fd, buf, strlen(buf)); > > + if (ret != strlen(buf)) { > > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > > + __func__, strerror(errno)); > > + close(vdev->vga_fd); > > + return -1; > > + } > > + snprintf(buf, sizeof(buf), "decodes io+mem"); > > + ret = write(vdev->vga_fd, buf, strlen(buf)); > > + if (ret != strlen(buf)) { > > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > > + __func__, strerror(errno)); > > + close(vdev->vga_fd); > > + return -1; > > + } > > OK, so we grab the assigned adapter and make it handle legacy io+mem. I > guess this approach only works with a single guest with an assigned > adapter. Would it be possible and not extremely costly to do some > on-demand grabbing of the range to share it with multiple VMs? Yes, and that was my intention but never got that far. Each legacy io access should switch the arbiter to the necessary device. Unfortunately the vga arbiter only works if everyone uses it, and so far it seems like nobody does. Obviously some pretty hefty performance implications with switch on every read. I'm not sure how that's going to play out. I expect once we bootstrap the VGA device and load a real driver, the legacy areas are seldom used. > And what about the host? When does Linux release the legacy range? > Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? Well, that's where it'd be nice if the vga arbiter was actually in more widespread use. It currently seems to be nothing more than a shared mutex, but it would actually be useful if it included backends to do the chipset vga routing changes. I think when I was testing this, I was externally poking PCI bridge chipset to toggle the VGA_EN bit. > Is there some other way to pass the legacy accesses from the guest to a > specific adapter without going via the host's legacy area? I.e. do some > adapters allow remapping? Not that I know of on x86. I wouldn't be surprised if some adapters just re-route the legacy address ranges to standard PCI mappings, but I don't know how to figure out if that's true and what the offsets would be. I've seen ia64 hardware that supports a _TRA offset such that each PCI root bridge can support it's own legacy io port space, but that requires a whole different ioport model. I believe X.org tries to tackle this by brute force, manually changing VGA enabled bits on PCI bridges. I think this is part if why it's difficult to run multiple X servers on the same system. Not sure if that problem has gotten any better since I last looked. > > + > > + vdev->vga_mmio_fd = open("/dev/mem", O_RDWR); > > + if (vdev->vga_mmio_fd < 0) { > > + fprintf(stderr, "%s - Failed to open /dev/mem (%s)\n", > > + __func__, strerror(errno)); > > + return -1; > > + } > > + vdev->vga_mmio = mmap(NULL, 0x40000, PROT_READ | PROT_WRITE, > > + MAP_SHARED, vdev->vga_mmio_fd, 0xa0000); > > + if (vdev->vga_mmio == MAP_FAILED) { > > + fprintf(stderr, "%s - mmap failed (%s)\n", __func__, strerror(errno)); > > + return -1; > > + } > > + > > +#if 1 > > + vdev->vga_io = cpu_register_io_memory(vfio_vga_reads, > > + vfio_vga_writes, vdev); > > + cpu_register_physical_memory(0xa0000, 0x20000, vdev->vga_io); > > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > > +#else > > + cpu_register_physical_memory(0xa0000, 0x20000, > > + qemu_ram_map(&vdev->pdev.qdev, "VGA", 0x20000, vdev->vga_mmio)); > > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > > +#endif > > To make the second case work, we would have to track the mode switches > of the guest via legacy VGA interfaces and switch the mapping on the > fly, right? Yeah, something like that. IIRC, I was expecting the second case to work since I'm doing a static switch of the legacy address space and I can't recall if it wasn't working or if I used the read/write interface just so I could add fprintfs to make sure something is happening. Thanks, Alex > > + > > + register_ioport_write(0x3b0, 0x30, 1, vfio_vga_outb, vdev); > > + register_ioport_write(0x3b0, 0x30, 2, vfio_vga_outw, vdev); > > + register_ioport_write(0x3b0, 0x30, 4, vfio_vga_outl, vdev); > > + register_ioport_read(0x3b0, 0x30, 1, vfio_vga_inb, vdev); > > + register_ioport_read(0x3b0, 0x30, 2, vfio_vga_inw, vdev); > > + register_ioport_read(0x3b0, 0x30, 4, vfio_vga_inl, vdev); > > + if (ioperm(0x3b0, 0x30, 1)) { > > + fprintf(stderr, "%s - ioperm failed (%s)\n", __func__, strerror(errno)); > > + return -1; > > + } > > + return 0; > > +} > > + > > +void vfio_vga_exit(VFIODevice *vdev) > > +{ > > + if (!vdev->vga_io) > > + return; > > + > > + isa_unassign_ioport(0x3b0, 0x30); > > + qemu_unregister_coalesced_mmio(0xa0000, 0x20000); > > + cpu_register_physical_memory(0xa0000, 0x20000, IO_MEM_UNASSIGNED); > > + cpu_unregister_io_memory(vdev->vga_io); > > + munmap(vdev->vga_mmio, 0x40000); > > + close(vdev->vga_mmio_fd); > > + qemu_free(vdev->vga_orig); > > + close(vdev->vga_fd); > > +} > > + > > Thanks, > Jan > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2011-05-05 17:17, Alex Williamson wrote: >> And what about the host? When does Linux release the legacy range? >> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? > > Well, that's where it'd be nice if the vga arbiter was actually in more > widespread use. It currently seems to be nothing more than a shared > mutex, but it would actually be useful if it included backends to do the > chipset vga routing changes. I think when I was testing this, I was > externally poking PCI bridge chipset to toggle the VGA_EN bit. Right, we had to drop the approach to pass through the secondary card for now, the arbiter was not switching properly. Haven't checked yet if VGA_EN was properly set, though the kernel code looks like it should take care of this. Even with handing out the primary adapter, we had only mixed success so far. The onboard adapter worked well (in VESA mode), but the NVIDIA is not displaying early boot messages at all. Maybe a vgabios issue. Windows was booting nevertheless - until we installed the NVIDIA drivers. Than it ran into a blue screen. BTW, what ATI adapter did you use precisely, and what did work, what not? One thing I was wondering: Most modern adapters should be PCIe these days. Our NVIDIA definitely is. But so far we are claiming to have it attached to a PCI bus. That caps all the extended capabilities e.g. Could this make some relevant difference? Jan
On Mon, 2011-05-09 at 13:14 +0200, Jan Kiszka wrote: > On 2011-05-05 17:17, Alex Williamson wrote: > >> And what about the host? When does Linux release the legacy range? > >> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? > > > > Well, that's where it'd be nice if the vga arbiter was actually in more > > widespread use. It currently seems to be nothing more than a shared > > mutex, but it would actually be useful if it included backends to do the > > chipset vga routing changes. I think when I was testing this, I was > > externally poking PCI bridge chipset to toggle the VGA_EN bit. > > Right, we had to drop the approach to pass through the secondary card > for now, the arbiter was not switching properly. Haven't checked yet if > VGA_EN was properly set, though the kernel code looks like it should > take care of this. > > Even with handing out the primary adapter, we had only mixed success so > far. The onboard adapter worked well (in VESA mode), but the NVIDIA is > not displaying early boot messages at all. Maybe a vgabios issue. > Windows was booting nevertheless - until we installed the NVIDIA > drivers. Than it ran into a blue screen. Interesting, IIRC I could never get VESA modes to work. I believe I only had a basic VGA16 mode running in a Windows guest too. > BTW, what ATI adapter did you use precisely, and what did work, what not? I have an old X550 (rv380?). I also have an Nvidia gs8400, but ISTR the ATI working better for me. > One thing I was wondering: Most modern adapters should be PCIe these > days. Our NVIDIA definitely is. But so far we are claiming to have it > attached to a PCI bus. That caps all the extended capabilities e.g. > Could this make some relevant difference? The BIOS and early boot use shouldn't care too much about that, but I could imagine the high performance drivers potentially failing. Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 9, 2011 at 12:14 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote: > On 2011-05-05 17:17, Alex Williamson wrote: >>> And what about the host? When does Linux release the legacy range? >>> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? >> >> Well, that's where it'd be nice if the vga arbiter was actually in more >> widespread use. It currently seems to be nothing more than a shared >> mutex, but it would actually be useful if it included backends to do the >> chipset vga routing changes. I think when I was testing this, I was >> externally poking PCI bridge chipset to toggle the VGA_EN bit. > > Right, we had to drop the approach to pass through the secondary card > for now, the arbiter was not switching properly. Haven't checked yet if > VGA_EN was properly set, though the kernel code looks like it should > take care of this. > > Even with handing out the primary adapter, we had only mixed success so > far. The onboard adapter worked well (in VESA mode), but the NVIDIA is > not displaying early boot messages at all. Maybe a vgabios issue. > Windows was booting nevertheless - until we installed the NVIDIA > drivers. Than it ran into a blue screen. > > BTW, what ATI adapter did you use precisely, and what did work, what not? Not hijacking the mail thread. Just wanted to provide some inputs. Few days back I had tried passing through the secondary graphics card. I could pass-through two graphics cards to virtual machine. 02:00.0 VGA compatible controller: ATI Technologies Inc Redwood [Radeon HD 5670] (prog-if 00 [VGA controller]) Subsystem: PC Partner Limited Device e151 Flags: bus master, fast devsel, latency 0, IRQ 87 Memory at d0000000 (64-bit, prefetchable) [size=256M] Memory at fe6e0000 (64-bit, non-prefetchable) [size=128K] I/O ports at b000 [size=256] Expansion ROM at fe6c0000 [disabled] [size=128K] Capabilities: <access denied> Kernel driver in use: radeon Kernel modules: radeon 07:00.0 VGA compatible controller: nVidia Corporation G86 [Quadro NVS 290] (rev a1) (prog-if 00 [VGA controller]) Subsystem: nVidia Corporation Device 0492 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-Stepping- SERR+ FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-<TAbort- <MAbort- >SERR- <PERR- INTx- Latency: 0, Cache Line Size: 64 bytes Interrupt: pin A routed to IRQ 24 Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] Region 1: Memory at d0000000 (64-bit, prefetchable) [size=256M] Region 3: Memory at fa000000 (64-bit, non-prefetchable) [size=32M] Region 5: I/O ports at ec00 [size=128] Expansion ROM at fe9e0000 [disabled] [size=128K] Capabilities: <access denied> Kernel driver in use: nouveau Kernel modules: nouveau, nvidiafb Both of them are PCIe cards. I have one more ATI card and another NVIDIA card which does not work. One of the reason the pass-through did not work is because of the limit on amount of pci configuration memory by SeaBIOS. SeaBIOS places a hard limit of 256MB or so on the amount of PCI memory space. Thus, for some of the VGA device that need more memory never worked for me. SeaBIOS allows this memory region to be extended to some value near 512MB, but even then the range is not enough. Another problem with SeaBIOS which limits the amount of memory space is: SeaBIOS allocates the BAR regions as they are encountered. As far as I know, the BAR regions should be naturally aligned. Thus the simple strategy of the SeaBIOS results in large fragmentation. Therefore, even after increasing the PCI memory space to 512MB the BAR regions were unallocated. I will confirm you the details of other graphics cards which do not work. Thanks and Regards, Prasad > > One thing I was wondering: Most modern adapters should be PCIe these > days. Our NVIDIA definitely is. But so far we are claiming to have it > attached to a PCI bus. That caps all the extended capabilities e.g. > Could this make some relevant difference? > > Jan > > -- > Siemens AG, Corporate Technology, CT T DE IT 1 > Corporate Competence Center Embedded Linux > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2011-05-09 16:29, Alex Williamson wrote: > On Mon, 2011-05-09 at 13:14 +0200, Jan Kiszka wrote: >> On 2011-05-05 17:17, Alex Williamson wrote: >>>> And what about the host? When does Linux release the legacy range? >>>> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? >>> >>> Well, that's where it'd be nice if the vga arbiter was actually in more >>> widespread use. It currently seems to be nothing more than a shared >>> mutex, but it would actually be useful if it included backends to do the >>> chipset vga routing changes. I think when I was testing this, I was >>> externally poking PCI bridge chipset to toggle the VGA_EN bit. >> >> Right, we had to drop the approach to pass through the secondary card >> for now, the arbiter was not switching properly. Haven't checked yet if >> VGA_EN was properly set, though the kernel code looks like it should >> take care of this. >> >> Even with handing out the primary adapter, we had only mixed success so >> far. The onboard adapter worked well (in VESA mode), but the NVIDIA is >> not displaying early boot messages at all. Maybe a vgabios issue. >> Windows was booting nevertheless - until we installed the NVIDIA >> drivers. Than it ran into a blue screen. > > Interesting, IIRC I could never get VESA modes to work. I believe I > only had a basic VGA16 mode running in a Windows guest too. > >> BTW, what ATI adapter did you use precisely, and what did work, what not? > > I have an old X550 (rv380?). I also have an Nvidia gs8400, but ISTR the > ATI working better for me. Is that Nvidia a PCIe adapter? Did it show BIOS / early boot messages properly? BTW, we are fighting with a Quadro FX 3800. > >> One thing I was wondering: Most modern adapters should be PCIe these >> days. Our NVIDIA definitely is. But so far we are claiming to have it >> attached to a PCI bus. That caps all the extended capabilities e.g. >> Could this make some relevant difference? > > The BIOS and early boot use shouldn't care too much about that, but I > could imagine the high performance drivers potentially failing. Thanks, Yeah, that was my thinking as well. But we will try to confirm this by tracing the BIOS activities. There is a telling that some adapters do not allow reading the true cold-boot ROM content during runtime, thus booting those adapters inside the guest may fail to some degree. Anyway, I've hacked on the q35 patches until they allowed me to boot a Linux guest with an assigned PCIe Atheros WLAN adapter - all caps were suddenly visible. Those bits are now on their way to our test box. Let's see if they are able to change the BSOD a bit... Jan
On 2011-05-09 16:55, Prasad Joshi wrote: > On Mon, May 9, 2011 at 12:14 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote: >> On 2011-05-05 17:17, Alex Williamson wrote: >>>> And what about the host? When does Linux release the legacy range? >>>> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? >>> >>> Well, that's where it'd be nice if the vga arbiter was actually in more >>> widespread use. It currently seems to be nothing more than a shared >>> mutex, but it would actually be useful if it included backends to do the >>> chipset vga routing changes. I think when I was testing this, I was >>> externally poking PCI bridge chipset to toggle the VGA_EN bit. >> >> Right, we had to drop the approach to pass through the secondary card >> for now, the arbiter was not switching properly. Haven't checked yet if >> VGA_EN was properly set, though the kernel code looks like it should >> take care of this. >> >> Even with handing out the primary adapter, we had only mixed success so >> far. The onboard adapter worked well (in VESA mode), but the NVIDIA is >> not displaying early boot messages at all. Maybe a vgabios issue. >> Windows was booting nevertheless - until we installed the NVIDIA >> drivers. Than it ran into a blue screen. >> >> BTW, what ATI adapter did you use precisely, and what did work, what not? > > Not hijacking the mail thread. Just wanted to provide some inputs. Much appreciated in fact! > > Few days back I had tried passing through the secondary graphics card. > I could pass-through two graphics cards to virtual machine. > > 02:00.0 VGA compatible controller: ATI Technologies Inc Redwood > [Radeon HD 5670] (prog-if 00 [VGA controller]) > Subsystem: PC Partner Limited Device e151 > Flags: bus master, fast devsel, latency 0, IRQ 87 > Memory at d0000000 (64-bit, prefetchable) [size=256M] > Memory at fe6e0000 (64-bit, non-prefetchable) [size=128K] > I/O ports at b000 [size=256] > Expansion ROM at fe6c0000 [disabled] [size=128K] > Capabilities: <access denied> > Kernel driver in use: radeon > Kernel modules: radeon > > 07:00.0 VGA compatible controller: nVidia Corporation G86 [Quadro NVS > 290] (rev a1) (prog-if 00 [VGA controller]) > Subsystem: nVidia Corporation Device 0492 > Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- > ParErr-Stepping- SERR+ FastB2B- DisINTx- > Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >> TAbort-<TAbort- <MAbort- >SERR- <PERR- INTx- > Latency: 0, Cache Line Size: 64 bytes > Interrupt: pin A routed to IRQ 24 > Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] > Region 1: Memory at d0000000 (64-bit, prefetchable) [size=256M] > Region 3: Memory at fa000000 (64-bit, non-prefetchable) [size=32M] > Region 5: I/O ports at ec00 [size=128] > Expansion ROM at fe9e0000 [disabled] [size=128K] > Capabilities: <access denied> > Kernel driver in use: nouveau > Kernel modules: nouveau, nvidiafb > > Both of them are PCIe cards. I have one more ATI card and another > NVIDIA card which does not work. Interesting. That may rule out missing PCIe capabilities as source for the NVIDIA driver indisposition. Did you passed those cards each as primary to the guest, or was the guest seeing multiple adapters? I presume you only got output after early boot was completed, right? To avoid having to deal with legacy I/O forwarding, we started with a dual adapter setup in the hope to leave the primary guest adapter at know-to-work cirrus-vga. But already in a native setup with on-board primary + NVIDIA secondary, the NVIDIA Windows drivers refused to talk to its hardware in this constellation. > > One of the reason the pass-through did not work is because of the > limit on amount of pci configuration memory by SeaBIOS. SeaBIOS places > a hard limit of 256MB or so on the amount of PCI memory space. Thus, > for some of the VGA device that need more memory never worked for me. > > SeaBIOS allows this memory region to be extended to some value near > 512MB, but even then the range is not enough. > > Another problem with SeaBIOS which limits the amount of memory space > is: SeaBIOS allocates the BAR regions as they are encountered. As far > as I know, the BAR regions should be naturally aligned. Thus the > simple strategy of the SeaBIOS results in large fragmentation. > Therefore, even after increasing the PCI memory space to 512MB the BAR > regions were unallocated. That's an interesting trace! We'll check this here, but I bet it contributes to the problems. Our FX 3800 has 1G memory... > > I will confirm you the details of other graphics cards which do not work. TiA, Jan
On Mon, May 9, 2011 at 4:27 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote: > On 2011-05-09 16:55, Prasad Joshi wrote: >> On Mon, May 9, 2011 at 12:14 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote: >>> On 2011-05-05 17:17, Alex Williamson wrote: >>>>> And what about the host? When does Linux release the legacy range? >>>>> Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? >>>> >>>> Well, that's where it'd be nice if the vga arbiter was actually in more >>>> widespread use. It currently seems to be nothing more than a shared >>>> mutex, but it would actually be useful if it included backends to do the >>>> chipset vga routing changes. I think when I was testing this, I was >>>> externally poking PCI bridge chipset to toggle the VGA_EN bit. >>> >>> Right, we had to drop the approach to pass through the secondary card >>> for now, the arbiter was not switching properly. Haven't checked yet if >>> VGA_EN was properly set, though the kernel code looks like it should >>> take care of this. >>> >>> Even with handing out the primary adapter, we had only mixed success so >>> far. The onboard adapter worked well (in VESA mode), but the NVIDIA is >>> not displaying early boot messages at all. Maybe a vgabios issue. >>> Windows was booting nevertheless - until we installed the NVIDIA >>> drivers. Than it ran into a blue screen. >>> >>> BTW, what ATI adapter did you use precisely, and what did work, what not? >> >> Not hijacking the mail thread. Just wanted to provide some inputs. > > Much appreciated in fact! > >> >> Few days back I had tried passing through the secondary graphics card. >> I could pass-through two graphics cards to virtual machine. >> >> 02:00.0 VGA compatible controller: ATI Technologies Inc Redwood >> [Radeon HD 5670] (prog-if 00 [VGA controller]) >> Subsystem: PC Partner Limited Device e151 >> Flags: bus master, fast devsel, latency 0, IRQ 87 >> Memory at d0000000 (64-bit, prefetchable) [size=256M] >> Memory at fe6e0000 (64-bit, non-prefetchable) [size=128K] >> I/O ports at b000 [size=256] >> Expansion ROM at fe6c0000 [disabled] [size=128K] >> Capabilities: <access denied> >> Kernel driver in use: radeon >> Kernel modules: radeon >> >> 07:00.0 VGA compatible controller: nVidia Corporation G86 [Quadro NVS >> 290] (rev a1) (prog-if 00 [VGA controller]) >> Subsystem: nVidia Corporation Device 0492 >> Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- >> ParErr-Stepping- SERR+ FastB2B- DisINTx- >> Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >>> TAbort-<TAbort- <MAbort- >SERR- <PERR- INTx- >> Latency: 0, Cache Line Size: 64 bytes >> Interrupt: pin A routed to IRQ 24 >> Region 0: Memory at fd000000 (32-bit, non-prefetchable) [size=16M] >> Region 1: Memory at d0000000 (64-bit, prefetchable) [size=256M] >> Region 3: Memory at fa000000 (64-bit, non-prefetchable) [size=32M] >> Region 5: I/O ports at ec00 [size=128] >> Expansion ROM at fe9e0000 [disabled] [size=128K] >> Capabilities: <access denied> >> Kernel driver in use: nouveau >> Kernel modules: nouveau, nvidiafb >> >> Both of them are PCIe cards. I have one more ATI card and another >> NVIDIA card which does not work. > > Interesting. That may rule out missing PCIe capabilities as source for > the NVIDIA driver indisposition. > > Did you passed those cards each as primary to the guest, or was the > guest seeing multiple adapters? I passed the graphics device as a primary device to the guest virtual machine, with -vga none parameter to disable the default vga device. > I presume you only got output after > early boot was completed, right? Yes you are correct. I got the display, only after the KMS was started. The initial BIOS messages were not displayed. > > To avoid having to deal with legacy I/O forwarding, we started with a > dual adapter setup in the hope to leave the primary guest adapter at > know-to-work cirrus-vga. But already in a native setup with on-board > primary + NVIDIA secondary, the NVIDIA Windows drivers refused to talk > to its hardware in this constellation. > Windows operating system never worked for me with either of the graphics card. >> >> One of the reason the pass-through did not work is because of the >> limit on amount of pci configuration memory by SeaBIOS. SeaBIOS places >> a hard limit of 256MB or so on the amount of PCI memory space. Thus, >> for some of the VGA device that need more memory never worked for me. >> >> SeaBIOS allows this memory region to be extended to some value near >> 512MB, but even then the range is not enough. >> >> Another problem with SeaBIOS which limits the amount of memory space >> is: SeaBIOS allocates the BAR regions as they are encountered. As far >> as I know, the BAR regions should be naturally aligned. Thus the >> simple strategy of the SeaBIOS results in large fragmentation. >> Therefore, even after increasing the PCI memory space to 512MB the BAR >> regions were unallocated. > > That's an interesting trace! We'll check this here, but I bet it > contributes to the problems. Our FX 3800 has 1G memory... Yes it is one of the problem. I remember reading something about the NVIDIA BIOS and FLR, those could be other interesting issues. > >> >> I will confirm you the details of other graphics cards which do not work. > > TiA, > Jan > > -- > Siemens AG, Corporate Technology, CT T DE IT 1 > Corporate Competence Center Embedded Linux > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/09/2011 06:27 PM, Jan Kiszka wrote: > To avoid having to deal with legacy I/O forwarding, we started with a > dual adapter setup in the hope to leave the primary guest adapter at > know-to-work cirrus-vga. But already in a native setup with on-board > primary + NVIDIA secondary, the NVIDIA Windows drivers refused to talk > to its hardware in this constellation. IIRC one issue with nvidia is that it uses non-BAR registers to move its PCI BAR around, which causes cpu writes to hit empty space. One way to see if this is the problem is to trace mmio that misses both kvm internal devices and qemu devices.
On 2011-05-11 13:23, Avi Kivity wrote: > On 05/09/2011 06:27 PM, Jan Kiszka wrote: >> To avoid having to deal with legacy I/O forwarding, we started with a >> dual adapter setup in the hope to leave the primary guest adapter at >> know-to-work cirrus-vga. But already in a native setup with on-board >> primary + NVIDIA secondary, the NVIDIA Windows drivers refused to talk >> to its hardware in this constellation. > > IIRC one issue with nvidia is that it uses non-BAR registers to move its > PCI BAR around, which causes cpu writes to hit empty space. I wonder if that would still be "virtualization friendly" as the adapter claims to be... > > One way to see if this is the problem is to trace mmio that misses both > kvm internal devices and qemu devices. We'll check. Jan
commit 0313d97cf24177023cdb6f2e4c54d077c5a775c1 Author: Alex Williamson <alex.williamson@redhat.com> Date: Wed Sep 29 13:50:39 2010 -0600 vfio: VGA passthrough support(ish) Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- diff --git a/Makefile.target b/Makefile.target index c507dd2..cb0cea6 100644 --- a/Makefile.target +++ b/Makefile.target @@ -203,6 +203,7 @@ obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o obj-i386-y += vfio.o +obj-$(CONFIG_VFIO_VGA) += vfio-vga.o # shared objects obj-ppc-y = ppc.o diff --git a/configure b/configure index 3bfc5e9..b15e68f 100755 --- a/configure +++ b/configure @@ -322,6 +322,7 @@ user_pie="no" zero_malloc="" trace_backend="nop" trace_file="trace" +vfio_vga="no" # OS specific if check_define __linux__ ; then @@ -718,6 +719,8 @@ for opt do ;; --enable-vhost-net) vhost_net="yes" ;; + --enable-vfio-vga) vfio_vga="yes" + ;; --*dir) ;; *) echo "ERROR: unknown option $opt"; show_help="yes" @@ -907,6 +910,7 @@ echo " --disable-docs disable documentation build" echo " --disable-vhost-net disable vhost-net acceleration support" echo " --enable-vhost-net enable vhost-net acceleration support" echo " --trace-backend=B Trace backend nop simple ust" +echo " --enable-vfio-vga enable vfio VGA passthrough support" echo " --trace-file=NAME Full PATH,NAME of file to store traces" echo " Default:trace-<pid>" echo "" @@ -2240,6 +2244,7 @@ echo "preadv support $preadv" echo "fdatasync $fdatasync" echo "uuid support $uuid" echo "vhost-net support $vhost_net" +echo "vfio-vga support $vfio_vga" echo "Trace backend $trace_backend" echo "Trace output file $trace_file-<pid>" @@ -2762,6 +2767,9 @@ case "$target_arch2" in if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then echo "CONFIG_XEN=y" >> $config_target_mak fi + if test $vfio_vga = "yes" ; then + echo "CONFIG_VFIO_VGA=y" >> $config_host_mak + fi esac case "$target_arch2" in i386|x86_64|ppcemb|ppc|ppc64|s390x) diff --git a/hw/vfio-vga.c b/hw/vfio-vga.c new file mode 100644 index 0000000..5c1899c --- /dev/null +++ b/hw/vfio-vga.c @@ -0,0 +1,291 @@ +/* + * vfio VGA device assignment support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Alex Williamson <alex.williamson@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/io.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "event_notifier.h" +#include "hw.h" +#include "memory.h" +#include "monitor.h" +#include "pc.h" +#include "qemu-error.h" +#include "sysemu.h" +#include "vfio.h" +#include <pci/header.h> +#include <pci/types.h> +#include <linux/types.h> +#include "linux-vfio.h" + +//#define DEBUG_VFIO_VGA +#ifdef DEBUG_VFIO_VGA +#define DPRINTF(fmt, ...) \ + do { printf("vfio-vga: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* + * VGA setup + */ +static void vfio_vga_write(VFIODevice *vdev, uint32_t addr, + uint32_t val, int len) +{ + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, 0xa0000 + addr, len, val); + switch (len) { + case 1: + *(uint8_t *)(vdev->vga_mmio + addr) = (uint8_t)val; + break; + case 2: + *(uint16_t *)(vdev->vga_mmio + addr) = (uint16_t)val; + break; + case 4: + *(uint32_t *)(vdev->vga_mmio + addr) = val; + break; + } +} + +static void vfio_vga_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + vfio_vga_write(opaque, addr, val, 1); +} + +static void vfio_vga_writew(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + vfio_vga_write(opaque, addr, val, 2); +} + +static void vfio_vga_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + vfio_vga_write(opaque, addr, val, 4); +} + +static CPUWriteMemoryFunc * const vfio_vga_writes[] = { + &vfio_vga_writeb, + &vfio_vga_writew, + &vfio_vga_writel +}; + +static uint32_t vfio_vga_read(VFIODevice *vdev, uint32_t addr, int len) +{ + uint32_t val = 0xffffffff; + switch (len) { + case 1: + val = (uint32_t)*(uint8_t *)(vdev->vga_mmio + addr); + break; + case 2: + val = (uint32_t)*(uint16_t *)(vdev->vga_mmio + addr); + break; + case 4: + val = *(uint32_t *)(vdev->vga_mmio + addr); + break; + } + DPRINTF("%s 0x%x %d = 0x%x\n", __func__, 0xa0000 + addr, len, val); + return val; +} + +static uint32_t vfio_vga_readb(void *opaque, target_phys_addr_t addr) +{ + return vfio_vga_read(opaque, addr, 1); +} + +static uint32_t vfio_vga_readw(void *opaque, target_phys_addr_t addr) +{ + return vfio_vga_read(opaque, addr, 2); +} + +static uint32_t vfio_vga_readl(void *opaque, target_phys_addr_t addr) +{ + return vfio_vga_read(opaque, addr, 4); +} + +static CPUReadMemoryFunc * const vfio_vga_reads[] = { + &vfio_vga_readb, + &vfio_vga_readw, + &vfio_vga_readl +}; + +static void vfio_vga_out(VFIODevice *vdev, uint32_t addr, uint32_t val, int len) +{ + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, addr, len, val); + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ + switch (len) { + case 1: + outb(val, addr); + break; + case 2: + outw(val, addr); + break; + case 4: + outl(val, addr); + break; + } +} + +static void vfio_vga_outb(void *opaque, uint32_t addr, uint32_t val) +{ + vfio_vga_out(opaque, addr, val, 1); +} + +static void vfio_vga_outw(void *opaque, uint32_t addr, uint32_t val) +{ + vfio_vga_out(opaque, addr, val, 2); +} + +static void vfio_vga_outl(void *opaque, uint32_t addr, uint32_t val) +{ + vfio_vga_out(opaque, addr, val, 4); +} + +static uint32_t vfio_vga_in(VFIODevice *vdev, uint32_t addr, int len) +{ + uint32_t val = 0xffffffff; + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ + switch (len) { + case 1: + val = inb(addr); + break; + case 2: + val = inw(addr); + break; + case 4: + val = inl(addr); + break; + } + DPRINTF("%s 0x%x, %d = 0x%x\n", __func__, addr, len, val); + return val; +} + +static uint32_t vfio_vga_inb(void *opaque, uint32_t addr) +{ + return vfio_vga_in(opaque, addr, 1); +} + +static uint32_t vfio_vga_inw(void *opaque, uint32_t addr) +{ + return vfio_vga_in(opaque, addr, 2); +} + +static uint32_t vfio_vga_inl(void *opaque, uint32_t addr) +{ + return vfio_vga_in(opaque, addr, 4); +} + +int vfio_vga_setup(VFIODevice *vdev) +{ + char buf[256]; + int ret; + + if (vga_interface_type != VGA_NONE) { + fprintf(stderr, + "VGA devie assigned without -vga none param, no ISA VGA\n"); + return -1; + } + + vdev->vga_fd = open("/dev/vga_arbiter", O_RDWR); + if (vdev->vga_fd < 0) { + fprintf(stderr, "%s - Failed to open vga arbiter (%s)\n", + __func__, strerror(errno)); + return -1; + } + ret = read(vdev->vga_fd, buf, sizeof(buf)); + if (ret <= 0) { + fprintf(stderr, "%s - Failed to read from vga arbiter (%s)\n", + __func__, strerror(errno)); + close(vdev->vga_fd); + return -1; + } + buf[ret - 1] = 0; + vdev->vga_orig = qemu_strdup(buf); + + snprintf(buf, sizeof(buf), "target PCI:%04x:%02x:%02x.%x", + vdev->host.seg, vdev->host.bus, vdev->host.dev, vdev->host.func); + ret = write(vdev->vga_fd, buf, strlen(buf)); + if (ret != strlen(buf)) { + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", + __func__, strerror(errno)); + close(vdev->vga_fd); + return -1; + } + snprintf(buf, sizeof(buf), "decodes io+mem"); + ret = write(vdev->vga_fd, buf, strlen(buf)); + if (ret != strlen(buf)) { + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", + __func__, strerror(errno)); + close(vdev->vga_fd); + return -1; + } + + vdev->vga_mmio_fd = open("/dev/mem", O_RDWR); + if (vdev->vga_mmio_fd < 0) { + fprintf(stderr, "%s - Failed to open /dev/mem (%s)\n", + __func__, strerror(errno)); + return -1; + } + vdev->vga_mmio = mmap(NULL, 0x40000, PROT_READ | PROT_WRITE, + MAP_SHARED, vdev->vga_mmio_fd, 0xa0000); + if (vdev->vga_mmio == MAP_FAILED) { + fprintf(stderr, "%s - mmap failed (%s)\n", __func__, strerror(errno)); + return -1; + } + +#if 1 + vdev->vga_io = cpu_register_io_memory(vfio_vga_reads, + vfio_vga_writes, vdev); + cpu_register_physical_memory(0xa0000, 0x20000, vdev->vga_io); + qemu_register_coalesced_mmio(0xa0000, 0x20000); +#else + cpu_register_physical_memory(0xa0000, 0x20000, + qemu_ram_map(&vdev->pdev.qdev, "VGA", 0x20000, vdev->vga_mmio)); + qemu_register_coalesced_mmio(0xa0000, 0x20000); +#endif + + register_ioport_write(0x3b0, 0x30, 1, vfio_vga_outb, vdev); + register_ioport_write(0x3b0, 0x30, 2, vfio_vga_outw, vdev); + register_ioport_write(0x3b0, 0x30, 4, vfio_vga_outl, vdev); + register_ioport_read(0x3b0, 0x30, 1, vfio_vga_inb, vdev); + register_ioport_read(0x3b0, 0x30, 2, vfio_vga_inw, vdev); + register_ioport_read(0x3b0, 0x30, 4, vfio_vga_inl, vdev); + if (ioperm(0x3b0, 0x30, 1)) { + fprintf(stderr, "%s - ioperm failed (%s)\n", __func__, strerror(errno)); + return -1; + } + return 0; +} + +void vfio_vga_exit(VFIODevice *vdev) +{ + if (!vdev->vga_io) + return; + + isa_unassign_ioport(0x3b0, 0x30); + qemu_unregister_coalesced_mmio(0xa0000, 0x20000); + cpu_register_physical_memory(0xa0000, 0x20000, IO_MEM_UNASSIGNED); + cpu_unregister_io_memory(vdev->vga_io); + munmap(vdev->vga_mmio, 0x40000); + close(vdev->vga_mmio_fd); + qemu_free(vdev->vga_orig); + close(vdev->vga_fd); +} + diff --git a/hw/vfio.c b/hw/vfio.c index e2da724..f7c7a42 100644 --- a/hw/vfio.c +++ b/hw/vfio.c @@ -1268,8 +1268,22 @@ static int vfio_initfn(struct PCIDevice *pdev) if (vfio_enable_intx(vdev)) goto out_unmap_iommu; +#ifdef CONFIG_VFIO_VGA + { + uint16_t class; + + class = vfio_pci_read_config(&vdev->pdev, PCI_CLASS_DEVICE, 2); + if (class == PCI_CLASS_DISPLAY_VGA && vfio_vga_setup(vdev)) + goto out_vga_fail; + } +#endif + return 0; +#ifdef CONFIG_VFIO_VGA +out_vga_fail: + vfio_disable_intx(vdev); +#endif out_unmap_iommu: vfio_unmap_iommu(vdev); out_unmap_resources: @@ -1290,6 +1304,9 @@ static int vfio_exitfn(struct PCIDevice *pdev) { VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); +#ifdef CONFIG_VFIO_VGA + vfio_vga_exit(vdev); +#endif vfio_disable_intx(vdev); vfio_disable_msi(vdev); vfio_disable_msix(vdev); diff --git a/hw/vfio.h b/hw/vfio.h index b5a0525..c7490b3 100644 --- a/hw/vfio.h +++ b/hw/vfio.h @@ -83,8 +83,20 @@ typedef struct VFIODevice { MSIX msix; int vfiofd; int uiommufd; +#ifdef CONFIG_VFIO_VGA + int vga_io; + int vga_fd; + int vga_mmio_fd; + uint8_t *vga_mmio; + char *vga_orig; +#endif char *vfiofd_name; char *uiommufd_name; } VFIODevice; +#ifdef CONFIG_VFIO_VGA +int vfio_vga_setup(VFIODevice *vdev); +void vfio_vga_exit(VFIODevice *vdev); +#endif + #endif /* __VFIO_H__ */