@@ -493,7 +493,7 @@ static void vfio_listener_release(VFIOContainer *container)
memory_listener_unregister(&container->listener);
}
-static struct vfio_info_cap_header *
+struct vfio_info_cap_header *
vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
{
struct vfio_info_cap_header *hdr;
@@ -11,9 +11,11 @@
*/
#include "qemu/osdep.h"
+#include "hw/nvram/fw_cfg.h"
#include "pci.h"
#include "trace.h"
#include "qemu/range.h"
+#include "qemu/error-report.h"
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
@@ -962,6 +964,551 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
}
/*
+ * Intel IGD support
+ *
+ * We need to do a few things to support Intel Integrated Graphics Devices:
+ * 1) Define a stolen memory region and trap I/O port writes programming it
+ * 2) Expose the OpRegion if one is provided to us
+ * 3) Copy key PCI config space register values from the host bridge
+ * 4) Create an LPC/ISA bridge and do the same for it.
+ *
+ * Each of these is described below in more detail.
+ *
+ * There are two modes of operation, Universal Passthrough (UPT) mode, which
+ * technically requires none of this, but does benefit with local display
+ * output via the OpRegion support, and "legacy" mode, which makes use of all
+ * of these. UPT should work on Broadwell and newer devices while legacy mode
+ * should work on SandyBridge and newer. We try to guess which mode to use
+ * based on the configuration of the system. To trigger legacy mode, you must:
+ * - Configure the IGD device at address 00:02.0
+ * - Not have a device at 00:1f.0 to enable LPC bridge
+ * - Not have hot-added the device
+ * - Have a ROM exposed via the PCI option ROM BAR on the IGD device
+ * - Have vfio-vga support present for the device and have VGA enable-able
+ * - Use a SandyBridge or newer device
+ *
+ * Thus easy ways to trigger UPT mode are to not install the device at PCI
+ * address 00:02.0 or simply disable the ROM BAR, which is not used in UPT
+ * mode anyway.
+ *
+ * Legacy mode is necessarily incompatible with hotplug due to the reserved
+ * memory regions and device modifications that it requires. UPT mode is
+ * potentially compatible with hotplug, OpRegion support is lost.
+ */
+
+/*
+ * This presumes the device is already known to be an Intel VGA device, so we
+ * take liberties in which device ID bits match which generation. See
+ * linux:include/drm/i915_pciids.h for IDs.
+ */
+static int igd_gen(VFIOPCIDevice *vdev)
+{
+ if ((vdev->device_id & 0xfff) == 0xa84) {
+ return 8; /* Broxton */
+ }
+
+ switch (vdev->device_id & 0xff00) {
+ /* Old, untested, unavailable, unknown */
+ case 0x0000:
+ case 0x2500:
+ case 0x2700:
+ case 0x2900:
+ case 0x2a00:
+ case 0x2e00:
+ case 0x3500:
+ case 0xa000:
+ return -1;
+ /* SandyBridge, IvyBridge, ValleyView, Haswell */
+ case 0x0100:
+ case 0x0400:
+ case 0x0a00:
+ case 0x0c00:
+ case 0x0d00:
+ case 0x0f00:
+ return 6;
+ /* BroadWell, CherryView, SkyLake, KabyLake */
+ case 0x1600:
+ case 0x1900:
+ case 0x2200:
+ case 0x5900:
+ return 8;
+ }
+
+ return 8; /* Assume newer is compatible */
+}
+
+typedef struct VFIOIGDQuirk {
+ struct VFIOPCIDevice *vdev;
+ uint32_t index;
+} VFIOIGDQuirk;
+
+#define IGD_GMCH 0x50 /* Graphics Control Register */
+#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
+#define IGD_ASLS 0xfc /* ASL Storage Register */
+
+/*
+ * The IGD ROM will make use of stolen memory (GGMS/GMS) when executed for
+ * support of VESA modes. The address of the host stolen memory appears to
+ * be coded into the ROM itself, which execution of the ROM then programs into
+ * the device, but thankfully it does so using I/O port space so we can trap
+ * it without affecting performance of the device and the address range is only
+ * used by the device itself, so we can transparently remap it from the host
+ * address range to the guest address range.
+ *
+ * To do this we first need to have a have a section of reserved memory to
+ * direct the hardware to. We do this with a new fw_cfg entry, which tells
+ * the BIOS the size of reserved memory we need. The BIOS allocates this
+ * memory and writes the base address to the IGD BDSM register. Then when the
+ * ROM execution begins, we make use of that base address to replace the host
+ * addresses written to the device. Unfortunately we have no choice but to
+ * throw a hardware error if the device is programmed without BIOS support for
+ * writing the BDSM. Disabling the ROM, installing the IGD at an address other
+ * than 00:02.0, or installing a device at 00:1f.0 to prevent the LPC device
+ * from being installed will all disable this support. This is not needed
+ * for Intel's Universal Passthrough mode.
+ */
+static uint64_t vfio_igd_quirk_data_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOIGDQuirk *igd = opaque;
+ VFIOPCIDevice *vdev = igd->vdev;
+
+ igd->index = ~0;
+
+ return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
+}
+
+static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOIGDQuirk *igd = opaque;
+ VFIOPCIDevice *vdev = igd->vdev;
+
+ /*
+ * Programming the GGMS starts at index 0x1 and uses every 4th index up
+ * through 0x3fd (ie. 0x1, 0x5, 0x9, ..., 0x3f9, 0x3fd). The address
+ * written at each index is incremented by 4k. This only accounts for 1MB,
+ * while the GGMS is potentially up to 2MB, the vBIOS doesn't seem to go
+ * beyond this and we don't have a spec reference to know if it goes up
+ * through 0x7fd.
+ */
+ if (igd->index < 0x400 && (igd->index % 4 == 1)) {
+ uint32_t base;
+
+ base = pci_get_long(vdev->pdev.config + IGD_BDSM);
+ if (!base) {
+ hw_error("vfio-igd: Guest attempted to program IGD GTT before BIOS "
+ "reserved stolen memory. Unsupported BIOS?");
+ }
+
+ base |= (data & ((1 << 20) - 1));
+
+ trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
+ igd->index, data, base);
+ data = base;
+ }
+
+ vfio_region_write(&vdev->bars[4].region, addr + 4, data, size);
+
+ /*
+ * Observation: On IVB system the vBIOS writes up through index 0x3f9,
+ * which correlates to offset 0xfe000 within the stolen memory range. That
+ * suspiciously leaves exactly one 4k page of the first 1MB unwritten and
+ * generates DMAR faults at offset 0xff000 from the host BDSM. If we do
+ * one more step, triggered on the write to index 0x3f9 to write the index
+ * and data for that last page, the issue appears resolved. Note that the
+ * GGMS may be 1MB or 2MB, but the vBIOS seems to program 1MB regardless
+ * and inspection shows the following index registers in the sequence
+ * already addresses within the first 1MB programmed.
+ */
+ if (igd->index == 0x3f9) {
+ vfio_region_write(&vdev->bars[4].region, addr, igd->index + 4, 4);
+ vfio_region_write(&vdev->bars[4].region, addr + 4, data + 0x1000, size);
+ }
+
+ igd->index = ~0;
+}
+
+static const MemoryRegionOps vfio_igd_data_quirk = {
+ .read = vfio_igd_quirk_data_read,
+ .write = vfio_igd_quirk_data_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_igd_quirk_index_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ VFIOIGDQuirk *igd = opaque;
+ VFIOPCIDevice *vdev = igd->vdev;
+
+ igd->index = ~0;
+
+ return vfio_region_read(&vdev->bars[4].region, addr, size);
+}
+
+static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ VFIOIGDQuirk *igd = opaque;
+ VFIOPCIDevice *vdev = igd->vdev;
+
+ igd->index = data;
+
+ vfio_region_write(&vdev->bars[4].region, addr, data, size);
+}
+
+static const MemoryRegionOps vfio_igd_index_quirk = {
+ .read = vfio_igd_quirk_index_read,
+ .write = vfio_igd_quirk_index_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(vdev);
+ struct vfio_region_info *reg_info;
+ VFIOQuirk *quirk;
+ VFIOIGDQuirk *igd;
+ int ret, ggms_mb, gms_mb = 0;
+ uint32_t gmch, gms_mask, gms_shift, ggms_mask, ggms_shift;
+ bool rom_present, vga_present;
+
+ if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+ !vfio_is_vga(vdev) || nr != 4) {
+ return;
+ }
+
+ ret = vfio_get_region_info(&vdev->vbasedev,
+ VFIO_PCI_ROM_REGION_INDEX, ®_info);
+ rom_present = vdev->pdev.romfile ? true :
+ (ret ? false : (reg_info->size > 0 ? true : false));
+ g_free(reg_info);
+
+ /*
+ * IGD ROMs are rather particular, the IGD device must be at 00:02.0 and
+ * 00:1f.0 must be populated with our fake LPC bridge that copies IDs from
+ * the host, otherwise executing the ROM hangs. If we know it's going to
+ * hang, we disable it and therefore disable all of the other IGD quirks
+ * except for the OpRegion support. This pretty much excludes "legacy"
+ * IGD assignment on Q35. If the device is new enough (Broadwell+), then
+ * Intel "Universal Passthrough" (UPT) mode should work without these
+ * quirks. Older devices are pretty much restricted to 440FX for now.
+ * If a device is hot-added (probably not supported), there's nothing we
+ * can do but assume UPT mode.
+ */
+ if (&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+ 0, PCI_DEVFN(0x2, 0)) ||
+ pci_find_device(pci_device_root_bus(&vdev->pdev),
+ 0, PCI_DEVFN(0x1f, 0)) ||
+ vdev->pdev.qdev.hotplugged) {
+
+ if (rom_present && vdev->pdev.rom_bar) {
+ error_report("IGD ROM disabled: Requires non-hotplugged IGD device "
+ "at address 00:02.0 and address 00:1f.0 free for LPC "
+ "bridge.");
+ vdev->rom_read_failed = true;
+ }
+ return;
+ }
+
+ ret = vfio_get_region_info(&vdev->vbasedev,
+ VFIO_PCI_VGA_REGION_INDEX, ®_info);
+ vga_present = ret ? false : (reg_info->size > 0 ? true : false);
+ g_free(reg_info);
+
+ /*
+ * If there is no ROM available, either via device or romfile, or the ROM
+ * BAR is disabled, or VGA is not present, or not enabled and not able to
+ * be enabled, then we consider this device to be non-legacy.
+ */
+ if (!vdev->pdev.rom_bar || !rom_present ||
+ !vga_present || (!vdev->vga && vdev->no_auto_vga)) {
+ return;
+ }
+
+ /* See linux:include/drm/i915_drm.h */
+ switch (igd_gen(vdev)) {
+ case 6:
+ gms_mask = 0x1f;
+ gms_shift = 3;
+ ggms_mask = 0x3;
+ ggms_shift = 8;
+ break;
+ case 8:
+ gms_mask = 0xff;
+ gms_shift = 8;
+ ggms_mask = 0x3;
+ ggms_shift = 6;
+ break;
+ default:
+ error_report("IGD device too old, try SandyBridge or newer, Broadwell "
+ "or newer for Intel's Universal Passthrough support");
+ return;
+ }
+
+ vdev->igd_legacy_mode = true; /* ie. non-UPT (Universal Passthrough) */
+
+ gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+ ggms_mb = (gmch >> ggms_shift) & ggms_mask; /* Read GGMS */
+ gmch &= ~(gms_mask << gms_shift); /* Mask out GMS */
+
+ if (!ggms_mb) {
+ ggms_mb = 1; /* vBIOS seems to use 1MB without checking hardware */
+ gmch |= ggms_mb << ggms_shift;
+ }
+
+ if (!(gmch & 0x2) && !vdev->vga && !vdev->no_auto_vga) {
+ if (vfio_populate_vga(vdev)) {
+ error_report("IGD VGA auto-enable failed");
+ }
+ }
+
+ dc->hotpluggable = false;
+
+ quirk = g_malloc0(sizeof(*quirk));
+ quirk->mem = g_new0(MemoryRegion, 2);
+ quirk->nr_mem = 2;
+ igd = quirk->data = g_malloc0(sizeof(*igd));
+ igd->vdev = vdev;
+ igd->index = ~0;
+
+ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
+ igd, "vfio-igd-index-quirk", 4);
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+ 0, &quirk->mem[0], 1);
+
+ memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
+ igd, "vfio-igd-data-quirk", 4);
+ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+ 4, &quirk->mem[1], 1);
+
+ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+ /*
+ * Not sure if this is worthwhile, but we can ask the BIOS to reserve GMS
+ * stolen memory as well and report it out through the emulated GMCH
+ * register. It's only an experimental option, so it can be dropped.
+ */
+ if (vdev->igd_gms) {
+ if (vdev->igd_gms <= 0x10) {
+ gms_mb = vdev->igd_gms * 32;
+ gmch |= vdev->igd_gms << gms_shift;
+ } else {
+ error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
+ vdev->igd_gms = 0;
+ }
+ }
+
+ /* We convey only the size via the fw_cfg entry, data is NULL */
+ fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm",
+ NULL, (ggms_mb + gms_mb) * 1024 * 1024);
+
+ /* GMCH is read-only, emulated */
+ pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
+ pci_set_long(vdev->pdev.wmask + IGD_BDSM, 0);
+ pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
+
+ /* BDSM is read-write, emulated. The BIOS needs to be able to write it */
+ pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
+ pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
+ pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
+
+ trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);
+}
+
+/*
+ * The OpRegion includes the Video BIOS Table, which seems important for
+ * telling the driver what sort of outputs it has. Without this, the device
+ * may work in the guest, but we may not get output. This is true even for
+ * Universal Passthrough mode, so we enable it whenever it's available to us.
+ * This also requires BIOS support to reserve and populate a section of guest
+ * memory sufficient for the table and to write the base address of that
+ * memory to the ASLS register of the IGD device. The opregion is provided
+ * via a vfio device specific region, which triggers this quirk to be enabled.
+ */
+int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(vdev);
+ int ret;
+
+ if (vdev->pdev.qdev.hotplugged) {
+ return 0; /* Don't expect the display to work */
+ }
+
+ dc->hotpluggable = false;
+
+ vdev->igd_opregion = g_malloc0(region->size);
+ ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
+ region->size, region->offset);
+ if (ret != region->size) {
+ error_report("vfio: Error reading IGD OpRegion");
+ g_free(vdev->igd_opregion);
+ vdev->igd_opregion = NULL;
+ return -EINVAL;
+ }
+
+ fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
+ vdev->igd_opregion, region->size);
+
+ trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
+
+ /* Like BDSM, the BIOS writes the location of the reserved memory here */
+ pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
+ pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
+ pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
+
+ return 0;
+}
+
+/*
+ * Finally, legacy mode IGD support requires modifications to a few registers
+ * on both the host bridge and LPC/ISA bridge. We enable these only if we're
+ * in "legacy" (non-UPT) mode. The host bridge registers are mostly benign
+ * since they don't actually modify the identity of the device, only the
+ * revision and subsystem IDs. All x86 chipsets have a host bridge at 00:00.0,
+ * so we can simply modify the config space of the existing device. For the
+ * LPC bridge we do need to modify the device ID, which means that the quirk is
+ * incompatible with existing devices at 00:1f.0. This is tested above in the
+ * BAR4 quirk and legacy mode is disabled if a device is found there. Both of
+ * these quirks rely on vfio device specific regions to provide read-only
+ * access to the config space of the host devices.
+ */
+typedef struct {
+ uint8_t offset;
+ uint8_t len;
+} IGDHostInfo;
+
+static const IGDHostInfo igd_host_bridge_infos[] = {
+ {PCI_REVISION_ID, 2},
+ {PCI_SUBSYSTEM_VENDOR_ID, 2},
+ {PCI_SUBSYSTEM_ID, 2},
+};
+
+static const IGDHostInfo igd_lpc_bridge_infos[] = {
+ {PCI_VENDOR_ID, 2},
+ {PCI_DEVICE_ID, 2},
+ {PCI_REVISION_ID, 2},
+ {PCI_SUBSYSTEM_VENDOR_ID, 2},
+ {PCI_SUBSYSTEM_ID, 2},
+};
+
+static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
+ struct vfio_region_info *region,
+ const IGDHostInfo *list, int len)
+{
+ int i, ret;
+
+ for (i = 0; i < len; i++) {
+ ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,
+ list[i].len, region->offset + list[i].offset);
+ if (ret != list[i].len) {
+ error_report("IGD copy failed: %m");
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(vdev);
+ PCIBus *bus;
+ PCIDevice *host_bridge;
+ int ret;
+
+ if (!vdev->igd_legacy_mode) {
+ return 0;
+ }
+
+ if (vdev->pdev.qdev.hotplugged) {
+ return -EINVAL;
+ }
+
+ dc->hotpluggable = false;
+
+ bus = pci_device_root_bus(&vdev->pdev);
+ host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
+
+ if (!host_bridge) {
+ error_report("Can't find host bridge");
+ return -ENODEV;
+ }
+
+ ret = vfio_pci_igd_copy(vdev, host_bridge, region, igd_host_bridge_infos,
+ ARRAY_SIZE(igd_host_bridge_infos));
+ if (!ret) {
+ trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);
+ }
+
+ return ret;
+}
+
+static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)
+{
+ if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {
+ error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");
+ return;
+ }
+}
+
+static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+ dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";
+ dc->hotpluggable = false;
+ k->realize = vfio_pci_igd_lpc_bridge_realize;
+ k->class_id = PCI_CLASS_BRIDGE_ISA;
+}
+
+static TypeInfo vfio_pci_igd_lpc_bridge_info = {
+ .name = "vfio-pci-igd-lpc-bridge",
+ .parent = TYPE_PCI_DEVICE,
+ .class_init = vfio_pci_igd_lpc_bridge_class_init,
+};
+
+static void vfio_pci_igd_register_types(void)
+{
+ type_register_static(&vfio_pci_igd_lpc_bridge_info);
+}
+
+type_init(vfio_pci_igd_register_types)
+
+int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region)
+{
+ DeviceClass *dc = DEVICE_GET_CLASS(vdev);
+ PCIDevice *lpc_bridge;
+ int ret;
+
+ if (!vdev->igd_legacy_mode) {
+ return 0;
+ }
+
+ if (vdev->pdev.qdev.hotplugged) {
+ return -EINVAL;
+ }
+
+ dc->hotpluggable = false;
+
+ lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
+ PCI_DEVFN(0x1f, 0),
+ "vfio-pci-igd-lpc-bridge");
+
+ ret = vfio_pci_igd_copy(vdev, lpc_bridge, region, igd_lpc_bridge_infos,
+ ARRAY_SIZE(igd_lpc_bridge_infos));
+ if (!ret) {
+ trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);
+ }
+
+ return ret;
+}
+
+/*
* Common quirk probe entry points.
*/
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
@@ -1010,6 +1557,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
vfio_probe_nvidia_bar5_quirk(vdev, nr);
vfio_probe_nvidia_bar0_quirk(vdev, nr);
vfio_probe_rtl8168_bar2_quirk(vdev, nr);
+ vfio_probe_igd_bar4_quirk(vdev, nr);
}
void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
@@ -2101,6 +2101,66 @@ int vfio_populate_vga(VFIOPCIDevice *vdev)
return 0;
}
+static int vfio_populate_dev_regions(VFIOPCIDevice *vdev)
+{
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ struct vfio_region_info *reg_info;
+ int i, ret;
+
+ if (vbasedev->num_regions > VFIO_PCI_NUM_REGIONS) {
+ for (i = VFIO_PCI_NUM_REGIONS; i < vbasedev->num_regions; i++) {
+ struct vfio_info_cap_header *hdr;
+ struct vfio_region_info_cap_type *type;
+
+ ret = vfio_get_region_info(vbasedev, i, ®_info);
+ if (ret) {
+ continue;
+ }
+
+ hdr = vfio_get_region_info_cap(reg_info, VFIO_REGION_INFO_CAP_TYPE);
+ if (!hdr) {
+ g_free(reg_info);
+ continue;
+ }
+
+ type = container_of(hdr, struct vfio_region_info_cap_type, header);
+
+ trace_vfio_populate_dev_region(vdev->vbasedev.name, i,
+ type->type, type->subtype);
+
+ if (type->type ==
+ (VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL) &&
+ type->subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION) {
+
+ ret = vfio_pci_igd_opregion_init(vdev, reg_info);
+ if (ret) {
+ return ret;
+ }
+ } else if (type->type ==
+ (VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL) &&
+ type->subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG) {
+
+ ret = vfio_pci_igd_host_init(vdev, reg_info);
+ if (ret) {
+ return ret;
+ }
+ } else if (type->type ==
+ (VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL) &&
+ type->subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG) {
+
+ ret = vfio_pci_igd_lpc_init(vdev, reg_info);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ g_free(reg_info);
+ }
+ }
+
+ return 0;
+}
+
static int vfio_populate_device(VFIOPCIDevice *vdev)
{
VFIODevice *vbasedev = &vdev->vbasedev;
@@ -2581,6 +2641,11 @@ static int vfio_initfn(PCIDevice *pdev)
}
}
+ ret = vfio_populate_dev_regions(vdev);
+ if (ret) {
+ goto out_teardown;
+ }
+
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
@@ -2603,6 +2668,7 @@ static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
+ g_free(vdev->igd_opregion);
vfio_put_device(vdev);
vfio_put_group(group);
}
@@ -2681,12 +2747,14 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
+ DEFINE_PROP_BOOL("x-no-auto-vga", VFIOPCIDevice, no_auto_vga, false),
DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
/*
* TODO - support passed fds... is this necessary?
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
@@ -115,6 +115,7 @@ typedef struct VFIOPCIDevice {
int interrupt; /* Current interrupt type */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
+ void *igd_opregion;
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
@@ -129,6 +130,7 @@ typedef struct VFIOPCIDevice {
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
int32_t bootindex;
+ uint32_t igd_gms;
uint8_t pm_cap;
bool has_vga;
bool pci_aer;
@@ -139,6 +141,8 @@ typedef struct VFIOPCIDevice {
bool no_kvm_intx;
bool no_kvm_msi;
bool no_kvm_msix;
+ bool no_auto_vga;
+ bool igd_legacy_mode;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
@@ -159,4 +163,10 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
int vfio_populate_vga(VFIOPCIDevice *vdev);
+int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region);
+int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region);
+int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
+ struct vfio_region_info *region);
#endif /* HW_VFIO_VFIO_PCI_H */
@@ -154,5 +154,7 @@ extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
#ifdef CONFIG_LINUX
int vfio_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
+struct vfio_info_cap_header *
+ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
#endif
#endif /* !HW_VFIO_VFIO_COMMON_H */
@@ -1673,6 +1673,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
+vfio_populate_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
vfio_initfn(const char *name, int group_id) " (%s) group %d"
vfio_pci_reset(const char *name) " (%s)"
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
@@ -1711,7 +1712,11 @@ vfio_quirk_ati_bonaire_reset_no_smc(const char *name) "%s"
vfio_quirk_ati_bonaire_reset_timeout(const char *name) "%s"
vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
vfio_quirk_ati_bonaire_reset(const char *name) "%s"
-
+vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [%03x] %08x -> %08x"
+vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
+vfio_pci_igd_opregion_enabled(const char *name) "%s"
+vfio_pci_igd_host_bridge_enabled(const char *name) "%s"
+vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
# hw/vfio/vfio-common.c
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
Two modes are available for IGD assignment, Universal Passthrough (UPT) and legacy. UPT mode attempts to handle the IGD device as if it were just a PCI device, requiring no collateral changes to the VM chipset. For the most part this works without these changes. The one feature found here for UPT mode is the addition of the OpRegion, which enables local display support (at least for external monitors). UPT mode requires guest driver support and a Broadwell or newer GPU. The legacy mode provided here supports back through SandyBridge processors and attempts to populate key properties of the VM chipset to match the host device, along with certain quirks to enable the device to work in the VM address space. The code here attempts to guess the mode to apply based on the configuration of the VM. Some aspects are out of our control, for example execution of the VGA BIOS requires the device to be at PCI address 00:02.0. The VGA BIOS also requires an ISA/LPC bridge with IDs matching the host device at address 00:1f.0. Execution of the VGA ROM implies VGA support. Therefore, legacy mode is automatically enabled when the IGD device is found at PCI address 00:02.0, it supports a ROM BAR and has a non-zero ROM size provided through VFIO, the address 00:1f.0 is available for the LPC bridge, and the VGA access is supported through vfio. Anything outside of that configuration assumes UPT mode. Notable in that configuration requirement is the slot at 00:1f.0, which is occupied in Q35 machine configurations. We cannot simply overwrite the device IDs of this component on Q35, therefore only UPT mode is available currently on Q35. UPT is intended to work with the IGD as a secondary graphics device with an emulated graphics device as the primary. Depending on the guest operating system, the IGD and emulated devices may mirror the same display, extend a shared desktop, or either device may be disabled. The primary graphics device should be at a lower PCI slot address than any secondary devices. In legacy mode, the IGD should be the primary graphics device and more probably the exclusive graphics device based on my experience. The changes here depend on vfio kernel changes which are currently in the linux-next tree and slated for inclusion in kernel v4.6. Also required is a modified SeaBIOS with support for the fw_cfg features added here: http://patchwork.ozlabs.org/patch/583731/ OVMF support has not yet been investigated, but I have yet to see an IGD device with UEFI ROM support. This makes only the OpRegion support relevant to a pure UEFI OVMF image. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- hw/vfio/common.c | 2 hw/vfio/pci-quirks.c | 548 +++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.c | 68 +++++ hw/vfio/pci.h | 10 + include/hw/vfio/vfio-common.h | 2 trace-events | 7 - 6 files changed, 635 insertions(+), 2 deletions(-)