@@ -12,6 +12,8 @@
#include <linux/init.h>
#include <linux/dmi.h>
#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/msi.h>
#include <asm/acpi.h>
#include <asm/segment.h>
@@ -724,3 +726,73 @@ struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
return dev;
}
#endif
+
+/*
+ * We want to figure out which context we are running in. But the hardware
+ * does not introduce a reliable way (instruction, CPUID leaf, MSR, whatever)
+ * which can be manipulated by the VMM to let the OS figure out where it runs.
+ * So we go with the below probably on_bare_metal() function as a replacement
+ * for definitely on_bare_metal() to go forward only for the very simple reason
+ * that this is the only option we have.
+ */
+static const char * const vmm_vendor_name[] = {
+ "QEMU", "Bochs", "KVM", "Xen", "VMware", "VMW", "VMware Inc.",
+ "innotek GmbH", "Oracle Corporation", "Parallels", "BHYVE"
+};
+
+static void read_type0_virtual_machine(const struct dmi_header *dm, void *p)
+{
+ u8 *data = (u8 *)dm + 0x13;
+
+ /* BIOS Information (Type 0) */
+ if (dm->type != 0 || dm->length < 0x14)
+ return;
+
+ /* Bit 4 of BIOS Characteristics Extension Byte 2*/
+ if (*data & BIT(4))
+ *((bool *)p) = true;
+}
+
+static bool smbios_virtual_machine(void)
+{
+ bool bit_present = false;
+
+ dmi_walk(read_type0_virtual_machine, &bit_present);
+
+ return bit_present;
+}
+
+static bool on_bare_metal(struct device *dev)
+{
+ int i;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return false;
+
+ if (smbios_virtual_machine())
+ return false;
+
+ if (iommu_capable(dev->bus, IOMMU_CAP_VIOMMU_HINT))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(vmm_vendor_name); i++)
+ if (dmi_match(DMI_SYS_VENDOR, vmm_vendor_name[i]))
+ return false;
+
+ pr_info("System running on bare metal, report to bugzilla.kernel.org if not the case.");
+
+ return true;
+}
+
+bool arch_support_pci_device_msi(struct pci_dev *pdev)
+{
+ /*
+ * When we are running in a VMM context, the device IMS could only be
+ * enabled when the underlying hardware supports interrupt isolation
+ * of the subdevice, or any mechanism (trap, hypercall) is added so
+ * that changes in the interrupt message store could be managed by the
+ * VMM. For now, we only support the device IMS when we are running on
+ * the bare metal.
+ */
+ return on_bare_metal(&pdev->dev);
+}
@@ -519,6 +519,11 @@ struct irq_domain *device_msi_create_irq_domain(struct fwnode_handle *fn,
#ifdef CONFIG_PCI
#include <linux/pci.h>
+bool __weak arch_support_pci_device_msi(struct pci_dev *pdev)
+{
+ return false;
+}
+
/**
* pci_subdevice_msi_create_irq_domain - Create an irq domain for subdevices
* @pdev: Pointer to PCI device for which the subdevice domain is created
@@ -530,6 +535,9 @@ struct irq_domain *pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
struct irq_domain *domain, *pdev_msi;
struct fwnode_handle *fn;
+ if (!arch_support_pci_device_msi(pdev))
+ return NULL;
+
/*
* Retrieve the MSI domain of the underlying PCI device's MSI
* domain. The PCI device domain's parent domain is also the parent
@@ -489,6 +489,8 @@ struct irq_domain *pci_subdevice_msi_create_irq_domain(struct pci_dev *pdev,
# endif
#endif /* CONFIG_DEVICE_MSI */
+bool arch_support_pci_device_msi(struct pci_dev *pdev);
+
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg);
struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,