@@ -473,6 +473,7 @@ struct kvm_assigned_dev_kernel {
unsigned int entries_nr;
int host_irq;
bool host_irq_disabled;
+ bool pci_2_3;
struct msix_entry *host_msix_entries;
int guest_irq;
struct kvm_guest_msix_entry *guest_msix_entries;
@@ -55,10 +55,96 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
return index;
}
+/*
+ * Verify that the device supports Interrupt Disable bit in command register,
+ * per PCI 2.3, by flipping this bit and reading it back: this bit was readonly
+ * in PCI 2.2.
+ */
+static bool pci_2_3_supported(struct pci_dev *pdev)
+{
+ u16 orig, new;
+ bool supported = false;
+
+ pci_block_user_cfg_access(pdev);
+ pci_read_config_word(pdev, PCI_COMMAND, &orig);
+ pci_write_config_word(pdev, PCI_COMMAND,
+ orig ^ PCI_COMMAND_INTX_DISABLE);
+ pci_read_config_word(pdev, PCI_COMMAND, &new);
+
+ /*
+ * There's no way to protect against
+ * hardware bugs or detect them reliably, but as long as we know
+ * what the value should be, let's go ahead and check it.
+ */
+ if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+ dev_err(&pdev->dev, "Command changed from 0x%x to 0x%x: "
+ "driver or HW bug?\n", orig, new);
+ goto out;
+ }
+ if (!((new ^ orig) & PCI_COMMAND_INTX_DISABLE)) {
+ dev_warn(&pdev->dev, "Device does not support "
+ "disabling interrupts: unable to bind.\n");
+ goto out;
+ }
+ supported = true;
+
+ /* Now restore the original value. */
+ pci_write_config_word(pdev, PCI_COMMAND, orig);
+
+out:
+ pci_unblock_user_cfg_access(pdev);
+ return supported;
+}
+
+static void
+pci_2_3_mask_irq(struct pci_dev *dev, int mask, unsigned int *irq_status)
+{
+ u32 cmd_status_dword;
+ u16 origcmd, newcmd;
+
+ /*
+ * We do a single dword read to retrieve both command and status.
+ * Document assumptions that make this possible.
+ */
+ BUILD_BUG_ON(PCI_COMMAND % 4);
+ BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+ pci_block_user_cfg_access(dev);
+
+ /*
+ * Read both command and status registers in a single 32-bit operation.
+ * Note: we could cache the value for command and move the status read
+ * out of the lock if there was a way to get notified of user changes
+ * to command register through sysfs. Should be good for shared irqs.
+ */
+ pci_read_config_dword(dev, PCI_COMMAND, &cmd_status_dword);
+ origcmd = cmd_status_dword;
+
+ if (irq_status) {
+ /*
+ * Check interrupt status register to see whether our device triggered
+ * the interrupt.
+ */
+ *irq_status = (cmd_status_dword >> 16) & PCI_STATUS_INTERRUPT;
+ if (*irq_status == 0)
+ goto done;
+ }
+
+ newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE;
+ if (mask)
+ newcmd |= PCI_COMMAND_INTX_DISABLE;
+ if (newcmd != origcmd)
+ pci_write_config_word(dev, PCI_COMMAND, newcmd);
+
+done:
+ pci_unblock_user_cfg_access(dev);
+}
+
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
+ int ret = IRQ_HANDLED;
unsigned long flags;
spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
@@ -83,19 +169,34 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
guest_entries[i].vector, 1);
}
} else {
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
-
if (assigned_dev->irq_requested_type &
KVM_DEV_IRQ_GUEST_INTX) {
- disable_irq_nosync(irq);
+ if (assigned_dev->pci_2_3) {
+ unsigned int irq_status;
+
+ if (assigned_dev->host_irq_disabled) {
+ ret = IRQ_NONE;
+ goto out;
+ }
+
+ pci_2_3_mask_irq(assigned_dev->dev, 1,
+ &irq_status);
+ if (irq_status == 0) {
+ ret = IRQ_NONE;
+ goto out;
+ }
+ } else
+ disable_irq_nosync(irq);
assigned_dev->host_irq_disabled = true;
}
+
+ kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
}
out:
spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
- return IRQ_HANDLED;
+ return ret;
}
/* Ack the irq line for an assigned device */
@@ -117,7 +218,10 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
*/
spin_lock_irqsave(&dev->assigned_dev_lock, flags);
if (dev->host_irq_disabled) {
- enable_irq(dev->host_irq);
+ if (dev->pci_2_3)
+ pci_2_3_mask_irq(dev->dev, 0, NULL);
+ else
+ enable_irq(dev->host_irq);
dev->host_irq_disabled = false;
}
spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
@@ -166,7 +270,11 @@ static void deassign_host_irq(struct kvm *kvm,
pci_disable_msix(assigned_dev->dev);
} else {
/* Deal with MSI and INTx */
- disable_irq(assigned_dev->host_irq);
+ if (assigned_dev->pci_2_3) {
+ pci_2_3_mask_irq(assigned_dev->dev, 1, NULL);
+ synchronize_irq(assigned_dev->host_irq);
+ } else
+ disable_irq(assigned_dev->host_irq);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
@@ -214,6 +322,13 @@ static void kvm_free_assigned_device(struct kvm *kvm,
pci_reset_function(assigned_dev->dev);
+ /*
+ * Unmask the IRQ at PCI level once the reset is done - the next user
+ * may not expect the IRQ being masked.
+ */
+ if (assigned_dev->pci_2_3)
+ pci_2_3_mask_irq(assigned_dev->dev, 0, NULL);
+
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
@@ -239,15 +354,26 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
static int assigned_device_enable_host_intx(struct kvm *kvm,
struct kvm_assigned_dev_kernel *dev)
{
+ unsigned long flags = 0;
+
dev->host_irq = dev->dev->irq;
- /* Even though this is PCI, we don't want to use shared
- * interrupts. Sharing host devices with guest-assigned devices
- * on the same interrupt line is not a happy situation: there
- * are going to be long delays in accepting, acking, etc.
+
+ /*
+ * We can only share the IRQ line with other host devices if we are
+ * able to disable the IRQ source at device-level - independently of
+ * the guest driver. Otherwise host devices may suffer from unbounded
+ * IRQ latencies when the guest keeps the line asserted.
*/
+ dev->pci_2_3 = pci_2_3_supported(dev->dev);
+ if (dev->pci_2_3)
+ flags = IRQF_SHARED;
+
if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
- 0, "kvm_assigned_intx_device", (void *)dev))
+ flags, "kvm_assigned_intx_device", (void *)dev))
return -EIO;
+
+ if (dev->pci_2_3)
+ pci_2_3_mask_irq(dev->dev, 0, NULL);
return 0;
}
@@ -324,7 +450,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
- dev->host_irq_disabled = false;
return 0;
}
#endif
@@ -336,7 +461,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
{
dev->guest_irq = irq->guest_irq;
dev->ack_notifier.gsi = -1;
- dev->host_irq_disabled = false;
return 0;
}
#endif
@@ -367,6 +491,7 @@ static int assign_host_irq(struct kvm *kvm,
default:
r = -EINVAL;
}
+ dev->host_irq_disabled = false;
if (!r)
dev->irq_requested_type |= host_irq_type;