diff mbox

[v4,16/28] x86/vvtd: Add queued invalidation (QI) support

Message ID 1510899755-40237-17-git-send-email-chao.gao@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chao Gao Nov. 17, 2017, 6:22 a.m. UTC
Queued Invalidation Interface is an expanded invalidation interface with
extended capabilities. Hardware implementations report support for queued
invalidation interface through the Extended Capability Register. The queued
invalidation interface uses an Invalidation Queue (IQ), which is a circular
buffer in system memory. Software submits commands by writing Invalidation
Descriptors to the IQ.

In this patch, a new function viommu_process_iq() is used for emulating how
hardware handles invalidation requests through QI.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>

---
v4:
 - Introduce a lock to protect invalidation related registers.
---
 xen/drivers/passthrough/vtd/iommu.h |  24 +++-
 xen/drivers/passthrough/vtd/vvtd.c  | 271 +++++++++++++++++++++++++++++++++++-
 2 files changed, 293 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index b71dab8..de9188b 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -47,7 +47,12 @@ 
 #define DMAR_IQH_REG            0x80 /* invalidation queue head */
 #define DMAR_IQT_REG            0x88 /* invalidation queue tail */
 #define DMAR_IQA_REG            0x90 /* invalidation queue addr */
+#define DMAR_IQUA_REG           0x94 /* invalidation queue upper addr */
+#define DMAR_ICS_REG            0x9c /* invalidation completion status */
 #define DMAR_IECTL_REG          0xa0 /* invalidation event control register */
+#define DMAR_IEDATA_REG         0xa4 /* invalidation event data register */
+#define DMAR_IEADDR_REG         0xa8 /* invalidation event address register */
+#define DMAR_IEUADDR_REG        0xac /* upper address register */
 #define DMAR_IRTA_REG           0xb8 /* base address of intr remap table */
 #define DMAR_IRTUA_REG          0xbc /* upper address of intr remap table */
 
@@ -175,6 +180,21 @@ 
 #define DMA_IRTA_S(val)         (val & 0xf)
 #define DMA_IRTA_SIZE(val)      (1UL << (DMA_IRTA_S(val) + 1))
 
+/* IQA_REG */
+#define DMA_IQA_ADDR(val)       (val & ~0xfffULL)
+#define DMA_IQA_QS(val)         (val & 0x7)
+#define DMA_IQA_RSVD            0xff8ULL
+
+/* IECTL_REG */
+#define DMA_IECTL_IM_SHIFT 31
+#define DMA_IECTL_IM            (1U << DMA_IECTL_IM_SHIFT)
+#define DMA_IECTL_IP_SHIFT 30
+#define DMA_IECTL_IP            (1U << DMA_IECTL_IP_SHIFT)
+
+/* ICS_REG */
+#define DMA_ICS_IWC_SHIFT       0
+#define DMA_ICS_IWC             (1U << DMA_ICS_IWC_SHIFT)
+
 /* PMEN_REG */
 #define DMA_PMEN_EPM    (((u32)1) << 31)
 #define DMA_PMEN_PRS    (((u32)1) << 0)
@@ -205,13 +225,14 @@ 
 /* FSTS_REG */
 #define DMA_FSTS_PFO_SHIFT  0
 #define DMA_FSTS_PPF_SHIFT  1
+#define DMA_FSTS_IQE_SHIFT  4
 #define DMA_FSTS_PRO_SHIFT  7
 
 #define DMA_FSTS_PFO        ((uint32_t)1 << DMA_FSTS_PFO_SHIFT)
 #define DMA_FSTS_PPF        ((uint32_t)1 << DMA_FSTS_PPF_SHIFT)
 #define DMA_FSTS_AFO        ((uint32_t)1 << 2)
 #define DMA_FSTS_APF        ((uint32_t)1 << 3)
-#define DMA_FSTS_IQE        ((uint32_t)1 << 4)
+#define DMA_FSTS_IQE        ((uint32_t)1 << DMA_FSTS_IQE_SHIFT)
 #define DMA_FSTS_ICE        ((uint32_t)1 << 5)
 #define DMA_FSTS_ITE        ((uint32_t)1 << 6)
 #define DMA_FSTS_PRO        ((uint32_t)1 << DMA_FSTS_PRO_SHIFT)
@@ -555,6 +576,7 @@  struct qinval_entry {
 
 /* Queue invalidation head/tail shift */
 #define QINVAL_INDEX_SHIFT 4
+#define QINVAL_INDEX_MASK  0x7fff0ULL
 
 #define qinval_present(v) ((v).lo & 1)
 #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
index a2fa64a..81170ec 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -27,6 +27,7 @@ 
 #include <asm/event.h>
 #include <asm/io_apic.h>
 #include <asm/hvm/domain.h>
+#include <asm/hvm/support.h>
 #include <asm/p2m.h>
 
 #include "iommu.h"
@@ -68,6 +69,9 @@  struct vvtd {
 
     struct hvm_hw_vvtd hw;
     void *irt_base;
+    void *inv_queue_base;
+    /* This lock protects invalidation related registers */
+    spinlock_t ie_lock;
 };
 
 /* Setting viommu_verbose enables debugging messages of vIOMMU */
@@ -284,6 +288,12 @@  static void vvtd_notify_fault(const struct vvtd *vvtd)
                             vvtd_get_reg(vvtd, DMAR_FEDATA_REG));
 }
 
+static void vvtd_notify_inv_completion(const struct vvtd *vvtd)
+{
+    vvtd_generate_interrupt(vvtd, vvtd_get_reg_quad(vvtd, DMAR_IEADDR_REG),
+                            vvtd_get_reg(vvtd, DMAR_IEDATA_REG));
+}
+
 /* Computing the IRTE index for a given interrupt request. When success, return
  * 0 and set index to reference the corresponding IRTE. Otherwise, return < 0,
  * i.e. -1 when the irq request isn't an remapping format.
@@ -478,6 +488,189 @@  static int vvtd_record_fault(struct vvtd *vvtd,
     return X86EMUL_OKAY;
 }
 
+/*
+ * Process an invalidation descriptor. Currently, only two types descriptors,
+ * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait
+ * Descriptor are handled.
+ * @vvtd: the virtual vtd instance
+ * @i: the index of the invalidation descriptor to be processed
+ *
+ * If success return 0, or return non-zero when failure.
+ */
+static int process_iqe(struct vvtd *vvtd, uint32_t i)
+{
+    struct qinval_entry qinval;
+    int ret = 0;
+
+    if ( !vvtd->inv_queue_base )
+    {
+        gdprintk(XENLOG_ERR, "Invalidation queue base isn't set\n");
+        return -1;
+    }
+    qinval = ((struct qinval_entry *)vvtd->inv_queue_base)[i];
+
+    switch ( qinval.q.inv_wait_dsc.lo.type )
+    {
+    case TYPE_INVAL_WAIT:
+        if ( qinval.q.inv_wait_dsc.lo.sw )
+        {
+            uint32_t data = qinval.q.inv_wait_dsc.lo.sdata;
+            uint64_t addr = qinval.q.inv_wait_dsc.hi.saddr << 2;
+
+            ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current);
+            if ( ret )
+                vvtd_info("Failed to write status address\n");
+        }
+
+        /*
+         * The following code generates an invalidation completion event
+         * indicating the invalidation wait descriptor completion. Note that
+         * the following code fragment is not tested properly.
+         */
+        if ( qinval.q.inv_wait_dsc.lo.iflag )
+        {
+            if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT) )
+            {
+                vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
+                if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT) )
+                {
+                    vvtd_notify_inv_completion(vvtd);
+                    vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
+                }
+            }
+        }
+        break;
+
+    case TYPE_INVAL_IEC:
+        /* No cache is preserved in vvtd, nothing is needed to be flushed */
+        break;
+
+    default:
+        vvtd_debug("d%d: Invalidation type (%x) isn't supported\n",
+                   vvtd->domain->domain_id, qinval.q.inv_wait_dsc.lo.type);
+        return -1;
+    }
+
+    return ret;
+}
+
+/*
+ * Invalidate all the descriptors in Invalidation Queue.
+ */
+static void vvtd_process_iq(struct vvtd *vvtd)
+{
+    uint32_t max_entry, i, iqh, iqt;
+    int err = 0;
+
+    /* Trylock avoids more than 1 caller dealing with invalidation requests */
+    if ( !spin_trylock(&vvtd->ie_lock) )
+        return;
+
+    iqh = MASK_EXTR(vvtd_get_reg_quad(vvtd, DMAR_IQH_REG), QINVAL_INDEX_MASK);
+    iqt = MASK_EXTR(vvtd_get_reg_quad(vvtd, DMAR_IQT_REG), QINVAL_INDEX_MASK);
+    /*
+     * No new descriptor is fetched from the Invalidation Queue until
+     * software clears the IQE field in the Fault Status Register
+     */
+    if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) )
+    {
+        spin_unlock(&vvtd->ie_lock);
+        return;
+    }
+
+    max_entry = 1 << (QINVAL_ENTRY_ORDER +
+                      DMA_IQA_QS(vvtd_get_reg_quad(vvtd, DMAR_IQA_REG)));
+
+    ASSERT(iqt < max_entry);
+    if ( iqh == iqt )
+    {
+        spin_unlock(&vvtd->ie_lock);
+        return;
+    }
+
+    for ( i = iqh; i != iqt; i = (i + 1) % max_entry )
+    {
+        err = process_iqe(vvtd, i);
+        if ( err )
+            break;
+    }
+
+    /*
+     * set IQH before checking error, because IQH should reference
+     * the desriptor associated with the error when an error is seen
+     * by guest
+     */
+    vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << QINVAL_INDEX_SHIFT);
+
+    spin_unlock(&vvtd->ie_lock);
+    if ( err )
+    {
+        spin_lock(&vvtd->fe_lock);
+        vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_SHIFT);
+        spin_unlock(&vvtd->fe_lock);
+    }
+}
+
+static void vvtd_write_iqt(struct vvtd *vvtd, uint32_t val)
+{
+    uint32_t max_entry;
+
+    if ( val & ~QINVAL_INDEX_MASK )
+    {
+        vvtd_info("attempts to set reserved bits in IQT\n");
+        return;
+    }
+
+    max_entry = 1U << (QINVAL_ENTRY_ORDER +
+                       DMA_IQA_QS(vvtd_get_reg_quad(vvtd, DMAR_IQA_REG)));
+    if ( MASK_EXTR(val, QINVAL_INDEX_MASK) >= max_entry )
+    {
+        vvtd_info("IQT: Value %x exceeded supported max index.", val);
+        return;
+    }
+
+    vvtd_set_reg(vvtd, DMAR_IQT_REG, val);
+}
+
+static void vvtd_write_iqa(struct vvtd *vvtd, uint32_t val, bool high)
+{
+    uint64_t cap = vvtd_get_reg_quad(vvtd, DMAR_CAP_REG);
+    uint64_t old = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
+    uint64_t new;
+
+    if ( high )
+        new = ((uint64_t)val << 32) | (old & 0xffffffff);
+    else
+        new = ((old >> 32) << 32) | val;
+
+    if ( new & (~((1ULL << cap_mgaw(cap)) - 1) | DMA_IQA_RSVD) )
+    {
+        vvtd_info("Attempt to set reserved bits in IQA\n");
+        return;
+    }
+
+    vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, new);
+    if ( high && !vvtd->inv_queue_base )
+        vvtd->inv_queue_base = map_guest_pages(vvtd->domain,
+                                               PFN_DOWN(DMA_IQA_ADDR(new)),
+                                               1 << DMA_IQA_QS(new));
+    else if ( !high && vvtd->inv_queue_base )
+    {
+        unmap_guest_pages(vvtd->inv_queue_base, 1 << DMA_IQA_QS(old));
+        vvtd->inv_queue_base = NULL;
+    }
+}
+
+static void vvtd_write_ics(struct vvtd *vvtd, uint32_t val)
+{
+    if ( val & DMA_ICS_IWC )
+    {
+        vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT);
+        /* When IWC field is cleared, the IP field needs to be cleared */
+        vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT);
+    }
+}
+
 static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
 {
     /* Writing a 1 means clear fault */
@@ -489,6 +682,20 @@  static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val)
     return X86EMUL_OKAY;
 }
 
+static void vvtd_write_iectl(struct vvtd *vvtd, uint32_t val)
+{
+    /* Only DMA_IECTL_IM bit is writable. Generate pending event when unmask */
+    if ( !(val & DMA_IECTL_IM) )
+    {
+        /* Clear IM and clear IP */
+        vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
+        if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT) )
+            vvtd_notify_inv_completion(vvtd);
+    }
+    else
+        vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT);
+}
+
 static void vvtd_write_fectl(struct vvtd *vvtd, uint32_t val)
 {
     /*
@@ -681,6 +888,48 @@  static void vvtd_write_fault_regs(struct vvtd *vvtd, unsigned long val,
     spin_unlock(&vvtd->fe_lock);
 }
 
+static void vvtd_write_invalidation_regs(struct vvtd *vvtd, unsigned long val,
+                                         unsigned int offset, unsigned int len)
+{
+    spin_lock(&vvtd->ie_lock);
+    for ( ; len ; len -= 4, offset += 4, val = val >> 32)
+    {
+        switch ( offset )
+        {
+        case DMAR_IECTL_REG:
+            vvtd_write_iectl(vvtd, val);
+            break;
+
+        case DMAR_ICS_REG:
+            vvtd_write_ics(vvtd, val);
+            break;
+
+        case DMAR_IQT_REG:
+            vvtd_write_iqt(vvtd, val);
+            break;
+
+        case DMAR_IQA_REG:
+            vvtd_write_iqa(vvtd, val, 0);
+            break;
+
+        case DMAR_IQUA_REG:
+            vvtd_write_iqa(vvtd, val, 1);
+            break;
+
+        case DMAR_IEDATA_REG:
+        case DMAR_IEADDR_REG:
+        case DMAR_IEUADDR_REG:
+            vvtd_set_reg(vvtd, offset, val);
+            break;
+
+        default:
+            break;
+        }
+    }
+    spin_unlock(&vvtd->ie_lock);
+
+}
+
 static int vvtd_write(struct vcpu *v, unsigned long addr,
                       unsigned int len, unsigned long val)
 {
@@ -719,6 +968,17 @@  static int vvtd_write(struct vcpu *v, unsigned long addr,
         vvtd_write_fault_regs(vvtd, val, offset, len);
         break;
 
+    case DMAR_IECTL_REG:
+    case DMAR_ICS_REG:
+    case DMAR_IQT_REG:
+    case DMAR_IQA_REG:
+    case DMAR_IQUA_REG:
+    case DMAR_IEDATA_REG:
+    case DMAR_IEADDR_REG:
+    case DMAR_IEUADDR_REG:
+        vvtd_write_invalidation_regs(vvtd, val, offset, len);
+        break;
+
     default:
         if ( (offset == (fault_offset + DMA_FRCD2_OFFSET)) ||
              (offset == (fault_offset + DMA_FRCD3_OFFSET)) )
@@ -840,7 +1100,8 @@  static int vvtd_handle_irq_request(const struct domain *d,
                         irte.remap.tm);
 
  out:
-    atomic_dec(&vvtd->inflight_intr);
+    if ( !atomic_dec_and_test(&vvtd->inflight_intr) )
+        vvtd_process_iq(vvtd);
     return ret;
 }
 
@@ -911,6 +1172,7 @@  static int vvtd_create(struct domain *d, struct viommu *viommu)
     vvtd->domain = d;
     register_mmio_handler(d, &vvtd_mmio_ops);
     spin_lock_init(&vvtd->fe_lock);
+    spin_lock_init(&vvtd->ie_lock);
 
     viommu->priv = vvtd;
 
@@ -930,6 +1192,13 @@  static int vvtd_destroy(struct viommu *viommu)
                                      sizeof(struct iremap_entry)));
             vvtd->irt_base = NULL;
         }
+        if ( vvtd->inv_queue_base )
+        {
+            uint64_t old = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG);
+
+            unmap_guest_pages(vvtd->inv_queue_base, 1 << DMA_IQA_QS(old));
+            vvtd->inv_queue_base = NULL;
+        }
         xfree(vvtd);
     }