diff mbox

[v2] intel_iommu: relax iq tail check on VTD_GCMD_QIE enable

Message ID 20170619073116.10302-1-lprosek@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ladi Prosek June 19, 2017, 7:31 a.m. UTC
The VT-d spec (section 6.5.2) prescribes software to zero the
Invalidation Queue Tail Register before enabling the VTD_GCMD_QIE
Global Command Register bit. Windows Server 2012 R2 and possibly
other older Windows versions violate the protocol and set a
non-zero queue tail first, which in effect makes them crash early
on boot with -device intel-iommu,intremap=on.

This commit relaxes the check and instead of failing to enable
VTD_GCMD_QIE with vtd_err_qi_enable, it behaves as if the tail
register was set just after enabling VTD_GCMD_QIE
(see vtd_handle_iqt_write).

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
---

v1->v2:
* rebased on top of Michael's repo
* trace_vtd_warn_invalid_qi_tail instead of VTD_DPRINTF, called on
  non-zero iq_tail
* added a comment


 hw/i386/intel_iommu.c | 33 +++++++++++++++++++--------------
 hw/i386/trace-events  |  2 +-
 2 files changed, 20 insertions(+), 15 deletions(-)

Comments

Peter Xu June 19, 2017, 8:29 a.m. UTC | #1
On Mon, Jun 19, 2017 at 09:31:16AM +0200, Ladi Prosek wrote:
> The VT-d spec (section 6.5.2) prescribes software to zero the
> Invalidation Queue Tail Register before enabling the VTD_GCMD_QIE
> Global Command Register bit. Windows Server 2012 R2 and possibly
> other older Windows versions violate the protocol and set a
> non-zero queue tail first, which in effect makes them crash early
> on boot with -device intel-iommu,intremap=on.
> 
> This commit relaxes the check and instead of failing to enable
> VTD_GCMD_QIE with vtd_err_qi_enable, it behaves as if the tail
> register was set just after enabling VTD_GCMD_QIE
> (see vtd_handle_iqt_write).
> 
> Signed-off-by: Ladi Prosek <lprosek@redhat.com>

Reviewed-by: Peter Xu <peterx@redhat.com>

> ---
> 
> v1->v2:
> * rebased on top of Michael's repo
> * trace_vtd_warn_invalid_qi_tail instead of VTD_DPRINTF, called on
>   non-zero iq_tail
> * added a comment
> 
> 
>  hw/i386/intel_iommu.c | 33 +++++++++++++++++++--------------
>  hw/i386/trace-events  |  2 +-
>  2 files changed, 20 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a9b59bd..2ddf3bd 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1450,10 +1450,7 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
>      return iaig;
>  }
>  
> -static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
> -{
> -    return s->iq_tail == 0;
> -}
> +static void vtd_fetch_inv_desc(IntelIOMMUState *s);
>  
>  static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
>  {
> @@ -1468,16 +1465,24 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
>      trace_vtd_inv_qi_enable(en);
>  
>      if (en) {
> -        if (vtd_queued_inv_enable_check(s)) {
> -            s->iq = iqa_val & VTD_IQA_IQA_MASK;
> -            /* 2^(x+8) entries */
> -            s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
> -            s->qi_enabled = true;
> -            trace_vtd_inv_qi_setup(s->iq, s->iq_size);
> -            /* Ok - report back to driver */
> -            vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
> -        } else {
> -            trace_vtd_err_qi_enable(s->iq_tail);
> +        s->iq = iqa_val & VTD_IQA_IQA_MASK;
> +        /* 2^(x+8) entries */
> +        s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
> +        s->qi_enabled = true;
> +        trace_vtd_inv_qi_setup(s->iq, s->iq_size);
> +        /* Ok - report back to driver */
> +        vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
> +
> +        if (s->iq_tail != 0) {
> +            /*
> +             * This is a spec violation but Windows guests are known to set up
> +             * Queued Invalidation this way so we allow the write and process
> +             * Invalidation Descriptors right away.
> +             */
> +            trace_vtd_warn_invalid_qi_tail(s->iq_tail);
> +            if (!(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
> +                vtd_fetch_inv_desc(s);
> +            }
>          }
>      } else {
>          if (vtd_queued_inv_disable_check(s)) {
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 5f111d6..42d8a7e 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -74,7 +74,7 @@ vtd_err_dmar_slpte_read_error(uint64_t iova, int level) "iova 0x%"PRIx64" level
>  vtd_err_dmar_slpte_perm_error(uint64_t iova, int level, uint64_t slpte, bool is_write) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64" write %d"
>  vtd_err_dmar_slpte_resv_error(uint64_t iova, int level, uint64_t slpte) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64
>  vtd_err_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova) "dev %02x:%02x.%02x iova 0x%"PRIx64
> -vtd_err_qi_enable(uint16_t tail) "tail 0x%"PRIx16
> +vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>  vtd_err_qi_disable(uint16_t head, uint16_t tail, int type) "head 0x%"PRIx16" tail 0x%"PRIx16" last_desc_type %d"
>  vtd_err_qi_tail(uint16_t tail, uint16_t size) "tail 0x%"PRIx16" size 0x%"PRIx16
>  vtd_err_irte(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64
> -- 
> 2.9.3
>
diff mbox

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a9b59bd..2ddf3bd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1450,10 +1450,7 @@  static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
     return iaig;
 }
 
-static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
-{
-    return s->iq_tail == 0;
-}
+static void vtd_fetch_inv_desc(IntelIOMMUState *s);
 
 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
 {
@@ -1468,16 +1465,24 @@  static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
     trace_vtd_inv_qi_enable(en);
 
     if (en) {
-        if (vtd_queued_inv_enable_check(s)) {
-            s->iq = iqa_val & VTD_IQA_IQA_MASK;
-            /* 2^(x+8) entries */
-            s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
-            s->qi_enabled = true;
-            trace_vtd_inv_qi_setup(s->iq, s->iq_size);
-            /* Ok - report back to driver */
-            vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
-        } else {
-            trace_vtd_err_qi_enable(s->iq_tail);
+        s->iq = iqa_val & VTD_IQA_IQA_MASK;
+        /* 2^(x+8) entries */
+        s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
+        s->qi_enabled = true;
+        trace_vtd_inv_qi_setup(s->iq, s->iq_size);
+        /* Ok - report back to driver */
+        vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
+
+        if (s->iq_tail != 0) {
+            /*
+             * This is a spec violation but Windows guests are known to set up
+             * Queued Invalidation this way so we allow the write and process
+             * Invalidation Descriptors right away.
+             */
+            trace_vtd_warn_invalid_qi_tail(s->iq_tail);
+            if (!(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
+                vtd_fetch_inv_desc(s);
+            }
         }
     } else {
         if (vtd_queued_inv_disable_check(s)) {
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 5f111d6..42d8a7e 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -74,7 +74,7 @@  vtd_err_dmar_slpte_read_error(uint64_t iova, int level) "iova 0x%"PRIx64" level
 vtd_err_dmar_slpte_perm_error(uint64_t iova, int level, uint64_t slpte, bool is_write) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64" write %d"
 vtd_err_dmar_slpte_resv_error(uint64_t iova, int level, uint64_t slpte) "iova 0x%"PRIx64" level %d slpte 0x%"PRIx64
 vtd_err_dmar_translate(uint8_t bus, uint8_t slot, uint8_t func, uint64_t iova) "dev %02x:%02x.%02x iova 0x%"PRIx64
-vtd_err_qi_enable(uint16_t tail) "tail 0x%"PRIx16
+vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
 vtd_err_qi_disable(uint16_t head, uint16_t tail, int type) "head 0x%"PRIx16" tail 0x%"PRIx16" last_desc_type %d"
 vtd_err_qi_tail(uint16_t tail, uint16_t size) "tail 0x%"PRIx16" size 0x%"PRIx16
 vtd_err_irte(int index, uint64_t lo, uint64_t hi) "index %d low 0x%"PRIx64" high 0x%"PRIx64