diff mbox

[v3,2/2] x86/PCI: Intercept accesses to RO MMIO from dom0s in HVM containers

Message ID 1450467596-3649-3-git-send-email-boris.ostrovsky@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boris Ostrovsky Dec. 18, 2015, 7:39 p.m. UTC
Commit 9256f66c1606 ("x86/PCI: intercept all PV Dom0 MMCFG writes")
added intercepts for writes to RO MMCFG space from PV dom0.

Similar functionality, including access to RO non-MMCFG addresses, is
needed by dom0s in HVM containers (such as PVH and, in the future,
HVMlite).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 xen/arch/x86/hvm/emulate.c             |   46 +++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c                 |    8 ++++
 xen/arch/x86/mm.c                      |   58 ++++++++++---------------------
 xen/arch/x86/x86_64/mmconfig_64.c      |   10 +++++
 xen/arch/x86/x86_emulate/x86_emulate.c |   10 +++++
 xen/arch/x86/x86_emulate/x86_emulate.h |   12 ++++++
 xen/include/asm-x86/hvm/emulate.h      |    1 +
 xen/include/asm-x86/mm.h               |   17 +++++++++
 xen/include/asm-x86/pci.h              |    3 ++
 9 files changed, 126 insertions(+), 39 deletions(-)
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index e1017b5..f5dd1f8 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1778,6 +1778,52 @@  int hvm_emulate_one_no_write(
     return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops_no_write);
 }
 
+int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
+{
+    static const struct x86_emulate_ops hvm_intercept_ops_mmcfg = {
+        .read       = x86emul_unhandleable_rw,
+        .insn_fetch = hvmemul_insn_fetch,
+        .write      = mmcfg_intercept_write,
+    };
+    static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = {
+        .read       = x86emul_unhandleable_rw,
+        .insn_fetch = hvmemul_insn_fetch,
+        .write      = mmio_ro_emulated_write
+    };
+    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla };
+    struct hvm_emulate_ctxt ctxt;
+    const struct x86_emulate_ops *ops;
+    unsigned int seg, bdf;
+    int rc;
+
+    if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) )
+    {
+        mmio_ro_ctxt.seg = seg;
+        mmio_ro_ctxt.bdf = bdf;
+        ops = &hvm_intercept_ops_mmcfg;
+    }
+    else
+        ops = &hvm_ro_emulate_ops_mmio;
+
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+    ctxt.ctxt.data = &mmio_ro_ctxt;
+    rc = _hvm_emulate_one(&ctxt, ops);
+    switch ( rc )
+    {
+    case X86EMUL_UNHANDLEABLE:
+        hvm_dump_emulation_state(XENLOG_G_WARNING "MMCFG", &ctxt);
+        break;
+    case X86EMUL_EXCEPTION:
+        if ( ctxt.exn_pending )
+            hvm_inject_trap(&ctxt.trap);
+        /* fallthrough */
+    default:
+        hvm_emulate_writeback(&ctxt);
+    }
+
+    return rc;
+}
+
 void hvm_mem_access_emulate_one(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 08cef1f..ed9bbc9 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3115,6 +3115,14 @@  int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
         goto out_put_gfn;
     }
 
+    if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) &&
+         npfec.write_access && npfec.present &&
+         (hvm_emulate_one_mmio(mfn_x(mfn), gla) == X86EMUL_OKAY) )
+    {
+        rc = 1;
+        goto out_put_gfn;
+    }
+
     /* If we fell through, the vcpu will retry now that access restrictions have
      * been removed. It may fault again if the p2m entry type still requires so.
      * Otherwise, this is an error condition. */
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 92df36f..7323d9d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5243,31 +5243,14 @@  int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
  * fault handling for read-only MMIO pages
  */
 
-struct mmio_ro_emulate_ctxt {
-    struct x86_emulate_ctxt ctxt;
-    unsigned long cr2;
-    unsigned int seg, bdf;
-};
-
-static int mmio_ro_emulated_read(
-    enum x86_segment seg,
-    unsigned long offset,
-    void *p_data,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    return X86EMUL_UNHANDLEABLE;
-}
-
-static int mmio_ro_emulated_write(
+int mmio_ro_emulated_write(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt =
-        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt = ctxt->data;
 
     /* Only allow naturally-aligned stores at the original %cr2 address. */
     if ( ((bytes | offset) & (bytes - 1)) || offset != mmio_ro_ctxt->cr2 )
@@ -5281,20 +5264,19 @@  static int mmio_ro_emulated_write(
 }
 
 static const struct x86_emulate_ops mmio_ro_emulate_ops = {
-    .read       = mmio_ro_emulated_read,
+    .read       = x86emul_unhandleable_rw,
     .insn_fetch = ptwr_emulated_read,
     .write      = mmio_ro_emulated_write,
 };
 
-static int mmio_intercept_write(
+int mmcfg_intercept_write(
     enum x86_segment seg,
     unsigned long offset,
     void *p_data,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    struct mmio_ro_emulate_ctxt *mmio_ctxt =
-        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
+    struct mmio_ro_emulate_ctxt *mmio_ctxt = ctxt->data;
 
     /*
      * Only allow naturally-aligned stores no wider than 4 bytes to the
@@ -5303,7 +5285,7 @@  static int mmio_intercept_write(
     if ( ((bytes | offset) & (bytes - 1)) || bytes > 4 ||
          offset != mmio_ctxt->cr2 )
     {
-        MEM_LOG("mmio_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)",
+        MEM_LOG("mmcfg_intercept: bad write (cr2=%lx, addr=%lx, bytes=%u)",
                 mmio_ctxt->cr2, offset, bytes);
         return X86EMUL_UNHANDLEABLE;
     }
@@ -5318,10 +5300,10 @@  static int mmio_intercept_write(
     return X86EMUL_OKAY;
 }
 
-static const struct x86_emulate_ops mmio_intercept_ops = {
-    .read       = mmio_ro_emulated_read,
+static const struct x86_emulate_ops mmcfg_intercept_ops = {
+    .read       = x86emul_unhandleable_rw,
     .insn_fetch = ptwr_emulated_read,
-    .write      = mmio_intercept_write,
+    .write      = mmcfg_intercept_write,
 };
 
 /* Check if guest is trying to modify a r/o MMIO page. */
@@ -5331,14 +5313,14 @@  int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
     l1_pgentry_t pte;
     unsigned long mfn;
     unsigned int addr_size = is_pv_32bit_vcpu(v) ? 32 : BITS_PER_LONG;
-    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
-        .ctxt.regs = regs,
-        .ctxt.addr_size = addr_size,
-        .ctxt.sp_size = addr_size,
-        .ctxt.swint_emulate = x86_swint_emulate_none,
-        .cr2 = addr
+    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = addr };
+    struct x86_emulate_ctxt ctxt = {
+        .regs = regs,
+        .addr_size = addr_size,
+        .sp_size = addr_size,
+        .swint_emulate = x86_swint_emulate_none,
+        .data = &mmio_ro_ctxt
     };
-    const unsigned long *ro_map;
     int rc;
 
     /* Attempt to read the PTE that maps the VA being accessed. */
@@ -5363,12 +5345,10 @@  int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
     if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
         return 0;
 
-    if ( pci_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) &&
-         ((ro_map = pci_get_ro_map(mmio_ro_ctxt.seg)) == NULL ||
-          !test_bit(mmio_ro_ctxt.bdf, ro_map)) )
-        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_intercept_ops);
+    if ( pci_ro_mmcfg_decode(mfn, &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) )
+        rc = x86_emulate(&ctxt, &mmcfg_intercept_ops);
     else
-        rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+        rc = x86_emulate(&ctxt, &mmio_ro_emulate_ops);
 
     return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
 }
diff --git a/xen/arch/x86/x86_64/mmconfig_64.c b/xen/arch/x86/x86_64/mmconfig_64.c
index 1f9a996..7618d67 100644
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -198,6 +198,16 @@  bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
     return 0;
 }
 
+bool_t pci_ro_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                           unsigned int *bdf)
+{
+    const unsigned long *ro_map;
+
+    return pci_mmcfg_decode(mfn, seg, bdf) &&
+           ((ro_map = pci_get_ro_map(*seg)) == NULL ||
+             !test_bit(*bdf, ro_map));
+}
+
 int __init pci_mmcfg_arch_init(void)
 {
     int i;
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index f1454ce..56aef3e 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1444,6 +1444,16 @@  static int inject_swint(enum x86_swint_type type,
     return ops->inject_hw_exception(fault_type, error_code, ctxt);
 }
 
+int x86emul_unhandleable_rw(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return X86EMUL_UNHANDLEABLE;
+}
+
 int
 x86_emulate(
     struct x86_emulate_ctxt *ctxt,
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index cfac09b..f33c523 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -430,6 +430,9 @@  struct x86_emulate_ctxt
         } flags;
         uint8_t byte;
     } retire;
+
+    /* Caller data that can be used by x86_emulate_ops' routines. */
+    void *data;
 };
 
 struct x86_emulate_stub {
@@ -463,4 +466,13 @@  void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
 
+/* Unhandleable read, write or instruction fetch */
+int
+x86emul_unhandleable_rw(
+    enum x86_segment seg,
+    unsigned long offset,
+    void *p_data,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
+
 #endif /* __X86_EMULATE_H__ */
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 49134b5..142d1b6 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -57,6 +57,7 @@  void hvm_emulate_writeback(
 struct segment_register *hvmemul_get_seg_reg(
     enum x86_segment seg,
     struct hvm_emulate_ctxt *hvmemul_ctxt);
+int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla);
 
 int hvmemul_do_pio_buffer(uint16_t port,
                           unsigned int size,
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 67b34c6..b4075e6 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -7,6 +7,7 @@ 
 #include <xen/spinlock.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
+#include <asm/x86_emulate.h>
 
 /*
  * Per-page-frame information.
@@ -488,6 +489,22 @@  void memguard_unguard_range(void *p, unsigned long l);
 void memguard_guard_stack(void *p);
 void memguard_unguard_stack(void *p);
 
+struct mmio_ro_emulate_ctxt {
+        unsigned long cr2;
+        unsigned int seg, bdf;
+};
+
+extern int mmio_ro_emulated_write(enum x86_segment seg,
+                                  unsigned long offset,
+                                  void *p_data,
+                                  unsigned int bytes,
+                                  struct x86_emulate_ctxt *ctxt);
+extern int mmcfg_intercept_write(enum x86_segment seg,
+                                 unsigned long offset,
+                                 void *p_data,
+                                 unsigned int bytes,
+                                 struct x86_emulate_ctxt *ctxt);
+
 int  ptwr_do_page_fault(struct vcpu *, unsigned long,
                         struct cpu_user_regs *);
 int  mmio_ro_do_page_fault(struct vcpu *, unsigned long,
diff --git a/xen/include/asm-x86/pci.h b/xen/include/asm-x86/pci.h
index 38ace79..36801d3 100644
--- a/xen/include/asm-x86/pci.h
+++ b/xen/include/asm-x86/pci.h
@@ -23,4 +23,7 @@  int pci_msi_conf_write_intercept(struct pci_dev *, unsigned int reg,
 bool_t pci_mmcfg_decode(unsigned long mfn, unsigned int *seg,
                         unsigned int *bdf);
 
+bool_t pci_ro_mmcfg_decode(unsigned long mfn, unsigned int *seg,
+                           unsigned int *bdf);
+
 #endif /* __X86_PCI_H__ */