[RFC,17/29] gvt: Xen hypervisor GVT-g MPT module

Message ID	1453976511-27322-18-git-send-email-zhi.a.wang@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Zhi Wang <zhi.a.wang@intel.com> To: intel-gfx@lists.freedesktop.org, igvt-g@lists.01.org Date: Thu, 28 Jan 2016 18:21:39 +0800 Message-Id: <1453976511-27322-18-git-send-email-zhi.a.wang@intel.com> In-Reply-To: <1453976511-27322-1-git-send-email-zhi.a.wang@intel.com> References: <1453976511-27322-1-git-send-email-zhi.a.wang@intel.com> Cc: daniel.vetter@ffwll.ch, david.j.cowperthwaite@intel.com Subject: [Intel-gfx] [RFC 17/29] gvt: Xen hypervisor GVT-g MPT module Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Hi, See the file MAINTAINERS and add Cc: lines according to "XEN HYPERVISOR INTERFACE". Also I think it'll be useful to split the i915 changes to a separate patch next int he series (as the reviewer will be different). We will have to wait for Xen maintainers to take a position on this. Is there KVM counterparts for this stuff incoming? On to, 2016-01-28 at 18:21 +0800, Zhi Wang wrote: > This is the xen hypervisor MPT module which let GVT-g be able to run > under > Xen hypervisor. > Cc: xen-devel@lists.xenproject.org ...and so on... Regards, Joonas > Signed-off-by: Zhi Wang <zhi.a.wang@intel.com> > --- > arch/x86/include/asm/xen/hypercall.h | 7 + > arch/x86/include/asm/xen/interface.h | 1 + > arch/x86/xen/mmu.c | 83 +++ > drivers/gpu/drm/i915/gvt/gvt.c | 10 + > drivers/gpu/drm/i915/gvt/gvt.h | 14 + > drivers/xen/Kconfig | 5 + > drivers/xen/Makefile | 6 + > drivers/xen/xengt.c | 1153 > ++++++++++++++++++++++++++++++++++ > include/xen/interface/hvm/hvm_op.h | 177 +++++- > include/xen/interface/hvm/ioreq.h | 132 ++++ > include/xen/interface/memory.h | 28 + > include/xen/interface/xen.h | 106 ++++ > include/xen/xen-ops.h | 5 + > 13 files changed, 1726 insertions(+), 1 deletion(-) > create mode 100644 drivers/xen/xengt.c > create mode 100644 include/xen/interface/hvm/ioreq.h > > diff --git a/arch/x86/include/asm/xen/hypercall.h > b/arch/x86/include/asm/xen/hypercall.h > index 3bcdcc8..aea97e3 100644 > --- a/arch/x86/include/asm/xen/hypercall.h > +++ b/arch/x86/include/asm/xen/hypercall.h > @@ -459,6 +459,13 @@ HYPERVISOR_hvm_op(int op, void *arg) > } > > static inline int > +HYPERVISOR_domctl( > + struct xen_domctl *arg) > +{ > + return _hypercall1(int, domctl, arg); > +} > + > +static inline int > HYPERVISOR_tmem_op( > struct tmem_op *op) > { > diff --git a/arch/x86/include/asm/xen/interface.h > b/arch/x86/include/asm/xen/interface.h > index 6ff4986..a4ee3f4 100644 > --- a/arch/x86/include/asm/xen/interface.h > +++ b/arch/x86/include/asm/xen/interface.h > @@ -89,6 +89,7 @@ typedef long xen_long_t; > /* Guest handles for primitive C types. */ > __DEFINE_GUEST_HANDLE(uchar, unsigned char); > __DEFINE_GUEST_HANDLE(uint, unsigned int); > +__DEFINE_GUEST_HANDLE(ulong, unsigned long); > DEFINE_GUEST_HANDLE(char); > DEFINE_GUEST_HANDLE(int); > DEFINE_GUEST_HANDLE(void); > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c > index c913ca4..da95d45 100644 > --- a/arch/x86/xen/mmu.c > +++ b/arch/x86/xen/mmu.c > @@ -2931,3 +2931,86 @@ int xen_unmap_domain_gfn_range(struct > vm_area_struct *vma, > #endif > } > EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); > + > +/* Note: here 'mfn' is actually gfn!!! */ > +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned > long mfn, > + int nr, unsigned domid) > +{ > + struct vm_struct *area; > + struct remap_data rmd; > + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; > + int batch; > + unsigned long range, addr; > + pgprot_t prot; > + int err; > + > + WARN_ON(in_interrupt() || irqs_disabled()); > + > + area = alloc_vm_area(nr << PAGE_SHIFT, NULL); > + if (!area) > + return NULL; > + > + addr = (unsigned long)area->addr; > + > + prot = __pgprot(pgprot_val(PAGE_KERNEL)); > + > + rmd.mfn = &mfn; > + rmd.prot = prot; > + > + while (nr) { > + batch = min(REMAP_BATCH_SIZE, nr); > + range = (unsigned long)batch << PAGE_SHIFT; > + > + rmd.mmu_update = mmu_update; > + err = apply_to_page_range(&init_mm, addr, range, > + remap_area_mfn_pte_fn, &rmd); > + if (err || HYPERVISOR_mmu_update(mmu_update, batch, > NULL, domid) < 0) > + goto err; > + > + nr -= batch; > + addr += range; > + } > + > + xen_flush_tlb_all(); > + return area; > +err: > + free_vm_area(area); > + xen_flush_tlb_all(); > + return NULL; > +} > +EXPORT_SYMBOL(xen_remap_domain_mfn_range_in_kernel); > + > +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area, > int nr, > + unsigned domid) > +{ > + struct remap_data rmd; > + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; > + int batch; > + unsigned long range, addr = (unsigned long)area->addr; > +#define INVALID_MFN (~0UL) > + unsigned long invalid_mfn = INVALID_MFN; > + int err; > + > + WARN_ON(in_interrupt() || irqs_disabled()); > + > + rmd.mfn = &invalid_mfn; > + rmd.prot = PAGE_NONE; > + > + while (nr) { > + batch = min(REMAP_BATCH_SIZE, nr); > + range = (unsigned long)batch << PAGE_SHIFT; > + > + rmd.mmu_update = mmu_update; > + err = apply_to_page_range(&init_mm, addr, range, > + remap_area_mfn_pte_fn, &rmd); > + BUG_ON(err); > + BUG_ON(HYPERVISOR_mmu_update(mmu_update, batch, > NULL, domid) < 0); > + > + nr -= batch; > + addr += range; > + } > + > + free_vm_area(area); > + xen_flush_tlb_all(); > +} > +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range_in_kernel); > diff --git a/drivers/gpu/drm/i915/gvt/gvt.c > b/drivers/gpu/drm/i915/gvt/gvt.c > index a71873c..28a51d9 100644 > --- a/drivers/gpu/drm/i915/gvt/gvt.c > +++ b/drivers/gpu/drm/i915/gvt/gvt.c > @@ -21,12 +21,14 @@ > * SOFTWARE. > */ > > +#include <linux/types.h> > #include <xen/xen.h> > #include <linux/kthread.h> > > #include "gvt.h" > > struct gvt_host gvt_host; > +EXPORT_SYMBOL(gvt_host); > > extern struct gvt_kernel_dm xengt_kdm; > extern struct gvt_kernel_dm kvmgt_kdm; > @@ -36,6 +38,13 @@ static struct gvt_io_emulation_ops > default_io_emulation_ops = { > .emulate_mmio_write = gvt_emulate_mmio_write, > }; > > +unsigned int pa_to_mmio_offset(struct vgt_device *vgt, > + uint64_t pa); > + > +static struct gvt_mpt_ops default_export_mpt_ops = { > + .pa_to_mmio_offset = pa_to_mmio_offset, > +}; > + > static const char *supported_hypervisors[] = { > [GVT_HYPERVISOR_TYPE_XEN] = "Xen Hypervisor", > [GVT_HYPERVISOR_TYPE_KVM] = "KVM", > @@ -78,6 +87,7 @@ static bool gvt_init_host(void) > supported_hypervisors[host- > >hypervisor_type]); > > host->emulate_ops = &default_io_emulation_ops; > + host->mpt_ops = &default_export_mpt_ops; > idr_init(&host->device_idr); > mutex_init(&host->device_idr_lock); > > diff --git a/drivers/gpu/drm/i915/gvt/gvt.h > b/drivers/gpu/drm/i915/gvt/gvt.h > index eb5fd47..83f90a2 100644 > --- a/drivers/gpu/drm/i915/gvt/gvt.h > +++ b/drivers/gpu/drm/i915/gvt/gvt.h > @@ -58,6 +58,10 @@ struct gvt_io_emulation_ops { > bool (*emulate_cfg_write)(struct vgt_device *, unsigned int, > void *, int); > }; > > +struct gvt_mpt_ops { > + unsigned int (*pa_to_mmio_offset)(struct vgt_device *, u64); > +}; > + > struct gvt_host { > bool initialized; > int hypervisor_type; > @@ -65,6 +69,7 @@ struct gvt_host { > struct idr device_idr; > struct gvt_kernel_dm *kdm; > struct gvt_io_emulation_ops *emulate_ops; > + struct gvt_mpt_ops *mpt_ops; > }; > > extern struct gvt_host gvt_host; > @@ -123,6 +128,9 @@ struct vgt_device { > struct gvt_virtual_device_state state; > struct gvt_statistics stat; > struct gvt_vgtt_info gtt; > + void *hypervisor_data; > + unsigned long low_mem_max_gpfn; > + atomic_t crashing; > }; > > struct gvt_gm_allocator { > @@ -423,6 +431,12 @@ static inline int gvt_pci_mmio_is_enabled(struct > vgt_device *vgt) > _REGBIT_CFG_COMMAND_MEMORY; > } > > +static inline uint64_t gvt_mmio_bar_base(struct vgt_device *vgt) > +{ > + char *cfg_space = &vgt->state.cfg.space[0]; > + return *(u64 *)(cfg_space + GVT_REG_CFG_SPACE_BAR0); > +} > + > #define __vreg(vgt, off) (*(u32*)(vgt->state.mmio.vreg + off)) > #define __vreg8(vgt, off) (*(u8*)(vgt->state.mmio.vreg + off)) > #define __vreg16(vgt, off) (*(u16*)(vgt->state.mmio.vreg + off)) > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index 73708ac..9ee2033 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -291,4 +291,9 @@ config XEN_SYMS > config XEN_HAVE_VPMU > bool > > +config XENGT > + tristate "Xen Dom0 support for i915 gvt device model" > + depends on XEN_DOM0 && I915_GVT > + default m > + > endmenu > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > index 9b7a35c..ff75c36 100644 > --- a/drivers/xen/Makefile > +++ b/drivers/xen/Makefile > @@ -9,6 +9,10 @@ CFLAGS_features.o := > $(nostackp) > > CFLAGS_efi.o += -fshort-wchar > > + > +I915 := drivers/gpu/drm/i915 > +CFLAGS_xengt.o += -Wall -Werror -I$(I915) -I$(I915)/gvt > + > dom0-$(CONFIG_PCI) += pci.o > dom0-$(CONFIG_USB_SUPPORT) += dbgp.o > dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y) > @@ -36,6 +40,8 @@ obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen- > acpi-processor.o > obj-$(CONFIG_XEN_EFI) += efi.o > obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o > obj-$(CONFIG_XEN_AUTO_XLATE) += xlate_mmu.o > +obj-$(CONFIG_XENGT) += xengt.o > + > xen-evtchn-y := evtchn.o > xen-gntdev-y := gntdev.o > xen-gntalloc-y := gntalloc.o > diff --git a/drivers/xen/xengt.c b/drivers/xen/xengt.c > new file mode 100644 > index 0000000..6c600adc > --- /dev/null > +++ b/drivers/xen/xengt.c > @@ -0,0 +1,1153 @@ > +/* > + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or > modify > + * it under the terms of Version 2 of the GNU General Public License > as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110- > 1301 USA. > + */ > + > +/* > + * NOTE: > + * This file contains hypervisor specific interactions to > + * implement the concept of mediated pass-through framework. > + * What this file provides is actually a general abstraction > + * of in-kernel device model, which is not vgt specific. > + * > + * Now temporarily in vgt code. long-term this should be > + * in hypervisor (xen/kvm) specific directory > + */ > +#include <linux/kernel.h> > +#include <linux/mm.h> > +#include <linux/types.h> > +#include <linux/kthread.h> > +#include <linux/time.h> > +#include <linux/freezer.h> > +#include <linux/wait.h> > +#include <linux/sched.h> > + > +#include <asm/xen/hypercall.h> > +#include <asm/xen/page.h> > +#include <xen/xen-ops.h> > +#include <xen/events.h> > +#include <xen/interface/hvm/params.h> > +#include <xen/interface/hvm/hvm_op.h> > +#include <xen/interface/hvm/ioreq.h> > +#include <xen/interface/memory.h> > +#include <xen/interface/platform.h> > +#include <xen/interface/vcpu.h> > + > +#include "gvt.h" > + > +MODULE_AUTHOR("Intel Corporation"); > +MODULE_DESCRIPTION("XenGT mediated passthrough driver"); > +MODULE_LICENSE("GPL"); > +MODULE_VERSION("0.1"); > + > +#define MAX_HVM_VCPUS_SUPPORTED 128 > +struct gvt_hvm_info { > + /* iopage_vma->addr is just iopage. We need iopage_vma on VM > destroy */ > + shared_iopage_t *iopage; > + struct vm_struct *iopage_vma; > + int *evtchn_irq; /* the event channle irqs to handle HVM io > request > + index is vcpu id */ > + > + DECLARE_BITMAP(ioreq_pending, MAX_HVM_VCPUS_SUPPORTED); > + wait_queue_head_t io_event_wq; > + struct task_struct *emulation_thread; > + > + int nr_vcpu; > + > + ioservid_t iosrv_id; /* io-request server id */ > + > +#define VMEM_1MB (1ULL << 20) /* the size of > the first 1MB */ > +#define VMEM_BUCK_SHIFT 20 > +#define VMEM_BUCK_SIZE (1ULL << VMEM_BUCK_SHIFT) > +#define VMEM_BUCK_MASK (~(VMEM_BUCK_SIZE - 1)) > + uint64_t vmem_sz; > + /* for the 1st 1MB memory of HVM: each vm_struct means one > 4K-page */ > + struct vm_struct **vmem_vma_low_1mb; > + /* for >1MB memory of HVM: each vm_struct means 1MB */ > + struct vm_struct **vmem_vma; > + /* for >1MB memory of HVM: each vm_struct means 4KB */ > + struct vm_struct **vmem_vma_4k; > +}; > + > +static int xen_pause_domain(int vm_id); > +static int xen_shutdown_domain(int vm_id); > +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long > gpa); > + > +#define XEN_ASSERT_VM(x, vgt) > \ > + do { > \ > + if (!(x)) { > \ > + printk("Assert at %s line %d\n", > \ > + __FILE__, __LINE__); > \ > + if (atomic_cmpxchg(&(vgt)->crashing, 0, 1)) > \ > + break; > \ > + gvt_err("Killing VM%d\n", (vgt)->vm_id); > \ > + if (!xen_pause_domain((vgt->vm_id))) > \ > + xen_shutdown_domain((vgt->vm_id)); > \ > + } > \ > + } while (0) > + > +/* Translate from VM's guest pfn to machine pfn */ > +static unsigned long xen_g2m_pfn(int vm_id, unsigned long g_pfn) > +{ > + struct xen_get_mfn_from_pfn pfn_arg; > + int rc; > + unsigned long pfn_list[1]; > + > + pfn_list[0] = g_pfn; > + > + set_xen_guest_handle(pfn_arg.pfn_list, pfn_list); > + pfn_arg.nr_pfns = 1; > + pfn_arg.domid = vm_id; > + > + rc = HYPERVISOR_memory_op(XENMEM_get_mfn_from_pfn, > &pfn_arg); > + if(rc < 0){ > + printk("failed to get mfn for gpfn(0x%lx)\n, > errno=%d\n", g_pfn, rc); > + return INVALID_MFN; > + } > + > + return pfn_list[0]; > +} > + > +static int xen_get_max_gpfn(int vm_id) > +{ > + domid_t dom_id = vm_id; > + int max_gpfn = HYPERVISOR_memory_op(XENMEM_maximum_gpfn, > &dom_id); > + BUG_ON(max_gpfn < 0); > + return max_gpfn; > +} > + > +static int xen_pause_domain(int vm_id) > +{ > + int rc; > + struct xen_domctl domctl; > + > + domctl.domain = vm_id; > + domctl.cmd = XEN_DOMCTL_pausedomain; > + domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; > + > + rc = HYPERVISOR_domctl(&domctl); > + if (rc != 0) > + printk("HYPERVISOR_domctl pausedomain fail with > %d!\n", rc); > + > + return rc; > +} > + > +static int xen_shutdown_domain(int vm_id) > +{ > + int rc; > + struct sched_remote_shutdown r; > + > + r.reason = SHUTDOWN_crash; > + r.domain_id = vm_id; > + rc = HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &r); > + if (rc != 0) > + printk("HYPERVISOR_sched_op failed: %d\n", rc); > + return rc; > +} > + > +static int xen_domain_iomem_perm(uint32_t domain_id, uint64_t > first_mfn, > + uint64_t nr_mfns, uint8_t > allow_access) > +{ > + struct xen_domctl arg; > + int rc; > + > + arg.domain = domain_id; > + arg.cmd = XEN_DOMCTL_iomem_permission; > + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; > + arg.u.iomem_perm.first_mfn = first_mfn; > + arg.u.iomem_perm.nr_mfns = nr_mfns; > + arg.u.iomem_perm.allow_access = allow_access; > + rc = HYPERVISOR_domctl(&arg); > + > + return rc; > +} > + > +static int xen_hvm_memory_mapping(int vm_id, uint64_t first_gfn, > uint64_t first_mfn, > + uint32_t nr_mfns, uint32_t > add_mapping) > +{ > + struct xen_domctl arg; > + int rc; > + > + if (add_mapping) { > + rc = xen_domain_iomem_perm(vm_id, first_mfn, > nr_mfns, 1); > + if (rc < 0) { > + printk(KERN_ERR "xen_domain_iomem_perm > failed: %d\n", rc); > + return rc; > + } > + } > + > + arg.domain = vm_id; > + arg.cmd = XEN_DOMCTL_memory_mapping; > + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; > + arg.u.memory_mapping.first_gfn = first_gfn; > + arg.u.memory_mapping.first_mfn = first_mfn; > + arg.u.memory_mapping.nr_mfns = nr_mfns; > + arg.u.memory_mapping.add_mapping = add_mapping; > + > + rc = HYPERVISOR_domctl(&arg); > + if (rc < 0) { > + printk(KERN_ERR "HYPERVISOR_domctl failed: %d\n", > rc); > + return rc; > + } > + > + if (!add_mapping) { > + rc = xen_domain_iomem_perm(vm_id, first_mfn, > nr_mfns, 0); > + if (rc < 0) { > + printk(KERN_ERR "xen_domain_iomem_perm > failed: %d\n", rc); > + return rc; > + } > + } > + > + return rc; > +} > + > +static int xen_map_mfn_to_gpfn(int vm_id, unsigned long gpfn, > + unsigned long mfn, int nr, int map, enum map_type type) > +{ > + int rc; > + rc = xen_hvm_memory_mapping(vm_id, gpfn, mfn, nr, > + map ? DPCI_ADD_MAPPING : > DPCI_REMOVE_MAPPING); > + if (rc != 0) > + printk("xen_hvm_memory_mapping failed: %d\n", rc); > + return rc; > +} > + > +static int xen_get_nr_vcpu(int vm_id) > +{ > + struct xen_domctl arg; > + int rc; > + > + arg.domain = vm_id; > + arg.cmd = XEN_DOMCTL_getdomaininfo; > + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; > + > + rc = HYPERVISOR_domctl(&arg); > + if (rc<0){ > + printk(KERN_ERR "HYPERVISOR_domctl fail > ret=%d\n",rc); > + /* assume it is UP */ > + return 1; > + } > + > + return arg.u.getdomaininfo.max_vcpu_id + 1; > +} > + > +static int hvm_create_iorequest_server(struct vgt_device *vgt) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + struct xen_hvm_create_ioreq_server arg; > + int r; > + > + arg.domid = vgt->vm_id; > + arg.handle_bufioreq = 0; > + r = HYPERVISOR_hvm_op(HVMOP_create_ioreq_server, &arg); > + if (r < 0) { > + printk(KERN_ERR "Cannot create io-requset server: > %d!\n", r); > + return r; > + } > + info->iosrv_id = arg.id; > + > + return r; > +} > + > +static int hvm_toggle_iorequest_server(struct vgt_device *vgt, bool > enable) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + struct xen_hvm_set_ioreq_server_state arg; > + int r; > + > + arg.domid = vgt->vm_id; > + arg.id = info->iosrv_id; > + arg.enabled = enable; > + r = HYPERVISOR_hvm_op(HVMOP_set_ioreq_server_state, &arg); > + if (r < 0) { > + printk(KERN_ERR "Cannot %s io-request server: > %d!\n", > + enable ? "enable" : "disbale", r); > + return r; > + } > + > + return r; > +} > + > +static int hvm_get_ioreq_pfn(struct vgt_device *vgt, uint64_t > *value) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + struct xen_hvm_get_ioreq_server_info arg; > + int r; > + > + arg.domid = vgt->vm_id; > + arg.id = info->iosrv_id; > + r = HYPERVISOR_hvm_op(HVMOP_get_ioreq_server_info, &arg); > + if (r < 0) { > + printk(KERN_ERR "Cannot get ioreq pfn: %d!\n", r); > + return r; > + } > + *value = arg.ioreq_pfn; > + return r; > +} > + > +static int hvm_destroy_iorequest_server(struct vgt_device *vgt) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + struct xen_hvm_destroy_ioreq_server arg; > + int r; > + > + arg.domid = vgt->vm_id; > + arg.id = info->iosrv_id; > + r = HYPERVISOR_hvm_op(HVMOP_destroy_ioreq_server, &arg); > + if (r < 0) { > + printk(KERN_ERR "Cannot destroy io-request > server(%d): %d!\n", > + info->iosrv_id, r); > + return r; > + } > + info->iosrv_id = 0; > + > + return r; > +} > + > +static int hvm_map_io_range_to_ioreq_server(struct vgt_device *vgt, > + int is_mmio, uint64_t start, uint64_t end, int map) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + xen_hvm_io_range_t arg; > + int rc; > + > + arg.domid = vgt->vm_id; > + arg.id = info->iosrv_id; > + arg.type = is_mmio ? HVMOP_IO_RANGE_MEMORY : > HVMOP_IO_RANGE_PORT; > + arg.start = start; > + arg.end = end; > + > + if (map) > + rc = > HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server, &arg); > + else > + rc = > HYPERVISOR_hvm_op(HVMOP_unmap_io_range_from_ioreq_server, &arg); > + > + return rc; > +} > + > +static int hvm_map_pcidev_to_ioreq_server(struct vgt_device *vgt, > uint64_t sbdf) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + xen_hvm_io_range_t arg; > + int rc; > + > + arg.domid = vgt->vm_id; > + arg.id = info->iosrv_id; > + arg.type = HVMOP_IO_RANGE_PCI; > + arg.start = arg.end = sbdf; > + rc = HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server, > &arg); > + if (rc < 0) { > + printk(KERN_ERR "Cannot map pci_dev to ioreq_server: > %d!\n", rc); > + return rc; > + } > + > + return rc; > +} > + > +static int hvm_set_mem_type(struct vgt_device *vgt, > + uint16_t mem_type, uint64_t first_pfn, uint64_t nr) > +{ > + xen_hvm_set_mem_type_t args; > + int rc; > + > + args.domid = vgt->vm_id; > + args.hvmmem_type = mem_type; > + args.first_pfn = first_pfn; > + args.nr = 1; > + rc = HYPERVISOR_hvm_op(HVMOP_set_mem_type, &args); > + > + return rc; > +} > + > +static int hvm_wp_page_to_ioreq_server(struct vgt_device *vgt, > unsigned long page, int set) > +{ > + int rc = 0; > + uint64_t start, end; > + uint16_t mem_type; > + > + start = page << PAGE_SHIFT; > + end = ((page + 1) << PAGE_SHIFT) - 1; > + > + rc = hvm_map_io_range_to_ioreq_server(vgt, 1, start, end, > set); > + if (rc < 0) { > + printk(KERN_ERR "Failed to %s page 0x%lx to > ioreq_server: %d!\n", > + set ? "map":"unmap", page , rc); > + return rc; > + } > + > + mem_type = set ? HVMMEM_mmio_write_dm : HVMMEM_ram_rw; > + rc = hvm_set_mem_type(vgt, mem_type, page, 1); > + if (rc < 0) { > + printk(KERN_ERR "Failed to set mem type of page > 0x%lx to %s!\n", page, > + set ? > "HVMMEM_mmio_write_dm":"HVMMEM_ram_rw"); > + return rc; > + } > + return rc; > +} > + > +static int xen_set_trap_area(struct vgt_device *vgt, uint64_t start, > uint64_t end, bool map) > +{ > + if (!gvt_pci_mmio_is_enabled(vgt)) > + return 0; > + > + return hvm_map_io_range_to_ioreq_server(vgt, 1, start, end, > map); > +} > + > +static struct vm_struct *xen_map_iopage(struct vgt_device *vgt) > +{ > + uint64_t ioreq_pfn; > + int rc; > + > + rc = hvm_create_iorequest_server(vgt); > + if (rc < 0) > + return NULL; > + rc = hvm_get_ioreq_pfn(vgt, &ioreq_pfn); > + if (rc < 0) { > + hvm_destroy_iorequest_server(vgt); > + return NULL; > + } > + > + return xen_remap_domain_mfn_range_in_kernel(ioreq_pfn, 1, > vgt->vm_id); > +} > + > +static bool xen_set_guest_page_writeprotection(struct vgt_device > *vgt, > + guest_page_t *guest_page) > +{ > + int r; > + > + if (guest_page->writeprotection) > + return true; > + > + r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 1); > + if (r) { > + gvt_err("fail to set write protection.\n"); > + return false; > + } > + > + guest_page->writeprotection = true; > + > + atomic_inc(&vgt->gtt.n_write_protected_guest_page); > + > + return true; > +} > + > +static bool xen_clear_guest_page_writeprotection(struct vgt_device > *vgt, > + guest_page_t *guest_page) > +{ > + int r; > + > + if (!guest_page->writeprotection) > + return true; > + > + r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 0); > + if (r) { > + gvt_err("fail to clear write protection.\n"); > + return false; > + } > + > + guest_page->writeprotection = false; > + > + atomic_dec(&vgt->gtt.n_write_protected_guest_page); > + > + return true; > +} > + > +static int xen_detect_host(void) > +{ > + return xen_initial_domain(); > +} > + > +static int xen_virt_to_mfn(void *addr) > +{ > + return virt_to_mfn(addr); > +} > + > +static void *xen_mfn_to_virt(int mfn) > +{ > + return mfn_to_virt(mfn); > +} > + > +static int xen_inject_msi(int vm_id, u32 addr_lo, u16 data) > +{ > + struct xen_hvm_inject_msi info = { > + .domid = vm_id, > + .addr = addr_lo, /* only low addr used */ > + .data = data, > + }; > + > + return HYPERVISOR_hvm_op(HVMOP_inject_msi, &info); > +} > + > +static int vgt_hvm_vmem_init(struct vgt_device *vgt) > +{ > + unsigned long i, j, gpfn, count; > + unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_4k_bkt; > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + > + if (!vgt->vm_id) > + return 0; > + > + ASSERT(info->vmem_vma == NULL && info->vmem_vma_low_1mb == > NULL); > + > + info->vmem_sz = xen_get_max_gpfn(vgt->vm_id) + 1; > + info->vmem_sz <<= PAGE_SHIFT; > + > + /* warn on non-1MB-aligned memory layout of HVM */ > + if (info->vmem_sz & ~VMEM_BUCK_MASK) > + gvt_err("VM%d: vmem_sz=0x%llx!\n", vgt->vm_id, info- > >vmem_sz); > + > + nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT; > + nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT); > + nr_high_4k_bkt = (info->vmem_sz >> PAGE_SHIFT); > + > + info->vmem_vma_low_1mb = > + vzalloc(sizeof(*info->vmem_vma) * nr_low_1mb_bkt); > + info->vmem_vma = > + vzalloc(sizeof(*info->vmem_vma) * nr_high_bkt); > + info->vmem_vma_4k = > + vzalloc(sizeof(*info->vmem_vma) * nr_high_4k_bkt); > + > + if (info->vmem_vma_low_1mb == NULL || info->vmem_vma == NULL > || > + info->vmem_vma_4k == NULL) { > + gvt_err("Insufficient memory for vmem_vma, > vmem_sz=0x%llx\n", > + info->vmem_sz ); > + goto err; > + } > + > + /* map the low 1MB memory */ > + for (i = 0; i < nr_low_1mb_bkt; i++) { > + info->vmem_vma_low_1mb[i] = > + xen_remap_domain_mfn_range_in_kernel(i, 1, > vgt->vm_id); > + > + if (info->vmem_vma_low_1mb[i] != NULL) > + continue; > + > + /* Don't warn on [0xa0000, 0x100000): a known non- > RAM hole */ > + if (i < (0xa0000 >> PAGE_SHIFT)) > + printk(KERN_ERR "GVT: VM%d: can't map GPFN > %ld!\n", > + vgt->vm_id, i); > + } > + > + printk("start vmem_map\n"); > + count = 0; > + /* map the >1MB memory */ > + for (i = 1; i < nr_high_bkt; i++) { > + gpfn = i << (VMEM_BUCK_SHIFT - PAGE_SHIFT); > + info->vmem_vma[i] = > xen_remap_domain_mfn_range_in_kernel( > + gpfn, VMEM_BUCK_SIZE >> PAGE_SHIFT, > vgt->vm_id); > + > + if (info->vmem_vma[i] != NULL) > + continue; > + > + > + /* for <4G GPFNs: skip the hole after > low_mem_max_gpfn */ > + if (gpfn < (1 << (32 - PAGE_SHIFT)) && > + vgt->low_mem_max_gpfn != 0 && > + gpfn > vgt->low_mem_max_gpfn) > + continue; > + > + for (j = gpfn; > + j < ((i + 1) << (VMEM_BUCK_SHIFT - > PAGE_SHIFT)); > + j++) { > + info->vmem_vma_4k[j] = > xen_remap_domain_mfn_range_in_kernel(j, 1, vgt->vm_id); > + > + if (info->vmem_vma_4k[j]) { > + count++; > + printk(KERN_ERR "map 4k gpa > (%lx)\n", j << PAGE_SHIFT); > + } > + } > + > + /* To reduce the number of err messages(some of > them, due to > + * the MMIO hole, are spurious and harmless) we only > print a > + * message if it's at every 64MB boundary or >4GB > memory. > + */ > + if ((i % 64 == 0) || (i >= (1ULL << (32 - > VMEM_BUCK_SHIFT)))) > + printk(KERN_ERR "GVT: VM%d: can't map > %ldKB\n", > + vgt->vm_id, i); > + } > + printk("end vmem_map (%ld 4k mappings)\n", count); > + > + return 0; > +err: > + vfree(info->vmem_vma); > + vfree(info->vmem_vma_low_1mb); > + vfree(info->vmem_vma_4k); > + info->vmem_vma = info->vmem_vma_low_1mb = info->vmem_vma_4k > = NULL; > + return -ENOMEM; > +} > + > +static void vgt_vmem_destroy(struct vgt_device *vgt) > +{ > + int i, j; > + unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_bkt_4k; > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + > + if (vgt->vm_id == 0) > + return; > + > + /* > + * Maybe the VM hasn't accessed GEN MMIO(e.g., still in the > legacy VGA > + * mode), so no mapping is created yet. > + */ > + if (info->vmem_vma == NULL && info->vmem_vma_low_1mb == > NULL) > + return; > + > + ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb != > NULL); > + > + nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT; > + nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT); > + nr_high_bkt_4k = (info->vmem_sz >> PAGE_SHIFT); > + > + for (i = 0; i < nr_low_1mb_bkt; i++) { > + if (info->vmem_vma_low_1mb[i] == NULL) > + continue; > + xen_unmap_domain_mfn_range_in_kernel(info- > >vmem_vma_low_1mb[i], > + 1, vgt->vm_id); > + } > + > + for (i = 1; i < nr_high_bkt; i++) { > + if (info->vmem_vma[i] == NULL) { > + for (j = (i << (VMEM_BUCK_SHIFT - > PAGE_SHIFT)); > + j < ((i + 1) << (VMEM_BUCK_SHIFT - > PAGE_SHIFT)); > + j++) { > + if (info->vmem_vma_4k[j] == NULL) > + continue; > + xen_unmap_domain_mfn_range_in_kernel > ( > + info->vmem_vma_4k[j], 1, > vgt->vm_id); > + } > + continue; > + } > + xen_unmap_domain_mfn_range_in_kernel( > + info->vmem_vma[i], VMEM_BUCK_SIZE >> > PAGE_SHIFT, > + vgt->vm_id); > + } > + > + vfree(info->vmem_vma); > + vfree(info->vmem_vma_low_1mb); > + vfree(info->vmem_vma_4k); > +} > + > +static int _hvm_mmio_emulation(struct vgt_device *vgt, struct ioreq > *req) > +{ > + int i, sign; > + void *gva; > + unsigned long gpa; > + uint64_t base = gvt_mmio_bar_base(vgt); > + uint64_t tmp; > + int pvinfo_page; > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + > + if (info->vmem_vma == NULL) { > + tmp = gvt_host.mpt_ops->pa_to_mmio_offset(vgt, req- > >addr); > + pvinfo_page = (tmp >= VGT_PVINFO_PAGE > + && tmp < (VGT_PVINFO_PAGE + > VGT_PVINFO_SIZE)); > + /* > + * hvmloader will read PVINFO to identify if HVM is > in GVT > + * or VTD. So we don't trigger HVM mapping logic > here. > + */ > + if (!pvinfo_page && vgt_hvm_vmem_init(vgt) < 0) { > + gvt_err("can not map the memory of > VM%d!!!\n", vgt->vm_id); > + XEN_ASSERT_VM(info->vmem_vma != NULL, vgt); > + return -EINVAL; > + } > + } > + > + sign = req->df ? -1 : 1; > + > + if (req->dir == IOREQ_READ) { > + /* MMIO READ */ > + if (!req->data_is_ptr) { > + if (req->count != 1) > + goto err_ioreq_count; > + > + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read: > target register (%lx).\n", > + // (unsigned long)req->addr); > + if (!gvt_host.emulate_ops- > >emulate_mmio_read(vgt, req->addr, &req->data, req->size)) > + return -EINVAL; > + } > + else { > + if ((req->addr + sign * req->count * req- > >size < base) > + || (req->addr + sign * req->count * req- > >size >= > + base + vgt->state.cfg.bar_size[0])) > + goto err_ioreq_range; > + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read: > rep %d target memory %lx, slow!\n", > + // req->count, (unsigned long)req- > >addr); > + > + for (i = 0; i < req->count; i++) { > + if (!gvt_host.emulate_ops- > >emulate_mmio_read(vgt, req->addr + sign * i * req->size, > + &tmp, req->size)) > + return -EINVAL; > + gpa = req->data + sign * i * req- > >size; > + if(!vgt->vm_id) > + gva = (char > *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa); > + else > + gva = xen_gpa_to_va(vgt, > gpa); > + if (gva) { > + memcpy(gva, &tmp, req- > >size); > + } else > + gvt_err("VM %d is trying to > store mmio data block to invalid gpa: 0x%lx.\n", vgt->vm_id, gpa); > + } > + } > + } > + else { /* MMIO Write */ > + if (!req->data_is_ptr) { > + if (req->count != 1) > + goto err_ioreq_count; > + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write: > target register (%lx).\n", (unsigned long)req->addr); > + if (!gvt_host.emulate_ops- > >emulate_mmio_write(vgt, req->addr, &req->data, req->size)) > + return -EINVAL; > + } > + else { > + if ((req->addr + sign * req->count * req- > >size < base) > + || (req->addr + sign * req->count * req- > >size >= > + base + vgt->state.cfg.bar_size[0])) > + goto err_ioreq_range; > + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write: > rep %d target memory %lx, slow!\n", > + // req->count, (unsigned long)req- > >addr); > + > + for (i = 0; i < req->count; i++) { > + gpa = req->data + sign * i * req- > >size; > + if(!vgt->vm_id) > + gva = (char > *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa); > + else > + gva = xen_gpa_to_va(vgt, > gpa); > + > + if (gva != NULL) > + memcpy(&tmp, gva, req- > >size); > + else { > + tmp = 0; > + printk(KERN_ERR "GVT: can > not read gpa = 0x%lx!!!\n", gpa); > + } > + if (!gvt_host.emulate_ops- > >emulate_mmio_write(vgt, req->addr + sign * i * req->size, &tmp, req- > >size)) > + return -EINVAL; > + } > + } > + } > + > + return 0; > + > +err_ioreq_count: > + gvt_err("VM(%d): Unexpected %s request count(%d)\n", > + vgt->vm_id, req->dir == IOREQ_READ ? "read" : > "write", > + req->count); > + return -EINVAL; > + > +err_ioreq_range: > + gvt_err("VM(%d): Invalid %s request addr end(%016llx)\n", > + vgt->vm_id, req->dir == IOREQ_READ ? "read" : > "write", > + req->addr + sign * req->count * req->size); > + return -ERANGE; > +} > + > +static bool vgt_hvm_write_cfg_space(struct vgt_device *vgt, > + uint64_t addr, unsigned int bytes, unsigned long val) > +{ > + /* Low 32 bit of addr is real address, high 32 bit is bdf */ > + unsigned int port = addr & 0xffffffff; > + > + ASSERT(((bytes == 4) && ((port & 3) == 0)) || > + ((bytes == 2) && ((port & 1) == 0)) || (bytes == > 1)); > + gvt_host.emulate_ops->emulate_cfg_write(vgt, port, &val, > bytes); > + return true; > +} > + > +static bool vgt_hvm_read_cfg_space(struct vgt_device *vgt, > + uint64_t addr, unsigned int bytes, unsigned long *val) > +{ > + unsigned long data; > + /* Low 32 bit of addr is real address, high 32 bit is bdf */ > + unsigned int port = addr & 0xffffffff; > + > + ASSERT (((bytes == 4) && ((port & 3) == 0)) || > + ((bytes == 2) && ((port & 1) == 0)) || (bytes == > 1)); > + gvt_host.emulate_ops->emulate_cfg_read(vgt, port, &data, > bytes); > + memcpy(val, &data, bytes); > + return true; > +} > + > +static int _hvm_pio_emulation(struct vgt_device *vgt, struct ioreq > *ioreq) > +{ > + int sign; > + > + sign = ioreq->df ? -1 : 1; > + > + if (ioreq->dir == IOREQ_READ) { > + /* PIO READ */ > + if (!ioreq->data_is_ptr) { > + if(!vgt_hvm_read_cfg_space(vgt, > + ioreq->addr, > + ioreq->size, > + (unsigned long*)&ioreq->data)) > + return -EINVAL; > + } else { > + printk(KERN_ERR "GVT: _hvm_pio_emulation > read data_ptr %lx\n", > + (long)ioreq->data); > + goto err_data_ptr; > + } > + } else { > + /* PIO WRITE */ > + if (!ioreq->data_is_ptr) { > + if (!vgt_hvm_write_cfg_space(vgt, > + ioreq->addr, > + ioreq->size, > + (unsigned long)ioreq->data)) > + return -EINVAL; > + } else { > + printk(KERN_ERR "GVT: _hvm_pio_emulation > write data_ptr %lx\n", > + (long)ioreq->data); > + goto err_data_ptr; > + } > + } > + return 0; > +err_data_ptr: > + /* The data pointer of emulation is guest physical address > + * so far, which goes to Qemu emulation, but hard for > + * GVT driver which doesn't know gpn_2_mfn translation. > + * We may ask hypervisor to use mfn for GVT driver. > + * We mark it as unsupported in case guest really it. > + */ > + gvt_err("VM(%d): Unsupported %s data_ptr(%lx)\n", > + vgt->vm_id, ioreq->dir == IOREQ_READ ? "read" : > "write", > + (long)ioreq->data); > + return -EINVAL; > +} > + > +#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) > + > +static int vgt_hvm_do_ioreq(struct vgt_device *vgt, struct ioreq > *ioreq) > +{ > + struct pgt_device *pdev = vgt->pdev; > + struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev; > + uint64_t bdf = PCI_BDF2(pci_dev->bus->number, pci_dev- > >devfn); > + > + /* When using ioreq-server, sometimes an event channal > + * notification is received with invalid ioreq. Don't > + * know the root cause. Put the workaround here. > + */ > + if (ioreq->state == STATE_IOREQ_NONE) > + return 0; > + > + if (ioreq->type == IOREQ_TYPE_INVALIDATE) > + return 0; > + > + switch (ioreq->type) { > + case IOREQ_TYPE_PCI_CONFIG: > + /* High 32 bit of ioreq->addr is bdf */ > + if ((ioreq->addr >> 32) != bdf) { > + printk(KERN_ERR "GVT: Unexpected PCI Dev %lx > emulation\n", > + (unsigned long) (ioreq->addr>>32)); > + return -EINVAL; > + } else > + return _hvm_pio_emulation(vgt, > ioreq); > + break; > + case IOREQ_TYPE_COPY: /* MMIO */ > + return _hvm_mmio_emulation(vgt, ioreq); > + break; > + default: > + printk(KERN_ERR "GVT: Unknown ioreq type %x > addr %llx size %u state %u\n", > + ioreq->type, ioreq->addr, ioreq- > >size, ioreq->state); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static struct ioreq *vgt_get_hvm_ioreq(struct vgt_device *vgt, int > vcpu) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + return &(info->iopage->vcpu_ioreq[vcpu]); > +} > + > +static int vgt_emulation_thread(void *priv) > +{ > + struct vgt_device *vgt = (struct vgt_device *)priv; > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + > + int vcpu; > + int nr_vcpus = info->nr_vcpu; > + > + struct ioreq *ioreq; > + int irq, ret; > + > + gvt_info("start kthread for VM%d\n", vgt->vm_id); > + > + ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED); > + > + set_freezable(); > + while (1) { > + ret = wait_event_freezable(info->io_event_wq, > + kthread_should_stop() || > + bitmap_weight(info->ioreq_pending, > nr_vcpus)); > + > + if (kthread_should_stop()) > + return 0; > + > + if (ret) > + gvt_err("Emulation thread(%d) waken up" > + "by unexpected signal!\n", vgt- > >vm_id); > + > + for (vcpu = 0; vcpu < nr_vcpus; vcpu++) { > + if (!test_and_clear_bit(vcpu, info- > >ioreq_pending)) > + continue; > + > + ioreq = vgt_get_hvm_ioreq(vgt, vcpu); > + > + if (vgt_hvm_do_ioreq(vgt, ioreq)) { > + xen_pause_domain(vgt->vm_id); > + xen_shutdown_domain(vgt->vm_id); > + } > + > + ioreq->state = STATE_IORESP_READY; > + > + irq = info->evtchn_irq[vcpu]; > + notify_remote_via_irq(irq); > + } > + } > + > + BUG(); /* It's actually impossible to reach here */ > + return 0; > +} > + > +static inline void vgt_raise_emulation_request(struct vgt_device > *vgt, > + int vcpu) > +{ > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + set_bit(vcpu, info->ioreq_pending); > + if (waitqueue_active(&info->io_event_wq)) > + wake_up(&info->io_event_wq); > +} > + > +static irqreturn_t vgt_hvm_io_req_handler(int irq, void* dev) > +{ > + struct vgt_device *vgt; > + struct gvt_hvm_info *info; > + int vcpu; > + > + vgt = (struct vgt_device *)dev; > + info = vgt->hypervisor_data; > + > + for(vcpu=0; vcpu < info->nr_vcpu; vcpu++){ > + if(info->evtchn_irq[vcpu] == irq) > + break; > + } > + if (vcpu == info->nr_vcpu){ > + /*opps, irq is not the registered one*/ > + gvt_info("Received a IOREQ w/o vcpu target\n"); > + gvt_info("Possible a false request from event > binding\n"); > + return IRQ_NONE; > + } > + > + vgt_raise_emulation_request(vgt, vcpu); > + > + return IRQ_HANDLED; > +} > + > +static void xen_hvm_exit(struct vgt_device *vgt) > +{ > + struct gvt_hvm_info *info; > + int vcpu; > + > + info = vgt->hypervisor_data; > + > + if (info == NULL) > + return; > + > + if (info->emulation_thread != NULL) > + kthread_stop(info->emulation_thread); > + > + if (!info->nr_vcpu || info->evtchn_irq == NULL) > + goto out1; > + > + if (info->iosrv_id != 0) > + hvm_destroy_iorequest_server(vgt); > + > + for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){ > + if(info->evtchn_irq[vcpu] >= 0) > + unbind_from_irqhandler(info- > >evtchn_irq[vcpu], vgt); > + } > + > + if (info->iopage_vma != NULL) > + xen_unmap_domain_mfn_range_in_kernel(info- > >iopage_vma, 1, vgt->vm_id); > + > + kfree(info->evtchn_irq); > + > +out1: > + vgt_vmem_destroy(vgt); > + kfree(info); > +} > + > +static int xen_hvm_init(struct vgt_device *vgt) > +{ > + struct gvt_hvm_info *info; > + int vcpu, irq, rc = 0; > + struct task_struct *thread; > + struct pgt_device *pdev = vgt->pdev; > + struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev; > + > + info = kzalloc(sizeof(struct gvt_hvm_info), GFP_KERNEL); > + if (info == NULL) > + return -ENOMEM; > + > + vgt->hypervisor_data = info; > + > + info->iopage_vma = xen_map_iopage(vgt); > + if (info->iopage_vma == NULL) { > + printk(KERN_ERR "Failed to map HVM I/O page for > VM%d\n", vgt->vm_id); > + rc = -EFAULT; > + goto err; > + } > + info->iopage = info->iopage_vma->addr; > + > + init_waitqueue_head(&info->io_event_wq); > + > + info->nr_vcpu = xen_get_nr_vcpu(vgt->vm_id); > + ASSERT(info->nr_vcpu > 0); > + ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED); > + > + info->evtchn_irq = kmalloc(info->nr_vcpu * sizeof(int), > GFP_KERNEL); > + if (info->evtchn_irq == NULL){ > + rc = -ENOMEM; > + goto err; > + } > + for( vcpu = 0; vcpu < info->nr_vcpu; vcpu++ ) > + info->evtchn_irq[vcpu] = -1; > + > + rc = hvm_map_pcidev_to_ioreq_server(vgt, PCI_BDF2(pci_dev- > >bus->number, pci_dev->devfn)); > + if (rc < 0) > + goto err; > + rc = hvm_toggle_iorequest_server(vgt, 1); > + if (rc < 0) > + goto err; > + > + for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){ > + irq = bind_interdomain_evtchn_to_irqhandler( vgt- > >vm_id, > + info->iopage- > >vcpu_ioreq[vcpu].vp_eport, > + vgt_hvm_io_req_handler, 0, > + "vgt", vgt ); > + if ( irq < 0 ){ > + rc = irq; > + printk(KERN_ERR "Failed to bind event > channle for vgt HVM IO handler, rc=%d\n", rc); > + goto err; > + } > + info->evtchn_irq[vcpu] = irq; > + } > + > + thread = kthread_run(vgt_emulation_thread, vgt, > + "vgt_emulation:%d", vgt->vm_id); > + if(IS_ERR(thread)) > + goto err; > + info->emulation_thread = thread; > + > + return 0; > + > +err: > + xen_hvm_exit(vgt); > + return rc; > +} > + > +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long > gpa) > +{ > + unsigned long buck_index, buck_4k_index; > + struct gvt_hvm_info *info = vgt->hypervisor_data; > + > + if (!vgt->vm_id) > + return (char*)xen_mfn_to_virt(gpa>>PAGE_SHIFT) + > (gpa & (PAGE_SIZE-1)); > + /* > + * At the beginning of _hvm_mmio_emulation(), we already > initialize > + * info->vmem_vma and info->vmem_vma_low_1mb. > + */ > + ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb != > NULL); > + > + /* handle the low 1MB memory */ > + if (gpa < VMEM_1MB) { > + buck_index = gpa >> PAGE_SHIFT; > + if (!info->vmem_vma_low_1mb[buck_index]) > + return NULL; > + > + return (char*)(info->vmem_vma_low_1mb[buck_index]- > >addr) + > + (gpa & ~PAGE_MASK); > + > + } > + > + /* handle the >1MB memory */ > + buck_index = gpa >> VMEM_BUCK_SHIFT; > + > + if (!info->vmem_vma[buck_index]) { > + buck_4k_index = gpa >> PAGE_SHIFT; > + if (!info->vmem_vma_4k[buck_4k_index]) { > + if (buck_4k_index > vgt->low_mem_max_gpfn) > + gvt_err("GVT failed to map > gpa=0x%lx?\n", gpa); > + return NULL; > + } > + > + return (char*)(info->vmem_vma_4k[buck_4k_index]- > >addr) + > + (gpa & ~PAGE_MASK); > + } > + > + return (char*)(info->vmem_vma[buck_index]->addr) + > + (gpa & (VMEM_BUCK_SIZE -1)); > +} > + > +static bool xen_read_va(struct vgt_device *vgt, void *va, void *val, > + int len, int atomic) > +{ > + memcpy(val, va, len); > + > + return true; > +} > + > +static bool xen_write_va(struct vgt_device *vgt, void *va, void > *val, > + int len, int atomic) > +{ > + memcpy(va, val, len); > + return true; > +} > + > +static struct gvt_kernel_dm xengt_kdm = { > + .name = "xengt_kdm", > + .g2m_pfn = xen_g2m_pfn, > + .pause_domain = xen_pause_domain, > + .shutdown_domain = xen_shutdown_domain, > + .map_mfn_to_gpfn = xen_map_mfn_to_gpfn, > + .set_trap_area = xen_set_trap_area, > + .set_wp_pages = xen_set_guest_page_writeprotection, > + .unset_wp_pages = xen_clear_guest_page_writeprotection, > + .detect_host = xen_detect_host, > + .from_virt_to_mfn = xen_virt_to_mfn, > + .from_mfn_to_virt = xen_mfn_to_virt, > + .inject_msi = xen_inject_msi, > + .hvm_init = xen_hvm_init, > + .hvm_exit = xen_hvm_exit, > + .gpa_to_va = xen_gpa_to_va, > + .read_va = xen_read_va, > + .write_va = xen_write_va, > +}; > +EXPORT_SYMBOL(xengt_kdm); > + > +static int __init xengt_init(void) > +{ > + if (!xen_initial_domain()) > + return -EINVAL; > + printk(KERN_INFO "xengt: loaded\n"); > + return 0; > +} > + > +static void __exit xengt_exit(void) > +{ > + printk(KERN_INFO "xengt: unloaded\n"); > +} > + > +module_init(xengt_init); > +module_exit(xengt_exit); > diff --git a/include/xen/interface/hvm/hvm_op.h > b/include/xen/interface/hvm/hvm_op.h > index 956a046..20577cc 100644 > --- a/include/xen/interface/hvm/hvm_op.h > +++ b/include/xen/interface/hvm/hvm_op.h > @@ -21,6 +21,8 @@ > #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ > #define __XEN_PUBLIC_HVM_HVM_OP_H__ > > +#include <xen/interface/event_channel.h> > + > /* Get/set subcommands: the second argument of the hypercall is a > * pointer to a xen_hvm_param struct. */ > #define HVMOP_set_param 0 > @@ -42,12 +44,41 @@ struct xen_hvm_pagetable_dying { > }; > typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; > DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); > - > + > +/* MSI injection for emulated devices */ > +#define HVMOP_inject_msi 16 > +struct xen_hvm_inject_msi { > + /* Domain to be injected */ > + domid_t domid; > + /* Data -- lower 32 bits */ > + uint32_t data; > + /* Address (0xfeexxxxx) */ > + uint64_t addr; > +}; > +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_inject_msi_t); > + > enum hvmmem_type_t { > HVMMEM_ram_rw, /* Normal read/write guest RAM */ > HVMMEM_ram_ro, /* Read-only; writes are discarded */ > HVMMEM_mmio_dm, /* Reads and write go to the device > model */ > + HVMMEM_mmio_write_dm /* Read-only; writes go to the device > model */ > +}; > + > +#define HVMOP_set_mem_type 8 > +/* Notify that a region of memory is to be treated in a specific > way. */ > +struct xen_hvm_set_mem_type { > + /* Domain to be updated. */ > + domid_t domid; > + /* Memory type */ > + uint16_t hvmmem_type; > + /* Number of pages. */ > + uint32_t nr; > + /* First pfn. */ > + uint64_t first_pfn; > }; > +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_mem_type_t); > > #define HVMOP_get_mem_type 15 > /* Return hvmmem_type_t for the specified pfn. */ > @@ -62,4 +93,148 @@ struct xen_hvm_get_mem_type { > }; > DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); > > +#define HVMOP_vgt_wp_pages 27 /* writeprotection to guest > pages */ > +#define MAX_WP_BATCH_PAGES 128 > +struct xen_hvm_vgt_wp_pages { > + uint16_t domid; > + uint16_t set; /* 1: set WP, 0: remove WP */ > + uint16_t nr_pages; > + unsigned long wp_pages[MAX_WP_BATCH_PAGES]; > +}; > +typedef struct xen_hvm_vgt_wp_pages xen_hvm_vgt_wp_pages_t; > + > +/* > + * IOREQ Servers > + * > + * The interface between an I/O emulator an Xen is called an IOREQ > Server. > + * A domain supports a single 'legacy' IOREQ Server which is > instantiated if > + * parameter... > + * > + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the > synchronous > + * ioreq structures), or... > + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the > buffered > + * ioreq ring), or... > + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that > Xen uses > + * to request buffered I/O emulation). > + * > + * The following hypercalls facilitate the creation of IOREQ Servers > for > + * 'secondary' emulators which are invoked to implement port I/O, > memory, or > + * PCI config space ranges which they explicitly register. > + */ > +typedef uint16_t ioservid_t; > + > +/* > + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a > secondary > + * emulator servicing domain <domid>. > + * > + * The <id> handed back is unique for <domid>. If <handle_bufioreq> > is zero > + * the buffered ioreq ring will not be allocated and hence all > emulation > + * requestes to this server will be synchronous. > + */ > +#define HVMOP_create_ioreq_server 17 > +struct xen_hvm_create_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + uint8_t handle_bufioreq; /* IN - should server handle buffered > ioreqs */ > + ioservid_t id; /* OUT - server id */ > +}; > +typedef struct xen_hvm_create_ioreq_server > xen_hvm_create_ioreq_server_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_create_ioreq_server_t); > + > +/* > + * HVMOP_get_ioreq_server_info: Get all the information necessary to > access > + * IOREQ Server <id>. > + * > + * The emulator needs to map the synchronous ioreq structures and > buffered > + * ioreq ring (if it exists) that Xen uses to request emulation. > These are > + * hosted in domain <domid>'s gmfns <ioreq_pfn> and <bufioreq_pfn> > + * respectively. In addition, if the IOREQ Server is handling > buffered > + * emulation requests, the emulator needs to bind to event channel > + * <bufioreq_port> to listen for them. (The event channels used for > + * synchronous emulation requests are specified in the per-CPU ioreq > + * structures in <ioreq_pfn>). > + * If the IOREQ Server is not handling buffered emulation requests > then the > + * values handed back in <bufioreq_pfn> and <bufioreq_port> will > both be 0. > + */ > +#define HVMOP_get_ioreq_server_info 18 > +struct xen_hvm_get_ioreq_server_info { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - server id */ > + evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ > + uint64_t ioreq_pfn; /* OUT - sync ioreq pfn */ > + uint64_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ > +}; > +typedef struct xen_hvm_get_ioreq_server_info > xen_hvm_get_ioreq_server_info_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_ioreq_server_info_t); > + > +/* > + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain > <domid> > + * for emulation by the client of > IOREQ > + * Server <id> > + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of > <domid> > + * for emulation by the client > of IOREQ > + * Server <id> > + * > + * There are three types of I/O that can be emulated: port I/O, > memory accesses > + * and PCI config space accesses. The <type> field denotes which > type of range > + * the <start> and <end> (inclusive) fields are specifying. > + * PCI config space ranges are specified by > segment/bus/device/function values > + * which should be encoded using the HVMOP_PCI_SBDF helper macro > below. > + * > + * NOTE: unless an emulation request falls entirely within a range > mapped > + * by a secondary emulator, it will not be passed to that emulator. > + */ > +#define HVMOP_map_io_range_to_ioreq_server 19 > +#define HVMOP_unmap_io_range_from_ioreq_server 20 > +struct xen_hvm_io_range { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - server id */ > + uint32_t type; /* IN - type of range */ > +# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ > +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ > +# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range > */ > + uint64_t start, end; /* IN - inclusive start and end of range */ > +}; > +typedef struct xen_hvm_io_range xen_hvm_io_range_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_io_range_t); > + > +#define HVMOP_PCI_SBDF(s,b,d,f) \ > + ((((s) & 0xffff) << 16) | \ > + (((b) & 0xff) << 8) | \ > + (((d) & 0x1f) << 3) | \ > + ((f) & 0x07)) > + > +/* > + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server <id> > servicing domain > + * <domid>. > + * > + * Any registered I/O ranges will be automatically deregistered. > + */ > +#define HVMOP_destroy_ioreq_server 21 > +struct xen_hvm_destroy_ioreq_server { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - server id */ > +}; > +typedef struct xen_hvm_destroy_ioreq_server > xen_hvm_destroy_ioreq_server_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_destroy_ioreq_server_t); > + > + > +/* > + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server > <id> servicing > + * domain <domid>. > + * > + * The IOREQ Server will not be passed any emulation requests until > it is in the > + * enabled state. > + * Note that the contents of the ioreq_pfn and bufioreq_fn (see > + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ > Server is in > + * the enabled state. > + */ > +#define HVMOP_set_ioreq_server_state 22 > +struct xen_hvm_set_ioreq_server_state { > + domid_t domid; /* IN - domain to be serviced */ > + ioservid_t id; /* IN - server id */ > + uint8_t enabled; /* IN - enabled? */ > +}; > +typedef struct xen_hvm_set_ioreq_server_state > xen_hvm_set_ioreq_server_state_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_ioreq_server_state_t); > + > #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ > diff --git a/include/xen/interface/hvm/ioreq.h > b/include/xen/interface/hvm/ioreq.h > new file mode 100644 > index 0000000..6bbf4e4 > --- /dev/null > +++ b/include/xen/interface/hvm/ioreq.h > @@ -0,0 +1,132 @@ > +/* > + * This program is free software; you can redistribute it and/or > modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but > WITHOUT > + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with > + * this program; if not, write to the Free Software Foundation, > Inc., > + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. > + */ > +#ifndef _IOREQ_H_ > +#define _IOREQ_H_ > + > +#define IOREQ_READ 1 > +#define IOREQ_WRITE 0 > + > +#define STATE_IOREQ_NONE 0 > +#define STATE_IOREQ_READY 1 > +#define STATE_IOREQ_INPROCESS 2 > +#define STATE_IORESP_READY 3 > + > +#define IOREQ_TYPE_PIO 0 /* pio */ > +#define IOREQ_TYPE_COPY 1 /* mmio ops */ > +#define IOREQ_TYPE_PCI_CONFIG 2 > +#define IOREQ_TYPE_TIMEOFFSET 7 > +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ > + > +/* > + * VMExit dispatcher should cooperate with instruction decoder to > + * prepare this structure and notify service OS and DM by sending > + * virq > + */ > +struct ioreq { > + uint64_t addr; /* physical address */ > + uint64_t data; /* data (or paddr of data) */ > + uint32_t count; /* for rep prefixes */ > + uint32_t size; /* size in bytes */ > + uint32_t vp_eport; /* evtchn for notifications to/from > device model */ > + uint16_t _pad0; > + uint8_t state:4; > + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr > + * of the real data to use. */ > + uint8_t dir:1; /* 1=read, 0=write */ > + uint8_t df:1; > + uint8_t _pad1:1; > + uint8_t type; /* I/O type */ > +}; > +typedef struct ioreq ioreq_t; > + > +struct shared_iopage { > + struct ioreq vcpu_ioreq[1]; > +}; > +typedef struct shared_iopage shared_iopage_t; > + > +struct buf_ioreq { > + uint8_t type; /* I/O type */ > + uint8_t pad:1; > + uint8_t dir:1; /* 1=read, 0=write */ > + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two > buf_ioreqs */ > + uint32_t addr:20;/* physical address */ > + uint32_t data; /* data */ > +}; > +typedef struct buf_ioreq buf_ioreq_t; > + > +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte > indexes */ > +struct buffered_iopage { > + unsigned int read_pointer; > + unsigned int write_pointer; > + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; > +}; /* NB. Size of this structure must be no greater than one page. > */ > +typedef struct buffered_iopage buffered_iopage_t; > + > +#if defined(__ia64__) > +struct pio_buffer { > + uint32_t page_offset; > + uint32_t pointer; > + uint32_t data_end; > + uint32_t buf_size; > + void *opaque; > +}; > + > +#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ > +#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ > +#define PIO_BUFFER_ENTRY_NUM 2 > +struct buffered_piopage { > + struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; > + uint8_t buffer[1]; > +}; > +#endif /* defined(__ia64__) */ > + > +/* > + * ACPI Control/Event register locations. Location is controlled by > a > + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. > + */ > + > +/* Version 0 (default): Traditional Xen locations. */ > +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 > +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + > 0x04) > +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + > 0x08) > +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + > 0x20) > +#define ACPI_GPE0_BLK_LEN_V0 0x08 > + > +/* Version 1: Locations preferred by modern Qemu. */ > +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 > +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + > 0x04) > +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + > 0x08) > +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 > +#define ACPI_GPE0_BLK_LEN_V1 0x04 > + > +/* Compatibility definitions for the default location (version 0). > */ > +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 > +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 > +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 > +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 > +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 > + > + > +#endif /* _IOREQ_H_ */ > + > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/memory.h > b/include/xen/interface/memory.h > index 2ecfe4f..92f18c5 100644 > --- a/include/xen/interface/memory.h > +++ b/include/xen/interface/memory.h > @@ -9,6 +9,7 @@ > #ifndef __XEN_PUBLIC_MEMORY_H__ > #define __XEN_PUBLIC_MEMORY_H__ > > +#include <xen/interface/event_channel.h> > #include <linux/spinlock.h> > > /* > @@ -141,6 +142,11 @@ struct xen_machphys_mfn_list { > DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); > > /* > + * Returns the maximum GPFN in use by the guest, or -ve errcode on > failure. > + */ > +#define XENMEM_maximum_gpfn 14 > + > +/* > * Returns the location in virtual address space of the > machine_to_phys > * mapping table. Architectures which do not have a m2p table, or > which do not > * map it by default into guest address space, do not implement this > command. > @@ -263,4 +269,26 @@ struct xen_remove_from_physmap { > }; > DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); > > +/* > + * Translate the given guest PFNs to MFNs > + */ > +#define XENMEM_get_mfn_from_pfn 25 > +struct xen_get_mfn_from_pfn { > + /* > + * Pointer to buffer to fill with list of pfn. > + * for IN, it contains the guest PFN that need to translated > + * for OUT, it contains the translated MFN. or INVALID_MFN if no > valid translation > + */ > + GUEST_HANDLE(ulong) pfn_list; > + > + /* > + * IN: Size of the pfn_array. > + */ > + unsigned int nr_pfns; > + > + /* IN: which domain */ > + domid_t domid; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xen_get_mfn_from_pfn); > + > #endif /* __XEN_PUBLIC_MEMORY_H__ */ > diff --git a/include/xen/interface/xen.h > b/include/xen/interface/xen.h > index 78a38f1..c7e0f32 100644 > --- a/include/xen/interface/xen.h > +++ b/include/xen/interface/xen.h > @@ -756,6 +756,112 @@ struct tmem_op { > > DEFINE_GUEST_HANDLE(u64); > > +/* XEN_DOMCTL_getdomaininfo */ > +struct xen_domctl_getdomaininfo { > + /* OUT variables. */ > + domid_t domain; /* Also echoed in > domctl.domain */ > + /* Domain is scheduled to die. */ > +#define _XEN_DOMINF_dying 0 > +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) > + /* Domain is an HVM guest (as opposed to a PV guest). */ > +#define _XEN_DOMINF_hvm_guest 1 > +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) > + /* The guest OS has shut down. */ > +#define _XEN_DOMINF_shutdown 2 > +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) > + /* Currently paused by control software. */ > +#define _XEN_DOMINF_paused 3 > +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) > + /* Currently blocked pending an event. */ > +#define _XEN_DOMINF_blocked 4 > +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) > + /* Domain is currently running. */ > +#define _XEN_DOMINF_running 5 > +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) > + /* Being debugged. */ > +#define _XEN_DOMINF_debugged 6 > +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) > + /* XEN_DOMINF_shutdown guest-supplied code. */ > +#define XEN_DOMINF_shutdownmask 255 > +#define XEN_DOMINF_shutdownshift 16 > + uint32_t flags; /* XEN_DOMINF_* */ > + aligned_u64 tot_pages; > + aligned_u64 max_pages; > + aligned_u64 outstanding_pages; > + aligned_u64 shr_pages; > + aligned_u64 paged_pages; > + aligned_u64 shared_info_frame; /* GMFN of shared_info struct > */ > + aligned_u64 cpu_time; > + uint32_t nr_online_vcpus; /* Number of VCPUs currently > online. */ > + uint32_t max_vcpu_id; /* Maximum VCPUID in use by > this domain. */ > + uint32_t ssidref; > + xen_domain_handle_t handle; > + uint32_t cpupool; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_getdomaininfo); > + > +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a > +#define XEN_DOMCTL_pausedomain 3 > +#define XEN_DOMCTL_getdomaininfo 5 > +#define XEN_DOMCTL_memory_mapping 39 > +#define XEN_DOMCTL_iomem_permission 20 > + > + > +#define XEN_DOMCTL_vgt_io_trap 700 > + > +#define MAX_VGT_IO_TRAP_INFO 4 > + > +struct vgt_io_trap_info { > + uint64_t s; > + uint64_t e; > +}; > + > +struct xen_domctl_vgt_io_trap { > + uint32_t n_pio; > + struct vgt_io_trap_info pio[MAX_VGT_IO_TRAP_INFO]; > + > + uint32_t n_mmio; > + struct vgt_io_trap_info mmio[MAX_VGT_IO_TRAP_INFO]; > +}; > + > +/* Bind machine I/O address range -> HVM address range. */ > +/* XEN_DOMCTL_memory_mapping */ > +#define DPCI_ADD_MAPPING 1 > +#define DPCI_REMOVE_MAPPING 0 > +struct xen_domctl_memory_mapping { > + aligned_u64 first_gfn; /* first page (hvm guest phys page) > in range */ > + aligned_u64 first_mfn; /* first page (machine page) in > range. */ > + aligned_u64 nr_mfns; /* number of pages in range (>0) */ > + uint32_t add_mapping; /* Add or remove mapping */ > + uint32_t padding; /* padding for 64-bit aligned struct > */ > +}; > +typedef struct xen_domctl_memory_mapping > xen_domctl_memory_mapping_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_memory_mapping_t); > + > +/* XEN_DOMCTL_iomem_permission */ > +struct xen_domctl_iomem_permission { > + aligned_u64 first_mfn;/* first page (physical page number) in > range */ > + aligned_u64 nr_mfns; /* number of pages in range (>0) */ > + uint8_t allow_access; /* allow (!0) or deny (0) access to > range? */ > +}; > +typedef struct xen_domctl_iomem_permission > xen_domctl_iomem_permission_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_iomem_permission_t); > + > +struct xen_domctl { > + uint32_t cmd; > + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION > */ > + domid_t domain; > + union { > + struct xen_domctl_getdomaininfo getdomaininfo; > + struct xen_domctl_vgt_io_trap vgt_io_trap; > + struct xen_domctl_memory_mapping memory_mapping; > + struct xen_domctl_iomem_permission iomem_perm; > + uint8_t pad[256]; > + }u; > +}; > +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl); > + > + > #else /* __ASSEMBLY__ */ > > /* In assembly code we cannot use C numeric constant suffixes. */ > diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h > index 86abe07..dde9eb0 100644 > --- a/include/xen/xen-ops.h > +++ b/include/xen/xen-ops.h > @@ -123,4 +123,9 @@ static inline void > xen_preemptible_hcall_end(void) > > #endif /* CONFIG_PREEMPT */ > > +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned > long mfn, > + int nr, unsigned domid); > +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area, > int nr, > + unsigned domid); > + > #endif /* INCLUDE_XEN_OPS_H */

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3bcdcc8..aea97e3 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -459,6 +459,13 @@ HYPERVISOR_hvm_op(int op, void *arg) } static inline int +HYPERVISOR_domctl( + struct xen_domctl *arg) +{ + return _hypercall1(int, domctl, arg); +} + +static inline int HYPERVISOR_tmem_op( struct tmem_op *op) { diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 6ff4986..a4ee3f4 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -89,6 +89,7 @@ typedef long xen_long_t; /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); +__DEFINE_GUEST_HANDLE(ulong, unsigned long); DEFINE_GUEST_HANDLE(char); DEFINE_GUEST_HANDLE(int); DEFINE_GUEST_HANDLE(void); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c913ca4..da95d45 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2931,3 +2931,86 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, #endif } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); + +/* Note: here 'mfn' is actually gfn!!! */ +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned long mfn, + int nr, unsigned domid) +{ + struct vm_struct *area; + struct remap_data rmd; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + int batch; + unsigned long range, addr; + pgprot_t prot; + int err; + + WARN_ON(in_interrupt() || irqs_disabled()); + + area = alloc_vm_area(nr << PAGE_SHIFT, NULL); + if (!area) + return NULL; + + addr = (unsigned long)area->addr; + + prot = __pgprot(pgprot_val(PAGE_KERNEL)); + + rmd.mfn = &mfn; + rmd.prot = prot; + + while (nr) { + batch = min(REMAP_BATCH_SIZE, nr); + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(&init_mm, addr, range, + remap_area_mfn_pte_fn, &rmd); + if (err || HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + goto err; + + nr -= batch; + addr += range; + } + + xen_flush_tlb_all(); + return area; +err: + free_vm_area(area); + xen_flush_tlb_all(); + return NULL; +} +EXPORT_SYMBOL(xen_remap_domain_mfn_range_in_kernel); + +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area, int nr, + unsigned domid) +{ + struct remap_data rmd; + struct mmu_update mmu_update[REMAP_BATCH_SIZE]; + int batch; + unsigned long range, addr = (unsigned long)area->addr; +#define INVALID_MFN (~0UL) + unsigned long invalid_mfn = INVALID_MFN; + int err; + + WARN_ON(in_interrupt() || irqs_disabled()); + + rmd.mfn = &invalid_mfn; + rmd.prot = PAGE_NONE; + + while (nr) { + batch = min(REMAP_BATCH_SIZE, nr); + range = (unsigned long)batch << PAGE_SHIFT; + + rmd.mmu_update = mmu_update; + err = apply_to_page_range(&init_mm, addr, range, + remap_area_mfn_pte_fn, &rmd); + BUG_ON(err); + BUG_ON(HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0); + + nr -= batch; + addr += range; + } + + free_vm_area(area); + xen_flush_tlb_all(); +} +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range_in_kernel); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index a71873c..28a51d9 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -21,12 +21,14 @@ * SOFTWARE. */ +#include <linux/types.h> #include <xen/xen.h> #include <linux/kthread.h> #include "gvt.h" struct gvt_host gvt_host; +EXPORT_SYMBOL(gvt_host); extern struct gvt_kernel_dm xengt_kdm; extern struct gvt_kernel_dm kvmgt_kdm; @@ -36,6 +38,13 @@ static struct gvt_io_emulation_ops default_io_emulation_ops = { .emulate_mmio_write = gvt_emulate_mmio_write, }; +unsigned int pa_to_mmio_offset(struct vgt_device *vgt, + uint64_t pa); + +static struct gvt_mpt_ops default_export_mpt_ops = { + .pa_to_mmio_offset = pa_to_mmio_offset, +}; + static const char *supported_hypervisors[] = { [GVT_HYPERVISOR_TYPE_XEN] = "Xen Hypervisor", [GVT_HYPERVISOR_TYPE_KVM] = "KVM", @@ -78,6 +87,7 @@ static bool gvt_init_host(void) supported_hypervisors[host->hypervisor_type]); host->emulate_ops = &default_io_emulation_ops; + host->mpt_ops = &default_export_mpt_ops; idr_init(&host->device_idr); mutex_init(&host->device_idr_lock); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index eb5fd47..83f90a2 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -58,6 +58,10 @@ struct gvt_io_emulation_ops { bool (*emulate_cfg_write)(struct vgt_device *, unsigned int, void *, int); }; +struct gvt_mpt_ops { + unsigned int (*pa_to_mmio_offset)(struct vgt_device *, u64); +}; + struct gvt_host { bool initialized; int hypervisor_type; @@ -65,6 +69,7 @@ struct gvt_host { struct idr device_idr; struct gvt_kernel_dm *kdm; struct gvt_io_emulation_ops *emulate_ops; + struct gvt_mpt_ops *mpt_ops; }; extern struct gvt_host gvt_host; @@ -123,6 +128,9 @@ struct vgt_device { struct gvt_virtual_device_state state; struct gvt_statistics stat; struct gvt_vgtt_info gtt; + void *hypervisor_data; + unsigned long low_mem_max_gpfn; + atomic_t crashing; }; struct gvt_gm_allocator { @@ -423,6 +431,12 @@ static inline int gvt_pci_mmio_is_enabled(struct vgt_device *vgt) _REGBIT_CFG_COMMAND_MEMORY; } +static inline uint64_t gvt_mmio_bar_base(struct vgt_device *vgt) +{ + char *cfg_space = &vgt->state.cfg.space[0]; + return *(u64 *)(cfg_space + GVT_REG_CFG_SPACE_BAR0); +} + #define __vreg(vgt, off) (*(u32*)(vgt->state.mmio.vreg + off)) #define __vreg8(vgt, off) (*(u8*)(vgt->state.mmio.vreg + off)) #define __vreg16(vgt, off) (*(u16*)(vgt->state.mmio.vreg + off)) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 73708ac..9ee2033 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -291,4 +291,9 @@ config XEN_SYMS config XEN_HAVE_VPMU bool +config XENGT + tristate "Xen Dom0 support for i915 gvt device model" + depends on XEN_DOM0 && I915_GVT + default m + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 9b7a35c..ff75c36 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -9,6 +9,10 @@ CFLAGS_features.o := $(nostackp) CFLAGS_efi.o += -fshort-wchar + +I915 := drivers/gpu/drm/i915 +CFLAGS_xengt.o += -Wall -Werror -I$(I915) -I$(I915)/gvt + dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y) @@ -36,6 +40,8 @@ obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o obj-$(CONFIG_XEN_EFI) += efi.o obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o obj-$(CONFIG_XEN_AUTO_XLATE) += xlate_mmu.o +obj-$(CONFIG_XENGT) += xengt.o + xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o diff --git a/drivers/xen/xengt.c b/drivers/xen/xengt.c new file mode 100644 index 0000000..6c600adc --- /dev/null +++ b/drivers/xen/xengt.c @@ -0,0 +1,1153 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of Version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * NOTE: + * This file contains hypervisor specific interactions to + * implement the concept of mediated pass-through framework. + * What this file provides is actually a general abstraction + * of in-kernel device model, which is not vgt specific. + * + * Now temporarily in vgt code. long-term this should be + * in hypervisor (xen/kvm) specific directory + */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/kthread.h> +#include <linux/time.h> +#include <linux/freezer.h> +#include <linux/wait.h> +#include <linux/sched.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/page.h> +#include <xen/xen-ops.h> +#include <xen/events.h> +#include <xen/interface/hvm/params.h> +#include <xen/interface/hvm/hvm_op.h> +#include <xen/interface/hvm/ioreq.h> +#include <xen/interface/memory.h> +#include <xen/interface/platform.h> +#include <xen/interface/vcpu.h> + +#include "gvt.h" + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("XenGT mediated passthrough driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("0.1"); + +#define MAX_HVM_VCPUS_SUPPORTED 128 +struct gvt_hvm_info { + /* iopage_vma->addr is just iopage. We need iopage_vma on VM destroy */ + shared_iopage_t *iopage; + struct vm_struct *iopage_vma; + int *evtchn_irq; /* the event channle irqs to handle HVM io request + index is vcpu id */ + + DECLARE_BITMAP(ioreq_pending, MAX_HVM_VCPUS_SUPPORTED); + wait_queue_head_t io_event_wq; + struct task_struct *emulation_thread; + + int nr_vcpu; + + ioservid_t iosrv_id; /* io-request server id */ + +#define VMEM_1MB (1ULL << 20) /* the size of the first 1MB */ +#define VMEM_BUCK_SHIFT 20 +#define VMEM_BUCK_SIZE (1ULL << VMEM_BUCK_SHIFT) +#define VMEM_BUCK_MASK (~(VMEM_BUCK_SIZE - 1)) + uint64_t vmem_sz; + /* for the 1st 1MB memory of HVM: each vm_struct means one 4K-page */ + struct vm_struct **vmem_vma_low_1mb; + /* for >1MB memory of HVM: each vm_struct means 1MB */ + struct vm_struct **vmem_vma; + /* for >1MB memory of HVM: each vm_struct means 4KB */ + struct vm_struct **vmem_vma_4k; +}; + +static int xen_pause_domain(int vm_id); +static int xen_shutdown_domain(int vm_id); +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long gpa); + +#define XEN_ASSERT_VM(x, vgt) \ + do { \ + if (!(x)) { \ + printk("Assert at %s line %d\n", \ + __FILE__, __LINE__); \ + if (atomic_cmpxchg(&(vgt)->crashing, 0, 1)) \ + break; \ + gvt_err("Killing VM%d\n", (vgt)->vm_id); \ + if (!xen_pause_domain((vgt->vm_id))) \ + xen_shutdown_domain((vgt->vm_id)); \ + } \ + } while (0) + +/* Translate from VM's guest pfn to machine pfn */ +static unsigned long xen_g2m_pfn(int vm_id, unsigned long g_pfn) +{ + struct xen_get_mfn_from_pfn pfn_arg; + int rc; + unsigned long pfn_list[1]; + + pfn_list[0] = g_pfn; + + set_xen_guest_handle(pfn_arg.pfn_list, pfn_list); + pfn_arg.nr_pfns = 1; + pfn_arg.domid = vm_id; + + rc = HYPERVISOR_memory_op(XENMEM_get_mfn_from_pfn, &pfn_arg); + if(rc < 0){ + printk("failed to get mfn for gpfn(0x%lx)\n, errno=%d\n", g_pfn, rc); + return INVALID_MFN; + } + + return pfn_list[0]; +} + +static int xen_get_max_gpfn(int vm_id) +{ + domid_t dom_id = vm_id; + int max_gpfn = HYPERVISOR_memory_op(XENMEM_maximum_gpfn, &dom_id); + BUG_ON(max_gpfn < 0); + return max_gpfn; +} + +static int xen_pause_domain(int vm_id) +{ + int rc; + struct xen_domctl domctl; + + domctl.domain = vm_id; + domctl.cmd = XEN_DOMCTL_pausedomain; + domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; + + rc = HYPERVISOR_domctl(&domctl); + if (rc != 0) + printk("HYPERVISOR_domctl pausedomain fail with %d!\n", rc); + + return rc; +} + +static int xen_shutdown_domain(int vm_id) +{ + int rc; + struct sched_remote_shutdown r; + + r.reason = SHUTDOWN_crash; + r.domain_id = vm_id; + rc = HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &r); + if (rc != 0) + printk("HYPERVISOR_sched_op failed: %d\n", rc); + return rc; +} + +static int xen_domain_iomem_perm(uint32_t domain_id, uint64_t first_mfn, + uint64_t nr_mfns, uint8_t allow_access) +{ + struct xen_domctl arg; + int rc; + + arg.domain = domain_id; + arg.cmd = XEN_DOMCTL_iomem_permission; + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; + arg.u.iomem_perm.first_mfn = first_mfn; + arg.u.iomem_perm.nr_mfns = nr_mfns; + arg.u.iomem_perm.allow_access = allow_access; + rc = HYPERVISOR_domctl(&arg); + + return rc; +} + +static int xen_hvm_memory_mapping(int vm_id, uint64_t first_gfn, uint64_t first_mfn, + uint32_t nr_mfns, uint32_t add_mapping) +{ + struct xen_domctl arg; + int rc; + + if (add_mapping) { + rc = xen_domain_iomem_perm(vm_id, first_mfn, nr_mfns, 1); + if (rc < 0) { + printk(KERN_ERR "xen_domain_iomem_perm failed: %d\n", rc); + return rc; + } + } + + arg.domain = vm_id; + arg.cmd = XEN_DOMCTL_memory_mapping; + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; + arg.u.memory_mapping.first_gfn = first_gfn; + arg.u.memory_mapping.first_mfn = first_mfn; + arg.u.memory_mapping.nr_mfns = nr_mfns; + arg.u.memory_mapping.add_mapping = add_mapping; + + rc = HYPERVISOR_domctl(&arg); + if (rc < 0) { + printk(KERN_ERR "HYPERVISOR_domctl failed: %d\n", rc); + return rc; + } + + if (!add_mapping) { + rc = xen_domain_iomem_perm(vm_id, first_mfn, nr_mfns, 0); + if (rc < 0) { + printk(KERN_ERR "xen_domain_iomem_perm failed: %d\n", rc); + return rc; + } + } + + return rc; +} + +static int xen_map_mfn_to_gpfn(int vm_id, unsigned long gpfn, + unsigned long mfn, int nr, int map, enum map_type type) +{ + int rc; + rc = xen_hvm_memory_mapping(vm_id, gpfn, mfn, nr, + map ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING); + if (rc != 0) + printk("xen_hvm_memory_mapping failed: %d\n", rc); + return rc; +} + +static int xen_get_nr_vcpu(int vm_id) +{ + struct xen_domctl arg; + int rc; + + arg.domain = vm_id; + arg.cmd = XEN_DOMCTL_getdomaininfo; + arg.interface_version = XEN_DOMCTL_INTERFACE_VERSION; + + rc = HYPERVISOR_domctl(&arg); + if (rc<0){ + printk(KERN_ERR "HYPERVISOR_domctl fail ret=%d\n",rc); + /* assume it is UP */ + return 1; + } + + return arg.u.getdomaininfo.max_vcpu_id + 1; +} + +static int hvm_create_iorequest_server(struct vgt_device *vgt) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + struct xen_hvm_create_ioreq_server arg; + int r; + + arg.domid = vgt->vm_id; + arg.handle_bufioreq = 0; + r = HYPERVISOR_hvm_op(HVMOP_create_ioreq_server, &arg); + if (r < 0) { + printk(KERN_ERR "Cannot create io-requset server: %d!\n", r); + return r; + } + info->iosrv_id = arg.id; + + return r; +} + +static int hvm_toggle_iorequest_server(struct vgt_device *vgt, bool enable) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + struct xen_hvm_set_ioreq_server_state arg; + int r; + + arg.domid = vgt->vm_id; + arg.id = info->iosrv_id; + arg.enabled = enable; + r = HYPERVISOR_hvm_op(HVMOP_set_ioreq_server_state, &arg); + if (r < 0) { + printk(KERN_ERR "Cannot %s io-request server: %d!\n", + enable ? "enable" : "disbale", r); + return r; + } + + return r; +} + +static int hvm_get_ioreq_pfn(struct vgt_device *vgt, uint64_t *value) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + struct xen_hvm_get_ioreq_server_info arg; + int r; + + arg.domid = vgt->vm_id; + arg.id = info->iosrv_id; + r = HYPERVISOR_hvm_op(HVMOP_get_ioreq_server_info, &arg); + if (r < 0) { + printk(KERN_ERR "Cannot get ioreq pfn: %d!\n", r); + return r; + } + *value = arg.ioreq_pfn; + return r; +} + +static int hvm_destroy_iorequest_server(struct vgt_device *vgt) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + struct xen_hvm_destroy_ioreq_server arg; + int r; + + arg.domid = vgt->vm_id; + arg.id = info->iosrv_id; + r = HYPERVISOR_hvm_op(HVMOP_destroy_ioreq_server, &arg); + if (r < 0) { + printk(KERN_ERR "Cannot destroy io-request server(%d): %d!\n", + info->iosrv_id, r); + return r; + } + info->iosrv_id = 0; + + return r; +} + +static int hvm_map_io_range_to_ioreq_server(struct vgt_device *vgt, + int is_mmio, uint64_t start, uint64_t end, int map) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + xen_hvm_io_range_t arg; + int rc; + + arg.domid = vgt->vm_id; + arg.id = info->iosrv_id; + arg.type = is_mmio ? HVMOP_IO_RANGE_MEMORY : HVMOP_IO_RANGE_PORT; + arg.start = start; + arg.end = end; + + if (map) + rc = HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server, &arg); + else + rc = HYPERVISOR_hvm_op(HVMOP_unmap_io_range_from_ioreq_server, &arg); + + return rc; +} + +static int hvm_map_pcidev_to_ioreq_server(struct vgt_device *vgt, uint64_t sbdf) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + xen_hvm_io_range_t arg; + int rc; + + arg.domid = vgt->vm_id; + arg.id = info->iosrv_id; + arg.type = HVMOP_IO_RANGE_PCI; + arg.start = arg.end = sbdf; + rc = HYPERVISOR_hvm_op(HVMOP_map_io_range_to_ioreq_server, &arg); + if (rc < 0) { + printk(KERN_ERR "Cannot map pci_dev to ioreq_server: %d!\n", rc); + return rc; + } + + return rc; +} + +static int hvm_set_mem_type(struct vgt_device *vgt, + uint16_t mem_type, uint64_t first_pfn, uint64_t nr) +{ + xen_hvm_set_mem_type_t args; + int rc; + + args.domid = vgt->vm_id; + args.hvmmem_type = mem_type; + args.first_pfn = first_pfn; + args.nr = 1; + rc = HYPERVISOR_hvm_op(HVMOP_set_mem_type, &args); + + return rc; +} + +static int hvm_wp_page_to_ioreq_server(struct vgt_device *vgt, unsigned long page, int set) +{ + int rc = 0; + uint64_t start, end; + uint16_t mem_type; + + start = page << PAGE_SHIFT; + end = ((page + 1) << PAGE_SHIFT) - 1; + + rc = hvm_map_io_range_to_ioreq_server(vgt, 1, start, end, set); + if (rc < 0) { + printk(KERN_ERR "Failed to %s page 0x%lx to ioreq_server: %d!\n", + set ? "map":"unmap", page , rc); + return rc; + } + + mem_type = set ? HVMMEM_mmio_write_dm : HVMMEM_ram_rw; + rc = hvm_set_mem_type(vgt, mem_type, page, 1); + if (rc < 0) { + printk(KERN_ERR "Failed to set mem type of page 0x%lx to %s!\n", page, + set ? "HVMMEM_mmio_write_dm":"HVMMEM_ram_rw"); + return rc; + } + return rc; +} + +static int xen_set_trap_area(struct vgt_device *vgt, uint64_t start, uint64_t end, bool map) +{ + if (!gvt_pci_mmio_is_enabled(vgt)) + return 0; + + return hvm_map_io_range_to_ioreq_server(vgt, 1, start, end, map); +} + +static struct vm_struct *xen_map_iopage(struct vgt_device *vgt) +{ + uint64_t ioreq_pfn; + int rc; + + rc = hvm_create_iorequest_server(vgt); + if (rc < 0) + return NULL; + rc = hvm_get_ioreq_pfn(vgt, &ioreq_pfn); + if (rc < 0) { + hvm_destroy_iorequest_server(vgt); + return NULL; + } + + return xen_remap_domain_mfn_range_in_kernel(ioreq_pfn, 1, vgt->vm_id); +} + +static bool xen_set_guest_page_writeprotection(struct vgt_device *vgt, + guest_page_t *guest_page) +{ + int r; + + if (guest_page->writeprotection) + return true; + + r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 1); + if (r) { + gvt_err("fail to set write protection.\n"); + return false; + } + + guest_page->writeprotection = true; + + atomic_inc(&vgt->gtt.n_write_protected_guest_page); + + return true; +} + +static bool xen_clear_guest_page_writeprotection(struct vgt_device *vgt, + guest_page_t *guest_page) +{ + int r; + + if (!guest_page->writeprotection) + return true; + + r = hvm_wp_page_to_ioreq_server(vgt, guest_page->gfn, 0); + if (r) { + gvt_err("fail to clear write protection.\n"); + return false; + } + + guest_page->writeprotection = false; + + atomic_dec(&vgt->gtt.n_write_protected_guest_page); + + return true; +} + +static int xen_detect_host(void) +{ + return xen_initial_domain(); +} + +static int xen_virt_to_mfn(void *addr) +{ + return virt_to_mfn(addr); +} + +static void *xen_mfn_to_virt(int mfn) +{ + return mfn_to_virt(mfn); +} + +static int xen_inject_msi(int vm_id, u32 addr_lo, u16 data) +{ + struct xen_hvm_inject_msi info = { + .domid = vm_id, + .addr = addr_lo, /* only low addr used */ + .data = data, + }; + + return HYPERVISOR_hvm_op(HVMOP_inject_msi, &info); +} + +static int vgt_hvm_vmem_init(struct vgt_device *vgt) +{ + unsigned long i, j, gpfn, count; + unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_4k_bkt; + struct gvt_hvm_info *info = vgt->hypervisor_data; + + if (!vgt->vm_id) + return 0; + + ASSERT(info->vmem_vma == NULL && info->vmem_vma_low_1mb == NULL); + + info->vmem_sz = xen_get_max_gpfn(vgt->vm_id) + 1; + info->vmem_sz <<= PAGE_SHIFT; + + /* warn on non-1MB-aligned memory layout of HVM */ + if (info->vmem_sz & ~VMEM_BUCK_MASK) + gvt_err("VM%d: vmem_sz=0x%llx!\n", vgt->vm_id, info->vmem_sz); + + nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT; + nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT); + nr_high_4k_bkt = (info->vmem_sz >> PAGE_SHIFT); + + info->vmem_vma_low_1mb = + vzalloc(sizeof(*info->vmem_vma) * nr_low_1mb_bkt); + info->vmem_vma = + vzalloc(sizeof(*info->vmem_vma) * nr_high_bkt); + info->vmem_vma_4k = + vzalloc(sizeof(*info->vmem_vma) * nr_high_4k_bkt); + + if (info->vmem_vma_low_1mb == NULL || info->vmem_vma == NULL || + info->vmem_vma_4k == NULL) { + gvt_err("Insufficient memory for vmem_vma, vmem_sz=0x%llx\n", + info->vmem_sz ); + goto err; + } + + /* map the low 1MB memory */ + for (i = 0; i < nr_low_1mb_bkt; i++) { + info->vmem_vma_low_1mb[i] = + xen_remap_domain_mfn_range_in_kernel(i, 1, vgt->vm_id); + + if (info->vmem_vma_low_1mb[i] != NULL) + continue; + + /* Don't warn on [0xa0000, 0x100000): a known non-RAM hole */ + if (i < (0xa0000 >> PAGE_SHIFT)) + printk(KERN_ERR "GVT: VM%d: can't map GPFN %ld!\n", + vgt->vm_id, i); + } + + printk("start vmem_map\n"); + count = 0; + /* map the >1MB memory */ + for (i = 1; i < nr_high_bkt; i++) { + gpfn = i << (VMEM_BUCK_SHIFT - PAGE_SHIFT); + info->vmem_vma[i] = xen_remap_domain_mfn_range_in_kernel( + gpfn, VMEM_BUCK_SIZE >> PAGE_SHIFT, vgt->vm_id); + + if (info->vmem_vma[i] != NULL) + continue; + + + /* for <4G GPFNs: skip the hole after low_mem_max_gpfn */ + if (gpfn < (1 << (32 - PAGE_SHIFT)) && + vgt->low_mem_max_gpfn != 0 && + gpfn > vgt->low_mem_max_gpfn) + continue; + + for (j = gpfn; + j < ((i + 1) << (VMEM_BUCK_SHIFT - PAGE_SHIFT)); + j++) { + info->vmem_vma_4k[j] = xen_remap_domain_mfn_range_in_kernel(j, 1, vgt->vm_id); + + if (info->vmem_vma_4k[j]) { + count++; + printk(KERN_ERR "map 4k gpa (%lx)\n", j << PAGE_SHIFT); + } + } + + /* To reduce the number of err messages(some of them, due to + * the MMIO hole, are spurious and harmless) we only print a + * message if it's at every 64MB boundary or >4GB memory. + */ + if ((i % 64 == 0) || (i >= (1ULL << (32 - VMEM_BUCK_SHIFT)))) + printk(KERN_ERR "GVT: VM%d: can't map %ldKB\n", + vgt->vm_id, i); + } + printk("end vmem_map (%ld 4k mappings)\n", count); + + return 0; +err: + vfree(info->vmem_vma); + vfree(info->vmem_vma_low_1mb); + vfree(info->vmem_vma_4k); + info->vmem_vma = info->vmem_vma_low_1mb = info->vmem_vma_4k = NULL; + return -ENOMEM; +} + +static void vgt_vmem_destroy(struct vgt_device *vgt) +{ + int i, j; + unsigned long nr_low_1mb_bkt, nr_high_bkt, nr_high_bkt_4k; + struct gvt_hvm_info *info = vgt->hypervisor_data; + + if (vgt->vm_id == 0) + return; + + /* + * Maybe the VM hasn't accessed GEN MMIO(e.g., still in the legacy VGA + * mode), so no mapping is created yet. + */ + if (info->vmem_vma == NULL && info->vmem_vma_low_1mb == NULL) + return; + + ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb != NULL); + + nr_low_1mb_bkt = VMEM_1MB >> PAGE_SHIFT; + nr_high_bkt = (info->vmem_sz >> VMEM_BUCK_SHIFT); + nr_high_bkt_4k = (info->vmem_sz >> PAGE_SHIFT); + + for (i = 0; i < nr_low_1mb_bkt; i++) { + if (info->vmem_vma_low_1mb[i] == NULL) + continue; + xen_unmap_domain_mfn_range_in_kernel(info->vmem_vma_low_1mb[i], + 1, vgt->vm_id); + } + + for (i = 1; i < nr_high_bkt; i++) { + if (info->vmem_vma[i] == NULL) { + for (j = (i << (VMEM_BUCK_SHIFT - PAGE_SHIFT)); + j < ((i + 1) << (VMEM_BUCK_SHIFT - PAGE_SHIFT)); + j++) { + if (info->vmem_vma_4k[j] == NULL) + continue; + xen_unmap_domain_mfn_range_in_kernel( + info->vmem_vma_4k[j], 1, vgt->vm_id); + } + continue; + } + xen_unmap_domain_mfn_range_in_kernel( + info->vmem_vma[i], VMEM_BUCK_SIZE >> PAGE_SHIFT, + vgt->vm_id); + } + + vfree(info->vmem_vma); + vfree(info->vmem_vma_low_1mb); + vfree(info->vmem_vma_4k); +} + +static int _hvm_mmio_emulation(struct vgt_device *vgt, struct ioreq *req) +{ + int i, sign; + void *gva; + unsigned long gpa; + uint64_t base = gvt_mmio_bar_base(vgt); + uint64_t tmp; + int pvinfo_page; + struct gvt_hvm_info *info = vgt->hypervisor_data; + + if (info->vmem_vma == NULL) { + tmp = gvt_host.mpt_ops->pa_to_mmio_offset(vgt, req->addr); + pvinfo_page = (tmp >= VGT_PVINFO_PAGE + && tmp < (VGT_PVINFO_PAGE + VGT_PVINFO_SIZE)); + /* + * hvmloader will read PVINFO to identify if HVM is in GVT + * or VTD. So we don't trigger HVM mapping logic here. + */ + if (!pvinfo_page && vgt_hvm_vmem_init(vgt) < 0) { + gvt_err("can not map the memory of VM%d!!!\n", vgt->vm_id); + XEN_ASSERT_VM(info->vmem_vma != NULL, vgt); + return -EINVAL; + } + } + + sign = req->df ? -1 : 1; + + if (req->dir == IOREQ_READ) { + /* MMIO READ */ + if (!req->data_is_ptr) { + if (req->count != 1) + goto err_ioreq_count; + + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read: target register (%lx).\n", + // (unsigned long)req->addr); + if (!gvt_host.emulate_ops->emulate_mmio_read(vgt, req->addr, &req->data, req->size)) + return -EINVAL; + } + else { + if ((req->addr + sign * req->count * req->size < base) + || (req->addr + sign * req->count * req->size >= + base + vgt->state.cfg.bar_size[0])) + goto err_ioreq_range; + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_read: rep %d target memory %lx, slow!\n", + // req->count, (unsigned long)req->addr); + + for (i = 0; i < req->count; i++) { + if (!gvt_host.emulate_ops->emulate_mmio_read(vgt, req->addr + sign * i * req->size, + &tmp, req->size)) + return -EINVAL; + gpa = req->data + sign * i * req->size; + if(!vgt->vm_id) + gva = (char *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa); + else + gva = xen_gpa_to_va(vgt, gpa); + if (gva) { + memcpy(gva, &tmp, req->size); + } else + gvt_err("VM %d is trying to store mmio data block to invalid gpa: 0x%lx.\n", vgt->vm_id, gpa); + } + } + } + else { /* MMIO Write */ + if (!req->data_is_ptr) { + if (req->count != 1) + goto err_ioreq_count; + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write: target register (%lx).\n", (unsigned long)req->addr); + if (!gvt_host.emulate_ops->emulate_mmio_write(vgt, req->addr, &req->data, req->size)) + return -EINVAL; + } + else { + if ((req->addr + sign * req->count * req->size < base) + || (req->addr + sign * req->count * req->size >= + base + vgt->state.cfg.bar_size[0])) + goto err_ioreq_range; + //vgt_dbg(GVT_DBG_GENERIC,"HVM_MMIO_write: rep %d target memory %lx, slow!\n", + // req->count, (unsigned long)req->addr); + + for (i = 0; i < req->count; i++) { + gpa = req->data + sign * i * req->size; + if(!vgt->vm_id) + gva = (char *)xen_mfn_to_virt(gpa >> PAGE_SHIFT) + offset_in_page(gpa); + else + gva = xen_gpa_to_va(vgt, gpa); + + if (gva != NULL) + memcpy(&tmp, gva, req->size); + else { + tmp = 0; + printk(KERN_ERR "GVT: can not read gpa = 0x%lx!!!\n", gpa); + } + if (!gvt_host.emulate_ops->emulate_mmio_write(vgt, req->addr + sign * i * req->size, &tmp, req->size)) + return -EINVAL; + } + } + } + + return 0; + +err_ioreq_count: + gvt_err("VM(%d): Unexpected %s request count(%d)\n", + vgt->vm_id, req->dir == IOREQ_READ ? "read" : "write", + req->count); + return -EINVAL; + +err_ioreq_range: + gvt_err("VM(%d): Invalid %s request addr end(%016llx)\n", + vgt->vm_id, req->dir == IOREQ_READ ? "read" : "write", + req->addr + sign * req->count * req->size); + return -ERANGE; +} + +static bool vgt_hvm_write_cfg_space(struct vgt_device *vgt, + uint64_t addr, unsigned int bytes, unsigned long val) +{ + /* Low 32 bit of addr is real address, high 32 bit is bdf */ + unsigned int port = addr & 0xffffffff; + + ASSERT(((bytes == 4) && ((port & 3) == 0)) || + ((bytes == 2) && ((port & 1) == 0)) || (bytes == 1)); + gvt_host.emulate_ops->emulate_cfg_write(vgt, port, &val, bytes); + return true; +} + +static bool vgt_hvm_read_cfg_space(struct vgt_device *vgt, + uint64_t addr, unsigned int bytes, unsigned long *val) +{ + unsigned long data; + /* Low 32 bit of addr is real address, high 32 bit is bdf */ + unsigned int port = addr & 0xffffffff; + + ASSERT (((bytes == 4) && ((port & 3) == 0)) || + ((bytes == 2) && ((port & 1) == 0)) || (bytes == 1)); + gvt_host.emulate_ops->emulate_cfg_read(vgt, port, &data, bytes); + memcpy(val, &data, bytes); + return true; +} + +static int _hvm_pio_emulation(struct vgt_device *vgt, struct ioreq *ioreq) +{ + int sign; + + sign = ioreq->df ? -1 : 1; + + if (ioreq->dir == IOREQ_READ) { + /* PIO READ */ + if (!ioreq->data_is_ptr) { + if(!vgt_hvm_read_cfg_space(vgt, + ioreq->addr, + ioreq->size, + (unsigned long*)&ioreq->data)) + return -EINVAL; + } else { + printk(KERN_ERR "GVT: _hvm_pio_emulation read data_ptr %lx\n", + (long)ioreq->data); + goto err_data_ptr; + } + } else { + /* PIO WRITE */ + if (!ioreq->data_is_ptr) { + if (!vgt_hvm_write_cfg_space(vgt, + ioreq->addr, + ioreq->size, + (unsigned long)ioreq->data)) + return -EINVAL; + } else { + printk(KERN_ERR "GVT: _hvm_pio_emulation write data_ptr %lx\n", + (long)ioreq->data); + goto err_data_ptr; + } + } + return 0; +err_data_ptr: + /* The data pointer of emulation is guest physical address + * so far, which goes to Qemu emulation, but hard for + * GVT driver which doesn't know gpn_2_mfn translation. + * We may ask hypervisor to use mfn for GVT driver. + * We mark it as unsupported in case guest really it. + */ + gvt_err("VM(%d): Unsupported %s data_ptr(%lx)\n", + vgt->vm_id, ioreq->dir == IOREQ_READ ? "read" : "write", + (long)ioreq->data); + return -EINVAL; +} + +#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff)) + +static int vgt_hvm_do_ioreq(struct vgt_device *vgt, struct ioreq *ioreq) +{ + struct pgt_device *pdev = vgt->pdev; + struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev; + uint64_t bdf = PCI_BDF2(pci_dev->bus->number, pci_dev->devfn); + + /* When using ioreq-server, sometimes an event channal + * notification is received with invalid ioreq. Don't + * know the root cause. Put the workaround here. + */ + if (ioreq->state == STATE_IOREQ_NONE) + return 0; + + if (ioreq->type == IOREQ_TYPE_INVALIDATE) + return 0; + + switch (ioreq->type) { + case IOREQ_TYPE_PCI_CONFIG: + /* High 32 bit of ioreq->addr is bdf */ + if ((ioreq->addr >> 32) != bdf) { + printk(KERN_ERR "GVT: Unexpected PCI Dev %lx emulation\n", + (unsigned long) (ioreq->addr>>32)); + return -EINVAL; + } else + return _hvm_pio_emulation(vgt, ioreq); + break; + case IOREQ_TYPE_COPY: /* MMIO */ + return _hvm_mmio_emulation(vgt, ioreq); + break; + default: + printk(KERN_ERR "GVT: Unknown ioreq type %x addr %llx size %u state %u\n", + ioreq->type, ioreq->addr, ioreq->size, ioreq->state); + return -EINVAL; + } + + return 0; +} + +static struct ioreq *vgt_get_hvm_ioreq(struct vgt_device *vgt, int vcpu) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + return &(info->iopage->vcpu_ioreq[vcpu]); +} + +static int vgt_emulation_thread(void *priv) +{ + struct vgt_device *vgt = (struct vgt_device *)priv; + struct gvt_hvm_info *info = vgt->hypervisor_data; + + int vcpu; + int nr_vcpus = info->nr_vcpu; + + struct ioreq *ioreq; + int irq, ret; + + gvt_info("start kthread for VM%d\n", vgt->vm_id); + + ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED); + + set_freezable(); + while (1) { + ret = wait_event_freezable(info->io_event_wq, + kthread_should_stop() || + bitmap_weight(info->ioreq_pending, nr_vcpus)); + + if (kthread_should_stop()) + return 0; + + if (ret) + gvt_err("Emulation thread(%d) waken up" + "by unexpected signal!\n", vgt->vm_id); + + for (vcpu = 0; vcpu < nr_vcpus; vcpu++) { + if (!test_and_clear_bit(vcpu, info->ioreq_pending)) + continue; + + ioreq = vgt_get_hvm_ioreq(vgt, vcpu); + + if (vgt_hvm_do_ioreq(vgt, ioreq)) { + xen_pause_domain(vgt->vm_id); + xen_shutdown_domain(vgt->vm_id); + } + + ioreq->state = STATE_IORESP_READY; + + irq = info->evtchn_irq[vcpu]; + notify_remote_via_irq(irq); + } + } + + BUG(); /* It's actually impossible to reach here */ + return 0; +} + +static inline void vgt_raise_emulation_request(struct vgt_device *vgt, + int vcpu) +{ + struct gvt_hvm_info *info = vgt->hypervisor_data; + set_bit(vcpu, info->ioreq_pending); + if (waitqueue_active(&info->io_event_wq)) + wake_up(&info->io_event_wq); +} + +static irqreturn_t vgt_hvm_io_req_handler(int irq, void* dev) +{ + struct vgt_device *vgt; + struct gvt_hvm_info *info; + int vcpu; + + vgt = (struct vgt_device *)dev; + info = vgt->hypervisor_data; + + for(vcpu=0; vcpu < info->nr_vcpu; vcpu++){ + if(info->evtchn_irq[vcpu] == irq) + break; + } + if (vcpu == info->nr_vcpu){ + /*opps, irq is not the registered one*/ + gvt_info("Received a IOREQ w/o vcpu target\n"); + gvt_info("Possible a false request from event binding\n"); + return IRQ_NONE; + } + + vgt_raise_emulation_request(vgt, vcpu); + + return IRQ_HANDLED; +} + +static void xen_hvm_exit(struct vgt_device *vgt) +{ + struct gvt_hvm_info *info; + int vcpu; + + info = vgt->hypervisor_data; + + if (info == NULL) + return; + + if (info->emulation_thread != NULL) + kthread_stop(info->emulation_thread); + + if (!info->nr_vcpu || info->evtchn_irq == NULL) + goto out1; + + if (info->iosrv_id != 0) + hvm_destroy_iorequest_server(vgt); + + for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){ + if(info->evtchn_irq[vcpu] >= 0) + unbind_from_irqhandler(info->evtchn_irq[vcpu], vgt); + } + + if (info->iopage_vma != NULL) + xen_unmap_domain_mfn_range_in_kernel(info->iopage_vma, 1, vgt->vm_id); + + kfree(info->evtchn_irq); + +out1: + vgt_vmem_destroy(vgt); + kfree(info); +} + +static int xen_hvm_init(struct vgt_device *vgt) +{ + struct gvt_hvm_info *info; + int vcpu, irq, rc = 0; + struct task_struct *thread; + struct pgt_device *pdev = vgt->pdev; + struct pci_dev *pci_dev = pdev->dev_priv->dev->pdev; + + info = kzalloc(sizeof(struct gvt_hvm_info), GFP_KERNEL); + if (info == NULL) + return -ENOMEM; + + vgt->hypervisor_data = info; + + info->iopage_vma = xen_map_iopage(vgt); + if (info->iopage_vma == NULL) { + printk(KERN_ERR "Failed to map HVM I/O page for VM%d\n", vgt->vm_id); + rc = -EFAULT; + goto err; + } + info->iopage = info->iopage_vma->addr; + + init_waitqueue_head(&info->io_event_wq); + + info->nr_vcpu = xen_get_nr_vcpu(vgt->vm_id); + ASSERT(info->nr_vcpu > 0); + ASSERT(info->nr_vcpu <= MAX_HVM_VCPUS_SUPPORTED); + + info->evtchn_irq = kmalloc(info->nr_vcpu * sizeof(int), GFP_KERNEL); + if (info->evtchn_irq == NULL){ + rc = -ENOMEM; + goto err; + } + for( vcpu = 0; vcpu < info->nr_vcpu; vcpu++ ) + info->evtchn_irq[vcpu] = -1; + + rc = hvm_map_pcidev_to_ioreq_server(vgt, PCI_BDF2(pci_dev->bus->number, pci_dev->devfn)); + if (rc < 0) + goto err; + rc = hvm_toggle_iorequest_server(vgt, 1); + if (rc < 0) + goto err; + + for (vcpu = 0; vcpu < info->nr_vcpu; vcpu++){ + irq = bind_interdomain_evtchn_to_irqhandler( vgt->vm_id, + info->iopage->vcpu_ioreq[vcpu].vp_eport, + vgt_hvm_io_req_handler, 0, + "vgt", vgt ); + if ( irq < 0 ){ + rc = irq; + printk(KERN_ERR "Failed to bind event channle for vgt HVM IO handler, rc=%d\n", rc); + goto err; + } + info->evtchn_irq[vcpu] = irq; + } + + thread = kthread_run(vgt_emulation_thread, vgt, + "vgt_emulation:%d", vgt->vm_id); + if(IS_ERR(thread)) + goto err; + info->emulation_thread = thread; + + return 0; + +err: + xen_hvm_exit(vgt); + return rc; +} + +static void *xen_gpa_to_va(struct vgt_device *vgt, unsigned long gpa) +{ + unsigned long buck_index, buck_4k_index; + struct gvt_hvm_info *info = vgt->hypervisor_data; + + if (!vgt->vm_id) + return (char*)xen_mfn_to_virt(gpa>>PAGE_SHIFT) + (gpa & (PAGE_SIZE-1)); + /* + * At the beginning of _hvm_mmio_emulation(), we already initialize + * info->vmem_vma and info->vmem_vma_low_1mb. + */ + ASSERT(info->vmem_vma != NULL && info->vmem_vma_low_1mb != NULL); + + /* handle the low 1MB memory */ + if (gpa < VMEM_1MB) { + buck_index = gpa >> PAGE_SHIFT; + if (!info->vmem_vma_low_1mb[buck_index]) + return NULL; + + return (char*)(info->vmem_vma_low_1mb[buck_index]->addr) + + (gpa & ~PAGE_MASK); + + } + + /* handle the >1MB memory */ + buck_index = gpa >> VMEM_BUCK_SHIFT; + + if (!info->vmem_vma[buck_index]) { + buck_4k_index = gpa >> PAGE_SHIFT; + if (!info->vmem_vma_4k[buck_4k_index]) { + if (buck_4k_index > vgt->low_mem_max_gpfn) + gvt_err("GVT failed to map gpa=0x%lx?\n", gpa); + return NULL; + } + + return (char*)(info->vmem_vma_4k[buck_4k_index]->addr) + + (gpa & ~PAGE_MASK); + } + + return (char*)(info->vmem_vma[buck_index]->addr) + + (gpa & (VMEM_BUCK_SIZE -1)); +} + +static bool xen_read_va(struct vgt_device *vgt, void *va, void *val, + int len, int atomic) +{ + memcpy(val, va, len); + + return true; +} + +static bool xen_write_va(struct vgt_device *vgt, void *va, void *val, + int len, int atomic) +{ + memcpy(va, val, len); + return true; +} + +static struct gvt_kernel_dm xengt_kdm = { + .name = "xengt_kdm", + .g2m_pfn = xen_g2m_pfn, + .pause_domain = xen_pause_domain, + .shutdown_domain = xen_shutdown_domain, + .map_mfn_to_gpfn = xen_map_mfn_to_gpfn, + .set_trap_area = xen_set_trap_area, + .set_wp_pages = xen_set_guest_page_writeprotection, + .unset_wp_pages = xen_clear_guest_page_writeprotection, + .detect_host = xen_detect_host, + .from_virt_to_mfn = xen_virt_to_mfn, + .from_mfn_to_virt = xen_mfn_to_virt, + .inject_msi = xen_inject_msi, + .hvm_init = xen_hvm_init, + .hvm_exit = xen_hvm_exit, + .gpa_to_va = xen_gpa_to_va, + .read_va = xen_read_va, + .write_va = xen_write_va, +}; +EXPORT_SYMBOL(xengt_kdm); + +static int __init xengt_init(void) +{ + if (!xen_initial_domain()) + return -EINVAL; + printk(KERN_INFO "xengt: loaded\n"); + return 0; +} + +static void __exit xengt_exit(void) +{ + printk(KERN_INFO "xengt: unloaded\n"); +} + +module_init(xengt_init); +module_exit(xengt_exit); diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index 956a046..20577cc 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include <xen/interface/event_channel.h> + /* Get/set subcommands: the second argument of the hypercall is a * pointer to a xen_hvm_param struct. */ #define HVMOP_set_param 0 @@ -42,12 +44,41 @@ struct xen_hvm_pagetable_dying { }; typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); - + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_inject_msi_t); + enum hvmmem_type_t { HVMMEM_ram_rw, /* Normal read/write guest RAM */ HVMMEM_ram_ro, /* Read-only; writes are discarded */ HVMMEM_mmio_dm, /* Reads and write go to the device model */ + HVMMEM_mmio_write_dm /* Read-only; writes go to the device model */ +}; + +#define HVMOP_set_mem_type 8 +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_t first_pfn; }; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_mem_type_t); #define HVMOP_get_mem_type 15 /* Return hvmmem_type_t for the specified pfn. */ @@ -62,4 +93,148 @@ struct xen_hvm_get_mem_type { }; DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); +#define HVMOP_vgt_wp_pages 27 /* writeprotection to guest pages */ +#define MAX_WP_BATCH_PAGES 128 +struct xen_hvm_vgt_wp_pages { + uint16_t domid; + uint16_t set; /* 1: set WP, 0: remove WP */ + uint16_t nr_pages; + unsigned long wp_pages[MAX_WP_BATCH_PAGES]; +}; +typedef struct xen_hvm_vgt_wp_pages xen_hvm_vgt_wp_pages_t; + +/* + * IOREQ Servers + * + * The interface between an I/O emulator an Xen is called an IOREQ Server. + * A domain supports a single 'legacy' IOREQ Server which is instantiated if + * parameter... + * + * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous + * ioreq structures), or... + * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered + * ioreq ring), or... + * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses + * to request buffered I/O emulation). + * + * The following hypercalls facilitate the creation of IOREQ Servers for + * 'secondary' emulators which are invoked to implement port I/O, memory, or + * PCI config space ranges which they explicitly register. + */ +typedef uint16_t ioservid_t; + +/* + * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary + * emulator servicing domain <domid>. + * + * The <id> handed back is unique for <domid>. If <handle_bufioreq> is zero + * the buffered ioreq ring will not be allocated and hence all emulation + * requestes to this server will be synchronous. + */ +#define HVMOP_create_ioreq_server 17 +struct xen_hvm_create_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ + ioservid_t id; /* OUT - server id */ +}; +typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_create_ioreq_server_t); + +/* + * HVMOP_get_ioreq_server_info: Get all the information necessary to access + * IOREQ Server <id>. + * + * The emulator needs to map the synchronous ioreq structures and buffered + * ioreq ring (if it exists) that Xen uses to request emulation. These are + * hosted in domain <domid>'s gmfns <ioreq_pfn> and <bufioreq_pfn> + * respectively. In addition, if the IOREQ Server is handling buffered + * emulation requests, the emulator needs to bind to event channel + * <bufioreq_port> to listen for them. (The event channels used for + * synchronous emulation requests are specified in the per-CPU ioreq + * structures in <ioreq_pfn>). + * If the IOREQ Server is not handling buffered emulation requests then the + * values handed back in <bufioreq_pfn> and <bufioreq_port> will both be 0. + */ +#define HVMOP_get_ioreq_server_info 18 +struct xen_hvm_get_ioreq_server_info { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ + uint64_t ioreq_pfn; /* OUT - sync ioreq pfn */ + uint64_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ +}; +typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_ioreq_server_info_t); + +/* + * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain <domid> + * for emulation by the client of IOREQ + * Server <id> + * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of <domid> + * for emulation by the client of IOREQ + * Server <id> + * + * There are three types of I/O that can be emulated: port I/O, memory accesses + * and PCI config space accesses. The <type> field denotes which type of range + * the <start> and <end> (inclusive) fields are specifying. + * PCI config space ranges are specified by segment/bus/device/function values + * which should be encoded using the HVMOP_PCI_SBDF helper macro below. + * + * NOTE: unless an emulation request falls entirely within a range mapped + * by a secondary emulator, it will not be passed to that emulator. + */ +#define HVMOP_map_io_range_to_ioreq_server 19 +#define HVMOP_unmap_io_range_from_ioreq_server 20 +struct xen_hvm_io_range { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint32_t type; /* IN - type of range */ +# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ +# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ +# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ + uint64_t start, end; /* IN - inclusive start and end of range */ +}; +typedef struct xen_hvm_io_range xen_hvm_io_range_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_io_range_t); + +#define HVMOP_PCI_SBDF(s,b,d,f) \ + ((((s) & 0xffff) << 16) | \ + (((b) & 0xff) << 8) | \ + (((d) & 0x1f) << 3) | \ + ((f) & 0x07)) + +/* + * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server <id> servicing domain + * <domid>. + * + * Any registered I/O ranges will be automatically deregistered. + */ +#define HVMOP_destroy_ioreq_server 21 +struct xen_hvm_destroy_ioreq_server { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ +}; +typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_destroy_ioreq_server_t); + + +/* + * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server <id> servicing + * domain <domid>. + * + * The IOREQ Server will not be passed any emulation requests until it is in the + * enabled state. + * Note that the contents of the ioreq_pfn and bufioreq_fn (see + * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is in + * the enabled state. + */ +#define HVMOP_set_ioreq_server_state 22 +struct xen_hvm_set_ioreq_server_state { + domid_t domid; /* IN - domain to be serviced */ + ioservid_t id; /* IN - server id */ + uint8_t enabled; /* IN - enabled? */ +}; +typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_ioreq_server_state_t); + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/ioreq.h b/include/xen/interface/hvm/ioreq.h new file mode 100644 index 0000000..6bbf4e4 --- /dev/null +++ b/include/xen/interface/hvm/ioreq.h @@ -0,0 +1,132 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_PCI_CONFIG 2 +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t _pad1:1; + uint8_t type; /* I/O type */ +}; +typedef struct ioreq ioreq_t; + +struct shared_iopage { + struct ioreq vcpu_ioreq[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad:1; + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr:20;/* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +#if defined(__ia64__) +struct pio_buffer { + uint32_t page_offset; + uint32_t pointer; + uint32_t data_end; + uint32_t buf_size; + void *opaque; +}; + +#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ +#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ +#define PIO_BUFFER_ENTRY_NUM 2 +struct buffered_piopage { + struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; + uint8_t buffer[1]; +}; +#endif /* defined(__ia64__) */ + +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 2ecfe4f..92f18c5 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -9,6 +9,7 @@ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ +#include <xen/interface/event_channel.h> #include <linux/spinlock.h> /* @@ -141,6 +142,11 @@ struct xen_machphys_mfn_list { DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* * Returns the location in virtual address space of the machine_to_phys * mapping table. Architectures which do not have a m2p table, or which do not * map it by default into guest address space, do not implement this command. @@ -263,4 +269,26 @@ struct xen_remove_from_physmap { }; DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); +/* + * Translate the given guest PFNs to MFNs + */ +#define XENMEM_get_mfn_from_pfn 25 +struct xen_get_mfn_from_pfn { + /* + * Pointer to buffer to fill with list of pfn. + * for IN, it contains the guest PFN that need to translated + * for OUT, it contains the translated MFN. or INVALID_MFN if no valid translation + */ + GUEST_HANDLE(ulong) pfn_list; + + /* + * IN: Size of the pfn_array. + */ + unsigned int nr_pfns; + + /* IN: which domain */ + domid_t domid; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_get_mfn_from_pfn); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 78a38f1..c7e0f32 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -756,6 +756,112 @@ struct tmem_op { DEFINE_GUEST_HANDLE(u64); +/* XEN_DOMCTL_getdomaininfo */ +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + aligned_u64 tot_pages; + aligned_u64 max_pages; + aligned_u64 outstanding_pages; + aligned_u64 shr_pages; + aligned_u64 paged_pages; + aligned_u64 shared_info_frame; /* GMFN of shared_info struct */ + aligned_u64 cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; + uint32_t cpupool; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_getdomaininfo); + +#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_iomem_permission 20 + + +#define XEN_DOMCTL_vgt_io_trap 700 + +#define MAX_VGT_IO_TRAP_INFO 4 + +struct vgt_io_trap_info { + uint64_t s; + uint64_t e; +}; + +struct xen_domctl_vgt_io_trap { + uint32_t n_pio; + struct vgt_io_trap_info pio[MAX_VGT_IO_TRAP_INFO]; + + uint32_t n_mmio; + struct vgt_io_trap_info mmio[MAX_VGT_IO_TRAP_INFO]; +}; + +/* Bind machine I/O address range -> HVM address range. */ +/* XEN_DOMCTL_memory_mapping */ +#define DPCI_ADD_MAPPING 1 +#define DPCI_REMOVE_MAPPING 0 +struct xen_domctl_memory_mapping { + aligned_u64 first_gfn; /* first page (hvm guest phys page) in range */ + aligned_u64 first_mfn; /* first page (machine page) in range. */ + aligned_u64 nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* Add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned struct */ +}; +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_memory_mapping_t); + +/* XEN_DOMCTL_iomem_permission */ +struct xen_domctl_iomem_permission { + aligned_u64 first_mfn;/* first page (physical page number) in range */ + aligned_u64 nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_iomem_permission_t); + +struct xen_domctl { + uint32_t cmd; + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_vgt_io_trap vgt_io_trap; + struct xen_domctl_memory_mapping memory_mapping; + struct xen_domctl_iomem_permission iomem_perm; + uint8_t pad[256]; + }u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl); + + #else /* __ASSEMBLY__ */ /* In assembly code we cannot use C numeric constant suffixes. */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 86abe07..dde9eb0 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -123,4 +123,9 @@ static inline void xen_preemptible_hcall_end(void) #endif /* CONFIG_PREEMPT */ +struct vm_struct * xen_remap_domain_mfn_range_in_kernel(unsigned long mfn, + int nr, unsigned domid); +void xen_unmap_domain_mfn_range_in_kernel(struct vm_struct *area, int nr, + unsigned domid); + #endif /* INCLUDE_XEN_OPS_H */

[RFC,17/29] gvt: Xen hypervisor GVT-g MPT module

Commit Message

Comments

Patch