@@ -411,6 +411,10 @@ struct kvm_vcpu_arch {
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
+ struct x86_fast_string_pio_ctxt {
+ unsigned long linear_addr;
+ u8 ad_bytes;
+ } fast_string_pio_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -776,6 +780,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port,
+ u8 ad_bytes_idx);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, in, string;
+ int size, in, string, rep;
unsigned port;
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
+ rep = (io_info & SVM_IOIO_REP_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
- if (string || in)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
svm->next_rip = svm->vmcb->control.exit_info_2;
- skip_emulated_instruction(&svm->vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ if (!string && !in) {
+ skip_emulated_instruction(&svm->vcpu);
+ return kvm_fast_pio_out(vcpu, size, port);
+ } else if (string && in && rep) {
+ int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >>
+ SVM_IOIO_ASIZE_SHIFT;
+ int r = kvm_fast_string_pio_in(vcpu, size, port,
+ ffs(addr_size) - 1);
+ if (r != EMULATE_FAIL)
+ return r == EMULATE_DONE;
+ }
+
+ return emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -639,6 +639,7 @@ static unsigned long *vmx_msr_bitmap_longmode;
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
+static bool cpu_has_ins_outs_inst_info;
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2522,6 +2523,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
if (((vmx_msr_high >> 18) & 15) != 6)
return -EIO;
+ cpu_has_ins_outs_inst_info = vmx_msr_high & (1u << 22);
+
vmcs_conf->size = vmx_msr_high & 0x1fff;
vmcs_conf->order = get_order(vmcs_config.size);
vmcs_conf->revision_id = vmx_msr_low;
@@ -4393,23 +4396,31 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int size, in, string;
+ int size, in, string, rep;
unsigned port;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- string = (exit_qualification & 16) != 0;
in = (exit_qualification & 8) != 0;
+ string = (exit_qualification & 16) != 0;
+ rep = (exit_qualification & 32) != 0;
++vcpu->stat.io_exits;
- if (string || in)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
-
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
- skip_emulated_instruction(vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ if (!string && !in) {
+ skip_emulated_instruction(vcpu);
+ return kvm_fast_pio_out(vcpu, size, port);
+ } else if (string && in && rep && cpu_has_ins_outs_inst_info) {
+ u32 inst_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ int r = kvm_fast_string_pio_in(vcpu, size, port,
+ (inst_info >> 7) & 7);
+ if (r != EMULATE_FAIL)
+ return r == EMULATE_DONE;
+ }
+
+ return emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static void
@@ -3661,6 +3661,59 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+static bool get_segment_descriptor(struct kvm_vcpu *vcpu, u16 *selector,
+ struct desc_struct *desc, u32 *base3,
+ int seg)
+{
+ struct kvm_segment var;
+
+ kvm_get_segment(vcpu, &var, seg);
+ *selector = var.selector;
+
+ if (var.unusable)
+ return false;
+
+ if (var.g)
+ var.limit >>= 12;
+ set_desc_limit(desc, var.limit);
+ set_desc_base(desc, (unsigned long)var.base);
+#ifdef CONFIG_X86_64
+ if (base3)
+ *base3 = var.base >> 32;
+#endif
+ desc->type = var.type;
+ desc->s = var.s;
+ desc->dpl = var.dpl;
+ desc->p = var.present;
+ desc->avl = var.avl;
+ desc->l = var.l;
+ desc->d = var.db;
+ desc->g = var.g;
+
+ return true;
+}
+
+static int kvm_linearize_address(struct kvm_vcpu *vcpu, enum x86emul_mode mode,
+ ulong ea, unsigned seg, unsigned size, bool write, bool fetch,
+ u8 ad_bytes, ulong *linear)
+{
+ struct x86_linearize_params param = {
+ .mode = mode,
+ .ea = ea,
+ .size = size,
+ .seg = seg,
+ .write = write,
+ .fetch = fetch,
+ .ad_bytes = ad_bytes,
+ .cpl = kvm_x86_ops->get_cpl(vcpu)
+ };
+
+ param.usable = get_segment_descriptor(vcpu, ¶m.sel, ¶m.desc,
+ NULL, seg);
+
+ return x86_linearize(¶m, linear);
+}
+
static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
@@ -4197,32 +4250,9 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
struct desc_struct *desc, u32 *base3,
int seg)
{
- struct kvm_segment var;
-
- kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
- *selector = var.selector;
-
- if (var.unusable)
- return false;
-
- if (var.g)
- var.limit >>= 12;
- set_desc_limit(desc, var.limit);
- set_desc_base(desc, (unsigned long)var.base);
-#ifdef CONFIG_X86_64
- if (base3)
- *base3 = var.base >> 32;
-#endif
- desc->type = var.type;
- desc->s = var.s;
- desc->dpl = var.dpl;
- desc->p = var.present;
- desc->avl = var.avl;
- desc->l = var.l;
- desc->d = var.db;
- desc->g = var.g;
- return true;
+ return get_segment_descriptor(emul_to_vcpu(ctxt), selector, desc, base3,
+ seg);
}
static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
@@ -4408,10 +4438,22 @@ static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
ctxt->mem_read.end = 0;
}
+static enum x86emul_mode get_emulation_mode(struct kvm_vcpu *vcpu)
+{
+ int cs_db, cs_l;
+
+ kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+ return (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
+ (kvm_get_rflags(vcpu) & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
+ cs_l ? X86EMUL_MODE_PROT64 :
+ cs_db ? X86EMUL_MODE_PROT32 :
+ X86EMUL_MODE_PROT16;
+}
+
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
- int cs_db, cs_l;
/*
* TODO: fix emulate.c to use guest_read/write_register
@@ -4421,15 +4463,10 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
*/
cache_all_regs(vcpu);
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
ctxt->eip = kvm_rip_read(vcpu);
- ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
- (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
- cs_l ? X86EMUL_MODE_PROT64 :
- cs_db ? X86EMUL_MODE_PROT32 :
- X86EMUL_MODE_PROT16;
+ ctxt->mode = get_emulation_mode(vcpu);
ctxt->guest_mode = is_guest_mode(vcpu);
init_decode_cache(ctxt, vcpu->arch.regs);
@@ -4665,6 +4702,122 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
+static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, unsigned long addr,
+ int count)
+{
+ struct page *page;
+ gpa_t gpa;
+ char *kaddr;
+ int ret;
+
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
+
+ if (gpa == UNMAPPED_GVA ||
+ (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+ return EMULATE_FAIL;
+
+ page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (is_error_page(page)) {
+ kvm_release_page_clean(page);
+ return EMULATE_FAIL;
+ }
+
+ kaddr = kmap_atomic(page);
+ kaddr += offset_in_page(gpa);
+
+ ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+ kaddr, count);
+
+ kunmap_atomic(kaddr);
+ if (ret) {
+ u8 ad_bytes = vcpu->arch.fast_string_pio_ctxt.ad_bytes;
+ unsigned long reg;
+
+ reg = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_address_increment(ad_bytes, ®, -count);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, reg);
+
+ reg = kvm_register_read(vcpu, VCPU_REGS_RDI);
+ kvm_register_address_increment(ad_bytes, ®, count * size);
+ kvm_register_write(vcpu, VCPU_REGS_RDI, reg);
+
+ kvm_release_page_dirty(page);
+ return EMULATE_DONE;
+ }
+ kvm_release_page_clean(page);
+ return EMULATE_DO_MMIO;
+}
+
+static int complete_fast_string_pio(struct kvm_vcpu *vcpu)
+{
+ unsigned long linear_addr = vcpu->arch.fast_string_pio_ctxt.linear_addr;
+ int r;
+
+ BUG_ON(!vcpu->arch.pio.count);
+
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = __kvm_fast_string_pio_in(vcpu, vcpu->arch.pio.size,
+ vcpu->arch.pio.port, linear_addr, vcpu->arch.pio.count);
+ BUG_ON(r == EMULATE_DO_MMIO);
+ if (r == EMULATE_FAIL) /* mem slot gone while we were not looking */
+ vcpu->arch.pio.count = 0; /* drop the pio data */
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ return 1;
+}
+
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, u8 ad_bytes_idx)
+{
+ unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
+ unsigned long linear_addr;
+ unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
+ u8 ad_bytes;
+ int r;
+
+ if (ad_bytes_idx > 2)
+ return EMULATE_FAIL;
+
+ ad_bytes = (u8[]){2, 4, 8}[ad_bytes_idx];
+
+ rcx = kvm_address_mask(ad_bytes, rcx);
+
+ if (rcx == 0) {
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return EMULATE_DONE;
+ }
+
+ if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
+ return EMULATE_FAIL;
+
+ rdi = kvm_address_mask(ad_bytes, rdi);
+
+ r = kvm_linearize_address(vcpu, get_emulation_mode(vcpu),
+ rdi, VCPU_SREG_ES, rcx * size, true, false, ad_bytes,
+ &linear_addr);
+
+ if (r >= 0)
+ return EMULATE_FAIL;
+
+ count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
+
+ if (count == 0) /* 'in' crosses page boundry */
+ return EMULATE_FAIL;
+
+ count = min(count, rcx);
+
+ r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
+
+ if (r != EMULATE_DO_MMIO)
+ return r;
+
+ vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
+ vcpu->arch.fast_string_pio_ctxt.ad_bytes = ad_bytes;
+ vcpu->arch.complete_userspace_io = complete_fast_string_pio;
+ return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
+
static void tsc_bad(void *info)
{
__this_cpu_write(cpu_tsc_khz, 0);
"rep ins" emulation is going through emulator now. This is slow because emulator knows how to write back only one datum at a time. This patch provides fast path for the instruction in certain conditions. The conditions are: DF flag is not set, destination memory is RAM and single datum does not cross page boundary. If fast path code fails it falls back to emulation. Signed-off-by: Gleb Natapov <gleb@redhat.com> --- arch/x86/include/asm/kvm_host.h | 6 ++ arch/x86/kvm/svm.c | 20 +++- arch/x86/kvm/vmx.c | 25 +++-- arch/x86/kvm/x86.c | 217 +++++++++++++++++++++++++++++++++------ 4 files changed, 224 insertions(+), 44 deletions(-)