Message ID | 20200218232953.5724-9-sean.j.christopherson@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: Allow userspace to disable the emulator | expand |
Sean Christopherson <sean.j.christopherson@intel.com> writes: > Allocate the emulation context instead of embedding it in struct > kvm_vcpu_arch. > > Dynamic allocation provides several benefits: > > - Shrinks the size x86 vcpus by ~2.5k bytes, dropping them back below > the PAGE_ALLOC_COSTLY_ORDER threshold. > - Allows for dropping the include of kvm_emulate.h from asm/kvm_host.h > and moving kvm_emulate.h into KVM's private directory. > - Allows a reducing KVM's attack surface by shrinking the amount of > vCPU data that is exposed to usercopy. > - Allows a future patch to disable the emulator entirely, which may or > may not be a realistic endeavor. > > Mark the entire struct as valid for usercopy to maintain existing > behavior with respect to hardened usercopy. Future patches can shrink > the usercopy range to cover only what is necessary. > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > --- > arch/x86/include/asm/kvm_emulate.h | 1 + > arch/x86/include/asm/kvm_host.h | 2 +- > arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++---- > 3 files changed, 55 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h > index 03946eb3e2b9..2f0a600efdff 100644 > --- a/arch/x86/include/asm/kvm_emulate.h > +++ b/arch/x86/include/asm/kvm_emulate.h > @@ -293,6 +293,7 @@ enum x86emul_mode { > #define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) > > struct x86_emulate_ctxt { > + void *vcpu; Why 'void *'? I changed this to 'struct kvm_vcpu *' and it seems to compile just fine... > const struct x86_emulate_ops *ops; > > /* Register state before/after emulation. */ > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index c750cd957558..e069f71667b1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -678,7 +678,7 @@ struct kvm_vcpu_arch { > > /* emulate context */ > > - struct x86_emulate_ctxt emulate_ctxt; > + struct x86_emulate_ctxt *emulate_ctxt; > bool emulate_regs_need_sync_to_vcpu; > bool emulate_regs_need_sync_from_vcpu; > int (*complete_userspace_io)(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0e67f90db9a6..5ab7d4283185 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -81,7 +81,7 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; > EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); > > #define emul_to_vcpu(ctxt) \ > - container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) > + ((struct kvm_vcpu *)(ctxt)->vcpu) > > /* EFER defaults: > * - enable syscall per default because its emulated by KVM > @@ -230,6 +230,19 @@ u64 __read_mostly host_xcr0; > struct kmem_cache *x86_fpu_cache; > EXPORT_SYMBOL_GPL(x86_fpu_cache); > > +static struct kmem_cache *x86_emulator_cache; > + > +static struct kmem_cache *kvm_alloc_emulator_cache(void) > +{ > + return kmem_cache_create_usercopy("x86_emulator", > + sizeof(struct x86_emulate_ctxt), > + __alignof__(struct x86_emulate_ctxt), > + SLAB_ACCOUNT, > + 0, > + sizeof(struct x86_emulate_ctxt), > + NULL); > +} > + > static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); > > static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) > @@ -6414,6 +6427,23 @@ static bool inject_emulated_exception(struct x86_emulate_ctxt *ctxt) > return false; > } > > +static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) > +{ > + struct x86_emulate_ctxt *ctxt; > + > + ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT); > + if (!ctxt) { > + pr_err("kvm: failed to allocate vcpu's emulator\n"); > + return NULL; > + } > + > + ctxt->vcpu = vcpu; > + ctxt->ops = &emulate_ops; > + vcpu->arch.emulate_ctxt = ctxt; > + > + return ctxt; > +} > + > static void init_emulate_ctxt(struct x86_emulate_ctxt *ctxt) > { > struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); > @@ -6440,7 +6470,7 @@ static void init_emulate_ctxt(struct x86_emulate_ctxt *ctxt) > > void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) > { > - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; > int ret; > > init_emulate_ctxt(ctxt); > @@ -6756,7 +6786,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > int emulation_type, void *insn, int insn_len) > { > int r; > - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; > bool writeback = true; > bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; > > @@ -7339,10 +7369,16 @@ int kvm_arch_init(void *opaque) > goto out; > } > > + x86_emulator_cache = kvm_alloc_emulator_cache(); > + if (!x86_emulator_cache) { > + pr_err("kvm: failed to allocate cache for x86 emulator\n"); > + goto out_free_x86_fpu_cache; > + } > + > shared_msrs = alloc_percpu(struct kvm_shared_msrs); > if (!shared_msrs) { > printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n"); > - goto out_free_x86_fpu_cache; > + goto out_free_x86_emulator_cache; > } > > r = kvm_mmu_module_init(); > @@ -7375,6 +7411,8 @@ int kvm_arch_init(void *opaque) > > out_free_percpu: > free_percpu(shared_msrs); > +out_free_x86_emulator_cache: > + kmem_cache_destroy(x86_emulator_cache); > out_free_x86_fpu_cache: > kmem_cache_destroy(x86_fpu_cache); > out: > @@ -8754,7 +8792,7 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > * that usually, but some bad designed PV devices (vmware > * backdoor interface) need this to work > */ > - emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); > + emulator_writeback_register_cache(vcpu->arch.emulate_ctxt); > vcpu->arch.emulate_regs_need_sync_to_vcpu = false; > } > regs->rax = kvm_rax_read(vcpu); > @@ -8940,7 +8978,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, > int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, > int reason, bool has_error_code, u32 error_code) > { > - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; > int ret; > > init_emulate_ctxt(ctxt); > @@ -9273,7 +9311,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > struct page *page; > int r; > > - vcpu->arch.emulate_ctxt.ops = &emulate_ops; > if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) > vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; > else > @@ -9311,11 +9348,14 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > GFP_KERNEL_ACCOUNT)) > goto fail_free_mce_banks; > > + if (!alloc_emulate_ctxt(vcpu)) > + goto free_wbinvd_dirty_mask; > + > vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, > GFP_KERNEL_ACCOUNT); > if (!vcpu->arch.user_fpu) { > pr_err("kvm: failed to allocate userspace's fpu\n"); > - goto free_wbinvd_dirty_mask; > + goto free_emulate_ctxt; > } > > vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, > @@ -9357,6 +9397,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); > free_user_fpu: > kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); > +free_emulate_ctxt: > + kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); > free_wbinvd_dirty_mask: > free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); > fail_free_mce_banks: > @@ -9409,6 +9451,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > kvm_x86_ops->vcpu_free(vcpu); > > + if (vcpu->arch.emulate_ctxt) > + kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); Checking for NULL here seems superfluous as we create the context in kvm_arch_vcpu_create() unconditionally. I'd suggest we move the check to "[PATCH v2 12/13] KVM: x86: Add variable to control existence of emulator" where 'enable_emulator' global is added. > + > free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); > kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); > kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
On 26/02/20 18:29, Vitaly Kuznetsov wrote: >> struct x86_emulate_ctxt { >> + void *vcpu; > Why 'void *'? I changed this to 'struct kvm_vcpu *' and it seems to > compile just fine... > I guess because it's really just an opaque pointer; using void* ensures that the emulator doesn't break the emulator ops abstraction. Paolo
On Tue, Mar 03, 2020 at 11:26:21AM +0100, Paolo Bonzini wrote: > On 26/02/20 18:29, Vitaly Kuznetsov wrote: > >> struct x86_emulate_ctxt { > >> + void *vcpu; > > Why 'void *'? I changed this to 'struct kvm_vcpu *' and it seems to > > compile just fine... > > > > I guess because it's really just an opaque pointer; using void* ensures > that the emulator doesn't break the emulator ops abstraction. Ya, it prevents the emulator from directly deferencing the vcpu.
Sean Christopherson <sean.j.christopherson@intel.com> writes: > On Tue, Mar 03, 2020 at 11:26:21AM +0100, Paolo Bonzini wrote: >> On 26/02/20 18:29, Vitaly Kuznetsov wrote: >> >> struct x86_emulate_ctxt { >> >> + void *vcpu; >> > Why 'void *'? I changed this to 'struct kvm_vcpu *' and it seems to >> > compile just fine... >> > >> >> I guess because it's really just an opaque pointer; using void* ensures >> that the emulator doesn't break the emulator ops abstraction. > > Ya, it prevents the emulator from directly deferencing the vcpu. > Makes sense, a comment like /* Should never be dereferenced by the emulater */ would've helped)
On Wed, Feb 26, 2020 at 06:29:56PM +0100, Vitaly Kuznetsov wrote: > Sean Christopherson <sean.j.christopherson@intel.com> writes: > > @@ -9409,6 +9451,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > > > > kvm_x86_ops->vcpu_free(vcpu); > > > > + if (vcpu->arch.emulate_ctxt) > > + kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); > > Checking for NULL here seems superfluous as we create the context in > kvm_arch_vcpu_create() unconditionally. I'd suggest we move the check to > "[PATCH v2 12/13] KVM: x86: Add variable to control existence of > emulator" where 'enable_emulator' global is added. Ya, checking here is premature. > > + > > free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); > > kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); > > kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); > > -- > Vitaly >
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 03946eb3e2b9..2f0a600efdff 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -293,6 +293,7 @@ enum x86emul_mode { #define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) struct x86_emulate_ctxt { + void *vcpu; const struct x86_emulate_ops *ops; /* Register state before/after emulation. */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c750cd957558..e069f71667b1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -678,7 +678,7 @@ struct kvm_vcpu_arch { /* emulate context */ - struct x86_emulate_ctxt emulate_ctxt; + struct x86_emulate_ctxt *emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e67f90db9a6..5ab7d4283185 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -81,7 +81,7 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); #define emul_to_vcpu(ctxt) \ - container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) + ((struct kvm_vcpu *)(ctxt)->vcpu) /* EFER defaults: * - enable syscall per default because its emulated by KVM @@ -230,6 +230,19 @@ u64 __read_mostly host_xcr0; struct kmem_cache *x86_fpu_cache; EXPORT_SYMBOL_GPL(x86_fpu_cache); +static struct kmem_cache *x86_emulator_cache; + +static struct kmem_cache *kvm_alloc_emulator_cache(void) +{ + return kmem_cache_create_usercopy("x86_emulator", + sizeof(struct x86_emulate_ctxt), + __alignof__(struct x86_emulate_ctxt), + SLAB_ACCOUNT, + 0, + sizeof(struct x86_emulate_ctxt), + NULL); +} + static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) @@ -6414,6 +6427,23 @@ static bool inject_emulated_exception(struct x86_emulate_ctxt *ctxt) return false; } +static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt; + + ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT); + if (!ctxt) { + pr_err("kvm: failed to allocate vcpu's emulator\n"); + return NULL; + } + + ctxt->vcpu = vcpu; + ctxt->ops = &emulate_ops; + vcpu->arch.emulate_ctxt = ctxt; + + return ctxt; +} + static void init_emulate_ctxt(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); @@ -6440,7 +6470,7 @@ static void init_emulate_ctxt(struct x86_emulate_ctxt *ctxt) void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) { - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; int ret; init_emulate_ctxt(ctxt); @@ -6756,7 +6786,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len) { int r; - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; bool writeback = true; bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; @@ -7339,10 +7369,16 @@ int kvm_arch_init(void *opaque) goto out; } + x86_emulator_cache = kvm_alloc_emulator_cache(); + if (!x86_emulator_cache) { + pr_err("kvm: failed to allocate cache for x86 emulator\n"); + goto out_free_x86_fpu_cache; + } + shared_msrs = alloc_percpu(struct kvm_shared_msrs); if (!shared_msrs) { printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n"); - goto out_free_x86_fpu_cache; + goto out_free_x86_emulator_cache; } r = kvm_mmu_module_init(); @@ -7375,6 +7411,8 @@ int kvm_arch_init(void *opaque) out_free_percpu: free_percpu(shared_msrs); +out_free_x86_emulator_cache: + kmem_cache_destroy(x86_emulator_cache); out_free_x86_fpu_cache: kmem_cache_destroy(x86_fpu_cache); out: @@ -8754,7 +8792,7 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) * that usually, but some bad designed PV devices (vmware * backdoor interface) need this to work */ - emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); + emulator_writeback_register_cache(vcpu->arch.emulate_ctxt); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; } regs->rax = kvm_rax_read(vcpu); @@ -8940,7 +8978,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code) { - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; int ret; init_emulate_ctxt(ctxt); @@ -9273,7 +9311,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct page *page; int r; - vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else @@ -9311,11 +9348,14 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) GFP_KERNEL_ACCOUNT)) goto fail_free_mce_banks; + if (!alloc_emulate_ctxt(vcpu)) + goto free_wbinvd_dirty_mask; + vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!vcpu->arch.user_fpu) { pr_err("kvm: failed to allocate userspace's fpu\n"); - goto free_wbinvd_dirty_mask; + goto free_emulate_ctxt; } vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, @@ -9357,6 +9397,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); free_user_fpu: kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); +free_emulate_ctxt: + kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_wbinvd_dirty_mask: free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); fail_free_mce_banks: @@ -9409,6 +9451,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); + if (vcpu->arch.emulate_ctxt) + kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); + free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
Allocate the emulation context instead of embedding it in struct kvm_vcpu_arch. Dynamic allocation provides several benefits: - Shrinks the size x86 vcpus by ~2.5k bytes, dropping them back below the PAGE_ALLOC_COSTLY_ORDER threshold. - Allows for dropping the include of kvm_emulate.h from asm/kvm_host.h and moving kvm_emulate.h into KVM's private directory. - Allows a reducing KVM's attack surface by shrinking the amount of vCPU data that is exposed to usercopy. - Allows a future patch to disable the emulator entirely, which may or may not be a realistic endeavor. Mark the entire struct as valid for usercopy to maintain existing behavior with respect to hardened usercopy. Future patches can shrink the usercopy range to cover only what is necessary. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 9 deletions(-)