KVM: x86: KVM_CAP_SYNC_REGS

Message ID	20170920174204.7215-1-hofsass@google.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Ken Hofsass <hofsass@google.com> To: kvm@vger.kernel.org Cc: Ken Hofsass <hofsass@google.com> Subject: [PATCH] KVM: x86: KVM_CAP_SYNC_REGS Date: Wed, 20 Sep 2017 10:42:04 -0700 Message-Id: <20170920174204.7215-1-hofsass@google.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: kvm-owner@vger.kernel.org Precedence: bulk

Reviewed-by: Jim Mattson <jmattson@google.com> On Wed, Sep 20, 2017 at 10:42 AM, Ken Hofsass <hofsass@google.com> wrote: > This commit implements an enhanced x86 version of S390 > KVM_CAP_SYNC_REGS functionality. KVM_CAP_SYNC_REGS "allow[s] > userspace to access certain guest registers without having > to call SET/GET_*REGS”. This reduces ioctl overhead which > is particularly important when userspace is making synchronous > guest state modifications (e.g. when emulating and/or intercepting > instructions). > > Originally implemented upstream for the S390, the x86 differences > follow: > - the capability can be enabled/disabled > - the register sets to be copied out out to kvm_run are selectable > by userspace > - vcpu_events and selectable MSRs are available in addition to the > basic register sets (regs, sregs, debug_regs, and fpu). > > Signed-off-by: Ken Hofsass <hofsass@google.com> > --- > Documentation/virtual/kvm/api.txt | 49 ++++++++++++++ > arch/x86/include/uapi/asm/kvm.h | 32 ++++++++- > arch/x86/kvm/x86.c | 134 +++++++++++++++++++++++++++++++++++++- > include/uapi/linux/kvm.h | 6 +- > 4 files changed, 216 insertions(+), 5 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index e63a35fafef0..9536eb310d04 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3951,6 +3951,55 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be > accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from > the guest. > > +6.74 KVM_CAP_SYNC_REGS > +Architectures: s390, x86 (vmx-only) > +Target: s390: always enabled, x86: vcpu > +Parameters: s390: none, x86: args[0] takes bitmask selecting which register > +sets to copy out (bitfields defined in arch/x86/include/uapi/asm/kvm.h). > + > +As described above in the kvm_sync_regs struct info in section 5 (kvm_run): > +KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers > +without having to call SET/GET_*REGS". This reduces overhead by eliminating > +repeated ioctl calls for setting and/or getting register values. This is > +particularly important when userspace is making synchronous guest state > +modifications, e.g. when emulating and/or intercepting instructions in > +userspace. > + > +For s390 specifics, please the source code. > + > +For x86: > +- the capability can be enabled/disabled (s390 is always enabled). > +- the register sets to be copied out out to kvm_run are selectable > + by userspace (rather that all sets being copied out for every exit). > +- vcpu_events and selectable MSRs are available in addition to the > + basic register sets (regs, sregs, debug_regs, and fpu). > + > +In addition to the register sets, the x86 'kvm_sync_regs' struct definition > +includes the bitarray, 'sync_regs', that toggles whether specific register > +sets are to be copied out. When enabling SYNC_REGS via KVM_CAP_ENABLE, the > +value in args[0] is copied into the 'sync_regs' bitarray value by KVM. But > +like the register sets, the copy-out field can be modified while handling > +an exit so that additonal KVM_CAP_ENABLE ioctl calls can be avoided. > + > +To indicate when userspace has modified values that should be copied into > +the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set. > +This is done using the same bitflags as for the 'sync_regs' copy-out field. > +If the dirty bit is not set, then the register values will not be copied > +into the vCPU even if they've been modified. > + > +Unused bitfields in the bitarrays must be set to zero. > + > +struct kvm_sync_regs { > + __u64 sync_regs; > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_debugregs debugregs; > + struct kvm_fpu fpu; > + struct kvm_vcpu_events events; > + struct kvm_msrs msrs; > + struct kvm_msr_entry msr_entries[SYNC_REGS_NUM_MSRS]; > +}; > + > 7. Capabilities that can be enabled on VMs > ------------------------------------------ > > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index c2824d02ba37..cdee17a4b0ee 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -353,8 +353,38 @@ struct kvm_xcrs { > __u64 padding[16]; > }; > > -/* definition of registers in kvm_run */ > +#define KVM_SYNC_X86_REGS (1UL << 0) > +#define KVM_SYNC_X86_SREGS (1UL << 1) > +#define KVM_SYNC_X86_DEBUGREGS (1UL << 2) > +#define KVM_SYNC_X86_FPU (1UL << 3) > +#define KVM_SYNC_X86_EVENTS (1UL << 4) > +#define KVM_SYNC_X86_MSRS (1UL << 5) > +#define KVM_SYNC_X86_NUM_FIELDS 6 > + > +#define KVM_SYNC_X86_VALID_BITS \ > + (KVM_SYNC_X86_REGS| \ > + KVM_SYNC_X86_SREGS| \ > + KVM_SYNC_X86_DEBUGREGS| \ > + KVM_SYNC_X86_FPU| \ > + KVM_SYNC_X86_EVENTS| \ > + KVM_SYNC_X86_MSRS) > + > +#define SYNC_REGS_NUM_MSRS 8 > +/* kvm_sync_regs struct included by kvm_run struct */ > struct kvm_sync_regs { > + /* Members of this structure are potentially malicious. > + * Care must be taken by code reading, esp. interpreting, > + * data fields from them inside KVM to prevent TOCTOU and > + * double-fetch types of vulnerabilities. > + */ > + __u64 sync_regs; > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_debugregs debugregs; > + struct kvm_fpu fpu; > + struct kvm_vcpu_events events; > + struct kvm_msrs msrs; > + struct kvm_msr_entry msr_entries[SYNC_REGS_NUM_MSRS]; > }; > > #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index cd17b7d9a107..ad3f84cbf7f3 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -100,6 +100,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); > static void process_nmi(struct kvm_vcpu *vcpu); > static void enter_smm(struct kvm_vcpu *vcpu); > static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); > +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu); > +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu); > > struct kvm_x86_ops *kvm_x86_ops __read_mostly; > EXPORT_SYMBOL_GPL(kvm_x86_ops); > @@ -2587,15 +2589,22 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); > * > * @return number of msrs set successfully. > */ > -static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, > +static int __msr_io(struct kvm_vcpu *vcpu, u32 nmsrs, struct kvm_msrs *msrs, > struct kvm_msr_entry *entries, > int (*do_msr)(struct kvm_vcpu *vcpu, > unsigned index, u64 *data)) > { > int i, idx; > > + /* For SYNC_REGS call into __msr_io, it is possible that userspace > + * might have modified the count since it was snapshotted. Could be > + * accidental but might be intentional attempt at TOCTOU. > + */ > + if (nmsrs != msrs->nmsrs) > + return -EINVAL; > + > idx = srcu_read_lock(&vcpu->kvm->srcu); > - for (i = 0; i < msrs->nmsrs; ++i) > + for (i = 0; i < nmsrs; ++i) > if (do_msr(vcpu, entries[i].index, &entries[i].data)) > break; > srcu_read_unlock(&vcpu->kvm->srcu, idx); > @@ -2633,7 +2642,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, > goto out; > } > > - r = n = __msr_io(vcpu, &msrs, entries, do_msr); > + r = n = __msr_io(vcpu, msrs.nmsrs, &msrs, entries, do_msr); > if (r < 0) > goto out_free; > > @@ -2665,6 +2674,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_NOP_IO_DELAY: > case KVM_CAP_MP_STATE: > case KVM_CAP_SYNC_MMU: > + case KVM_CAP_SYNC_REGS: > case KVM_CAP_USER_NMI: > case KVM_CAP_REINJECT_CONTROL: > case KVM_CAP_IRQ_INJECT_STATUS: > @@ -3435,6 +3445,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, > return -EINVAL; > return kvm_hv_activate_synic(vcpu, cap->cap == > KVM_CAP_HYPERV_SYNIC2); > + case KVM_CAP_SYNC_REGS: > + if (cap->args[0] & ~KVM_SYNC_X86_VALID_BITS) > + return -EINVAL; > + vcpu->run->s.regs.sync_regs = cap->args[0]; > + return 0; > default: > return -EINVAL; > } > @@ -7247,6 +7262,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > goto out; > } > > + if (vcpu->run->kvm_dirty_regs) { > + r = sync_regs_load_from_kvmrun(vcpu); > + if (r != 0) > + goto out; > + } > + > /* re-sync apic's tpr */ > if (!lapic_in_kernel(vcpu)) { > if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { > @@ -7270,6 +7291,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > r = vcpu_run(vcpu); > > out: > + if (vcpu->run->s.regs.sync_regs) > + sync_regs_store_to_kvmrun(vcpu); > post_kvm_run_save(vcpu); > if (vcpu->sigset_active) > sigprocmask(SIG_SETMASK, &sigsaved, NULL); > @@ -7648,6 +7671,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > return 0; > } > > +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu) > +{ > + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_UNION_SIZE_BYTES); > + vcpu->run->kvm_valid_regs = 0; > + > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_REGS) { > + kvm_arch_vcpu_ioctl_get_regs(vcpu, &vcpu->run->s.regs.regs); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_REGS; > + } > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_SREGS) { > + kvm_arch_vcpu_ioctl_get_sregs(vcpu, &vcpu->run->s.regs.sregs); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_SREGS; > + } > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_DEBUGREGS) { > + kvm_vcpu_ioctl_x86_get_debugregs( > + vcpu, &vcpu->run->s.regs.debugregs); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_DEBUGREGS; > + } > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_FPU) { > + kvm_arch_vcpu_ioctl_get_fpu(vcpu, &vcpu->run->s.regs.fpu); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_FPU; > + } > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_EVENTS) { > + kvm_vcpu_ioctl_x86_get_vcpu_events( > + vcpu, &vcpu->run->s.regs.events); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_EVENTS; > + } > + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_MSRS) { > + u32 nmsrs = vcpu->run->s.regs.msrs.nmsrs; > + > + if (nmsrs > SYNC_REGS_NUM_MSRS) > + nmsrs = vcpu->run->s.regs.msrs.nmsrs > + = SYNC_REGS_NUM_MSRS; > + __msr_io(vcpu, nmsrs, &vcpu->run->s.regs.msrs, > + vcpu->run->s.regs.msr_entries, do_get_msr); > + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_MSRS; > + } > +} > + > +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu) > +{ > + int r = -EINVAL; > + > + if (unlikely(vcpu->run->kvm_dirty_regs == 0)) > + goto out; > + > + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_BITS) > + goto out; > + > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { > + if (kvm_arch_vcpu_ioctl_set_regs( > + vcpu, &vcpu->run->s.regs.regs)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { > + if (kvm_arch_vcpu_ioctl_set_sregs( > + vcpu, &vcpu->run->s.regs.sregs)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_DEBUGREGS) { > + if (kvm_vcpu_ioctl_x86_set_debugregs( > + vcpu, &vcpu->run->s.regs.debugregs)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_DEBUGREGS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_FPU) { > + if (kvm_arch_vcpu_ioctl_set_fpu( > + vcpu, &vcpu->run->s.regs.fpu)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_FPU; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { > + if (kvm_vcpu_ioctl_x86_set_vcpu_events( > + vcpu, &vcpu->run->s.regs.events)) > + goto out; > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; > + } > + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_MSRS) { > + u32 nmsrs_in = vcpu->run->s.regs.msrs.nmsrs; > + u32 nmsrs_out = 0; > + > + if (!nmsrs_in) > + goto out; > + else if (nmsrs_in > SYNC_REGS_NUM_MSRS) { > + r = -E2BIG; > + goto out; > + } > + nmsrs_out = __msr_io(vcpu, nmsrs_in, &vcpu->run->s.regs.msrs, > + vcpu->run->s.regs.msr_entries, do_set_msr); > + if (!nmsrs_out) { > + r = -EINVAL; > + goto out; > + } else if (nmsrs_in != nmsrs_out) { > + r = nmsrs_out; > + goto out; > + } > + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_MSRS; > + } > + r = 0; > +out: > + return r; > +} > + > static void fx_init(struct kvm_vcpu *vcpu) > { > fpstate_init(&vcpu->arch.guest_fpu.state); > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 838887587411..90a6b24df901 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -395,6 +395,10 @@ struct kvm_run { > char padding[256]; > }; > > + /* 2048 is the size of the char array IBM used to bound/pad the size > + * of the union that holds sync regs. > + */ > +#define SYNC_REGS_UNION_SIZE_BYTES 2048 > /* > * shared registers between kvm and userspace. > * kvm_valid_regs specifies the register classes set by the host > @@ -406,7 +410,7 @@ struct kvm_run { > __u64 kvm_dirty_regs; > union { > struct kvm_sync_regs regs; > - char padding[2048]; > + char padding[SYNC_REGS_UNION_SIZE_BYTES]; > } s; > }; > > -- > 2.14.1.690.gbb1197296e-goog >

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e63a35fafef0..9536eb310d04 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3951,6 +3951,55 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from the guest. +6.74 KVM_CAP_SYNC_REGS +Architectures: s390, x86 (vmx-only) +Target: s390: always enabled, x86: vcpu +Parameters: s390: none, x86: args[0] takes bitmask selecting which register +sets to copy out (bitfields defined in arch/x86/include/uapi/asm/kvm.h). + +As described above in the kvm_sync_regs struct info in section 5 (kvm_run): +KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers +without having to call SET/GET_*REGS". This reduces overhead by eliminating +repeated ioctl calls for setting and/or getting register values. This is +particularly important when userspace is making synchronous guest state +modifications, e.g. when emulating and/or intercepting instructions in +userspace. + +For s390 specifics, please the source code. + +For x86: +- the capability can be enabled/disabled (s390 is always enabled). +- the register sets to be copied out out to kvm_run are selectable + by userspace (rather that all sets being copied out for every exit). +- vcpu_events and selectable MSRs are available in addition to the + basic register sets (regs, sregs, debug_regs, and fpu). + +In addition to the register sets, the x86 'kvm_sync_regs' struct definition +includes the bitarray, 'sync_regs', that toggles whether specific register +sets are to be copied out. When enabling SYNC_REGS via KVM_CAP_ENABLE, the +value in args[0] is copied into the 'sync_regs' bitarray value by KVM. But +like the register sets, the copy-out field can be modified while handling +an exit so that additonal KVM_CAP_ENABLE ioctl calls can be avoided. + +To indicate when userspace has modified values that should be copied into +the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set. +This is done using the same bitflags as for the 'sync_regs' copy-out field. +If the dirty bit is not set, then the register values will not be copied +into the vCPU even if they've been modified. + +Unused bitfields in the bitarrays must be set to zero. + +struct kvm_sync_regs { + __u64 sync_regs; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + struct kvm_fpu fpu; + struct kvm_vcpu_events events; + struct kvm_msrs msrs; + struct kvm_msr_entry msr_entries[SYNC_REGS_NUM_MSRS]; +}; + 7. Capabilities that can be enabled on VMs ------------------------------------------ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index c2824d02ba37..cdee17a4b0ee 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -353,8 +353,38 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_DEBUGREGS (1UL << 2) +#define KVM_SYNC_X86_FPU (1UL << 3) +#define KVM_SYNC_X86_EVENTS (1UL << 4) +#define KVM_SYNC_X86_MSRS (1UL << 5) +#define KVM_SYNC_X86_NUM_FIELDS 6 + +#define KVM_SYNC_X86_VALID_BITS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_DEBUGREGS| \ + KVM_SYNC_X86_FPU| \ + KVM_SYNC_X86_EVENTS| \ + KVM_SYNC_X86_MSRS) + +#define SYNC_REGS_NUM_MSRS 8 +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + __u64 sync_regs; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + struct kvm_fpu fpu; + struct kvm_vcpu_events events; + struct kvm_msrs msrs; + struct kvm_msr_entry msr_entries[SYNC_REGS_NUM_MSRS]; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd17b7d9a107..ad3f84cbf7f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -100,6 +100,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu); +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu); struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -2587,15 +2589,22 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); * * @return number of msrs set successfully. */ -static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, +static int __msr_io(struct kvm_vcpu *vcpu, u32 nmsrs, struct kvm_msrs *msrs, struct kvm_msr_entry *entries, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { int i, idx; + /* For SYNC_REGS call into __msr_io, it is possible that userspace + * might have modified the count since it was snapshotted. Could be + * accidental but might be intentional attempt at TOCTOU. + */ + if (nmsrs != msrs->nmsrs) + return -EINVAL; + idx = srcu_read_lock(&vcpu->kvm->srcu); - for (i = 0; i < msrs->nmsrs; ++i) + for (i = 0; i < nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -2633,7 +2642,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, goto out; } - r = n = __msr_io(vcpu, &msrs, entries, do_msr); + r = n = __msr_io(vcpu, msrs.nmsrs, &msrs, entries, do_msr); if (r < 0) goto out_free; @@ -2665,6 +2674,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: + case KVM_CAP_SYNC_REGS: case KVM_CAP_USER_NMI: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: @@ -3435,6 +3445,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; return kvm_hv_activate_synic(vcpu, cap->cap == KVM_CAP_HYPERV_SYNIC2); + case KVM_CAP_SYNC_REGS: + if (cap->args[0] & ~KVM_SYNC_X86_VALID_BITS) + return -EINVAL; + vcpu->run->s.regs.sync_regs = cap->args[0]; + return 0; default: return -EINVAL; } @@ -7247,6 +7262,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } + if (vcpu->run->kvm_dirty_regs) { + r = sync_regs_load_from_kvmrun(vcpu); + if (r != 0) + goto out; + } + /* re-sync apic's tpr */ if (!lapic_in_kernel(vcpu)) { if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { @@ -7270,6 +7291,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = vcpu_run(vcpu); out: + if (vcpu->run->s.regs.sync_regs) + sync_regs_store_to_kvmrun(vcpu); post_kvm_run_save(vcpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -7648,6 +7671,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +static void sync_regs_store_to_kvmrun(struct kvm_vcpu *vcpu) +{ + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_UNION_SIZE_BYTES); + vcpu->run->kvm_valid_regs = 0; + + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_REGS) { + kvm_arch_vcpu_ioctl_get_regs(vcpu, &vcpu->run->s.regs.regs); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_REGS; + } + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_SREGS) { + kvm_arch_vcpu_ioctl_get_sregs(vcpu, &vcpu->run->s.regs.sregs); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_SREGS; + } + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_DEBUGREGS) { + kvm_vcpu_ioctl_x86_get_debugregs( + vcpu, &vcpu->run->s.regs.debugregs); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_DEBUGREGS; + } + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_FPU) { + kvm_arch_vcpu_ioctl_get_fpu(vcpu, &vcpu->run->s.regs.fpu); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_FPU; + } + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_EVENTS) { + kvm_vcpu_ioctl_x86_get_vcpu_events( + vcpu, &vcpu->run->s.regs.events); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_EVENTS; + } + if (vcpu->run->s.regs.sync_regs & KVM_SYNC_X86_MSRS) { + u32 nmsrs = vcpu->run->s.regs.msrs.nmsrs; + + if (nmsrs > SYNC_REGS_NUM_MSRS) + nmsrs = vcpu->run->s.regs.msrs.nmsrs + = SYNC_REGS_NUM_MSRS; + __msr_io(vcpu, nmsrs, &vcpu->run->s.regs.msrs, + vcpu->run->s.regs.msr_entries, do_get_msr); + vcpu->run->kvm_valid_regs |= KVM_SYNC_X86_MSRS; + } +} + +static int sync_regs_load_from_kvmrun(struct kvm_vcpu *vcpu) +{ + int r = -EINVAL; + + if (unlikely(vcpu->run->kvm_dirty_regs == 0)) + goto out; + + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_BITS) + goto out; + + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { + if (kvm_arch_vcpu_ioctl_set_regs( + vcpu, &vcpu->run->s.regs.regs)) + goto out; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { + if (kvm_arch_vcpu_ioctl_set_sregs( + vcpu, &vcpu->run->s.regs.sregs)) + goto out; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_DEBUGREGS) { + if (kvm_vcpu_ioctl_x86_set_debugregs( + vcpu, &vcpu->run->s.regs.debugregs)) + goto out; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_DEBUGREGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_FPU) { + if (kvm_arch_vcpu_ioctl_set_fpu( + vcpu, &vcpu->run->s.regs.fpu)) + goto out; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_FPU; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { + if (kvm_vcpu_ioctl_x86_set_vcpu_events( + vcpu, &vcpu->run->s.regs.events)) + goto out; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_MSRS) { + u32 nmsrs_in = vcpu->run->s.regs.msrs.nmsrs; + u32 nmsrs_out = 0; + + if (!nmsrs_in) + goto out; + else if (nmsrs_in > SYNC_REGS_NUM_MSRS) { + r = -E2BIG; + goto out; + } + nmsrs_out = __msr_io(vcpu, nmsrs_in, &vcpu->run->s.regs.msrs, + vcpu->run->s.regs.msr_entries, do_set_msr); + if (!nmsrs_out) { + r = -EINVAL; + goto out; + } else if (nmsrs_in != nmsrs_out) { + r = nmsrs_out; + goto out; + } + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_MSRS; + } + r = 0; +out: + return r; +} + static void fx_init(struct kvm_vcpu *vcpu) { fpstate_init(&vcpu->arch.guest_fpu.state); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 838887587411..90a6b24df901 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -395,6 +395,10 @@ struct kvm_run { char padding[256]; }; + /* 2048 is the size of the char array IBM used to bound/pad the size + * of the union that holds sync regs. + */ +#define SYNC_REGS_UNION_SIZE_BYTES 2048 /* * shared registers between kvm and userspace. * kvm_valid_regs specifies the register classes set by the host @@ -406,7 +410,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[2048]; + char padding[SYNC_REGS_UNION_SIZE_BYTES]; } s; };

KVM: x86: KVM_CAP_SYNC_REGS

Commit Message

Comments

Patch