diff mbox

[1/2] kvm/x86: Hyper-V based guest crash data handling

Message ID 1434028716-6767-2-git-send-email-den@openvz.org (mailing list archive)
State New, archived
Headers show

Commit Message

Denis V. Lunev June 11, 2015, 1:18 p.m. UTC
From: Andrey Smetanin <asmetanin@virtuozzo.com>

Windows 2012 guests can notify hypervisor about occurred guest crash
(Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
handling of this MSR's by KVM and sending notification to user space that
allows to gather Windows guest crash dump by QEMU/LIBVIRT.

The idea is to provide functionality equal to pvpanic device without
QEMU guest agent for Windows.

The idea is borrowed from Linux HyperV bus driver and validated against
Windows 2k12.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/hyperv.h | 10 +++++
 arch/x86/kvm/Makefile              |  2 +-
 arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mshv.h                | 32 +++++++++++++++
 arch/x86/kvm/x86.c                 | 25 ++++++++++++
 include/linux/kvm_host.h           | 17 ++++++++
 include/uapi/linux/kvm.h           | 11 +++++
 7 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/mshv.c
 create mode 100644 arch/x86/kvm/mshv.h

Comments

Peter Hornyack June 12, 2015, 11:03 p.m. UTC | #1
Hi Denis, Andrey, I have a few comments and questions. (re-sending in
plain-text mode, apologies for sending twice.)

On Thu, Jun 11, 2015 at 6:18 AM, Denis V. Lunev <den@openvz.org> wrote:
> From: Andrey Smetanin <asmetanin@virtuozzo.com>
>
> Windows 2012 guests can notify hypervisor about occurred guest crash
> (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
> handling of this MSR's by KVM and sending notification to user space that
> allows to gather Windows guest crash dump by QEMU/LIBVIRT.
>
> The idea is to provide functionality equal to pvpanic device without
> QEMU guest agent for Windows.
>
> The idea is borrowed from Linux HyperV bus driver and validated against
> Windows 2k12.
>
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
>  arch/x86/kvm/Makefile              |  2 +-
>  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/mshv.h                | 32 +++++++++++++++
>  arch/x86/kvm/x86.c                 | 25 ++++++++++++
>  include/linux/kvm_host.h           | 17 ++++++++
>  include/uapi/linux/kvm.h           | 11 +++++
>  7 files changed, 180 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/kvm/mshv.c
>  create mode 100644 arch/x86/kvm/mshv.h
>
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index ce6068d..25f3064 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -199,6 +199,16 @@
>  #define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
>  #define HV_X64_MSR_STIMER3_COUNT               0x400000B7
>
> +
> +/* Hypev-V guest crash notification MSR's */
> +#define HV_X64_MSR_CRASH_P0                    0x40000100
> +#define HV_X64_MSR_CRASH_P1                    0x40000101
> +#define HV_X64_MSR_CRASH_P2                    0x40000102
> +#define HV_X64_MSR_CRASH_P3                    0x40000103
> +#define HV_X64_MSR_CRASH_P4                    0x40000104
> +#define HV_X64_MSR_CRASH_CTL                   0x40000105
> +#define HV_CRASH_CTL_CRASH_NOTIFY              (1ULL << 63)
> +
>  #define HV_X64_MSR_HYPERCALL_ENABLE            0x00000001
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT        12
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 16e8f96..b1ec24d 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,7 +12,7 @@ kvm-y                 += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
>  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
>
>  kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> -                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
> +                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
>  kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += assigned-dev.o iommu.o
>  kvm-intel-y            += vmx.o
>  kvm-amd-y              += svm.o
> diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
> new file mode 100644
> index 0000000..ad367c44
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.c
> @@ -0,0 +1,84 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> + *
> + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> + */
> +
> +#include <linux/kvm_host.h>
> +#include "mshv.h"
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm)
> +{
> +       struct kvm_mshv_ctx *ctx;
> +
> +       ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
> +       if (!ctx)
> +               return -ENOMEM;
> +
> +       ctx->kvm = kvm;
> +       atomic_set(&ctx->crash_pending, 0);
> +       kvm->mshv_ctx = ctx;
> +       return 0;
> +}
> +
> +void kvm_mshv_ctx_destroy(struct kvm *kvm)
> +{
> +       kfree(kvm->mshv_ctx);
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       atomic_set(&ctx->crash_pending, 1);

Can you explain why crash_pending is needed?

Do you know what the Windows guest behavior is here? From my reading
of the Hyper-V TLFS 4.0, the guest will read HV_X64_MSR_CRASH_CTL at
some point (not necessarily at the time of the crash, potentially at
boot time?) to determine the crash actions that the hypervisor
supports. I'm not sure why kvm needs to remember any state on reads
from HV_X64_MSR_CRASH_CTL.

 > +
> +       /* Response that KVM ready to receive crash data */
> +       *pdata = HV_CRASH_CTL_CRASH_NOTIFY;

The TLFS says that CrashNotify is the only current action that is
supported on a guest crash, but other actions may be supported in the
future. I suggest defining something like
HV_X64_MSR_CRASH_CTL_CONTENTS (see below), which will only have
HV_CRASH_CTL_CRASH_NOTIFY set for now but may have other bits set in
the future, and then setting *pdata = HV_X64_MSR_CRASH_CTL_CONTENTS
here.

> +       return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       if (atomic_dec_and_test(&ctx->crash_pending)) {

The TLFS says: "To invoke a supported hypervisor guest crash action, a
child partition writes to the HV_X64_MSR_CRASH_CTL register,
specifying the desired action." However, this implementation doesn't
actually check the data that's written to this register. The condition
here should check for "data & HV_CRASH_CTL_CRASH_NOTIFY", right?

I'm still not clear on the need for crash_pending here... maybe there
are concurrency reasons that I haven't thought through.

> +               pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
> +                        vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
> +                        ctx->crash_p3, ctx->crash_p4);
> +
> +               /* Crash data almost gathered so notify user space */
> +               kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
> +       }
> +
> +       return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +       switch (msr) {
> +       case HV_X64_MSR_CRASH_P0:
> +               ctx->crash_p0 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P1:
> +               ctx->crash_p1 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P2:
> +               ctx->crash_p2 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P3:
> +               ctx->crash_p3 = data;
> +               return 0;
> +       case HV_X64_MSR_CRASH_P4:
> +               ctx->crash_p4 = data;
> +               return 0;
> +       default:
> +               return -EINVAL;
> +       }
> +}
> diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
> new file mode 100644
> index 0000000..ce8e7fa
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.h
> @@ -0,0 +1,32 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> + *
> + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> + */
> +
> +#ifndef __ARCH_X86_KVM_MSHV_H__
> +#define __ARCH_X86_KVM_MSHV_H__
> +

I suggest here:

#define HV_X64_MSR_CRASH_CTL_CONTENTS  \
        (HV_CRASH_CTL_CRASH_NOTIFY)

To allow for more crash actions to be added in the future.

> +static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
> +{
> +       return vm->mshv_ctx;
> +}
> +
> +static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
> +{
> +       return vcpu->kvm->mshv_ctx;
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm);
> +void kvm_mshv_ctx_destroy(struct kvm *kvm);
> +
> +#endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ea306ad..388b58f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -28,6 +28,7 @@
>  #include "x86.h"
>  #include "cpuid.h"
>  #include "assigned-dev.h"
> +#include "mshv.h"
>
>  #include <linux/clocksource.h>
>  #include <linux/interrupt.h>
> @@ -2338,6 +2339,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 } else
>                         return set_msr_hyperv(vcpu, msr, data);
>                 break;
> +       case HV_X64_MSR_CRASH_CTL:
> +               return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
> +       case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
> +               return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
>         case MSR_IA32_BBL_CR_CTL3:
>                 /* Drop writes to this legacy MSR -- see rdmsr
>                  * counterpart for further detail.
> @@ -2650,6 +2655,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>                 } else
>                         return get_msr_hyperv(vcpu, msr, pdata);
>                 break;
> +       case HV_X64_MSR_CRASH_CTL:
> +               return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);

Have the Windows guests that you've tested ever tried to read from the
data registers HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4? Is there
any harm in implementing them for reads? The TLFS doesn't seem to say
if reads are permitted or not. Not terribly important, but might be
nice to have.

>         case MSR_IA32_BBL_CR_CTL3:
>                 /* This legacy MSR exists but isn't fully documented in current
>                  * silicon.  It is however accessed by winxp in very narrow
> @@ -6280,6 +6287,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                         vcpu_scan_ioapic(vcpu);
>                 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
>                         kvm_vcpu_reload_apic_access_page(vcpu);
> +               if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
> +                       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +                       vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +                       vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
> +                       vcpu->run->system_event.flags = 0;
> +                       vcpu->run->system_event.crash.p0 = ctx->crash_p0;
> +                       vcpu->run->system_event.crash.p1 = ctx->crash_p1;
> +                       vcpu->run->system_event.crash.p2 = ctx->crash_p2;
> +                       vcpu->run->system_event.crash.p3 = ctx->crash_p3;
> +                       vcpu->run->system_event.crash.p4 = ctx->crash_p4;
> +                       r = 0;
> +                       goto out;
> +               }
>         }
>
>         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> @@ -7418,6 +7439,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>         if (type)
>                 return -EINVAL;
>
> +       if (kvm_mshv_ctx_create(kvm))
> +               return -ENOMEM;
> +
>         INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
>         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>         INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
> @@ -7484,6 +7508,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
>
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +       kvm_mshv_ctx_destroy(kvm);
>         if (current->mm == kvm->mm) {
>                 /*
>                  * Free memory regions allocated on behalf of userspace,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ad45054..83bd7bf 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_ENABLE_IBS        23
>  #define KVM_REQ_DISABLE_IBS       24
>  #define KVM_REQ_APIC_PAGE_RELOAD  25
> +#define KVM_REQ_MSHV_CRASH        26
>
>  #define KVM_USERSPACE_IRQ_SOURCE_ID            0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
> @@ -343,6 +344,21 @@ struct kvm_memslots {
>         int used_slots;
>  };
>
> +/*
> + * Ms hyperv paravirt context
> + */
> +struct kvm_mshv_ctx {
> +       struct kvm      *kvm;
> +       atomic_t        crash_pending;
> +
> +       /* Guest crash related parameters */
> +       u64             crash_p0;
> +       u64             crash_p1;
> +       u64             crash_p2;
> +       u64             crash_p3;
> +       u64             crash_p4;
> +};
> +
>  struct kvm {
>         spinlock_t mmu_lock;
>         struct mutex slots_lock;
> @@ -395,6 +411,7 @@ struct kvm {
>  #endif
>         long tlbs_dirty;
>         struct list_head devices;
> +       struct kvm_mshv_ctx *mshv_ctx;
>  };
>
>  #define kvm_err(fmt, ...) \
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4b60056..12f481b 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -317,8 +317,19 @@ struct kvm_run {
>                 struct {
>  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
>  #define KVM_SYSTEM_EVENT_RESET          2
> +#define KVM_SYSTEM_EVENT_CRASH          3
>                         __u32 type;
>                         __u64 flags;
> +                       union {
> +                               struct {
> +                                       /* Guest crash related parameters */
> +                                       __u64 p0;
> +                                       __u64 p1;
> +                                       __u64 p2;
> +                                       __u64 p3;
> +                                       __u64 p4;
> +                               } crash;
> +                       };
>                 } system_event;
>                 /* KVM_EXIT_S390_STSI */
>                 struct {
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks,
Pete
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Smetanin June 15, 2015, 10:21 a.m. UTC | #2
Hi Peter,
thank you for comments. See answers below.
On Fri, 2015-06-12 at 16:03 -0700, Peter Hornyack wrote:
> Hi Denis, Andrey, I have a few comments and questions. (re-sending in
> plain-text mode, apologies for sending twice.)
> 
> On Thu, Jun 11, 2015 at 6:18 AM, Denis V. Lunev <den@openvz.org> wrote:
> > From: Andrey Smetanin <asmetanin@virtuozzo.com>
> >
> > Windows 2012 guests can notify hypervisor about occurred guest crash
> > (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
> > handling of this MSR's by KVM and sending notification to user space that
> > allows to gather Windows guest crash dump by QEMU/LIBVIRT.
> >
> > The idea is to provide functionality equal to pvpanic device without
> > QEMU guest agent for Windows.
> >
> > The idea is borrowed from Linux HyperV bus driver and validated against
> > Windows 2k12.
> >
> > Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> > Signed-off-by: Denis V. Lunev <den@openvz.org>
> > CC: Gleb Natapov <gleb@kernel.org>
> > CC: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
> >  arch/x86/kvm/Makefile              |  2 +-
> >  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/mshv.h                | 32 +++++++++++++++
> >  arch/x86/kvm/x86.c                 | 25 ++++++++++++
> >  include/linux/kvm_host.h           | 17 ++++++++
> >  include/uapi/linux/kvm.h           | 11 +++++
> >  7 files changed, 180 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/x86/kvm/mshv.c
> >  create mode 100644 arch/x86/kvm/mshv.h
> >
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index ce6068d..25f3064 100644
> > --- a/arch/x86/include/uapi/asm/hyp	erv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -199,6 +199,16 @@
> >  #define HV_X64_MSR_STIMER3_CONFIG              0x400000B6
> >  #define HV_X64_MSR_STIMER3_COUNT               0x400000B7
> >
> > +
> > +/* Hypev-V guest crash notification MSR's */
> > +#define HV_X64_MSR_CRASH_P0                    0x40000100
> > +#define HV_X64_MSR_CRASH_P1                    0x40000101
> > +#define HV_X64_MSR_CRASH_P2                    0x40000102
> > +#define HV_X64_MSR_CRASH_P3                    0x40000103
> > +#define HV_X64_MSR_CRASH_P4                    0x40000104
> > +#define HV_X64_MSR_CRASH_CTL                   0x40000105
> > +#define HV_CRASH_CTL_CRASH_NOTIFY              (1ULL << 63)
> > +
> >  #define HV_X64_MSR_HYPERCALL_ENABLE            0x00000001
> >  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT        12
> >  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index 16e8f96..b1ec24d 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -12,7 +12,7 @@ kvm-y                 += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
> >  kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
> >
> >  kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> > -                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
> > +                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
> >  kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += assigned-dev.o iommu.o
> >  kvm-intel-y            += vmx.o
> >  kvm-amd-y              += svm.o
> > diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
> > new file mode 100644
> > index 0000000..ad367c44
> > --- /dev/null
> > +++ b/arch/x86/kvm/mshv.c
> > @@ -0,0 +1,84 @@
> > +/*
> > + * KVM Microsoft Hyper-V extended paravirtualization
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> > + *
> > + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> > + */
> > +
> > +#include <linux/kvm_host.h>
> > +#include "mshv.h"
> > +
> > +int kvm_mshv_ctx_create(struct kvm *kvm)
> > +{
> > +       struct kvm_mshv_ctx *ctx;
> > +
> > +       ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
> > +       if (!ctx)
> > +               return -ENOMEM;
> > +
> > +       ctx->kvm = kvm;
> > +       atomic_set(&ctx->crash_pending, 0);
> > +       kvm->mshv_ctx = ctx;
> > +       return 0;
> > +}
> > +
> > +void kvm_mshv_ctx_destroy(struct kvm *kvm)
> > +{
> > +       kfree(kvm->mshv_ctx);
> > +}
> > +
> > +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> > +{
> > +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +       atomic_set(&ctx->crash_pending, 1);
> 
> Can you explain why crash_pending is needed?
> 
> Do you know what the Windows guest behavior is here? From my reading
> of the Hyper-V TLFS 4.0, the guest will read HV_X64_MSR_CRASH_CTL at
> some point (not necessarily at the time of the crash, potentially at
> boot time?) to determine the crash actions that the hypervisor
> supports. I'm not sure why kvm needs to remember any state on reads
> from HV_X64_MSR_CRASH_CTL.
Thank you for notice, I agree that crash_pending is useless. We'll drop
it and will send new version of patch.
> 
>  > +
> > +       /* Response that KVM ready to receive crash data */
> > +       *pdata = HV_CRASH_CTL_CRASH_NOTIFY;
> 
> The TLFS says that CrashNotify is the only current action that is
> supported on a guest crash, but other actions may be supported in the
> future. I suggest defining something like
> HV_X64_MSR_CRASH_CTL_CONTENTS (see below), which will only have
> HV_CRASH_CTL_CRASH_NOTIFY set for now but may have other bits set in
> the future, and then setting *pdata = HV_X64_MSR_CRASH_CTL_CONTENTS
> here.
> 
Ok, will do
> > +       return 0;
> > +}
> > +
> > +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > +{
> > +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +       if (atomic_dec_and_test(&ctx->crash_pending)) {
> 
> The TLFS says: "To invoke a supported hypervisor guest crash action, a
> child partition writes to the HV_X64_MSR_CRASH_CTL register,
> specifying the desired action." However, this implementation doesn't
> actually check the data that's written to this register. The condition
> here should check for "data & HV_CRASH_CTL_CRASH_NOTIFY", right?
> 
Agree, will be done in new version of patch
> I'm still not clear on the need for crash_pending here... maybe there
> are concurrency reasons that I haven't thought through.
> 
Will drop it
> > +               pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
> > +                        vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
> > +                        ctx->crash_p3, ctx->crash_p4);
> > +
> > +               /* Crash data almost gathered so notify user space */
> > +               kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > +{
> > +       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +       switch (msr) {
> > +       case HV_X64_MSR_CRASH_P0:
> > +               ctx->crash_p0 = data;
> > +               return 0;
> > +       case HV_X64_MSR_CRASH_P1:
> > +               ctx->crash_p1 = data;
> > +               return 0;
> > +       case HV_X64_MSR_CRASH_P2:
> > +               ctx->crash_p2 = data;
> > +               return 0;
> > +       case HV_X64_MSR_CRASH_P3:
> > +               ctx->crash_p3 = data;
> > +               return 0;
> > +       case HV_X64_MSR_CRASH_P4:
> > +               ctx->crash_p4 = data;
> > +               return 0;
> > +       default:
> > +               return -EINVAL;
> > +       }
> > +}
> > diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
> > new file mode 100644
> > index 0000000..ce8e7fa
> > --- /dev/null
> > +++ b/arch/x86/kvm/mshv.h
> > @@ -0,0 +1,32 @@
> > +/*
> > + * KVM Microsoft Hyper-V extended paravirtualization
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> > + *
> > + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> > + */
> > +
> > +#ifndef __ARCH_X86_KVM_MSHV_H__
> > +#define __ARCH_X86_KVM_MSHV_H__
> > +
> 
> I suggest here:
> 
> #define HV_X64_MSR_CRASH_CTL_CONTENTS  \
>         (HV_CRASH_CTL_CRASH_NOTIFY)
> 
> To allow for more crash actions to be added in the future.
> 
Ok, will do
> > +static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
> > +{
> > +       return vm->mshv_ctx;
> > +}
> > +
> > +static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
> > +{
> > +       return vcpu->kvm->mshv_ctx;
> > +}
> > +
> > +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> > +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> > +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> > +
> > +int kvm_mshv_ctx_create(struct kvm *kvm);
> > +void kvm_mshv_ctx_destroy(struct kvm *kvm);
> > +
> > +#endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index ea306ad..388b58f 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -28,6 +28,7 @@
> >  #include "x86.h"
> >  #include "cpuid.h"
> >  #include "assigned-dev.h"
> > +#include "mshv.h"
> >
> >  #include <linux/clocksource.h>
> >  #include <linux/interrupt.h>
> > @@ -2338,6 +2339,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >                 } else
> >                         return set_msr_hyperv(vcpu, msr, data);
> >                 break;
> > +       case HV_X64_MSR_CRASH_CTL:
> > +               return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
> > +       case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
> > +               return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
> >         case MSR_IA32_BBL_CR_CTL3:
> >                 /* Drop writes to this legacy MSR -- see rdmsr
> >                  * counterpart for further detail.
> > @@ -2650,6 +2655,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >                 } else
> >                         return get_msr_hyperv(vcpu, msr, pdata);
> >                 break;
> > +       case HV_X64_MSR_CRASH_CTL:
> > +               return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);
> 
> Have the Windows guests that you've tested ever tried to read from the
> data registers HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4? Is there
> any harm in implementing them for reads? The TLFS doesn't seem to say
> if reads are permitted or not. Not terribly important, but might be
> nice to have.
> 
This MSR's are not read by w2k12 guest, but we'll make placeholder
handler that returns error in that case.
> >         case MSR_IA32_BBL_CR_CTL3:
> >                 /* This legacy MSR exists but isn't fully documented in current
> >                  * silicon.  It is however accessed by winxp in very narrow
> > @@ -6280,6 +6287,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >                         vcpu_scan_ioapic(vcpu);
> >                 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
> >                         kvm_vcpu_reload_apic_access_page(vcpu);
> > +               if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
> > +                       struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +                       vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> > +                       vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
> > +                       vcpu->run->system_event.flags = 0;
> > +                       vcpu->run->system_event.crash.p0 = ctx->crash_p0;
> > +                       vcpu->run->system_event.crash.p1 = ctx->crash_p1;
> > +                       vcpu->run->system_event.crash.p2 = ctx->crash_p2;
> > +                       vcpu->run->system_event.crash.p3 = ctx->crash_p3;
> > +                       vcpu->run->system_event.crash.p4 = ctx->crash_p4;
> > +                       r = 0;
> > +                       goto out;
> > +               }
> >         }
> >
> >         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > @@ -7418,6 +7439,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >         if (type)
> >                 return -EINVAL;
> >
> > +       if (kvm_mshv_ctx_create(kvm))
> > +               return -ENOMEM;
> > +
> >         INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
> >         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> >         INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
> > @@ -7484,6 +7508,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
> >
> >  void kvm_arch_destroy_vm(struct kvm *kvm)
> >  {
> > +       kvm_mshv_ctx_destroy(kvm);
> >         if (current->mm == kvm->mm) {
> >                 /*
> >                  * Free memory regions allocated on behalf of userspace,
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index ad45054..83bd7bf 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
> >  #define KVM_REQ_ENABLE_IBS        23
> >  #define KVM_REQ_DISABLE_IBS       24
> >  #define KVM_REQ_APIC_PAGE_RELOAD  25
> > +#define KVM_REQ_MSHV_CRASH        26
> >
> >  #define KVM_USERSPACE_IRQ_SOURCE_ID            0
> >  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
> > @@ -343,6 +344,21 @@ struct kvm_memslots {
> >         int used_slots;
> >  };
> >
> > +/*
> > + * Ms hyperv paravirt context
> > + */
> > +struct kvm_mshv_ctx {
> > +       struct kvm      *kvm;
> > +       atomic_t        crash_pending;
> > +
> > +       /* Guest crash related parameters */
> > +       u64             crash_p0;
> > +       u64             crash_p1;
> > +       u64             crash_p2;
> > +       u64             crash_p3;
> > +       u64             crash_p4;
> > +};
> > +
> >  struct kvm {
> >         spinlock_t mmu_lock;
> >         struct mutex slots_lock;
> > @@ -395,6 +411,7 @@ struct kvm {
> >  #endif
> >         long tlbs_dirty;
> >         struct list_head devices;
> > +       struct kvm_mshv_ctx *mshv_ctx;
> >  };
> >
> >  #define kvm_err(fmt, ...) \
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 4b60056..12f481b 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -317,8 +317,19 @@ struct kvm_run {
> >                 struct {
> >  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
> >  #define KVM_SYSTEM_EVENT_RESET          2
> > +#define KVM_SYSTEM_EVENT_CRASH          3
> >                         __u32 type;
> >                         __u64 flags;
> > +                       union {
> > +                               struct {
> > +                                       /* Guest crash related parameters */
> > +                                       __u64 p0;
> > +                                       __u64 p1;
> > +                                       __u64 p2;
> > +                                       __u64 p3;
> > +                                       __u64 p4;
> > +                               } crash;
> > +                       };
> >                 } system_event;
> >                 /* KVM_EXIT_S390_STSI */
> >                 struct {
> > --
> > 1.9.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> Thanks,
> Pete
Thanks,
Andrey


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 17, 2015, 12:44 p.m. UTC | #3
On 11/06/2015 15:18, Denis V. Lunev wrote:
> From: Andrey Smetanin <asmetanin@virtuozzo.com>
> 
> Windows 2012 guests can notify hypervisor about occurred guest crash
> (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
> handling of this MSR's by KVM and sending notification to user space that
> allows to gather Windows guest crash dump by QEMU/LIBVIRT.
> 
> The idea is to provide functionality equal to pvpanic device without
> QEMU guest agent for Windows.
> 
> The idea is borrowed from Linux HyperV bus driver and validated against
> Windows 2k12.
> 
> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Gleb Natapov <gleb@kernel.org>
> CC: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
>  arch/x86/kvm/Makefile              |  2 +-
>  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/mshv.h                | 32 +++++++++++++++

Please use hyperv.[ch] or hyper-v.[ch] and name the functions kvm_hv_*.
 We can later move more functions from x86.c to the new file, so it's
better to keep the names consistent.

>  arch/x86/kvm/x86.c                 | 25 ++++++++++++
>  include/linux/kvm_host.h           | 17 ++++++++
>  include/uapi/linux/kvm.h           | 11 +++++
>  7 files changed, 180 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/kvm/mshv.c
>  create mode 100644 arch/x86/kvm/mshv.h
> 
> diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> index ce6068d..25f3064 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -199,6 +199,16 @@
>  #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
>  #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
>  
> +
> +/* Hypev-V guest crash notification MSR's */
> +#define HV_X64_MSR_CRASH_P0			0x40000100
> +#define HV_X64_MSR_CRASH_P1			0x40000101
> +#define HV_X64_MSR_CRASH_P2			0x40000102
> +#define HV_X64_MSR_CRASH_P3			0x40000103
> +#define HV_X64_MSR_CRASH_P4			0x40000104
> +#define HV_X64_MSR_CRASH_CTL			0x40000105
> +#define HV_CRASH_CTL_CRASH_NOTIFY		(1ULL << 63)
> +
>  #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
>  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 16e8f96..b1ec24d 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,7 +12,7 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
>  kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
>  
>  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> -			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
> +			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
>  kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
>  kvm-intel-y		+= vmx.o
>  kvm-amd-y		+= svm.o
> diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
> new file mode 100644
> index 0000000..ad367c44
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.c
> @@ -0,0 +1,84 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> + *
> + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> + */
> +
> +#include <linux/kvm_host.h>
> +#include "mshv.h"
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm)
> +{
> +	struct kvm_mshv_ctx *ctx;
> +
> +	ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->kvm = kvm;
> +	atomic_set(&ctx->crash_pending, 0);
> +	kvm->mshv_ctx = ctx;
> +	return 0;
> +}
> +
> +void kvm_mshv_ctx_destroy(struct kvm *kvm)
> +{
> +	kfree(kvm->mshv_ctx);
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> +{
> +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +	atomic_set(&ctx->crash_pending, 1);
> +
> +	/* Response that KVM ready to receive crash data */
> +	*pdata = HV_CRASH_CTL_CRASH_NOTIFY;
> +	return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +	if (atomic_dec_and_test(&ctx->crash_pending)) {
> +		pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
> +			 vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
> +			 ctx->crash_p3, ctx->crash_p4);
> +
> +		/* Crash data almost gathered so notify user space */

Why "almost" gathered?

> +		kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
> +	}
> +
> +	return 0;
> +}
> +
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> +{
> +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +	switch (msr) {
> +	case HV_X64_MSR_CRASH_P0:
> +		ctx->crash_p0 = data;
> +		return 0;
> +	case HV_X64_MSR_CRASH_P1:
> +		ctx->crash_p1 = data;
> +		return 0;
> +	case HV_X64_MSR_CRASH_P2:
> +		ctx->crash_p2 = data;
> +		return 0;
> +	case HV_X64_MSR_CRASH_P3:
> +		ctx->crash_p3 = data;
> +		return 0;
> +	case HV_X64_MSR_CRASH_P4:
> +		ctx->crash_p4 = data;
> +		return 0;

Please use an array (with a WARN_ON_ONCE check that the index is in bounds).

> +	default:
> +		return -EINVAL;
> +	}
> +}
> diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
> new file mode 100644
> index 0000000..ce8e7fa
> --- /dev/null
> +++ b/arch/x86/kvm/mshv.h
> @@ -0,0 +1,32 @@
> +/*
> + * KVM Microsoft Hyper-V extended paravirtualization
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> + *
> + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> + */
> +
> +#ifndef __ARCH_X86_KVM_MSHV_H__
> +#define __ARCH_X86_KVM_MSHV_H__
> +
> +static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
> +{
> +	return vm->mshv_ctx;
> +}
> +
> +static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->kvm->mshv_ctx;
> +}
> +
> +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> +
> +int kvm_mshv_ctx_create(struct kvm *kvm);
> +void kvm_mshv_ctx_destroy(struct kvm *kvm);
> +
> +#endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ea306ad..388b58f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -28,6 +28,7 @@
>  #include "x86.h"
>  #include "cpuid.h"
>  #include "assigned-dev.h"
> +#include "mshv.h"
>  
>  #include <linux/clocksource.h>
>  #include <linux/interrupt.h>
> @@ -2338,6 +2339,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		} else
>  			return set_msr_hyperv(vcpu, msr, data);
>  		break;
> +	case HV_X64_MSR_CRASH_CTL:
> +		return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
> +	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
> +		return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
>  	case MSR_IA32_BBL_CR_CTL3:
>  		/* Drop writes to this legacy MSR -- see rdmsr
>  		 * counterpart for further detail.
> @@ -2650,6 +2655,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  		} else
>  			return get_msr_hyperv(vcpu, msr, pdata);
>  		break;
> +	case HV_X64_MSR_CRASH_CTL:
> +		return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);

Please implement get_crash_data as well.  Userspace may want to retrieve
this value and stash it somewhere for post-mortem analysis, and
KVM_GET_MSR is very handy for this purpose.

Do not return an error, just return the last written datum.

>  	case MSR_IA32_BBL_CR_CTL3:
>  		/* This legacy MSR exists but isn't fully documented in current
>  		 * silicon.  It is however accessed by winxp in very narrow
> @@ -6280,6 +6287,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  			vcpu_scan_ioapic(vcpu);
>  		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
>  			kvm_vcpu_reload_apic_access_page(vcpu);
> +		if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
> +			struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> +
> +			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> +			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
> +			vcpu->run->system_event.flags = 0;
> +			vcpu->run->system_event.crash.p0 = ctx->crash_p0;
> +			vcpu->run->system_event.crash.p1 = ctx->crash_p1;
> +			vcpu->run->system_event.crash.p2 = ctx->crash_p2;
> +			vcpu->run->system_event.crash.p3 = ctx->crash_p3;
> +			vcpu->run->system_event.crash.p4 = ctx->crash_p4;
> +			r = 0;
> +			goto out;
> +		}
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> @@ -7418,6 +7439,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	if (type)
>  		return -EINVAL;
>  
> +	if (kvm_mshv_ctx_create(kvm))
> +		return -ENOMEM;
> +
>  	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
>  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>  	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
> @@ -7484,6 +7508,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
>  
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	kvm_mshv_ctx_destroy(kvm);
>  	if (current->mm == kvm->mm) {
>  		/*
>  		 * Free memory regions allocated on behalf of userspace,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index ad45054..83bd7bf 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_ENABLE_IBS        23
>  #define KVM_REQ_DISABLE_IBS       24
>  #define KVM_REQ_APIC_PAGE_RELOAD  25
> +#define KVM_REQ_MSHV_CRASH        26
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> @@ -343,6 +344,21 @@ struct kvm_memslots {
>  	int used_slots;
>  };
>  
> +/*
> + * Ms hyperv paravirt context
> + */
> +struct kvm_mshv_ctx {

This should be in an x86-specific file.  Please name it "struct
kvm_arch_hyperv hv" and stick it inside struct kvm_arch (so it's
accessed as kvm->arch.hv).  We can also move other fields, e.g.
kvm->arch.hv_hypercall inside this new struct.

> +	struct kvm	*kvm;

Not needed if you avoid the pointer: then you can just use container_of.

> +	atomic_t	crash_pending;
> +
> +	/* Guest crash related parameters */
> +	u64		crash_p0;
> +	u64		crash_p1;
> +	u64		crash_p2;
> +	u64		crash_p3;
> +	u64		crash_p4;
> +};
> +
>  struct kvm {
>  	spinlock_t mmu_lock;
>  	struct mutex slots_lock;
> @@ -395,6 +411,7 @@ struct kvm {
>  #endif
>  	long tlbs_dirty;
>  	struct list_head devices;
> +	struct kvm_mshv_ctx *mshv_ctx;
>  };
>  
>  #define kvm_err(fmt, ...) \
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4b60056..12f481b 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -317,8 +317,19 @@ struct kvm_run {
>  		struct {
>  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
>  #define KVM_SYSTEM_EVENT_RESET          2
> +#define KVM_SYSTEM_EVENT_CRASH          3
>  			__u32 type;
>  			__u64 flags;
> +			union {
> +				struct {
> +					/* Guest crash related parameters */
> +					__u64 p0;
> +					__u64 p1;
> +					__u64 p2;
> +					__u64 p3;
> +					__u64 p4;
> +				} crash;

No need to return the parameters here.  Userspace can use KVM_GET_MSR to
read them.

Paolo

> +			};
>  		} system_event;
>  		/* KVM_EXIT_S390_STSI */
>  		struct {
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrey Smetanin June 19, 2015, 10:28 a.m. UTC | #4
On Wed, 2015-06-17 at 14:44 +0200, Paolo Bonzini wrote:
> 
> On 11/06/2015 15:18, Denis V. Lunev wrote:
> > From: Andrey Smetanin <asmetanin@virtuozzo.com>
> > 
> > Windows 2012 guests can notify hypervisor about occurred guest crash
> > (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
> > handling of this MSR's by KVM and sending notification to user space that
> > allows to gather Windows guest crash dump by QEMU/LIBVIRT.
> > 
> > The idea is to provide functionality equal to pvpanic device without
> > QEMU guest agent for Windows.
> > 
> > The idea is borrowed from Linux HyperV bus driver and validated against
> > Windows 2k12.
> > 
> > Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
> > Signed-off-by: Denis V. Lunev <den@openvz.org>
> > CC: Gleb Natapov <gleb@kernel.org>
> > CC: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
> >  arch/x86/kvm/Makefile              |  2 +-
> >  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/mshv.h                | 32 +++++++++++++++
> 
> Please use hyperv.[ch] or hyper-v.[ch] and name the functions kvm_hv_*.
>  We can later move more functions from x86.c to the new file, so it's
> better to keep the names consistent.
> 
Should we prepare a 1st patch in this series where
we move all hyper-v related code from x86.c into hyperv.c new file with
copyright extension ?
> >  arch/x86/kvm/x86.c                 | 25 ++++++++++++
> >  include/linux/kvm_host.h           | 17 ++++++++
> >  include/uapi/linux/kvm.h           | 11 +++++
> >  7 files changed, 180 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/x86/kvm/mshv.c
> >  create mode 100644 arch/x86/kvm/mshv.h
> > 
> > diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
> > index ce6068d..25f3064 100644
> > --- a/arch/x86/include/uapi/asm/hyperv.h
> > +++ b/arch/x86/include/uapi/asm/hyperv.h
> > @@ -199,6 +199,16 @@
> >  #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
> >  #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
> >  
> > +
> > +/* Hypev-V guest crash notification MSR's */
> > +#define HV_X64_MSR_CRASH_P0			0x40000100
> > +#define HV_X64_MSR_CRASH_P1			0x40000101
> > +#define HV_X64_MSR_CRASH_P2			0x40000102
> > +#define HV_X64_MSR_CRASH_P3			0x40000103
> > +#define HV_X64_MSR_CRASH_P4			0x40000104
> > +#define HV_X64_MSR_CRASH_CTL			0x40000105
> > +#define HV_CRASH_CTL_CRASH_NOTIFY		(1ULL << 63)
> > +
> >  #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
> >  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
> >  #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index 16e8f96..b1ec24d 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -12,7 +12,7 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
> >  kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
> >  
> >  kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
> > -			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
> > +			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
> >  kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
> >  kvm-intel-y		+= vmx.o
> >  kvm-amd-y		+= svm.o
> > diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
> > new file mode 100644
> > index 0000000..ad367c44
> > --- /dev/null
> > +++ b/arch/x86/kvm/mshv.c
> > @@ -0,0 +1,84 @@
> > +/*
> > + * KVM Microsoft Hyper-V extended paravirtualization
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> > + *
> > + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> > + */
> > +
> > +#include <linux/kvm_host.h>
> > +#include "mshv.h"
> > +
> > +int kvm_mshv_ctx_create(struct kvm *kvm)
> > +{
> > +	struct kvm_mshv_ctx *ctx;
> > +
> > +	ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
> > +	if (!ctx)
> > +		return -ENOMEM;
> > +
> > +	ctx->kvm = kvm;
> > +	atomic_set(&ctx->crash_pending, 0);
> > +	kvm->mshv_ctx = ctx;
> > +	return 0;
> > +}
> > +
> > +void kvm_mshv_ctx_destroy(struct kvm *kvm)
> > +{
> > +	kfree(kvm->mshv_ctx);
> > +}
> > +
> > +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> > +{
> > +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +	atomic_set(&ctx->crash_pending, 1);
> > +
> > +	/* Response that KVM ready to receive crash data */
> > +	*pdata = HV_CRASH_CTL_CRASH_NOTIFY;
> > +	return 0;
> > +}
> > +
> > +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > +{
> > +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +	if (atomic_dec_and_test(&ctx->crash_pending)) {
> > +		pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
> > +			 vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
> > +			 ctx->crash_p3, ctx->crash_p4);
> > +
> > +		/* Crash data almost gathered so notify user space */
> 
> Why "almost" gathered?
> 
> > +		kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> > +{
> > +	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +	switch (msr) {
> > +	case HV_X64_MSR_CRASH_P0:
> > +		ctx->crash_p0 = data;
> > +		return 0;
> > +	case HV_X64_MSR_CRASH_P1:
> > +		ctx->crash_p1 = data;
> > +		return 0;
> > +	case HV_X64_MSR_CRASH_P2:
> > +		ctx->crash_p2 = data;
> > +		return 0;
> > +	case HV_X64_MSR_CRASH_P3:
> > +		ctx->crash_p3 = data;
> > +		return 0;
> > +	case HV_X64_MSR_CRASH_P4:
> > +		ctx->crash_p4 = data;
> > +		return 0;
> 
> Please use an array (with a WARN_ON_ONCE check that the index is in bounds).
> 
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +}
> > diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
> > new file mode 100644
> > index 0000000..ce8e7fa
> > --- /dev/null
> > +++ b/arch/x86/kvm/mshv.h
> > @@ -0,0 +1,32 @@
> > +/*
> > + * KVM Microsoft Hyper-V extended paravirtualization
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
> > + *
> > + * Authors: Andrey Smetanin asmetanin@virtuozzo.com
> > + */
> > +
> > +#ifndef __ARCH_X86_KVM_MSHV_H__
> > +#define __ARCH_X86_KVM_MSHV_H__
> > +
> > +static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
> > +{
> > +	return vm->mshv_ctx;
> > +}
> > +
> > +static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
> > +{
> > +	return vcpu->kvm->mshv_ctx;
> > +}
> > +
> > +int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> > +int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> > +int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
> > +
> > +int kvm_mshv_ctx_create(struct kvm *kvm);
> > +void kvm_mshv_ctx_destroy(struct kvm *kvm);
> > +
> > +#endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index ea306ad..388b58f 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -28,6 +28,7 @@
> >  #include "x86.h"
> >  #include "cpuid.h"
> >  #include "assigned-dev.h"
> > +#include "mshv.h"
> >  
> >  #include <linux/clocksource.h>
> >  #include <linux/interrupt.h>
> > @@ -2338,6 +2339,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		} else
> >  			return set_msr_hyperv(vcpu, msr, data);
> >  		break;
> > +	case HV_X64_MSR_CRASH_CTL:
> > +		return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
> > +	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
> > +		return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
> >  	case MSR_IA32_BBL_CR_CTL3:
> >  		/* Drop writes to this legacy MSR -- see rdmsr
> >  		 * counterpart for further detail.
> > @@ -2650,6 +2655,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
> >  		} else
> >  			return get_msr_hyperv(vcpu, msr, pdata);
> >  		break;
> > +	case HV_X64_MSR_CRASH_CTL:
> > +		return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);
> 
> Please implement get_crash_data as well.  Userspace may want to retrieve
> this value and stash it somewhere for post-mortem analysis, and
> KVM_GET_MSR is very handy for this purpose.
> 
> Do not return an error, just return the last written datum.
> 
> >  	case MSR_IA32_BBL_CR_CTL3:
> >  		/* This legacy MSR exists but isn't fully documented in current
> >  		 * silicon.  It is however accessed by winxp in very narrow
> > @@ -6280,6 +6287,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  			vcpu_scan_ioapic(vcpu);
> >  		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
> >  			kvm_vcpu_reload_apic_access_page(vcpu);
> > +		if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
> > +			struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
> > +
> > +			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
> > +			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
> > +			vcpu->run->system_event.flags = 0;
> > +			vcpu->run->system_event.crash.p0 = ctx->crash_p0;
> > +			vcpu->run->system_event.crash.p1 = ctx->crash_p1;
> > +			vcpu->run->system_event.crash.p2 = ctx->crash_p2;
> > +			vcpu->run->system_event.crash.p3 = ctx->crash_p3;
> > +			vcpu->run->system_event.crash.p4 = ctx->crash_p4;
> > +			r = 0;
> > +			goto out;
> > +		}
> >  	}
> >  
> >  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > @@ -7418,6 +7439,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >  	if (type)
> >  		return -EINVAL;
> >  
> > +	if (kvm_mshv_ctx_create(kvm))
> > +		return -ENOMEM;
> > +
> >  	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
> >  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> >  	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
> > @@ -7484,6 +7508,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
> >  
> >  void kvm_arch_destroy_vm(struct kvm *kvm)
> >  {
> > +	kvm_mshv_ctx_destroy(kvm);
> >  	if (current->mm == kvm->mm) {
> >  		/*
> >  		 * Free memory regions allocated on behalf of userspace,
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index ad45054..83bd7bf 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
> >  #define KVM_REQ_ENABLE_IBS        23
> >  #define KVM_REQ_DISABLE_IBS       24
> >  #define KVM_REQ_APIC_PAGE_RELOAD  25
> > +#define KVM_REQ_MSHV_CRASH        26
> >  
> >  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
> >  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> > @@ -343,6 +344,21 @@ struct kvm_memslots {
> >  	int used_slots;
> >  };
> >  
> > +/*
> > + * Ms hyperv paravirt context
> > + */
> > +struct kvm_mshv_ctx {
> 
> This should be in an x86-specific file.  Please name it "struct
> kvm_arch_hyperv hv" and stick it inside struct kvm_arch (so it's
> accessed as kvm->arch.hv).  We can also move other fields, e.g.
> kvm->arch.hv_hypercall inside this new struct.
> 
> > +	struct kvm	*kvm;
> 
> Not needed if you avoid the pointer: then you can just use container_of.
> 
> > +	atomic_t	crash_pending;
> > +
> > +	/* Guest crash related parameters */
> > +	u64		crash_p0;
> > +	u64		crash_p1;
> > +	u64		crash_p2;
> > +	u64		crash_p3;
> > +	u64		crash_p4;
> > +};
> > +
> >  struct kvm {
> >  	spinlock_t mmu_lock;
> >  	struct mutex slots_lock;
> > @@ -395,6 +411,7 @@ struct kvm {
> >  #endif
> >  	long tlbs_dirty;
> >  	struct list_head devices;
> > +	struct kvm_mshv_ctx *mshv_ctx;
> >  };
> >  
> >  #define kvm_err(fmt, ...) \
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 4b60056..12f481b 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -317,8 +317,19 @@ struct kvm_run {
> >  		struct {
> >  #define KVM_SYSTEM_EVENT_SHUTDOWN       1
> >  #define KVM_SYSTEM_EVENT_RESET          2
> > +#define KVM_SYSTEM_EVENT_CRASH          3
> >  			__u32 type;
> >  			__u64 flags;
> > +			union {
> > +				struct {
> > +					/* Guest crash related parameters */
> > +					__u64 p0;
> > +					__u64 p1;
> > +					__u64 p2;
> > +					__u64 p3;
> > +					__u64 p4;
> > +				} crash;
> 
> No need to return the parameters here.  Userspace can use KVM_GET_MSR to
> read them.
> 
> Paolo
> 
> > +			};
> >  		} system_event;
> >  		/* KVM_EXIT_S390_STSI */
> >  		struct {
> > 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 19, 2015, 10:32 a.m. UTC | #5
On 19/06/2015 12:28, Andrey Smetanin wrote:
> On Wed, 2015-06-17 at 14:44 +0200, Paolo Bonzini wrote:
>>
>> On 11/06/2015 15:18, Denis V. Lunev wrote:
>>> From: Andrey Smetanin <asmetanin@virtuozzo.com>
>>>
>>> Windows 2012 guests can notify hypervisor about occurred guest crash
>>> (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does
>>> handling of this MSR's by KVM and sending notification to user space that
>>> allows to gather Windows guest crash dump by QEMU/LIBVIRT.
>>>
>>> The idea is to provide functionality equal to pvpanic device without
>>> QEMU guest agent for Windows.
>>>
>>> The idea is borrowed from Linux HyperV bus driver and validated against
>>> Windows 2k12.
>>>
>>> Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
>>> Signed-off-by: Denis V. Lunev <den@openvz.org>
>>> CC: Gleb Natapov <gleb@kernel.org>
>>> CC: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  arch/x86/include/uapi/asm/hyperv.h | 10 +++++
>>>  arch/x86/kvm/Makefile              |  2 +-
>>>  arch/x86/kvm/mshv.c                | 84 ++++++++++++++++++++++++++++++++++++++
>>>  arch/x86/kvm/mshv.h                | 32 +++++++++++++++
>>
>> Please use hyperv.[ch] or hyper-v.[ch] and name the functions kvm_hv_*.
>>  We can later move more functions from x86.c to the new file, so it's
>> better to keep the names consistent.
>>
> Should we prepare a 1st patch in this series where
> we move all hyper-v related code from x86.c into hyperv.c new file with
> copyright extension ?

If you want to do that, I certainly wouldn't complain.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index ce6068d..25f3064 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -199,6 +199,16 @@ 
 #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
 
+
+/* Hypev-V guest crash notification MSR's */
+#define HV_X64_MSR_CRASH_P0			0x40000100
+#define HV_X64_MSR_CRASH_P1			0x40000101
+#define HV_X64_MSR_CRASH_P2			0x40000102
+#define HV_X64_MSR_CRASH_P3			0x40000103
+#define HV_X64_MSR_CRASH_P4			0x40000104
+#define HV_X64_MSR_CRASH_CTL			0x40000105
+#define HV_CRASH_CTL_CRASH_NOTIFY		(1ULL << 63)
+
 #define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 16e8f96..b1ec24d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@  kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mshv.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
diff --git a/arch/x86/kvm/mshv.c b/arch/x86/kvm/mshv.c
new file mode 100644
index 0000000..ad367c44
--- /dev/null
+++ b/arch/x86/kvm/mshv.c
@@ -0,0 +1,84 @@ 
+/*
+ * KVM Microsoft Hyper-V extended paravirtualization
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors: Andrey Smetanin asmetanin@virtuozzo.com
+ */
+
+#include <linux/kvm_host.h>
+#include "mshv.h"
+
+int kvm_mshv_ctx_create(struct kvm *kvm)
+{
+	struct kvm_mshv_ctx *ctx;
+
+	ctx = kzalloc(sizeof(struct kvm_mshv_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->kvm = kvm;
+	atomic_set(&ctx->crash_pending, 0);
+	kvm->mshv_ctx = ctx;
+	return 0;
+}
+
+void kvm_mshv_ctx_destroy(struct kvm *kvm)
+{
+	kfree(kvm->mshv_ctx);
+}
+
+int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
+
+	atomic_set(&ctx->crash_pending, 1);
+
+	/* Response that KVM ready to receive crash data */
+	*pdata = HV_CRASH_CTL_CRASH_NOTIFY;
+	return 0;
+}
+
+int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
+
+	if (atomic_dec_and_test(&ctx->crash_pending)) {
+		pr_debug("vcpu %p 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx",
+			 vcpu, ctx->crash_p0, ctx->crash_p1, ctx->crash_p2,
+			 ctx->crash_p3, ctx->crash_p4);
+
+		/* Crash data almost gathered so notify user space */
+		kvm_make_request(KVM_REQ_MSHV_CRASH, vcpu);
+	}
+
+	return 0;
+}
+
+int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
+
+	switch (msr) {
+	case HV_X64_MSR_CRASH_P0:
+		ctx->crash_p0 = data;
+		return 0;
+	case HV_X64_MSR_CRASH_P1:
+		ctx->crash_p1 = data;
+		return 0;
+	case HV_X64_MSR_CRASH_P2:
+		ctx->crash_p2 = data;
+		return 0;
+	case HV_X64_MSR_CRASH_P3:
+		ctx->crash_p3 = data;
+		return 0;
+	case HV_X64_MSR_CRASH_P4:
+		ctx->crash_p4 = data;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/arch/x86/kvm/mshv.h b/arch/x86/kvm/mshv.h
new file mode 100644
index 0000000..ce8e7fa
--- /dev/null
+++ b/arch/x86/kvm/mshv.h
@@ -0,0 +1,32 @@ 
+/*
+ * KVM Microsoft Hyper-V extended paravirtualization
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors: Andrey Smetanin asmetanin@virtuozzo.com
+ */
+
+#ifndef __ARCH_X86_KVM_MSHV_H__
+#define __ARCH_X86_KVM_MSHV_H__
+
+static inline struct kvm_mshv_ctx *kvm_get_mshv_ctx(struct kvm *vm)
+{
+	return vm->mshv_ctx;
+}
+
+static inline struct kvm_mshv_ctx *kvm_vcpu_get_mshv_ctx(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->mshv_ctx;
+}
+
+int kvm_mshv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_mshv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_mshv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+
+int kvm_mshv_ctx_create(struct kvm *kvm);
+void kvm_mshv_ctx_destroy(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ea306ad..388b58f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -28,6 +28,7 @@ 
 #include "x86.h"
 #include "cpuid.h"
 #include "assigned-dev.h"
+#include "mshv.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -2338,6 +2339,10 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		} else
 			return set_msr_hyperv(vcpu, msr, data);
 		break;
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_mshv_msr_set_crash_ctl(vcpu, msr, data);
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+		return kvm_mshv_msr_set_crash_data(vcpu, msr, data);
 	case MSR_IA32_BBL_CR_CTL3:
 		/* Drop writes to this legacy MSR -- see rdmsr
 		 * counterpart for further detail.
@@ -2650,6 +2655,8 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		} else
 			return get_msr_hyperv(vcpu, msr, pdata);
 		break;
+	case HV_X64_MSR_CRASH_CTL:
+		return kvm_mshv_msr_get_crash_ctl(vcpu, msr, pdata);
 	case MSR_IA32_BBL_CR_CTL3:
 		/* This legacy MSR exists but isn't fully documented in current
 		 * silicon.  It is however accessed by winxp in very narrow
@@ -6280,6 +6287,20 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
+		if (kvm_check_request(KVM_REQ_MSHV_CRASH, vcpu)) {
+			struct kvm_mshv_ctx *ctx = kvm_vcpu_get_mshv_ctx(vcpu);
+
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+			vcpu->run->system_event.flags = 0;
+			vcpu->run->system_event.crash.p0 = ctx->crash_p0;
+			vcpu->run->system_event.crash.p1 = ctx->crash_p1;
+			vcpu->run->system_event.crash.p2 = ctx->crash_p2;
+			vcpu->run->system_event.crash.p3 = ctx->crash_p3;
+			vcpu->run->system_event.crash.p4 = ctx->crash_p4;
+			r = 0;
+			goto out;
+		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7418,6 +7439,9 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type)
 		return -EINVAL;
 
+	if (kvm_mshv_ctx_create(kvm))
+		return -ENOMEM;
+
 	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
@@ -7484,6 +7508,7 @@  void kvm_arch_sync_events(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_mshv_ctx_destroy(kvm);
 	if (current->mm == kvm->mm) {
 		/*
 		 * Free memory regions allocated on behalf of userspace,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ad45054..83bd7bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@  static inline bool is_error_page(struct page *page)
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_MSHV_CRASH        26
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -343,6 +344,21 @@  struct kvm_memslots {
 	int used_slots;
 };
 
+/*
+ * Ms hyperv paravirt context
+ */
+struct kvm_mshv_ctx {
+	struct kvm	*kvm;
+	atomic_t	crash_pending;
+
+	/* Guest crash related parameters */
+	u64		crash_p0;
+	u64		crash_p1;
+	u64		crash_p2;
+	u64		crash_p3;
+	u64		crash_p4;
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
@@ -395,6 +411,7 @@  struct kvm {
 #endif
 	long tlbs_dirty;
 	struct list_head devices;
+	struct kvm_mshv_ctx *mshv_ctx;
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b60056..12f481b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -317,8 +317,19 @@  struct kvm_run {
 		struct {
 #define KVM_SYSTEM_EVENT_SHUTDOWN       1
 #define KVM_SYSTEM_EVENT_RESET          2
+#define KVM_SYSTEM_EVENT_CRASH          3
 			__u32 type;
 			__u64 flags;
+			union {
+				struct {
+					/* Guest crash related parameters */
+					__u64 p0;
+					__u64 p1;
+					__u64 p2;
+					__u64 p3;
+					__u64 p4;
+				} crash;
+			};
 		} system_event;
 		/* KVM_EXIT_S390_STSI */
 		struct {