diff mbox

RFC: convert KVMTRACE to event traces

Message ID 20090514203016.GA10183@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti May 14, 2009, 8:30 p.m. UTC
Convert custom marker based KVMTRACE to event trace.

Applies on top of
git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-x86.git

See Documentation/trace/events.txt and commit
7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 if you're interested in playing
with event traces.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Christoph Hellwig May 15, 2009, 5:10 p.m. UTC | #1
On Thu, May 14, 2009 at 05:30:16PM -0300, Marcelo Tosatti wrote:
> +		trace_kvm_cr_write(cr, val);
>  		switch (cr) {
>  		case 0:
> -			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
> +			kvm_set_cr0(vcpu, val);
>  			skip_emulated_instruction(vcpu);

Do we really need one trace point covering all cr writes, _and_ one for
each specific register?

>  	if (!npt_enabled)
> -		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
> -			    (u32)fault_address, (u32)(fault_address >> 32),
> -			    handler);
> +		trace_kvm_page_fault(fault_address, error_code);
>  	else
> -		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
> -			    (u32)fault_address, (u32)(fault_address >> 32),
> -			    handler);
> +		trace_kvm_tdp_page_fault(fault_address, error_code);

Again this seems a bit cumbersome.  Why not just one tracepoint for
page faults, with a flag if we're using npt or not?

> +ifeq ($(CONFIG_TRACEPOINTS),y)
> +trace-objs = kvm-traces.o
> +arch-trace-objs = kvm-traces-arch.o
> +endif
> +
>  EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
>  
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> -	i8254.o
> +	i8254.o $(trace-objs)
>  obj-$(CONFIG_KVM) += kvm.o
> -kvm-intel-objs = vmx.o
> +kvm-intel-objs = vmx.o $(arch-trace-objs)
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> -kvm-amd-objs = svm.o
> +kvm-amd-objs = svm.o $(arch-trace-objs)
>  obj-$(CONFIG_KVM_AMD) += kvm-amd.o

The option to select even tracing bits is CONFIG_EVENT_TRACING and the
makefile syntax used here (both the original makefile and the additions)
is rather awkward.

A proper arch/x86/kvm/Makefile including tracing bits should look like
the following:

-- snip --
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm

kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			   coalesced_mmio.o irq_comm.o)
kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
kmv-y			+= x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
			   i8254.o

kvm-$(CONFIG_EVENT_TRACING) += kvm-traces.o
kvm-arch-trace-$(CONFIG_EVENT_TRACING) += kvm-traces-arch.o

kvm-intel-y		+= vmx.o $(kvm-arch-trace-y)
kvm-amd-y		+= svm.o $(kvm-arch-trace-y)

obj-$(CONFIG_KVM)	+= kvm.o
obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
obj-$(CONFIG_KVM_AMD)	+= kvm-amd.o
-- snip --

and do we actually still need kvm_trace.o after this?

Anyway, I'll send the upstream part of the makefile cleanup out ASAP,
then you can rebase later.

> Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
> @@ -0,0 +1,5 @@
> +#include <linux/sched.h>
> +
> +
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kvm/x86.h>

Can't we just put this into some other common .c file?  That would also
reduce the amount of makefile magic required.

> Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
> @@ -0,0 +1,5 @@
> +#include <linux/sched.h>
> +
> +
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kvm/x86-arch.h>

Same for this one, especially as the makefile hackery required for this
one is even worse..

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 15, 2009, 6:08 p.m. UTC | #2
On Fri, May 15, 2009 at 01:10:34PM -0400, Christoph Hellwig wrote:
> On Thu, May 14, 2009 at 05:30:16PM -0300, Marcelo Tosatti wrote:
> > +		trace_kvm_cr_write(cr, val);
> >  		switch (cr) {
> >  		case 0:
> > -			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
> > +			kvm_set_cr0(vcpu, val);
> >  			skip_emulated_instruction(vcpu);
> 
> Do we really need one trace point covering all cr writes, _and_ one for
> each specific register?

There is one tracepoint named kvm_cr that covers cr reads and writes.

kvm_trace_cr_read/kvm_trace_cr_write are macros that expand to
kvm_trace_cr(rw=1 or rw=0). Perhaps that is not a very good idea.

> 
> >  	if (!npt_enabled)
> > -		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
> > -			    (u32)fault_address, (u32)(fault_address >> 32),
> > -			    handler);
> > +		trace_kvm_page_fault(fault_address, error_code);
> >  	else
> > -		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
> > -			    (u32)fault_address, (u32)(fault_address >> 32),
> > -			    handler);
> > +		trace_kvm_tdp_page_fault(fault_address, error_code);
> 
> Again this seems a bit cumbersome.  Why not just one tracepoint for
> page faults, with a flag if we're using npt or not?

Issue is the meaning of these faults is different. With npt disabled the
fault is a guest fault (like a normal pagefault), but with npt enabled
the fault indicates the host pagetables the hardware uses to do the
translation are not set up correctly.

I did unify them as you suggest but reverted back to separate
tracepoints because the unification might be confusing.

Can be unified later if desirable.

> > +ifeq ($(CONFIG_TRACEPOINTS),y)
> > +trace-objs = kvm-traces.o
> > +arch-trace-objs = kvm-traces-arch.o
> > +endif
> > +
> >  EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> >  
> >  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> > -	i8254.o
> > +	i8254.o $(trace-objs)
> >  obj-$(CONFIG_KVM) += kvm.o
> > -kvm-intel-objs = vmx.o
> > +kvm-intel-objs = vmx.o $(arch-trace-objs)
> >  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> > -kvm-amd-objs = svm.o
> > +kvm-amd-objs = svm.o $(arch-trace-objs)
> >  obj-$(CONFIG_KVM_AMD) += kvm-amd.o
> 
> The option to select even tracing bits is CONFIG_EVENT_TRACING and the
> makefile syntax used here (both the original makefile and the additions)
> is rather awkward.
> 
> A proper arch/x86/kvm/Makefile including tracing bits should look like
> the following:
> 
> -- snip --
> EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
> 
> kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
> 			   coalesced_mmio.o irq_comm.o)
> kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
> kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
> kmv-y			+= x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> 			   i8254.o
> 
> kvm-$(CONFIG_EVENT_TRACING) += kvm-traces.o
> kvm-arch-trace-$(CONFIG_EVENT_TRACING) += kvm-traces-arch.o
> 
> kvm-intel-y		+= vmx.o $(kvm-arch-trace-y)
> kvm-amd-y		+= svm.o $(kvm-arch-trace-y)
> 
> obj-$(CONFIG_KVM)	+= kvm.o
> obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
> obj-$(CONFIG_KVM_AMD)	+= kvm-amd.o
> -- snip --
> 
> and do we actually still need kvm_trace.o after this?

Your version looks much nicer. kvm_trace.o can disappear as soon as 
this is in Avi's tree and a decent replacement for user/kvm_trace.c 
is in qemu-kvm.git.

> Anyway, I'll send the upstream part of the makefile cleanup out ASAP,
> then you can rebase later.

OK.

> 
> > Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
> > ===================================================================
> > --- /dev/null
> > +++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
> > @@ -0,0 +1,5 @@
> > +#include <linux/sched.h>
> > +
> > +
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/kvm/x86.h>
> 
> Can't we just put this into some other common .c file?  That would also
> reduce the amount of makefile magic required.
> 
> > Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
> > ===================================================================
> > --- /dev/null
> > +++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
> > @@ -0,0 +1,5 @@
> > +#include <linux/sched.h>
> > +
> > +
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/kvm/x86-arch.h>
> 
> Same for this one, especially as the makefile hackery required for this
> one is even worse..

Probably for both. Now that you say I can't explain the reason for the
separate C files. Will put this up in a git tree in a couple of hours.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 17, 2009, 8:21 p.m. UTC | #3
Marcelo Tosatti wrote:
> On Fri, May 15, 2009 at 01:10:34PM -0400, Christoph Hellwig wrote:
>   
>> On Thu, May 14, 2009 at 05:30:16PM -0300, Marcelo Tosatti wrote:
>>     
>>> +		trace_kvm_cr_write(cr, val);
>>>  		switch (cr) {
>>>  		case 0:
>>> -			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
>>> +			kvm_set_cr0(vcpu, val);
>>>  			skip_emulated_instruction(vcpu);
>>>       
>> Do we really need one trace point covering all cr writes, _and_ one for
>> each specific register?
>>     
>
> There is one tracepoint named kvm_cr that covers cr reads and writes.
>
> kvm_trace_cr_read/kvm_trace_cr_write are macros that expand to
> kvm_trace_cr(rw=1 or rw=0). Perhaps that is not a very good idea.
>   

If the framework allows us to filter on arguments, that's fine (I think 
it does).  We need a way to look at, say, cr4 reads in a sea of cr3 
read/writes.

>> Again this seems a bit cumbersome.  Why not just one tracepoint for
>> page faults, with a flag if we're using npt or not?
>>     
>
> Issue is the meaning of these faults is different. With npt disabled the
> fault is a guest fault (like a normal pagefault), but with npt enabled
> the fault indicates the host pagetables the hardware uses to do the
> translation are not set up correctly.
>   

No, with npt disabled a fault is either a guest fault or a shadow fault 
or a host (minor/major) fault.
With npt enabled it cannot be a guest fault, but may be a shadow fault 
or a host fault.

> I did unify them as you suggest but reverted back to separate
> tracepoints because the unification might be confusing.
>
> Can be unified later if desirable.
>   

I think you can unify them, especially as they will never occur together 
in the same run.  We'll need tracepoints later in the mmu code for 
dealing with the different fault types.  Guest faults are marked by 
injecting a #GP, shadow faults by populating an spte, and host faults 
somewhere in the bowels of get_user_pages().  Isn't it wonderful how 
everything is integrated?
diff mbox

Patch

Index: linux-2.6-x86-2/arch/x86/kvm/vmx.c
===================================================================
--- linux-2.6-x86-2.orig/arch/x86/kvm/vmx.c
+++ linux-2.6-x86-2/arch/x86/kvm/vmx.c
@@ -25,6 +25,7 @@ 
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
+#include <trace/events/kvm/x86-arch.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -2406,7 +2407,7 @@  static void vmx_inject_irq(struct kvm_vc
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+	trace_kvm_inj_virq(irq);
 
 	++vcpu->stat.irq_injections;
 	if (vcpu->arch.rmode.active) {
@@ -2631,8 +2632,8 @@  static int handle_exception(struct kvm_v
 		if (vm_need_ept())
 			BUG();
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
-		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
-			    (u32)((u64)cr2 >> 32), handler);
+		trace_kvm_page_fault(cr2, error_code);
+
 		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
@@ -2679,7 +2680,6 @@  static int handle_external_interrupt(str
 				     struct kvm_run *kvm_run)
 {
 	++vcpu->stat.irq_exits;
-	KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
 	return 1;
 }
 
@@ -2727,7 +2727,7 @@  vmx_patch_hypercall(struct kvm_vcpu *vcp
 
 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	unsigned long exit_qualification;
+	unsigned long exit_qualification, val;
 	int cr;
 	int reg;
 
@@ -2736,25 +2736,23 @@  static int handle_cr(struct kvm_vcpu *vc
 	reg = (exit_qualification >> 8) & 15;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
-		KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
-			    (u32)kvm_register_read(vcpu, reg),
-			    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-			    handler);
+		val = kvm_register_read(vcpu, reg);
+		trace_kvm_cr_write(cr, val);
 		switch (cr) {
 		case 0:
-			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr0(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 3:
-			kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr3(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 4:
-			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr4(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
-			kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr8(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			if (irqchip_in_kernel(vcpu->kvm))
 				return 1;
@@ -2767,23 +2765,19 @@  static int handle_cr(struct kvm_vcpu *vc
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 		vmx_fpu_activate(vcpu);
-		KVMTRACE_0D(CLTS, vcpu, handler);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
 			kvm_register_write(vcpu, reg, vcpu->arch.cr3);
-			KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
-				    (u32)kvm_register_read(vcpu, reg),
-				    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-				    handler);
+			trace_kvm_cr_read(cr, vcpu->arch.cr3);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
-			kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
-			KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
-				    (u32)kvm_register_read(vcpu, reg), handler);
+			val = kvm_get_cr8(vcpu);
+			kvm_register_write(vcpu, cr, val);
+			trace_kvm_cr_read(cr, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		}
@@ -2851,7 +2845,6 @@  static int handle_dr(struct kvm_vcpu *vc
 			val = 0;
 		}
 		kvm_register_write(vcpu, reg, val);
-		KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	} else {
 		val = vcpu->arch.regs[reg];
 		switch (dr) {
@@ -2884,7 +2877,6 @@  static int handle_dr(struct kvm_vcpu *vc
 			}
 			break;
 		}
-		KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
 	}
 	skip_emulated_instruction(vcpu);
 	return 1;
@@ -2906,8 +2898,7 @@  static int handle_rdmsr(struct kvm_vcpu 
 		return 1;
 	}
 
-	KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_read(ecx, data);
 
 	/* FIXME: handling of bits 32:63 of rax, rdx */
 	vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
@@ -2922,8 +2913,7 @@  static int handle_wrmsr(struct kvm_vcpu 
 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_write(ecx, data);
 
 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
 		kvm_inject_gp(vcpu, 0);
@@ -2950,7 +2940,6 @@  static int handle_interrupt_window(struc
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
-	KVMTRACE_0D(PEND_INTR, vcpu, handler);
 	++vcpu->stat.irq_window_exits;
 
 	/*
@@ -3076,6 +3065,7 @@  static int handle_ept_violation(struct k
 	}
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+	trace_kvm_tdp_page_fault(gpa, exit_qualification);
 	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
 }
 
@@ -3165,8 +3155,7 @@  static int kvm_handle_exit(struct kvm_ru
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
-	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
-		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
+	trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
 
 	/* If we need to emulate an MMIO from handle_invalid_guest_state
 	 * we just return 0 */
@@ -3514,10 +3503,8 @@  static void vmx_vcpu_run(struct kvm_vcpu
 
 	/* We need to handle NMIs before interrupts are enabled */
 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-	    (intr_info & INTR_INFO_VALID_MASK)) {
-		KVMTRACE_0D(NMI, vcpu, handler);
+	    (intr_info & INTR_INFO_VALID_MASK))
 		asm("int $2");
-	}
 
 	vmx_complete_interrupts(vmx);
 }
Index: linux-2.6-x86-2/arch/x86/kvm/svm.c
===================================================================
--- linux-2.6-x86-2.orig/arch/x86/kvm/svm.c
+++ linux-2.6-x86-2/arch/x86/kvm/svm.c
@@ -25,6 +25,7 @@ 
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
+#include <trace/events/kvm/x86-arch.h>
 
 #include <asm/desc.h>
 
@@ -1053,7 +1054,6 @@  static unsigned long svm_get_dr(struct k
 		val = 0;
 	}
 
-	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	return val;
 }
 
@@ -1062,8 +1062,6 @@  static void svm_set_dr(struct kvm_vcpu *
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
-
 	*exception = 0;
 
 	switch (dr) {
@@ -1121,13 +1119,9 @@  static int pf_interception(struct vcpu_s
 	error_code = svm->vmcb->control.exit_info_1;
 
 	if (!npt_enabled)
-		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
-			    (u32)fault_address, (u32)(fault_address >> 32),
-			    handler);
+		trace_kvm_page_fault(fault_address, error_code);
 	else
-		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
-			    (u32)fault_address, (u32)(fault_address >> 32),
-			    handler);
+		trace_kvm_tdp_page_fault(fault_address, error_code);
 	/*
 	 * FIXME: Tis shouldn't be necessary here, but there is a flush
 	 * missing in the MMU code. Until we find this bug, flush the
@@ -1237,14 +1231,12 @@  static int io_interception(struct vcpu_s
 
 static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	KVMTRACE_0D(NMI, &svm->vcpu, handler);
 	return 1;
 }
 
 static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.irq_exits;
-	KVMTRACE_0D(INTR, &svm->vcpu, handler);
 	return 1;
 }
 
@@ -1963,8 +1955,7 @@  static int rdmsr_interception(struct vcp
 	if (svm_get_msr(&svm->vcpu, ecx, &data))
 		kvm_inject_gp(&svm->vcpu, 0);
 	else {
-		KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
-			    (u32)(data >> 32), handler);
+		trace_kvm_msr_read(ecx, data);
 
 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
@@ -2058,8 +2049,7 @@  static int wrmsr_interception(struct vcp
 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_write(ecx, data);
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
 	if (svm_set_msr(&svm->vcpu, ecx, data))
@@ -2080,8 +2070,6 @@  static int msr_interception(struct vcpu_
 static int interrupt_window_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
-	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
-
 	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	/*
@@ -2158,8 +2146,7 @@  static int handle_exit(struct kvm_run *k
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 exit_code = svm->vmcb->control.exit_code;
 
-	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
-		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
 
 	if (is_nested(svm)) {
 		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
@@ -2245,7 +2232,7 @@  static inline void svm_inject_irq(struct
 {
 	struct vmcb_control_area *control;
 
-	KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
+	trace_kvm_inj_virq(irq);
 
 	++svm->vcpu.stat.irq_injections;
 	control = &svm->vmcb->control;
Index: linux-2.6-x86-2/arch/x86/kvm/lapic.c
===================================================================
--- linux-2.6-x86-2.orig/arch/x86/kvm/lapic.c
+++ linux-2.6-x86-2/arch/x86/kvm/lapic.c
@@ -26,6 +26,7 @@ 
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/math64.h>
+#include <trace/events/kvm/x86.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/page.h>
@@ -557,8 +558,6 @@  static u32 __apic_read(struct kvm_lapic 
 {
 	u32 val = 0;
 
-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
-
 	if (offset >= LAPIC_MMIO_LENGTH)
 		return 0;
 
@@ -599,6 +598,8 @@  static void apic_mmio_read(struct kvm_io
 	}
 	result = __apic_read(apic, offset & ~0xf);
 
+	trace_kvm_apic_read(offset, result);
+
 	switch (len) {
 	case 1:
 	case 2:
@@ -694,7 +695,7 @@  static void apic_mmio_write(struct kvm_i
 
 	offset &= 0xff0;
 
-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
+	trace_kvm_apic_write(offset, val);
 
 	switch (offset) {
 	case APIC_ID:		/* Local APIC ID */
Index: linux-2.6-x86-2/arch/x86/kvm/Makefile
===================================================================
--- linux-2.6-x86-2.orig/arch/x86/kvm/Makefile
+++ linux-2.6-x86-2/arch/x86/kvm/Makefile
@@ -11,12 +11,17 @@  ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
+ifeq ($(CONFIG_TRACEPOINTS),y)
+trace-objs = kvm-traces.o
+arch-trace-objs = kvm-traces-arch.o
+endif
+
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-	i8254.o
+	i8254.o $(trace-objs)
 obj-$(CONFIG_KVM) += kvm.o
-kvm-intel-objs = vmx.o
+kvm-intel-objs = vmx.o $(arch-trace-objs)
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
-kvm-amd-objs = svm.o
+kvm-amd-objs = svm.o $(arch-trace-objs)
 obj-$(CONFIG_KVM_AMD) += kvm-amd.o
Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
===================================================================
--- /dev/null
+++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces.c
@@ -0,0 +1,5 @@ 
+#include <linux/sched.h>
+
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/kvm/x86.h>
Index: linux-2.6-x86-2/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-2.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-2/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@ 
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <trace/events/kvm/x86.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -330,9 +331,6 @@  EXPORT_SYMBOL_GPL(kvm_set_cr0);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
 	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
-	KVMTRACE_1D(LMSW, vcpu,
-		    (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
-		    handler);
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
@@ -2285,7 +2283,6 @@  int emulate_invlpg(struct kvm_vcpu *vcpu
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-	KVMTRACE_0D(CLTS, vcpu, handler);
 	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
 	return X86EMUL_CONTINUE;
 }
@@ -2563,12 +2560,8 @@  int kvm_emulate_pio(struct kvm_vcpu *vcp
 	vcpu->arch.pio.down = 0;
 	vcpu->arch.pio.rep = 0;
 
-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
-	else
-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+		      size, 1);
 
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
@@ -2602,12 +2595,8 @@  int kvm_emulate_pio_string(struct kvm_vc
 	vcpu->arch.pio.down = down;
 	vcpu->arch.pio.rep = rep;
 
-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
-	else
-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+		      size, count);
 
 	if (!count) {
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -2785,7 +2774,6 @@  void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
-	KVMTRACE_0D(HLT, vcpu, handler);
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
 		return 1;
@@ -2816,7 +2804,7 @@  int kvm_emulate_hypercall(struct kvm_vcp
 	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
 	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
 
-	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+	trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
 	if (!is_long_mode(vcpu)) {
 		nr &= 0xFFFFFFFF;
@@ -2916,8 +2904,6 @@  unsigned long realmode_get_cr(struct kvm
 		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 		return 0;
 	}
-	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
-		    (u32)((u64)value >> 32), handler);
 
 	return value;
 }
@@ -2925,9 +2911,6 @@  unsigned long realmode_get_cr(struct kvm
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 		     unsigned long *rflags)
 {
-	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
-		    (u32)((u64)val >> 32), handler);
-
 	switch (cr) {
 	case 0:
 		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -3027,11 +3010,11 @@  void kvm_emulate_cpuid(struct kvm_vcpu *
 		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
 	}
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	KVMTRACE_5D(CPUID, vcpu, function,
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
+	trace_kvm_cpuid(function,
+			kvm_register_read(vcpu, VCPU_REGS_RAX),
+			kvm_register_read(vcpu, VCPU_REGS_RBX),
+			kvm_register_read(vcpu, VCPU_REGS_RCX),
+			kvm_register_read(vcpu, VCPU_REGS_RDX));
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -3176,7 +3159,7 @@  static int vcpu_enter_guest(struct kvm_v
 		set_debugreg(vcpu->arch.eff_db[3], 3);
 	}
 
-	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+	trace_kvm_entry(vcpu->vcpu_id);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
Index: linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
===================================================================
--- /dev/null
+++ linux-2.6-x86-2/arch/x86/kvm/kvm-traces-arch.c
@@ -0,0 +1,5 @@ 
+#include <linux/sched.h>
+
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/kvm/x86-arch.h>
Index: linux-2.6-x86-2/include/trace/events/kvm/x86-arch.h
===================================================================
--- /dev/null
+++ linux-2.6-x86-2/include/trace/events/kvm/x86-arch.h
@@ -0,0 +1,151 @@ 
+#if !defined(_TRACE_KVM_ARCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_ARCH_H
+
+#include <linux/kvm_host.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_FILE kvm/x86-arch
+
+/*
+ * Tracepoint for kvm guest exit:
+ */
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
+	TP_ARGS(exit_reason, guest_rip),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_reason	)
+		__field(	unsigned long,	guest_rip	)
+	),
+
+	TP_fast_assign(
+		__entry->exit_reason	= exit_reason;
+		__entry->guest_rip	= guest_rip;
+	),
+
+	TP_printk("reason %x rip 0x%lx",
+		  __entry->exit_reason, __entry->guest_rip)
+);
+
+/*
+ * Tracepoint for kvm interrupt injection:
+ */
+TRACE_EVENT(kvm_inj_virq,
+	TP_PROTO(unsigned int irq),
+	TP_ARGS(irq),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+	),
+
+	TP_printk("irq %u", __entry->irq)
+);
+
+/*
+ * Tracepoint for guest page fault.
+ */
+TRACE_EVENT(kvm_page_fault,
+	TP_PROTO(unsigned long fault_address, unsigned int error_code),
+	TP_ARGS(fault_address, error_code),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	fault_address	)
+		__field(	unsigned int,	error_code	)
+	),
+
+	TP_fast_assign(
+		__entry->fault_address	= fault_address;
+		__entry->error_code	= error_code;
+	),
+
+	TP_printk("address %lx error_code %x",
+		  __entry->fault_address, __entry->error_code)
+);
+
+/*
+ * Tracepoint for two dimensional paging fault.
+ */
+TRACE_EVENT(kvm_tdp_page_fault,
+	TP_PROTO(unsigned long fault_address, unsigned int error_code),
+	TP_ARGS(fault_address, error_code),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	fault_address	)
+		__field(	unsigned int,	error_code	)
+	),
+
+	TP_fast_assign(
+		__entry->fault_address	= fault_address;
+		__entry->error_code	= error_code;
+	),
+
+	TP_printk("address %lx error_code %x",
+		  __entry->fault_address, __entry->error_code)
+);
+
+/*
+ * Tracepoint for guest MSR access.
+ */
+TRACE_EVENT(kvm_msr,
+	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
+	TP_ARGS(rw, ecx, data),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw		)
+		__field(	unsigned int,	ecx		)
+		__field(	unsigned long,	data		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->ecx		= ecx;
+		__entry->data		= data;
+	),
+
+	TP_printk("msr_%s %x = 0x%lx",
+		  __entry->rw ? "write" : "read",
+		  __entry->ecx, __entry->data)
+);
+
+#define trace_kvm_msr_read(ecx, data)			\
+		trace_kvm_msr(0, ecx, data)
+#define trace_kvm_msr_write(ecx, data)			\
+		trace_kvm_msr(1, ecx, data)
+
+/*
+ * Tracepoint for guest CR access.
+ */
+TRACE_EVENT(kvm_cr,
+	TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
+	TP_ARGS(rw, cr, val),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw		)
+		__field(	unsigned int,	cr		)
+		__field(	unsigned long,	val		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->cr		= cr;
+		__entry->val		= val;
+	),
+
+	TP_printk("cr_%s %x = 0x%lx",
+		  __entry->rw ? "write" : "read",
+		  __entry->cr, __entry->val)
+);
+
+#define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val)
+#define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val)
+
+#endif /* _TRACE_KVM_ARCH_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
Index: linux-2.6-x86-2/include/trace/events/kvm/x86.h
===================================================================
--- /dev/null
+++ linux-2.6-x86-2/include/trace/events/kvm/x86.h
@@ -0,0 +1,146 @@ 
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/kvm_host.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_FILE kvm/x86
+
+/*
+ * Tracepoint for guest mode entry.
+ */
+TRACE_EVENT(kvm_entry,
+	TP_PROTO(unsigned int vcpu_id),
+	TP_ARGS(vcpu_id),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+	),
+
+	TP_printk("vcpu %u\n", __entry->vcpu_id)
+);
+
+/*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hypercall,
+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
+		 unsigned long a2, unsigned long a3),
+	TP_ARGS(nr, a0, a1, a2, a3),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long, 	nr	)
+		__field(	unsigned long,	a0	)
+		__field(	unsigned long,	a1	)
+		__field(	unsigned long,	a2	)
+		__field(	unsigned long,	a3	)
+	),
+
+	TP_fast_assign(
+		__entry->nr		= nr;
+		__entry->a0		= a0;
+		__entry->a1		= a1;
+		__entry->a2		= a2;
+		__entry->a3		= a3;
+	),
+
+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
+		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
+		 __entry->a3)
+);
+
+/*
+ * Tracepoint for PIO.
+ */
+TRACE_EVENT(kvm_pio,
+	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
+		 unsigned int count),
+	TP_ARGS(rw, port, size, count),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int, 	rw	)
+		__field(	unsigned int, 	port	)
+		__field(	unsigned int, 	size	)
+		__field(	unsigned int,	count	)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->port		= port;
+		__entry->size		= size;
+		__entry->count		= count;
+	),
+
+	TP_printk("pio_%s at 0x%x size %d count %d",
+		  __entry->rw ? "write" : "read",
+		  __entry->port, __entry->size, __entry->count)
+);
+
+/*
+ * Tracepoint for cpuid.
+ */
+TRACE_EVENT(kvm_cpuid,
+	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
+		 unsigned long rcx, unsigned long rdx),
+	TP_ARGS(function, rax, rbx, rcx, rdx),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	function	)
+		__field(	unsigned long,	rax		)
+		__field(	unsigned long,	rbx		)
+		__field(	unsigned long,	rcx		)
+		__field(	unsigned long,	rdx		)
+	),
+
+	TP_fast_assign(
+		__entry->function	= function;
+		__entry->rax		= rax;
+		__entry->rbx		= rbx;
+		__entry->rcx		= rcx;
+		__entry->rdx		= rdx;
+	),
+
+	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
+		  __entry->function, __entry->rax,
+		  __entry->rbx, __entry->rcx, __entry->rdx)
+);
+
+/*
+ * Tracepoint for apic access.
+ */
+TRACE_EVENT(kvm_apic,
+	TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
+	TP_ARGS(rw, reg, val),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw	)
+		__field(	unsigned int,	reg	)
+		__field(	unsigned int,	val	)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->reg		= reg;
+		__entry->val		= val;
+	),
+
+	TP_printk("apic_%s 0x%x = 0x%x",
+		  __entry->rw ? "write" : "read",
+		  __entry->reg, __entry->val)
+);
+
+#define trace_kvm_apic_read(reg, val)		\
+		trace_kvm_apic(0, reg, val)
+#define trace_kvm_apic_write(reg, val)		\
+		trace_kvm_apic(1, reg, val)
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>