diff mbox

KVM: trace the events of mmu_notifier

Message ID 50335A27.2070306@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Aug. 21, 2012, 9:51 a.m. UTC
mmu_notifier is the interface to broadcast the mm events to KVM, the
tracepoints introduced in this patch can trace all these events, it is
very helpful for us to notice and fix the bug caused by mm

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 include/trace/events/kvm.h |  121 ++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c        |   19 +++++++
 2 files changed, 140 insertions(+), 0 deletions(-)

Comments

Marcelo Tosatti Aug. 23, 2012, 9:24 a.m. UTC | #1
On Tue, Aug 21, 2012 at 05:51:35PM +0800, Xiao Guangrong wrote:
> mmu_notifier is the interface to broadcast the mm events to KVM, the
> tracepoints introduced in this patch can trace all these events, it is
> very helpful for us to notice and fix the bug caused by mm
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  include/trace/events/kvm.h |  121 ++++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c        |   19 +++++++
>  2 files changed, 140 insertions(+), 0 deletions(-)
> 
> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
> index 7ef9e75..a855ff9 100644
> --- a/include/trace/events/kvm.h
> +++ b/include/trace/events/kvm.h
> @@ -309,6 +309,127 @@ TRACE_EVENT(
> 
>  #endif
> 
> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
> +DECLARE_EVENT_CLASS(mmu_notifier_address_class,
> +
> +	TP_PROTO(struct kvm *kvm, unsigned long address),
> +
> +	TP_ARGS(kvm, address),
> +
> +	TP_STRUCT__entry(
> +		__field(struct kvm *, kvm)
> +		__field(unsigned long, address)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->kvm = kvm;
> +		__entry->address = address;
> +	),
> +
> +	TP_printk("kvm %p address %lx", __entry->kvm, __entry->address)


Perhaps the pointer was useful for debugging, but otherwise, i don't
think it should be printed.

> +	TP_ARGS(kvm, address, pte),
> +
> +	TP_STRUCT__entry(
> +		__field(struct kvm *, kvm)
> +		__field(unsigned long, address)
> +		__field(unsigned long, pte)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->kvm = kvm;
> +		__entry->address = address;
> +		__entry->pte = pte.pte;
> +	),
> +
> +	TP_printk("kvm %p address %lx pte %lx", __entry->kvm, __entry->address,
> +		  __entry->pte)

The pte bits can be spelled out? (see __print_symbolic).

>  	spin_lock(&kvm->mmu_lock);
> 
> +	trace_kvm_mmu_notifier_clear_flush_young(kvm, address);
> +
>  	young = kvm_age_hva(kvm, address);
>  	if (young)
>  		kvm_flush_remote_tlbs(kvm);
> @@ -394,6 +407,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
> 
>  	idx = srcu_read_lock(&kvm->srcu);
>  	spin_lock(&kvm->mmu_lock);
> +
> +	trace_kvm_mmu_notifier_test_young(kvm, address);
> +

can print young information?

>  	young = kvm_test_age_hva(kvm, address);
>  	spin_unlock(&kvm->mmu_lock);
>  	srcu_read_unlock(&kvm->srcu, idx);
> @@ -408,6 +424,9 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
>  	int idx;
> 
>  	idx = srcu_read_lock(&kvm->srcu);
> +
> +	trace_kvm_mmu_notifier_release(kvm);
> +
>  	kvm_arch_flush_shadow(kvm);
>  	srcu_read_unlock(&kvm->srcu, idx);
>  }


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 23, 2012, 12:30 p.m. UTC | #2
On 08/23/2012 05:24 PM, Marcelo Tosatti wrote:
> On Tue, Aug 21, 2012 at 05:51:35PM +0800, Xiao Guangrong wrote:
>> mmu_notifier is the interface to broadcast the mm events to KVM, the
>> tracepoints introduced in this patch can trace all these events, it is
>> very helpful for us to notice and fix the bug caused by mm
>>
>> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
>> ---
>>  include/trace/events/kvm.h |  121 ++++++++++++++++++++++++++++++++++++++++++++
>>  virt/kvm/kvm_main.c        |   19 +++++++
>>  2 files changed, 140 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
>> index 7ef9e75..a855ff9 100644
>> --- a/include/trace/events/kvm.h
>> +++ b/include/trace/events/kvm.h
>> @@ -309,6 +309,127 @@ TRACE_EVENT(
>>
>>  #endif
>>
>> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>> +DECLARE_EVENT_CLASS(mmu_notifier_address_class,
>> +
>> +	TP_PROTO(struct kvm *kvm, unsigned long address),
>> +
>> +	TP_ARGS(kvm, address),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(struct kvm *, kvm)
>> +		__field(unsigned long, address)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->kvm = kvm;
>> +		__entry->address = address;
>> +	),
>> +
>> +	TP_printk("kvm %p address %lx", __entry->kvm, __entry->address)
> 
> 
> Perhaps the pointer was useful for debugging, but otherwise, i don't
> think it should be printed.

But this is the only information to identify the guest if may guest
are running.

> 
>> +	TP_ARGS(kvm, address, pte),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(struct kvm *, kvm)
>> +		__field(unsigned long, address)
>> +		__field(unsigned long, pte)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->kvm = kvm;
>> +		__entry->address = address;
>> +		__entry->pte = pte.pte;
>> +	),
>> +
>> +	TP_printk("kvm %p address %lx pte %lx", __entry->kvm, __entry->address,
>> +		  __entry->pte)
> 
> The pte bits can be spelled out? (see __print_symbolic).

This tracepoint is put in kvm.h which can be used on different architectures
which have different pte decode. I will try to find a way to show more
readable format.

> 
>>  	spin_lock(&kvm->mmu_lock);
>>
>> +	trace_kvm_mmu_notifier_clear_flush_young(kvm, address);
>> +
>>  	young = kvm_age_hva(kvm, address);
>>  	if (young)
>>  		kvm_flush_remote_tlbs(kvm);
>> @@ -394,6 +407,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
>>
>>  	idx = srcu_read_lock(&kvm->srcu);
>>  	spin_lock(&kvm->mmu_lock);
>> +
>> +	trace_kvm_mmu_notifier_test_young(kvm, address);
>> +
> 
> can print young information?

Okay, will do it in next version.

Thanks for your review, Marcelo!

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 23, 2012, 1:08 p.m. UTC | #3
On Thu, Aug 23, 2012 at 08:30:15PM +0800, Xiao Guangrong wrote:
> On 08/23/2012 05:24 PM, Marcelo Tosatti wrote:
> > On Tue, Aug 21, 2012 at 05:51:35PM +0800, Xiao Guangrong wrote:
> >> mmu_notifier is the interface to broadcast the mm events to KVM, the
> >> tracepoints introduced in this patch can trace all these events, it is
> >> very helpful for us to notice and fix the bug caused by mm
> >>
> >> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> >> ---
> >>  include/trace/events/kvm.h |  121 ++++++++++++++++++++++++++++++++++++++++++++
> >>  virt/kvm/kvm_main.c        |   19 +++++++
> >>  2 files changed, 140 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
> >> index 7ef9e75..a855ff9 100644
> >> --- a/include/trace/events/kvm.h
> >> +++ b/include/trace/events/kvm.h
> >> @@ -309,6 +309,127 @@ TRACE_EVENT(
> >>
> >>  #endif
> >>
> >> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
> >> +DECLARE_EVENT_CLASS(mmu_notifier_address_class,
> >> +
> >> +	TP_PROTO(struct kvm *kvm, unsigned long address),
> >> +
> >> +	TP_ARGS(kvm, address),
> >> +
> >> +	TP_STRUCT__entry(
> >> +		__field(struct kvm *, kvm)
> >> +		__field(unsigned long, address)
> >> +	),
> >> +
> >> +	TP_fast_assign(
> >> +		__entry->kvm = kvm;
> >> +		__entry->address = address;
> >> +	),
> >> +
> >> +	TP_printk("kvm %p address %lx", __entry->kvm, __entry->address)
> > 
> > 
> > Perhaps the pointer was useful for debugging, but otherwise, i don't
> > think it should be printed.
> 
> But this is the only information to identify the guest if may guest
> are running.

Can't you get to task_struct, then pid?

A kernel pointer to identify a task is really weird (and not consistent 
with the other tracepoints).

> >> +	TP_ARGS(kvm, address, pte),
> >> +
> >> +	TP_STRUCT__entry(
> >> +		__field(struct kvm *, kvm)
> >> +		__field(unsigned long, address)
> >> +		__field(unsigned long, pte)
> >> +	),
> >> +
> >> +	TP_fast_assign(
> >> +		__entry->kvm = kvm;
> >> +		__entry->address = address;
> >> +		__entry->pte = pte.pte;
> >> +	),
> >> +
> >> +	TP_printk("kvm %p address %lx pte %lx", __entry->kvm, __entry->address,
> >> +		  __entry->pte)
> > 
> > The pte bits can be spelled out? (see __print_symbolic).
> 
> This tracepoint is put in kvm.h which can be used on different architectures
> which have different pte decode. I will try to find a way to show more
> readable format.
> >>  	spin_lock(&kvm->mmu_lock);
> >>
> >> +	trace_kvm_mmu_notifier_clear_flush_young(kvm, address);
> >> +
> >>  	young = kvm_age_hva(kvm, address);
> >>  	if (young)
> >>  		kvm_flush_remote_tlbs(kvm);
> >> @@ -394,6 +407,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
> >>
> >>  	idx = srcu_read_lock(&kvm->srcu);
> >>  	spin_lock(&kvm->mmu_lock);
> >> +
> >> +	trace_kvm_mmu_notifier_test_young(kvm, address);
> >> +
> > 
> > can print young information?
> 
> Okay, will do it in next version.
> 
> Thanks for your review, Marcelo!
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 24, 2012, 1:36 a.m. UTC | #4
On 08/23/2012 09:08 PM, Marcelo Tosatti wrote:
> On Thu, Aug 23, 2012 at 08:30:15PM +0800, Xiao Guangrong wrote:
>> On 08/23/2012 05:24 PM, Marcelo Tosatti wrote:
>>> On Tue, Aug 21, 2012 at 05:51:35PM +0800, Xiao Guangrong wrote:
>>>> mmu_notifier is the interface to broadcast the mm events to KVM, the
>>>> tracepoints introduced in this patch can trace all these events, it is
>>>> very helpful for us to notice and fix the bug caused by mm
>>>>
>>>> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
>>>> ---
>>>>  include/trace/events/kvm.h |  121 ++++++++++++++++++++++++++++++++++++++++++++
>>>>  virt/kvm/kvm_main.c        |   19 +++++++
>>>>  2 files changed, 140 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
>>>> index 7ef9e75..a855ff9 100644
>>>> --- a/include/trace/events/kvm.h
>>>> +++ b/include/trace/events/kvm.h
>>>> @@ -309,6 +309,127 @@ TRACE_EVENT(
>>>>
>>>>  #endif
>>>>
>>>> +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>>>> +DECLARE_EVENT_CLASS(mmu_notifier_address_class,
>>>> +
>>>> +	TP_PROTO(struct kvm *kvm, unsigned long address),
>>>> +
>>>> +	TP_ARGS(kvm, address),
>>>> +
>>>> +	TP_STRUCT__entry(
>>>> +		__field(struct kvm *, kvm)
>>>> +		__field(unsigned long, address)
>>>> +	),
>>>> +
>>>> +	TP_fast_assign(
>>>> +		__entry->kvm = kvm;
>>>> +		__entry->address = address;
>>>> +	),
>>>> +
>>>> +	TP_printk("kvm %p address %lx", __entry->kvm, __entry->address)
>>>
>>>
>>> Perhaps the pointer was useful for debugging, but otherwise, i don't
>>> think it should be printed.
>>
>> But this is the only information to identify the guest if may guest
>> are running.
> 
> Can't you get to task_struct, then pid?

Qemu-kvm have many threads, that means, we hardly know which pids belong
to the same kvm.

> 
> A kernel pointer to identify a task is really weird (and not consistent 
> with the other tracepoints).

Okay, if you really dislike it, i can use perf record to filter the process
then perf script to show the result, but it is not so readable because some
tracepoints can not fully decoded by perf script.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7ef9e75..a855ff9 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -309,6 +309,127 @@  TRACE_EVENT(

 #endif

+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+DECLARE_EVENT_CLASS(mmu_notifier_address_class,
+
+	TP_PROTO(struct kvm *kvm, unsigned long address),
+
+	TP_ARGS(kvm, address),
+
+	TP_STRUCT__entry(
+		__field(struct kvm *, kvm)
+		__field(unsigned long, address)
+	),
+
+	TP_fast_assign(
+		__entry->kvm = kvm;
+		__entry->address = address;
+	),
+
+	TP_printk("kvm %p address %lx", __entry->kvm, __entry->address)
+
+);
+
+DEFINE_EVENT(mmu_notifier_address_class, kvm_mmu_notifier_invalidate_page,
+
+	TP_PROTO(struct kvm *kvm, unsigned long address),
+
+	TP_ARGS(kvm, address)
+);
+
+DEFINE_EVENT(mmu_notifier_address_class, kvm_mmu_notifier_clear_flush_young,
+
+	TP_PROTO(struct kvm *kvm, unsigned long address),
+
+	TP_ARGS(kvm, address)
+);
+
+DEFINE_EVENT(mmu_notifier_address_class, kvm_mmu_notifier_test_young,
+
+	TP_PROTO(struct kvm *kvm, unsigned long address),
+
+	TP_ARGS(kvm, address)
+);
+
+DECLARE_EVENT_CLASS(mmu_notifier_range_class,
+
+	TP_PROTO(struct kvm *kvm, unsigned long start, unsigned long end),
+
+	TP_ARGS(kvm, start, end),
+
+	TP_STRUCT__entry(
+		__field(struct kvm *, kvm)
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+	),
+
+	TP_fast_assign(
+		__entry->kvm = kvm;
+		__entry->start = start;
+		__entry->end = end;
+	),
+
+	TP_printk("kvm %p start %lx end %lx", __entry->kvm, __entry->start,
+		  __entry->end)
+
+);
+
+DEFINE_EVENT(mmu_notifier_range_class, kvm_mmu_notifier_invalidate_range_start,
+
+	TP_PROTO(struct kvm *kvm, unsigned long start, unsigned long end),
+
+	TP_ARGS(kvm, start, end)
+);
+
+DEFINE_EVENT(mmu_notifier_range_class, kvm_mmu_notifier_invalidate_range_end,
+
+	TP_PROTO(struct kvm *kvm, unsigned long start, unsigned long end),
+
+	TP_ARGS(kvm, start, end)
+);
+
+TRACE_EVENT(kvm_mmu_notifier_change_pte,
+
+	TP_PROTO(struct kvm *kvm, unsigned long address, pte_t pte),
+
+	TP_ARGS(kvm, address, pte),
+
+	TP_STRUCT__entry(
+		__field(struct kvm *, kvm)
+		__field(unsigned long, address)
+		__field(unsigned long, pte)
+	),
+
+	TP_fast_assign(
+		__entry->kvm = kvm;
+		__entry->address = address;
+		__entry->pte = pte.pte;
+	),
+
+	TP_printk("kvm %p address %lx pte %lx", __entry->kvm, __entry->address,
+		  __entry->pte)
+
+);
+
+TRACE_EVENT(kvm_mmu_notifier_release,
+
+	TP_PROTO(struct kvm *kvm),
+
+	TP_ARGS(kvm),
+
+	TP_STRUCT__entry(
+		__field(struct kvm *, kvm)
+	),
+
+	TP_fast_assign(
+		__entry->kvm = kvm;
+	),
+
+	TP_printk("kvm %p", __entry->kvm)
+
+);
+#endif
+
 #endif /* _TRACE_KVM_MAIN_H */

 /* This part must be outside protection */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ec970f4..3491865 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -287,6 +287,8 @@  static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);

+	trace_kvm_mmu_notifier_invalidate_page(kvm, address);
+
 	kvm->mmu_notifier_seq++;
 	need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
 	/* we've to flush the tlb before the pages can be freed */
@@ -307,6 +309,9 @@  static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,

 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
+
+	trace_kvm_mmu_notifier_change_pte(kvm, address, pte);
+
 	kvm->mmu_notifier_seq++;
 	kvm_set_spte_hva(kvm, address, pte);
 	spin_unlock(&kvm->mmu_lock);
@@ -323,6 +328,9 @@  static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,

 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
+
+	trace_kvm_mmu_notifier_invalidate_range_start(kvm, start, end);
+
 	/*
 	 * The count increase must become visible at unlock time as no
 	 * spte can be established without taking the mmu_lock and
@@ -347,6 +355,9 @@  static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);

 	spin_lock(&kvm->mmu_lock);
+
+	trace_kvm_mmu_notifier_invalidate_range_end(kvm, start, end);
+
 	/*
 	 * This sequence increase will notify the kvm page fault that
 	 * the page that is going to be mapped in the spte could have
@@ -375,6 +386,8 @@  static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);

+	trace_kvm_mmu_notifier_clear_flush_young(kvm, address);
+
 	young = kvm_age_hva(kvm, address);
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
@@ -394,6 +407,9 @@  static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,

 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
+
+	trace_kvm_mmu_notifier_test_young(kvm, address);
+
 	young = kvm_test_age_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -408,6 +424,9 @@  static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	int idx;

 	idx = srcu_read_lock(&kvm->srcu);
+
+	trace_kvm_mmu_notifier_release(kvm);
+
 	kvm_arch_flush_shadow(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
 }