diff mbox series

[v1] mm: add tracepoints to ksm

Message ID 20230210214645.2720847-1-shr@devkernel.io (mailing list archive)
State New
Headers show
Series [v1] mm: add tracepoints to ksm | expand

Commit Message

Stefan Roesch Feb. 10, 2023, 9:46 p.m. UTC
This adds the following tracepoints to ksm:
- start / stop scan
- ksm enter / exit
- merge a page
- merge a page with ksm
- remove a page
- remove a rmap item

This patch has been split off from the RFC patch series "mm:
process/cgroup ksm support".

Signed-off-by: Stefan Roesch <shr@devkernel.io>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 MAINTAINERS                |   1 +
 include/trace/events/ksm.h | 251 +++++++++++++++++++++++++++++++++++++
 mm/ksm.c                   |  21 +++-
 3 files changed, 271 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/ksm.h


base-commit: 234a68e24b120b98875a8b6e17a9dead277be16a

Comments

Steven Rostedt March 9, 2023, 10:27 p.m. UTC | #1
On Fri, 10 Feb 2023 13:46:45 -0800
Stefan Roesch <shr@devkernel.io> wrote:

Sorry for the late reply, I just noticed this (I had the flu when this was
originally sent).

> +/**
> + * ksm_remove_ksm_page - called after a ksm page has been removed
> + *
> + * @pfn:		page frame number of ksm page
> + *
> + * Allows to trace the removing of stable ksm pages.
> + */
> +TRACE_EVENT(ksm_remove_ksm_page,
> +
> +	TP_PROTO(unsigned long pfn),
> +
> +	TP_ARGS(pfn),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long, pfn)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pfn = pfn;
> +	),
> +
> +	TP_printk("pfn %lu", __entry->pfn)
> +);
> +
> +/**
> + * ksm_remove_rmap_item - called after a rmap_item has been removed from the
> + *                        stable tree
> + *
> + * @pfn:		page frame number of ksm page
> + * @rmap_item:		address of rmap_item  object
> + * @mm:			address of the process mm struct
> + *
> + * Allows to trace the removal of pages from the stable tree list.
> + */
> +TRACE_EVENT(ksm_remove_rmap_item,
> +
> +	TP_PROTO(unsigned long pfn, void *rmap_item, void *mm),
> +
> +	TP_ARGS(pfn, rmap_item, mm),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long,	pfn)
> +		__field(void *,		rmap_item)
> +		__field(void *,		mm)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pfn		= pfn;
> +		__entry->rmap_item	= rmap_item;
> +		__entry->mm		= mm;
> +	),
> +
> +	TP_printk("pfn %lu rmap_item %p mm %p",
> +			__entry->pfn, __entry->rmap_item, __entry->mm)
> +);
> +
> +#endif /* _TRACE_KSM_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 56808e3bfd19..4356af760735 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -45,6 +45,9 @@
>  #include "internal.h"
>  #include "mm_slot.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/ksm.h>
> +
>  #ifdef CONFIG_NUMA
>  #define NUMA(x)		(x)
>  #define DO_NUMA(x)	do { (x); } while (0)
> @@ -655,10 +658,12 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>  	BUG_ON(stable_node->rmap_hlist_len < 0);
>  
>  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
> -		if (rmap_item->hlist.next)
> +		if (rmap_item->hlist.next) {
>  			ksm_pages_sharing--;
> -		else
> +			trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);

Instead of dereferencing the stable_node here, where the work could
possibly happen outside the trace event and in the hot path, could you pass
in the stable_node instead, and then in the TP_fast_assign() do:

		__entry->pfn = stable_node->kpfn;


> +		} else {
>  			ksm_pages_shared--;
> +		}
>  
>  		rmap_item->mm->ksm_merging_pages--;
>  
> @@ -679,6 +684,7 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
>  
> +	trace_ksm_remove_ksm_page(stable_node->kpfn);

Here too?

-- Steve

>  	if (stable_node->head == &migrate_nodes)
>  		list_del(&stable_node->list);
>  	else
> @@ -1367,6 +1373,8 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
>  	get_anon_vma(vma->anon_vma);
>  out:
>  	mmap_read_unlock(mm);
> +	trace_ksm_merge_with_ksm_page(kpage, page_to_pfn(kpage ? kpage : page),
> +				rmap_item, mm, err);
>  	return err;
>  }
>  
> @@ -2114,6 +2122,9 @@ static int try_to_merge_with_kernel_zero_page(struct ksm_rmap_item *rmap_item,
>  		if (vma) {
>  			err = try_to_merge_one_page(vma, page,
>  						ZERO_PAGE(rmap_item->address));
> +			trace_ksm_merge_one_page(
> +				page_to_pfn(ZERO_PAGE(rmap_item->address)),
> +				rmap_item, mm, err);
>  			if (!err) {
>  				rmap_item->address |= ZERO_PAGE_FLAG;
>  				ksm_zero_pages_sharing++;
> @@ -2344,6 +2355,8 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>  
>  	mm_slot = ksm_scan.mm_slot;
>  	if (mm_slot == &ksm_mm_head) {
> +		trace_ksm_start_scan(ksm_scan.seqnr, ksm_rmap_items);
> +
>  		/*
>  		 * A number of pages can hang around indefinitely on per-cpu
>  		 * pagevecs, raised page count preventing write_protect_page
> @@ -2510,6 +2523,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>  	if (mm_slot != &ksm_mm_head)
>  		goto next_mm;
>  
> +	trace_ksm_stop_scan(ksm_scan.seqnr, ksm_rmap_items);
>  	ksm_scan.seqnr++;
>  	return NULL;
>  }
> @@ -2661,6 +2675,7 @@ int __ksm_enter(struct mm_struct *mm)
>  	if (needs_wakeup)
>  		wake_up_interruptible(&ksm_thread_wait);
>  
> +	trace_ksm_enter(mm);
>  	return 0;
>  }
>  
> @@ -2702,6 +2717,8 @@ void __ksm_exit(struct mm_struct *mm)
>  		mmap_write_lock(mm);
>  		mmap_write_unlock(mm);
>  	}
> +
> +	trace_ksm_exit(mm);
>  }
>  
>  struct page *ksm_might_need_to_copy(struct page *page,
> 
> base-commit: 234a68e24b120b98875a8b6e17a9dead277be16a
Stefan Roesch March 9, 2023, 10:30 p.m. UTC | #2
Steven Rostedt <rostedt@goodmis.org> writes:

> On Fri, 10 Feb 2023 13:46:45 -0800
> Stefan Roesch <shr@devkernel.io> wrote:
>
> Sorry for the late reply, I just noticed this (I had the flu when this was
> originally sent).
>
>> +/**
>> + * ksm_remove_ksm_page - called after a ksm page has been removed
>> + *
>> + * @pfn:		page frame number of ksm page
>> + *
>> + * Allows to trace the removing of stable ksm pages.
>> + */
>> +TRACE_EVENT(ksm_remove_ksm_page,
>> +
>> +	TP_PROTO(unsigned long pfn),
>> +
>> +	TP_ARGS(pfn),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(unsigned long, pfn)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->pfn = pfn;
>> +	),
>> +
>> +	TP_printk("pfn %lu", __entry->pfn)
>> +);
>> +
>> +/**
>> + * ksm_remove_rmap_item - called after a rmap_item has been removed from the
>> + *                        stable tree
>> + *
>> + * @pfn:		page frame number of ksm page
>> + * @rmap_item:		address of rmap_item  object
>> + * @mm:			address of the process mm struct
>> + *
>> + * Allows to trace the removal of pages from the stable tree list.
>> + */
>> +TRACE_EVENT(ksm_remove_rmap_item,
>> +
>> +	TP_PROTO(unsigned long pfn, void *rmap_item, void *mm),
>> +
>> +	TP_ARGS(pfn, rmap_item, mm),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(unsigned long,	pfn)
>> +		__field(void *,		rmap_item)
>> +		__field(void *,		mm)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->pfn		= pfn;
>> +		__entry->rmap_item	= rmap_item;
>> +		__entry->mm		= mm;
>> +	),
>> +
>> +	TP_printk("pfn %lu rmap_item %p mm %p",
>> +			__entry->pfn, __entry->rmap_item, __entry->mm)
>> +);
>> +
>> +#endif /* _TRACE_KSM_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>> diff --git a/mm/ksm.c b/mm/ksm.c
>> index 56808e3bfd19..4356af760735 100644
>> --- a/mm/ksm.c
>> +++ b/mm/ksm.c
>> @@ -45,6 +45,9 @@
>>  #include "internal.h"
>>  #include "mm_slot.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <trace/events/ksm.h>
>> +
>>  #ifdef CONFIG_NUMA
>>  #define NUMA(x)		(x)
>>  #define DO_NUMA(x)	do { (x); } while (0)
>> @@ -655,10 +658,12 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>>  	BUG_ON(stable_node->rmap_hlist_len < 0);
>>
>>  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
>> -		if (rmap_item->hlist.next)
>> +		if (rmap_item->hlist.next) {
>>  			ksm_pages_sharing--;
>> -		else
>> +			trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);
>
> Instead of dereferencing the stable_node here, where the work could
> possibly happen outside the trace event and in the hot path, could you pass
> in the stable_node instead, and then in the TP_fast_assign() do:
>
> 		__entry->pfn = stable_node->kpfn;
>
>

I'll make the change in the next version.

>> +		} else {
>>  			ksm_pages_shared--;
>> +		}
>>
>>  		rmap_item->mm->ksm_merging_pages--;
>>
>> @@ -679,6 +684,7 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
>>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
>>
>> +	trace_ksm_remove_ksm_page(stable_node->kpfn);
>
> Here too?
>
> -- Steve
>

I'll make the change in the next version.

>>  	if (stable_node->head == &migrate_nodes)
>>  		list_del(&stable_node->list);
>>  	else
>> @@ -1367,6 +1373,8 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
>>  	get_anon_vma(vma->anon_vma);
>>  out:
>>  	mmap_read_unlock(mm);
>> +	trace_ksm_merge_with_ksm_page(kpage, page_to_pfn(kpage ? kpage : page),
>> +				rmap_item, mm, err);
>>  	return err;
>>  }
>>
>> @@ -2114,6 +2122,9 @@ static int try_to_merge_with_kernel_zero_page(struct ksm_rmap_item *rmap_item,
>>  		if (vma) {
>>  			err = try_to_merge_one_page(vma, page,
>>  						ZERO_PAGE(rmap_item->address));
>> +			trace_ksm_merge_one_page(
>> +				page_to_pfn(ZERO_PAGE(rmap_item->address)),
>> +				rmap_item, mm, err);
>>  			if (!err) {
>>  				rmap_item->address |= ZERO_PAGE_FLAG;
>>  				ksm_zero_pages_sharing++;
>> @@ -2344,6 +2355,8 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>>
>>  	mm_slot = ksm_scan.mm_slot;
>>  	if (mm_slot == &ksm_mm_head) {
>> +		trace_ksm_start_scan(ksm_scan.seqnr, ksm_rmap_items);
>> +
>>  		/*
>>  		 * A number of pages can hang around indefinitely on per-cpu
>>  		 * pagevecs, raised page count preventing write_protect_page
>> @@ -2510,6 +2523,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>>  	if (mm_slot != &ksm_mm_head)
>>  		goto next_mm;
>>
>> +	trace_ksm_stop_scan(ksm_scan.seqnr, ksm_rmap_items);
>>  	ksm_scan.seqnr++;
>>  	return NULL;
>>  }
>> @@ -2661,6 +2675,7 @@ int __ksm_enter(struct mm_struct *mm)
>>  	if (needs_wakeup)
>>  		wake_up_interruptible(&ksm_thread_wait);
>>
>> +	trace_ksm_enter(mm);
>>  	return 0;
>>  }
>>
>> @@ -2702,6 +2717,8 @@ void __ksm_exit(struct mm_struct *mm)
>>  		mmap_write_lock(mm);
>>  		mmap_write_unlock(mm);
>>  	}
>> +
>> +	trace_ksm_exit(mm);
>>  }
>>
>>  struct page *ksm_might_need_to_copy(struct page *page,
>>
>> base-commit: 234a68e24b120b98875a8b6e17a9dead277be16a
Stefan Roesch March 10, 2023, 7:22 p.m. UTC | #3
Steven Rostedt <rostedt@goodmis.org> writes:

> On Fri, 10 Feb 2023 13:46:45 -0800
> Stefan Roesch <shr@devkernel.io> wrote:
>
> Sorry for the late reply, I just noticed this (I had the flu when this was
> originally sent).
>
>> +/**
>> + * ksm_remove_ksm_page - called after a ksm page has been removed
>> + *
>> + * @pfn:		page frame number of ksm page
>> + *
>> + * Allows to trace the removing of stable ksm pages.
>> + */
>> +TRACE_EVENT(ksm_remove_ksm_page,
>> +
>> +	TP_PROTO(unsigned long pfn),
>> +
>> +	TP_ARGS(pfn),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(unsigned long, pfn)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->pfn = pfn;
>> +	),
>> +
>> +	TP_printk("pfn %lu", __entry->pfn)
>> +);
>> +
>> +/**
>> + * ksm_remove_rmap_item - called after a rmap_item has been removed from the
>> + *                        stable tree
>> + *
>> + * @pfn:		page frame number of ksm page
>> + * @rmap_item:		address of rmap_item  object
>> + * @mm:			address of the process mm struct
>> + *
>> + * Allows to trace the removal of pages from the stable tree list.
>> + */
>> +TRACE_EVENT(ksm_remove_rmap_item,
>> +
>> +	TP_PROTO(unsigned long pfn, void *rmap_item, void *mm),
>> +
>> +	TP_ARGS(pfn, rmap_item, mm),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(unsigned long,	pfn)
>> +		__field(void *,		rmap_item)
>> +		__field(void *,		mm)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->pfn		= pfn;
>> +		__entry->rmap_item	= rmap_item;
>> +		__entry->mm		= mm;
>> +	),
>> +
>> +	TP_printk("pfn %lu rmap_item %p mm %p",
>> +			__entry->pfn, __entry->rmap_item, __entry->mm)
>> +);
>> +
>> +#endif /* _TRACE_KSM_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>> diff --git a/mm/ksm.c b/mm/ksm.c
>> index 56808e3bfd19..4356af760735 100644
>> --- a/mm/ksm.c
>> +++ b/mm/ksm.c
>> @@ -45,6 +45,9 @@
>>  #include "internal.h"
>>  #include "mm_slot.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include <trace/events/ksm.h>
>> +
>>  #ifdef CONFIG_NUMA
>>  #define NUMA(x)		(x)
>>  #define DO_NUMA(x)	do { (x); } while (0)
>> @@ -655,10 +658,12 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>>  	BUG_ON(stable_node->rmap_hlist_len < 0);
>>
>>  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
>> -		if (rmap_item->hlist.next)
>> +		if (rmap_item->hlist.next) {
>>  			ksm_pages_sharing--;
>> -		else
>> +			trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);
>
> Instead of dereferencing the stable_node here, where the work could
> possibly happen outside the trace event and in the hot path, could you pass
> in the stable_node instead, and then in the TP_fast_assign() do:
>
> 		__entry->pfn = stable_node->kpfn;
>
>

To do this, the structure would need to be exposed. Currently the
structure is defined in ksm.c. This is an internal structure that we
most likely don't want to expose. We can get by not printing the pfn
and use the rmap_item to refer back to it, but exposing it directly
here is more convenient for debugging.

Any thoughts?

>> +		} else {
>>  			ksm_pages_shared--;
>> +		}
>>
>>  		rmap_item->mm->ksm_merging_pages--;
>>
>> @@ -679,6 +684,7 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
>>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
>>  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
>>
>> +	trace_ksm_remove_ksm_page(stable_node->kpfn);
>
> Here too?
>
> -- Steve
>
>>  	if (stable_node->head == &migrate_nodes)
>>  		list_del(&stable_node->list);
>>  	else
>> @@ -1367,6 +1373,8 @@ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
>>  	get_anon_vma(vma->anon_vma);
>>  out:
>>  	mmap_read_unlock(mm);
>> +	trace_ksm_merge_with_ksm_page(kpage, page_to_pfn(kpage ? kpage : page),
>> +				rmap_item, mm, err);
>>  	return err;
>>  }
>>
>> @@ -2114,6 +2122,9 @@ static int try_to_merge_with_kernel_zero_page(struct ksm_rmap_item *rmap_item,
>>  		if (vma) {
>>  			err = try_to_merge_one_page(vma, page,
>>  						ZERO_PAGE(rmap_item->address));
>> +			trace_ksm_merge_one_page(
>> +				page_to_pfn(ZERO_PAGE(rmap_item->address)),
>> +				rmap_item, mm, err);
>>  			if (!err) {
>>  				rmap_item->address |= ZERO_PAGE_FLAG;
>>  				ksm_zero_pages_sharing++;
>> @@ -2344,6 +2355,8 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>>
>>  	mm_slot = ksm_scan.mm_slot;
>>  	if (mm_slot == &ksm_mm_head) {
>> +		trace_ksm_start_scan(ksm_scan.seqnr, ksm_rmap_items);
>> +
>>  		/*
>>  		 * A number of pages can hang around indefinitely on per-cpu
>>  		 * pagevecs, raised page count preventing write_protect_page
>> @@ -2510,6 +2523,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
>>  	if (mm_slot != &ksm_mm_head)
>>  		goto next_mm;
>>
>> +	trace_ksm_stop_scan(ksm_scan.seqnr, ksm_rmap_items);
>>  	ksm_scan.seqnr++;
>>  	return NULL;
>>  }
>> @@ -2661,6 +2675,7 @@ int __ksm_enter(struct mm_struct *mm)
>>  	if (needs_wakeup)
>>  		wake_up_interruptible(&ksm_thread_wait);
>>
>> +	trace_ksm_enter(mm);
>>  	return 0;
>>  }
>>
>> @@ -2702,6 +2717,8 @@ void __ksm_exit(struct mm_struct *mm)
>>  		mmap_write_lock(mm);
>>  		mmap_write_unlock(mm);
>>  	}
>> +
>> +	trace_ksm_exit(mm);
>>  }
>>
>>  struct page *ksm_might_need_to_copy(struct page *page,
>>
>> base-commit: 234a68e24b120b98875a8b6e17a9dead277be16a
Steven Rostedt March 10, 2023, 8:02 p.m. UTC | #4
On Fri, 10 Mar 2023 11:22:54 -0800
Stefan Roesch <shr@devkernel.io> wrote:

> >> +			trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);  
> >
> > Instead of dereferencing the stable_node here, where the work could
> > possibly happen outside the trace event and in the hot path, could you pass
> > in the stable_node instead, and then in the TP_fast_assign() do:
> >
> > 		__entry->pfn = stable_node->kpfn;
> >
> >  
> 
> To do this, the structure would need to be exposed. Currently the
> structure is defined in ksm.c. This is an internal structure that we
> most likely don't want to expose. We can get by not printing the pfn
> and use the rmap_item to refer back to it, but exposing it directly
> here is more convenient for debugging.
> 
> Any thoughts?

Sounds like the include/trace/events/ksm.h should be local too.

See my reply about include/trace/events/thermal.h

  https://lore.kernel.org/all/20230227100715.7d896836@gandalf.local.home/

And their solution.

  https://lore.kernel.org/all/20230307133735.90772-2-daniel.lezcano@linaro.org/

I suggest you do the same.

-- Steve
Stefan Roesch March 10, 2023, 10:34 p.m. UTC | #5
I'll follow that approach.

On Fri, Mar 10, 2023, at 12:02 PM, Steven Rostedt wrote:
> On Fri, 10 Mar 2023 11:22:54 -0800
> Stefan Roesch <shr@devkernel.io> wrote:
> 
> > >> + trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);  
> > >
> > > Instead of dereferencing the stable_node here, where the work could
> > > possibly happen outside the trace event and in the hot path, could you pass
> > > in the stable_node instead, and then in the TP_fast_assign() do:
> > >
> > > __entry->pfn = stable_node->kpfn;
> > >
> > >  
> > 
> > To do this, the structure would need to be exposed. Currently the
> > structure is defined in ksm.c. This is an internal structure that we
> > most likely don't want to expose. We can get by not printing the pfn
> > and use the rmap_item to refer back to it, but exposing it directly
> > here is more convenient for debugging.
> > 
> > Any thoughts?
> 
> Sounds like the include/trace/events/ksm.h should be local too.
> 
> See my reply about include/trace/events/thermal.h
> 
>   https://lore.kernel.org/all/20230227100715.7d896836@gandalf.local.home/
> 
> And their solution.
> 
>   https://lore.kernel.org/all/20230307133735.90772-2-daniel.lezcano@linaro.org/
> 
> I suggest you do the same.
> 
> -- Steve
>
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index b92a2a0cb36b..827291f1ba97 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13487,6 +13487,7 @@  F:	include/linux/memory_hotplug.h
 F:	include/linux/mm.h
 F:	include/linux/mmzone.h
 F:	include/linux/pagewalk.h
+F:	include/trace/events/ksm.h
 F:	mm/
 F:	tools/mm/
 F:	tools/testing/selftests/mm/
diff --git a/include/trace/events/ksm.h b/include/trace/events/ksm.h
new file mode 100644
index 000000000000..b5ac35c1d0e8
--- /dev/null
+++ b/include/trace/events/ksm.h
@@ -0,0 +1,251 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ksm
+
+#if !defined(_TRACE_KSM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KSM_H
+
+#include <linux/tracepoint.h>
+
+/**
+ * ksm_scan_template - called for start / stop scan
+ *
+ * @seq:		sequence number of scan
+ * @rmap_entries:	actual number of rmap entries
+ *
+ * Allows to trace the start / stop of a ksm scan.
+ */
+DECLARE_EVENT_CLASS(ksm_scan_template,
+
+	TP_PROTO(int seq, u32 rmap_entries),
+
+	TP_ARGS(seq, rmap_entries),
+
+	TP_STRUCT__entry(
+		__field(int,	seq)
+		__field(u32,	rmap_entries)
+	),
+
+	TP_fast_assign(
+		__entry->seq		= seq;
+		__entry->rmap_entries	= rmap_entries;
+	),
+
+	TP_printk("seq %d rmap size %d",
+			__entry->seq, __entry->rmap_entries)
+);
+
+/**
+ * ksm_start_scan - called after a new ksm scan is started
+ *
+ * @seq:		sequence number of scan
+ * @rmap_entries:	actual number of rmap entries
+ *
+ * Allows to trace the start of a ksm scan.
+ */
+DEFINE_EVENT(ksm_scan_template, ksm_start_scan,
+
+	TP_PROTO(int seq, u32 rmap_entries),
+
+	TP_ARGS(seq, rmap_entries)
+);
+
+/**
+ * ksm_stop_scan - called after a new ksm scan has completed
+ *
+ * @seq:		sequence number of scan
+ * @rmap_entries:	actual number of rmap entries
+ *
+ * Allows to trace the completion of a ksm scan.
+ */
+DEFINE_EVENT(ksm_scan_template, ksm_stop_scan,
+
+	TP_PROTO(int seq, u32 rmap_entries),
+
+	TP_ARGS(seq, rmap_entries)
+);
+
+/**
+ * ksm_enter - called after a new process has been added / removed from ksm
+ *
+ * @mm:			address of the mm object of the process
+ *
+ * Allows to trace the when a process has been added or removed from ksm.
+ */
+DECLARE_EVENT_CLASS(ksm_enter_exit_template,
+
+	TP_PROTO(void *mm),
+
+	TP_ARGS(mm),
+
+	TP_STRUCT__entry(
+		__field(void *,		mm)
+	),
+
+	TP_fast_assign(
+		__entry->mm	= mm;
+	),
+
+	TP_printk("mm %p", __entry->mm)
+);
+
+/**
+ * ksm_enter - called after a new process has been added to ksm
+ *
+ * @mm:			address of the mm object of the process
+ *
+ * Allows to trace the when a process has been added to ksm.
+ */
+DEFINE_EVENT(ksm_enter_exit_template, ksm_enter,
+
+	TP_PROTO(void *mm),
+
+	TP_ARGS(mm)
+);
+
+/**
+ * ksm_exit - called after a new process has been removed from ksm
+ *
+ * @mm:			address of the mm object of the process
+ *
+ * Allows to trace the when a process has been removed from ksm.
+ */
+DEFINE_EVENT(ksm_enter_exit_template, ksm_exit,
+
+	TP_PROTO(void *mm),
+
+	TP_ARGS(mm)
+);
+
+/**
+ * ksm_merge_one_page - called after a page has been merged
+ *
+ * @pfn:		page frame number of ksm page
+ * @rmap_item:		address of rmap_item  object
+ * @mm:			address of the process mm struct
+ * @err:		success
+ *
+ * Allows to trace the ksm merging of individual pages.
+ */
+TRACE_EVENT(ksm_merge_one_page,
+
+	TP_PROTO(unsigned long pfn, void *rmap_item, void *mm, int err),
+
+	TP_ARGS(pfn, rmap_item, mm, err),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,	pfn)
+		__field(void *,		rmap_item)
+		__field(void *,		mm)
+		__field(int,		err)
+	),
+
+	TP_fast_assign(
+		__entry->pfn		= pfn;
+		__entry->rmap_item	= rmap_item;
+		__entry->mm		= mm;
+		__entry->err		= err;
+	),
+
+	TP_printk("ksm pfn %lu rmap_item %p mm %p error %d",
+			__entry->pfn, __entry->rmap_item, __entry->mm, __entry->err)
+);
+
+/**
+ * ksm_merge_with_ksm_page - called after a page has been merged with a ksm page
+ *
+ * @ksm_page:		address ksm page
+ * @pfn:		page frame number of ksm page
+ * @rmap_item:		address of rmap_item  object
+ * @mm:			address of the mm object of the process
+ * @err:		success
+ *
+ * Allows to trace the merging of a page with a ksm page.
+ */
+TRACE_EVENT(ksm_merge_with_ksm_page,
+
+	TP_PROTO(void *ksm_page, unsigned long pfn, void *rmap_item, void *mm, int err),
+
+	TP_ARGS(ksm_page, pfn, rmap_item, mm, err),
+
+	TP_STRUCT__entry(
+		__field(void *,		ksm_page)
+		__field(unsigned long,	pfn)
+		__field(void *,		rmap_item)
+		__field(void *,		mm)
+		__field(int,		err)
+	),
+
+	TP_fast_assign(
+		__entry->ksm_page	= ksm_page;
+		__entry->pfn		= pfn;
+		__entry->rmap_item	= rmap_item;
+		__entry->mm		= mm;
+		__entry->err		= err;
+	),
+
+	TP_printk("%spfn %lu rmap_item %p mm %p error %d",
+		  (__entry->ksm_page ? "ksm " : ""),
+		  __entry->pfn, __entry->rmap_item, __entry->mm, __entry->err)
+);
+
+/**
+ * ksm_remove_ksm_page - called after a ksm page has been removed
+ *
+ * @pfn:		page frame number of ksm page
+ *
+ * Allows to trace the removing of stable ksm pages.
+ */
+TRACE_EVENT(ksm_remove_ksm_page,
+
+	TP_PROTO(unsigned long pfn),
+
+	TP_ARGS(pfn),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, pfn)
+	),
+
+	TP_fast_assign(
+		__entry->pfn = pfn;
+	),
+
+	TP_printk("pfn %lu", __entry->pfn)
+);
+
+/**
+ * ksm_remove_rmap_item - called after a rmap_item has been removed from the
+ *                        stable tree
+ *
+ * @pfn:		page frame number of ksm page
+ * @rmap_item:		address of rmap_item  object
+ * @mm:			address of the process mm struct
+ *
+ * Allows to trace the removal of pages from the stable tree list.
+ */
+TRACE_EVENT(ksm_remove_rmap_item,
+
+	TP_PROTO(unsigned long pfn, void *rmap_item, void *mm),
+
+	TP_ARGS(pfn, rmap_item, mm),
+
+	TP_STRUCT__entry(
+		__field(unsigned long,	pfn)
+		__field(void *,		rmap_item)
+		__field(void *,		mm)
+	),
+
+	TP_fast_assign(
+		__entry->pfn		= pfn;
+		__entry->rmap_item	= rmap_item;
+		__entry->mm		= mm;
+	),
+
+	TP_printk("pfn %lu rmap_item %p mm %p",
+			__entry->pfn, __entry->rmap_item, __entry->mm)
+);
+
+#endif /* _TRACE_KSM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 56808e3bfd19..4356af760735 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -45,6 +45,9 @@ 
 #include "internal.h"
 #include "mm_slot.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/ksm.h>
+
 #ifdef CONFIG_NUMA
 #define NUMA(x)		(x)
 #define DO_NUMA(x)	do { (x); } while (0)
@@ -655,10 +658,12 @@  static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
 	BUG_ON(stable_node->rmap_hlist_len < 0);
 
 	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
-		if (rmap_item->hlist.next)
+		if (rmap_item->hlist.next) {
 			ksm_pages_sharing--;
-		else
+			trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm);
+		} else {
 			ksm_pages_shared--;
+		}
 
 		rmap_item->mm->ksm_merging_pages--;
 
@@ -679,6 +684,7 @@  static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
 
+	trace_ksm_remove_ksm_page(stable_node->kpfn);
 	if (stable_node->head == &migrate_nodes)
 		list_del(&stable_node->list);
 	else
@@ -1367,6 +1373,8 @@  static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item,
 	get_anon_vma(vma->anon_vma);
 out:
 	mmap_read_unlock(mm);
+	trace_ksm_merge_with_ksm_page(kpage, page_to_pfn(kpage ? kpage : page),
+				rmap_item, mm, err);
 	return err;
 }
 
@@ -2114,6 +2122,9 @@  static int try_to_merge_with_kernel_zero_page(struct ksm_rmap_item *rmap_item,
 		if (vma) {
 			err = try_to_merge_one_page(vma, page,
 						ZERO_PAGE(rmap_item->address));
+			trace_ksm_merge_one_page(
+				page_to_pfn(ZERO_PAGE(rmap_item->address)),
+				rmap_item, mm, err);
 			if (!err) {
 				rmap_item->address |= ZERO_PAGE_FLAG;
 				ksm_zero_pages_sharing++;
@@ -2344,6 +2355,8 @@  static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
 
 	mm_slot = ksm_scan.mm_slot;
 	if (mm_slot == &ksm_mm_head) {
+		trace_ksm_start_scan(ksm_scan.seqnr, ksm_rmap_items);
+
 		/*
 		 * A number of pages can hang around indefinitely on per-cpu
 		 * pagevecs, raised page count preventing write_protect_page
@@ -2510,6 +2523,7 @@  static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
 	if (mm_slot != &ksm_mm_head)
 		goto next_mm;
 
+	trace_ksm_stop_scan(ksm_scan.seqnr, ksm_rmap_items);
 	ksm_scan.seqnr++;
 	return NULL;
 }
@@ -2661,6 +2675,7 @@  int __ksm_enter(struct mm_struct *mm)
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
 
+	trace_ksm_enter(mm);
 	return 0;
 }
 
@@ -2702,6 +2717,8 @@  void __ksm_exit(struct mm_struct *mm)
 		mmap_write_lock(mm);
 		mmap_write_unlock(mm);
 	}
+
+	trace_ksm_exit(mm);
 }
 
 struct page *ksm_might_need_to_copy(struct page *page,