diff mbox series

[mm-unstable,v7,11/18] mm/madvise: add huge_memory:mm_madvise_collapse tracepoint

Message ID 20220706235936.2197195-12-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe July 6, 2022, 11:59 p.m. UTC
Add a tracepoint to expose mm, address, and enum scan_result of each
hugepage attempted to be collapsed by call to madvise(MADV_COLLAPSE).

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 include/trace/events/huge_memory.h | 22 ++++++++++++++++++++++
 mm/khugepaged.c                    |  2 ++
 2 files changed, 24 insertions(+)

Comments

Yang Shi July 11, 2022, 9:32 p.m. UTC | #1
On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> Add a tracepoint to expose mm, address, and enum scan_result of each
> hugepage attempted to be collapsed by call to madvise(MADV_COLLAPSE).

Is this necessary? Isn't mm_khugepaged_scan_pmd tracepoint good
enough? It doesn't have "address", but you should be able to calculate
address from it with syscall trace together.


>
> Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> ---
>  include/trace/events/huge_memory.h | 22 ++++++++++++++++++++++
>  mm/khugepaged.c                    |  2 ++
>  2 files changed, 24 insertions(+)
>
> diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> index 55392bf30a03..38d339ffdb16 100644
> --- a/include/trace/events/huge_memory.h
> +++ b/include/trace/events/huge_memory.h
> @@ -167,5 +167,27 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
>                 __entry->ret)
>  );
>
> +TRACE_EVENT(mm_madvise_collapse,
> +
> +       TP_PROTO(struct mm_struct *mm, unsigned long addr, int result),
> +
> +       TP_ARGS(mm, addr, result),
> +
> +       TP_STRUCT__entry(__field(struct mm_struct *, mm)
> +                        __field(unsigned long, addr)
> +                        __field(int, result)
> +       ),
> +
> +       TP_fast_assign(__entry->mm = mm;
> +                      __entry->addr = addr;
> +                      __entry->result = result;
> +       ),
> +
> +       TP_printk("mm=%p addr=%#lx result=%s",
> +                 __entry->mm,
> +                 __entry->addr,
> +                 __print_symbolic(__entry->result, SCAN_STATUS))
> +);
> +
>  #endif /* __HUGE_MEMORY_H */
>  #include <trace/define_trace.h>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index e0d00180512c..0207fc0a5b2a 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -2438,6 +2438,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
>                 if (!mmap_locked)
>                         *prev = NULL;  /* Tell caller we dropped mmap_lock */
>
> +               trace_mm_madvise_collapse(mm, addr, result);
> +
>                 switch (result) {
>                 case SCAN_SUCCEED:
>                 case SCAN_PMD_MAPPED:
> --
> 2.37.0.rc0.161.g10f37bed90-goog
>
Zach O'Keefe July 12, 2022, 4:21 p.m. UTC | #2
Hey Yang,

Thanks for taking the time to review this series again.

On Jul 11 14:32, Yang Shi wrote:
> On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > Add a tracepoint to expose mm, address, and enum scan_result of each
> > hugepage attempted to be collapsed by call to madvise(MADV_COLLAPSE).
> 
> Is this necessary? Isn't mm_khugepaged_scan_pmd tracepoint good
> enough? It doesn't have "address", but you should be able to calculate
> address from it with syscall trace together.
> 

I've also found this useful debugging along the file path. Perhaps the answer to
that is: add tracepoints to the file path - and we should probably do that - but
the other issue is that turning on these tracepoints (for the purposes of
debugging MADV_COLLAPSE) generates a lot of noise from khugepaged that is hard
to separate out. Augmenting existing tracepoints with .is_khugepaged data
incurred the risks associated with altering an existing kernel API. WDYT?

Thanks again,
Zach


> 
> >
> > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > ---
> >  include/trace/events/huge_memory.h | 22 ++++++++++++++++++++++
> >  mm/khugepaged.c                    |  2 ++
> >  2 files changed, 24 insertions(+)
> >
> > diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> > index 55392bf30a03..38d339ffdb16 100644
> > --- a/include/trace/events/huge_memory.h
> > +++ b/include/trace/events/huge_memory.h
> > @@ -167,5 +167,27 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
> >                 __entry->ret)
> >  );
> >
> > +TRACE_EVENT(mm_madvise_collapse,
> > +
> > +       TP_PROTO(struct mm_struct *mm, unsigned long addr, int result),
> > +
> > +       TP_ARGS(mm, addr, result),
> > +
> > +       TP_STRUCT__entry(__field(struct mm_struct *, mm)
> > +                        __field(unsigned long, addr)
> > +                        __field(int, result)
> > +       ),
> > +
> > +       TP_fast_assign(__entry->mm = mm;
> > +                      __entry->addr = addr;
> > +                      __entry->result = result;
> > +       ),
> > +
> > +       TP_printk("mm=%p addr=%#lx result=%s",
> > +                 __entry->mm,
> > +                 __entry->addr,
> > +                 __print_symbolic(__entry->result, SCAN_STATUS))
> > +);
> > +
> >  #endif /* __HUGE_MEMORY_H */
> >  #include <trace/define_trace.h>
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index e0d00180512c..0207fc0a5b2a 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -2438,6 +2438,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
> >                 if (!mmap_locked)
> >                         *prev = NULL;  /* Tell caller we dropped mmap_lock */
> >
> > +               trace_mm_madvise_collapse(mm, addr, result);
> > +
> >                 switch (result) {
> >                 case SCAN_SUCCEED:
> >                 case SCAN_PMD_MAPPED:
> > --
> > 2.37.0.rc0.161.g10f37bed90-goog
> >
Yang Shi July 12, 2022, 5:05 p.m. UTC | #3
On Tue, Jul 12, 2022 at 9:21 AM Zach O'Keefe <zokeefe@google.com> wrote:
>
> Hey Yang,
>
> Thanks for taking the time to review this series again.
>
> On Jul 11 14:32, Yang Shi wrote:
> > On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > >
> > > Add a tracepoint to expose mm, address, and enum scan_result of each
> > > hugepage attempted to be collapsed by call to madvise(MADV_COLLAPSE).
> >
> > Is this necessary? Isn't mm_khugepaged_scan_pmd tracepoint good
> > enough? It doesn't have "address", but you should be able to calculate
> > address from it with syscall trace together.
> >
>
> I've also found this useful debugging along the file path. Perhaps the answer to
> that is: add tracepoints to the file path - and we should probably do that - but
> the other issue is that turning on these tracepoints (for the purposes of
> debugging MADV_COLLAPSE) generates a lot of noise from khugepaged that is hard
> to separate out. Augmenting existing tracepoints with .is_khugepaged data
> incurred the risks associated with altering an existing kernel API. WDYT?

Doesn't ftrace show process comm and ID? And I think you also could
trace the specific processes, right?

>
> Thanks again,
> Zach
>
>
> >
> > >
> > > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > > ---
> > >  include/trace/events/huge_memory.h | 22 ++++++++++++++++++++++
> > >  mm/khugepaged.c                    |  2 ++
> > >  2 files changed, 24 insertions(+)
> > >
> > > diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> > > index 55392bf30a03..38d339ffdb16 100644
> > > --- a/include/trace/events/huge_memory.h
> > > +++ b/include/trace/events/huge_memory.h
> > > @@ -167,5 +167,27 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
> > >                 __entry->ret)
> > >  );
> > >
> > > +TRACE_EVENT(mm_madvise_collapse,
> > > +
> > > +       TP_PROTO(struct mm_struct *mm, unsigned long addr, int result),
> > > +
> > > +       TP_ARGS(mm, addr, result),
> > > +
> > > +       TP_STRUCT__entry(__field(struct mm_struct *, mm)
> > > +                        __field(unsigned long, addr)
> > > +                        __field(int, result)
> > > +       ),
> > > +
> > > +       TP_fast_assign(__entry->mm = mm;
> > > +                      __entry->addr = addr;
> > > +                      __entry->result = result;
> > > +       ),
> > > +
> > > +       TP_printk("mm=%p addr=%#lx result=%s",
> > > +                 __entry->mm,
> > > +                 __entry->addr,
> > > +                 __print_symbolic(__entry->result, SCAN_STATUS))
> > > +);
> > > +
> > >  #endif /* __HUGE_MEMORY_H */
> > >  #include <trace/define_trace.h>
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index e0d00180512c..0207fc0a5b2a 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -2438,6 +2438,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
> > >                 if (!mmap_locked)
> > >                         *prev = NULL;  /* Tell caller we dropped mmap_lock */
> > >
> > > +               trace_mm_madvise_collapse(mm, addr, result);
> > > +
> > >                 switch (result) {
> > >                 case SCAN_SUCCEED:
> > >                 case SCAN_PMD_MAPPED:
> > > --
> > > 2.37.0.rc0.161.g10f37bed90-goog
> > >
Zach O'Keefe July 12, 2022, 5:30 p.m. UTC | #4
On Jul 12 10:05, Yang Shi wrote:
> On Tue, Jul 12, 2022 at 9:21 AM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > Hey Yang,
> >
> > Thanks for taking the time to review this series again.
> >
> > On Jul 11 14:32, Yang Shi wrote:
> > > On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > > >
> > > > Add a tracepoint to expose mm, address, and enum scan_result of each
> > > > hugepage attempted to be collapsed by call to madvise(MADV_COLLAPSE).
> > >
> > > Is this necessary? Isn't mm_khugepaged_scan_pmd tracepoint good
> > > enough? It doesn't have "address", but you should be able to calculate
> > > address from it with syscall trace together.
> > >
> >
> > I've also found this useful debugging along the file path. Perhaps the answer to
> > that is: add tracepoints to the file path - and we should probably do that - but
> > the other issue is that turning on these tracepoints (for the purposes of
> > debugging MADV_COLLAPSE) generates a lot of noise from khugepaged that is hard
> > to separate out. Augmenting existing tracepoints with .is_khugepaged data
> > incurred the risks associated with altering an existing kernel API. WDYT?
> 
> Doesn't ftrace show process comm and ID? And I think you also could
> trace the specific processes, right?
>

That's true enough - it had been awhile since I actually did that ;  I've been
carrying some printk's for debugging that eventually I converted into a
tracepoint here. Sorry about that.

I'll drop this and add a relevant tracepoint to file collapse path that will
benefit khugepaged too.

Thanks again,
Zach

> >
> > Thanks again,
> > Zach
> >
> >
> > >
> > > >
> > > > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > > > ---
> > > >  include/trace/events/huge_memory.h | 22 ++++++++++++++++++++++
> > > >  mm/khugepaged.c                    |  2 ++
> > > >  2 files changed, 24 insertions(+)
> > > >
> > > > diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> > > > index 55392bf30a03..38d339ffdb16 100644
> > > > --- a/include/trace/events/huge_memory.h
> > > > +++ b/include/trace/events/huge_memory.h
> > > > @@ -167,5 +167,27 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
> > > >                 __entry->ret)
> > > >  );
> > > >
> > > > +TRACE_EVENT(mm_madvise_collapse,
> > > > +
> > > > +       TP_PROTO(struct mm_struct *mm, unsigned long addr, int result),
> > > > +
> > > > +       TP_ARGS(mm, addr, result),
> > > > +
> > > > +       TP_STRUCT__entry(__field(struct mm_struct *, mm)
> > > > +                        __field(unsigned long, addr)
> > > > +                        __field(int, result)
> > > > +       ),
> > > > +
> > > > +       TP_fast_assign(__entry->mm = mm;
> > > > +                      __entry->addr = addr;
> > > > +                      __entry->result = result;
> > > > +       ),
> > > > +
> > > > +       TP_printk("mm=%p addr=%#lx result=%s",
> > > > +                 __entry->mm,
> > > > +                 __entry->addr,
> > > > +                 __print_symbolic(__entry->result, SCAN_STATUS))
> > > > +);
> > > > +
> > > >  #endif /* __HUGE_MEMORY_H */
> > > >  #include <trace/define_trace.h>
> > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > > index e0d00180512c..0207fc0a5b2a 100644
> > > > --- a/mm/khugepaged.c
> > > > +++ b/mm/khugepaged.c
> > > > @@ -2438,6 +2438,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
> > > >                 if (!mmap_locked)
> > > >                         *prev = NULL;  /* Tell caller we dropped mmap_lock */
> > > >
> > > > +               trace_mm_madvise_collapse(mm, addr, result);
> > > > +
> > > >                 switch (result) {
> > > >                 case SCAN_SUCCEED:
> > > >                 case SCAN_PMD_MAPPED:
> > > > --
> > > > 2.37.0.rc0.161.g10f37bed90-goog
> > > >
diff mbox series

Patch

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 55392bf30a03..38d339ffdb16 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -167,5 +167,27 @@  TRACE_EVENT(mm_collapse_huge_page_swapin,
 		__entry->ret)
 );
 
+TRACE_EVENT(mm_madvise_collapse,
+
+	TP_PROTO(struct mm_struct *mm, unsigned long addr, int result),
+
+	TP_ARGS(mm, addr, result),
+
+	TP_STRUCT__entry(__field(struct mm_struct *, mm)
+			 __field(unsigned long, addr)
+			 __field(int, result)
+	),
+
+	TP_fast_assign(__entry->mm = mm;
+		       __entry->addr = addr;
+		       __entry->result = result;
+	),
+
+	TP_printk("mm=%p addr=%#lx result=%s",
+		  __entry->mm,
+		  __entry->addr,
+		  __print_symbolic(__entry->result, SCAN_STATUS))
+);
+
 #endif /* __HUGE_MEMORY_H */
 #include <trace/define_trace.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e0d00180512c..0207fc0a5b2a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2438,6 +2438,8 @@  int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		if (!mmap_locked)
 			*prev = NULL;  /* Tell caller we dropped mmap_lock */
 
+		trace_mm_madvise_collapse(mm, addr, result);
+
 		switch (result) {
 		case SCAN_SUCCEED:
 		case SCAN_PMD_MAPPED: