Message ID | 1551341664-13912-1-git-send-email-laoar.shao@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: vmscan: add tracepoints for node reclaim | expand |
On Thu, Feb 28, 2019 at 1:44 PM Yafang Shao <laoar.shao@gmail.com> wrote: > > In the page alloc fast path, it may do node reclaim, which may cause > latency spike. > We should add tracepoint for this event, and also mesure the latency > it causes. Minor typo : mesure ->measure. > > So bellow two tracepoints are introduced, > mm_vmscan_node_reclaim_begin > mm_vmscan_node_reclaim_end > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > --- > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > mm/vmscan.c | 13 +++++++++++- > 2 files changed, 60 insertions(+), 1 deletion(-) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index a1cb913..9310d5b 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -465,6 +465,54 @@ > __entry->ratio, > show_reclaim_flags(__entry->reclaim_flags)) > ); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > + > + TP_PROTO(int nid, int order, int may_writepage, > + gfp_t gfp_flags, int zid), > + > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > + > + TP_STRUCT__entry( > + __field(int, nid) > + __field(int, order) > + __field(int, may_writepage) > + __field(gfp_t, gfp_flags) > + __field(int, zid) > + ), > + > + TP_fast_assign( > + __entry->nid = nid; > + __entry->order = order; > + __entry->may_writepage = may_writepage; > + __entry->gfp_flags = gfp_flags; > + __entry->zid = zid; > + ), > + > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > + __entry->nid, > + __entry->zid, > + __entry->order, > + __entry->may_writepage, > + show_gfp_flags(__entry->gfp_flags)) > +); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > + > + TP_PROTO(int result), > + > + TP_ARGS(result), > + > + TP_STRUCT__entry( > + __field(int, result) > + ), > + > + TP_fast_assign( > + __entry->result = result; > + ), > + > + TP_printk("result=%d", __entry->result) > +); > #endif /* _TRACE_VMSCAN_H */ > > /* This part must be outside protection */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index ac4806f..01a0401 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > .may_swap = 1, > .reclaim_idx = gfp_zone(gfp_mask), > }; > + int result; If it goes to v2, then s/result/ret ? > + > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > + sc.may_writepage, > + sc.gfp_mask, > + sc.reclaim_idx); > > cond_resched(); > fs_reclaim_acquire(sc.gfp_mask); > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > current->flags &= ~PF_SWAPWRITE; > memalloc_noreclaim_restore(noreclaim_flag); > fs_reclaim_release(sc.gfp_mask); > - return sc.nr_reclaimed >= nr_pages; > + > + result = sc.nr_reclaimed >= nr_pages; > + > + trace_mm_vmscan_node_reclaim_end(result); > + > + return result; > } > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) > -- > 1.8.3.1 >
On Thu, Feb 28, 2019 at 4:59 PM Souptick Joarder <jrdr.linux@gmail.com> wrote: > > On Thu, Feb 28, 2019 at 1:44 PM Yafang Shao <laoar.shao@gmail.com> wrote: > > > > In the page alloc fast path, it may do node reclaim, which may cause > > latency spike. > > We should add tracepoint for this event, and also mesure the latency > > it causes. > > Minor typo : mesure ->measure. > Thanks for your correction. > > > > So bellow two tracepoints are introduced, > > mm_vmscan_node_reclaim_begin > > mm_vmscan_node_reclaim_end > > > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > > --- > > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > > mm/vmscan.c | 13 +++++++++++- > > 2 files changed, 60 insertions(+), 1 deletion(-) > > > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > > index a1cb913..9310d5b 100644 > > --- a/include/trace/events/vmscan.h > > +++ b/include/trace/events/vmscan.h > > @@ -465,6 +465,54 @@ > > __entry->ratio, > > show_reclaim_flags(__entry->reclaim_flags)) > > ); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > > + > > + TP_PROTO(int nid, int order, int may_writepage, > > + gfp_t gfp_flags, int zid), > > + > > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > > + > > + TP_STRUCT__entry( > > + __field(int, nid) > > + __field(int, order) > > + __field(int, may_writepage) > > + __field(gfp_t, gfp_flags) > > + __field(int, zid) > > + ), > > + > > + TP_fast_assign( > > + __entry->nid = nid; > > + __entry->order = order; > > + __entry->may_writepage = may_writepage; > > + __entry->gfp_flags = gfp_flags; > > + __entry->zid = zid; > > + ), > > + > > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > > + __entry->nid, > > + __entry->zid, > > + __entry->order, > > + __entry->may_writepage, > > + show_gfp_flags(__entry->gfp_flags)) > > +); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > > + > > + TP_PROTO(int result), > > + > > + TP_ARGS(result), > > + > > + TP_STRUCT__entry( > > + __field(int, result) > > + ), > > + > > + TP_fast_assign( > > + __entry->result = result; > > + ), > > + > > + TP_printk("result=%d", __entry->result) > > +); > > #endif /* _TRACE_VMSCAN_H */ > > > > /* This part must be outside protection */ > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index ac4806f..01a0401 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > .may_swap = 1, > > .reclaim_idx = gfp_zone(gfp_mask), > > }; > > + int result; > > If it goes to v2, then > s/result/ret ? > Sure. Will change it. > > + > > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > > + sc.may_writepage, > > + sc.gfp_mask, > > + sc.reclaim_idx); > > > > cond_resched(); > > fs_reclaim_acquire(sc.gfp_mask); > > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > current->flags &= ~PF_SWAPWRITE; > > memalloc_noreclaim_restore(noreclaim_flag); > > fs_reclaim_release(sc.gfp_mask); > > - return sc.nr_reclaimed >= nr_pages; > > + > > + result = sc.nr_reclaimed >= nr_pages; > > + > > + trace_mm_vmscan_node_reclaim_end(result); > > + > > + return result; > > } > > > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) > > -- > > 1.8.3.1 > > Thanks Yafang
On Thu 28-02-19 16:14:24, Yafang Shao wrote: > In the page alloc fast path, it may do node reclaim, which may cause > latency spike. > We should add tracepoint for this event, and also mesure the latency > it causes. > > So bellow two tracepoints are introduced, > mm_vmscan_node_reclaim_begin > mm_vmscan_node_reclaim_end This makes some sense to me. Regular direct reclaim already does have similar tracepoints. Is there any reason you haven't used mm_vmscan_direct_reclaim_{begin,end}_template as all other direct reclaim paths? > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > --- > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > mm/vmscan.c | 13 +++++++++++- > 2 files changed, 60 insertions(+), 1 deletion(-) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index a1cb913..9310d5b 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -465,6 +465,54 @@ > __entry->ratio, > show_reclaim_flags(__entry->reclaim_flags)) > ); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > + > + TP_PROTO(int nid, int order, int may_writepage, > + gfp_t gfp_flags, int zid), > + > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > + > + TP_STRUCT__entry( > + __field(int, nid) > + __field(int, order) > + __field(int, may_writepage) > + __field(gfp_t, gfp_flags) > + __field(int, zid) > + ), > + > + TP_fast_assign( > + __entry->nid = nid; > + __entry->order = order; > + __entry->may_writepage = may_writepage; > + __entry->gfp_flags = gfp_flags; > + __entry->zid = zid; > + ), > + > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > + __entry->nid, > + __entry->zid, > + __entry->order, > + __entry->may_writepage, > + show_gfp_flags(__entry->gfp_flags)) > +); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > + > + TP_PROTO(int result), > + > + TP_ARGS(result), > + > + TP_STRUCT__entry( > + __field(int, result) > + ), > + > + TP_fast_assign( > + __entry->result = result; > + ), > + > + TP_printk("result=%d", __entry->result) > +); > #endif /* _TRACE_VMSCAN_H */ > > /* This part must be outside protection */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index ac4806f..01a0401 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > .may_swap = 1, > .reclaim_idx = gfp_zone(gfp_mask), > }; > + int result; > + > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > + sc.may_writepage, > + sc.gfp_mask, > + sc.reclaim_idx); > > cond_resched(); > fs_reclaim_acquire(sc.gfp_mask); > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > current->flags &= ~PF_SWAPWRITE; > memalloc_noreclaim_restore(noreclaim_flag); > fs_reclaim_release(sc.gfp_mask); > - return sc.nr_reclaimed >= nr_pages; > + > + result = sc.nr_reclaimed >= nr_pages; > + > + trace_mm_vmscan_node_reclaim_end(result); > + > + return result; > } > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) > -- > 1.8.3.1
On Thu, Feb 28, 2019 at 6:17 PM Michal Hocko <mhocko@kernel.org> wrote: > > On Thu 28-02-19 16:14:24, Yafang Shao wrote: > > In the page alloc fast path, it may do node reclaim, which may cause > > latency spike. > > We should add tracepoint for this event, and also mesure the latency > > it causes. > > > > So bellow two tracepoints are introduced, > > mm_vmscan_node_reclaim_begin > > mm_vmscan_node_reclaim_end > > This makes some sense to me. Regular direct reclaim already does have > similar tracepoints. Is there any reason you haven't used > mm_vmscan_direct_reclaim_{begin,end}_template as all other direct reclaim > paths? > Because I also want to know the node id, which is not show in mm_vmscan_direct_reclaim_{begin,end}_template. Or should we modify mm_vmscan_direct_reclaim_{begin,end}_template to show the node id as well ? Thanks Yafang > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > > --- > > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > > mm/vmscan.c | 13 +++++++++++- > > 2 files changed, 60 insertions(+), 1 deletion(-) > > > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > > index a1cb913..9310d5b 100644 > > --- a/include/trace/events/vmscan.h > > +++ b/include/trace/events/vmscan.h > > @@ -465,6 +465,54 @@ > > __entry->ratio, > > show_reclaim_flags(__entry->reclaim_flags)) > > ); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > > + > > + TP_PROTO(int nid, int order, int may_writepage, > > + gfp_t gfp_flags, int zid), > > + > > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > > + > > + TP_STRUCT__entry( > > + __field(int, nid) > > + __field(int, order) > > + __field(int, may_writepage) > > + __field(gfp_t, gfp_flags) > > + __field(int, zid) > > + ), > > + > > + TP_fast_assign( > > + __entry->nid = nid; > > + __entry->order = order; > > + __entry->may_writepage = may_writepage; > > + __entry->gfp_flags = gfp_flags; > > + __entry->zid = zid; > > + ), > > + > > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > > + __entry->nid, > > + __entry->zid, > > + __entry->order, > > + __entry->may_writepage, > > + show_gfp_flags(__entry->gfp_flags)) > > +); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > > + > > + TP_PROTO(int result), > > + > > + TP_ARGS(result), > > + > > + TP_STRUCT__entry( > > + __field(int, result) > > + ), > > + > > + TP_fast_assign( > > + __entry->result = result; > > + ), > > + > > + TP_printk("result=%d", __entry->result) > > +); > > #endif /* _TRACE_VMSCAN_H */ > > > > /* This part must be outside protection */ > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index ac4806f..01a0401 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > .may_swap = 1, > > .reclaim_idx = gfp_zone(gfp_mask), > > }; > > + int result; > > + > > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > > + sc.may_writepage, > > + sc.gfp_mask, > > + sc.reclaim_idx); > > > > cond_resched(); > > fs_reclaim_acquire(sc.gfp_mask); > > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > current->flags &= ~PF_SWAPWRITE; > > memalloc_noreclaim_restore(noreclaim_flag); > > fs_reclaim_release(sc.gfp_mask); > > - return sc.nr_reclaimed >= nr_pages; > > + > > + result = sc.nr_reclaimed >= nr_pages; > > + > > + trace_mm_vmscan_node_reclaim_end(result); > > + > > + return result; > > } > > > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) > > -- > > 1.8.3.1 > > -- > Michal Hocko > SUSE Labs
On 2/28/19 9:14 AM, Yafang Shao wrote: > In the page alloc fast path, it may do node reclaim, which may cause > latency spike. > We should add tracepoint for this event, and also mesure the latency > it causes. > > So bellow two tracepoints are introduced, > mm_vmscan_node_reclaim_begin > mm_vmscan_node_reclaim_end > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > --- > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > mm/vmscan.c | 13 +++++++++++- > 2 files changed, 60 insertions(+), 1 deletion(-) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index a1cb913..9310d5b 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -465,6 +465,54 @@ > __entry->ratio, > show_reclaim_flags(__entry->reclaim_flags)) > ); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > + > + TP_PROTO(int nid, int order, int may_writepage, > + gfp_t gfp_flags, int zid), > + > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > + > + TP_STRUCT__entry( > + __field(int, nid) > + __field(int, order) > + __field(int, may_writepage) For node reclaim may_writepage is statically set in node_reclaim_mode, so I'm not sure it's worth including it. > + __field(gfp_t, gfp_flags) > + __field(int, zid) zid seems wasteful and misleading as it's simply derived by gfp_zone(gfp_mask), so I would drop it. > + ), > + > + TP_fast_assign( > + __entry->nid = nid; > + __entry->order = order; > + __entry->may_writepage = may_writepage; > + __entry->gfp_flags = gfp_flags; > + __entry->zid = zid; > + ), > + > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > + __entry->nid, > + __entry->zid, > + __entry->order, > + __entry->may_writepage, > + show_gfp_flags(__entry->gfp_flags)) > +); > + > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > + > + TP_PROTO(int result), > + > + TP_ARGS(result), > + > + TP_STRUCT__entry( > + __field(int, result) Reporting sc.nr_reclaimed sounds more useful and in line with other reclaim tracepoints. Result (sc.nr_reclaimed >= nr_pages) can then be derived by postprocessing as the beginning tracepoint contains 'order' thus we know nr_pages? > + ), > + > + TP_fast_assign( > + __entry->result = result; > + ), > + > + TP_printk("result=%d", __entry->result) > +); > #endif /* _TRACE_VMSCAN_H */ > > /* This part must be outside protection */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index ac4806f..01a0401 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > .may_swap = 1, > .reclaim_idx = gfp_zone(gfp_mask), > }; > + int result; > + > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > + sc.may_writepage, > + sc.gfp_mask, > + sc.reclaim_idx); > > cond_resched(); > fs_reclaim_acquire(sc.gfp_mask); > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > current->flags &= ~PF_SWAPWRITE; > memalloc_noreclaim_restore(noreclaim_flag); > fs_reclaim_release(sc.gfp_mask); > - return sc.nr_reclaimed >= nr_pages; > + > + result = sc.nr_reclaimed >= nr_pages; > + > + trace_mm_vmscan_node_reclaim_end(result); > + > + return result; > } > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) >
On Thu 28-02-19 18:20:16, Yafang Shao wrote: > On Thu, Feb 28, 2019 at 6:17 PM Michal Hocko <mhocko@kernel.org> wrote: > > > > On Thu 28-02-19 16:14:24, Yafang Shao wrote: > > > In the page alloc fast path, it may do node reclaim, which may cause > > > latency spike. > > > We should add tracepoint for this event, and also mesure the latency > > > it causes. > > > > > > So bellow two tracepoints are introduced, > > > mm_vmscan_node_reclaim_begin > > > mm_vmscan_node_reclaim_end > > > > This makes some sense to me. Regular direct reclaim already does have > > similar tracepoints. Is there any reason you haven't used > > mm_vmscan_direct_reclaim_{begin,end}_template as all other direct reclaim > > paths? > > > > Because I also want to know the node id, which is not show in > mm_vmscan_direct_reclaim_{begin,end}_template. > > Or should we modify mm_vmscan_direct_reclaim_{begin,end}_template to > show the node id as well ? OK, I see. I thought it was there but it would make much less sense than for the node reclaim for sure. A separate tracepoint makes more sense then.
On Thu, Feb 28, 2019 at 6:21 PM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 2/28/19 9:14 AM, Yafang Shao wrote: > > In the page alloc fast path, it may do node reclaim, which may cause > > latency spike. > > We should add tracepoint for this event, and also mesure the latency > > it causes. > > > > So bellow two tracepoints are introduced, > > mm_vmscan_node_reclaim_begin > > mm_vmscan_node_reclaim_end > > > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > > --- > > include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > > mm/vmscan.c | 13 +++++++++++- > > 2 files changed, 60 insertions(+), 1 deletion(-) > > > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > > index a1cb913..9310d5b 100644 > > --- a/include/trace/events/vmscan.h > > +++ b/include/trace/events/vmscan.h > > @@ -465,6 +465,54 @@ > > __entry->ratio, > > show_reclaim_flags(__entry->reclaim_flags)) > > ); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > > + > > + TP_PROTO(int nid, int order, int may_writepage, > > + gfp_t gfp_flags, int zid), > > + > > + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > > + > > + TP_STRUCT__entry( > > + __field(int, nid) > > + __field(int, order) > > + __field(int, may_writepage) > > For node reclaim may_writepage is statically set in node_reclaim_mode, > so I'm not sure it's worth including it. > > > + __field(gfp_t, gfp_flags) > > + __field(int, zid) > > zid seems wasteful and misleading as it's simply derived by > gfp_zone(gfp_mask), so I would drop it. > I agree with you that may_writepage and zid is wasteful, but I found they are in other tracepoints in this file, so I place them in this tracepoint as well. Seems we'd better drop them from other tracepoints as well ? > > + ), > > + > > + TP_fast_assign( > > + __entry->nid = nid; > > + __entry->order = order; > > + __entry->may_writepage = may_writepage; > > + __entry->gfp_flags = gfp_flags; > > + __entry->zid = zid; > > + ), > > + > > + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", > > + __entry->nid, > > + __entry->zid, > > + __entry->order, > > + __entry->may_writepage, > > + show_gfp_flags(__entry->gfp_flags)) > > +); > > + > > +TRACE_EVENT(mm_vmscan_node_reclaim_end, > > + > > + TP_PROTO(int result), > > + > > + TP_ARGS(result), > > + > > + TP_STRUCT__entry( > > + __field(int, result) > > Reporting sc.nr_reclaimed sounds more useful and in line with other > reclaim tracepoints. Result (sc.nr_reclaimed >= nr_pages) can then be > derived by postprocessing as the beginning tracepoint contains 'order' > thus we know nr_pages? > Seems reasonable. Will change it. > > + ), > > + > > + TP_fast_assign( > > + __entry->result = result; > > + ), > > + > > + TP_printk("result=%d", __entry->result) > > +); > > #endif /* _TRACE_VMSCAN_H */ > > > > /* This part must be outside protection */ > > diff --git a/mm/vmscan.c b/mm/vmscan.c > > index ac4806f..01a0401 100644 > > --- a/mm/vmscan.c > > +++ b/mm/vmscan.c > > @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > .may_swap = 1, > > .reclaim_idx = gfp_zone(gfp_mask), > > }; > > + int result; > > + > > + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, > > + sc.may_writepage, > > + sc.gfp_mask, > > + sc.reclaim_idx); > > > > cond_resched(); > > fs_reclaim_acquire(sc.gfp_mask); > > @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in > > current->flags &= ~PF_SWAPWRITE; > > memalloc_noreclaim_restore(noreclaim_flag); > > fs_reclaim_release(sc.gfp_mask); > > - return sc.nr_reclaimed >= nr_pages; > > + > > + result = sc.nr_reclaimed >= nr_pages; > > + > > + trace_mm_vmscan_node_reclaim_end(result); > > + > > + return result; > > } > > > > int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) > > > Thanks Yafang
On 2/28/19 11:34 AM, Yafang Shao wrote: > On Thu, Feb 28, 2019 at 6:21 PM Vlastimil Babka <vbabka@suse.cz> wrote: >> >> On 2/28/19 9:14 AM, Yafang Shao wrote: >>> In the page alloc fast path, it may do node reclaim, which may cause >>> latency spike. >>> We should add tracepoint for this event, and also mesure the latency >>> it causes. >>> >>> So bellow two tracepoints are introduced, >>> mm_vmscan_node_reclaim_begin >>> mm_vmscan_node_reclaim_end >>> >>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com> >>> --- >>> include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ >>> mm/vmscan.c | 13 +++++++++++- >>> 2 files changed, 60 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h >>> index a1cb913..9310d5b 100644 >>> --- a/include/trace/events/vmscan.h >>> +++ b/include/trace/events/vmscan.h >>> @@ -465,6 +465,54 @@ >>> __entry->ratio, >>> show_reclaim_flags(__entry->reclaim_flags)) >>> ); >>> + >>> +TRACE_EVENT(mm_vmscan_node_reclaim_begin, >>> + >>> + TP_PROTO(int nid, int order, int may_writepage, >>> + gfp_t gfp_flags, int zid), >>> + >>> + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), >>> + >>> + TP_STRUCT__entry( >>> + __field(int, nid) >>> + __field(int, order) >>> + __field(int, may_writepage) >> >> For node reclaim may_writepage is statically set in node_reclaim_mode, >> so I'm not sure it's worth including it. >> >>> + __field(gfp_t, gfp_flags) >>> + __field(int, zid) >> >> zid seems wasteful and misleading as it's simply derived by >> gfp_zone(gfp_mask), so I would drop it. >> > > I agree with you that may_writepage and zid is wasteful, but I found > they are in other tracepoints in this file, > so I place them in this tracepoint as well. I see zid only in kswapd waking tracepoints? That's different kind of event. > Seems we'd better drop them from other tracepoints as well ? Hmm seems may_writepage in other tracepoints depends on laptop_mode which is also a static setting. do_try_to_free_pages() can override it due to priority, but that doesn't affect the tracepoints. If they are to be dropped, it would be a separate patch though.
On Thu, Feb 28, 2019 at 6:44 PM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 2/28/19 11:34 AM, Yafang Shao wrote: > > On Thu, Feb 28, 2019 at 6:21 PM Vlastimil Babka <vbabka@suse.cz> wrote: > >> > >> On 2/28/19 9:14 AM, Yafang Shao wrote: > >>> In the page alloc fast path, it may do node reclaim, which may cause > >>> latency spike. > >>> We should add tracepoint for this event, and also mesure the latency > >>> it causes. > >>> > >>> So bellow two tracepoints are introduced, > >>> mm_vmscan_node_reclaim_begin > >>> mm_vmscan_node_reclaim_end > >>> > >>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com> > >>> --- > >>> include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ > >>> mm/vmscan.c | 13 +++++++++++- > >>> 2 files changed, 60 insertions(+), 1 deletion(-) > >>> > >>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > >>> index a1cb913..9310d5b 100644 > >>> --- a/include/trace/events/vmscan.h > >>> +++ b/include/trace/events/vmscan.h > >>> @@ -465,6 +465,54 @@ > >>> __entry->ratio, > >>> show_reclaim_flags(__entry->reclaim_flags)) > >>> ); > >>> + > >>> +TRACE_EVENT(mm_vmscan_node_reclaim_begin, > >>> + > >>> + TP_PROTO(int nid, int order, int may_writepage, > >>> + gfp_t gfp_flags, int zid), > >>> + > >>> + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), > >>> + > >>> + TP_STRUCT__entry( > >>> + __field(int, nid) > >>> + __field(int, order) > >>> + __field(int, may_writepage) > >> > >> For node reclaim may_writepage is statically set in node_reclaim_mode, > >> so I'm not sure it's worth including it. > >> > >>> + __field(gfp_t, gfp_flags) > >>> + __field(int, zid) > >> > >> zid seems wasteful and misleading as it's simply derived by > >> gfp_zone(gfp_mask), so I would drop it. > >> > > > > I agree with you that may_writepage and zid is wasteful, but I found > > they are in other tracepoints in this file, > > so I place them in this tracepoint as well. > > I see zid only in kswapd waking tracepoints? That's different kind of > event. > Pls. see mm_vmscan_wakeup_kswapd and classzone_idx in mm_vmscan_direct_reclaim_begin_template. mm_vmscan_direct_reclaim_begin_template: "order=%d may_writepage=%d gfp_flags=%s classzone_idx=%d" mm_vmscan_wakeup_kswapd: "nid=%d zid=%d order=%d gfp_flags=%s" > > Seems we'd better drop them from other tracepoints as well ? > > Hmm seems may_writepage in other tracepoints depends on laptop_mode > which is also a static setting. do_try_to_free_pages() can override it > due to priority, but that doesn't affect the tracepoints. If they are to > be dropped, it would be a separate patch though. OK. Thanks Yafang
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a1cb913..9310d5b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -465,6 +465,54 @@ __entry->ratio, show_reclaim_flags(__entry->reclaim_flags)) ); + +TRACE_EVENT(mm_vmscan_node_reclaim_begin, + + TP_PROTO(int nid, int order, int may_writepage, + gfp_t gfp_flags, int zid), + + TP_ARGS(nid, order, may_writepage, gfp_flags, zid), + + TP_STRUCT__entry( + __field(int, nid) + __field(int, order) + __field(int, may_writepage) + __field(gfp_t, gfp_flags) + __field(int, zid) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->order = order; + __entry->may_writepage = may_writepage; + __entry->gfp_flags = gfp_flags; + __entry->zid = zid; + ), + + TP_printk("nid=%d zid=%d order=%d may_writepage=%d gfp_flags=%s", + __entry->nid, + __entry->zid, + __entry->order, + __entry->may_writepage, + show_gfp_flags(__entry->gfp_flags)) +); + +TRACE_EVENT(mm_vmscan_node_reclaim_end, + + TP_PROTO(int result), + + TP_ARGS(result), + + TP_STRUCT__entry( + __field(int, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result=%d", __entry->result) +); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/mm/vmscan.c b/mm/vmscan.c index ac4806f..01a0401 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4240,6 +4240,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in .may_swap = 1, .reclaim_idx = gfp_zone(gfp_mask), }; + int result; + + trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, + sc.may_writepage, + sc.gfp_mask, + sc.reclaim_idx); cond_resched(); fs_reclaim_acquire(sc.gfp_mask); @@ -4267,7 +4273,12 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); - return sc.nr_reclaimed >= nr_pages; + + result = sc.nr_reclaimed >= nr_pages; + + trace_mm_vmscan_node_reclaim_end(result); + + return result; } int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
In the page alloc fast path, it may do node reclaim, which may cause latency spike. We should add tracepoint for this event, and also mesure the latency it causes. So bellow two tracepoints are introduced, mm_vmscan_node_reclaim_begin mm_vmscan_node_reclaim_end Signed-off-by: Yafang Shao <laoar.shao@gmail.com> --- include/trace/events/vmscan.h | 48 +++++++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 13 +++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-)