diff mbox series

vmscan: add trace events for lru_gen

Message ID 20230919025216.1878-1-jaewon31.kim@samsung.com (mailing list archive)
State Superseded
Headers show
Series vmscan: add trace events for lru_gen | expand

Commit Message

Jaewon Kim Sept. 19, 2023, 2:52 a.m. UTC
As the legacy lru provides, the lru_gen needs some trace events for
debugging.

This commit introduces 2 trace events.
  trace_mm_vmscan_lru_gen_scan
  trace_mm_vmscan_lru_gen_evict

Each event is similar to the following legacy events.
  trace_mm_vmscan_lru_isolate,
  trace_mm_vmscan_lru_shrink_[in]active

Here's an example
  mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=431 nr_skipped=0 nr_taken=55 lru=anon
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=42 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=13 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_ANON|RECLAIM_WB_ASYNC
  mm_vmscan_lru_gen_scan: isolate_mode=0 classzone=1 order=9 nr_requested=4096 nr_scanned=66 nr_skipped=0 nr_taken=64 lru=file
  mm_vmscan_lru_gen_evict: nid=0 nr_reclaimed=62 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=2 nr_ref_keep=0 nr_unmap_fail=0 priority=2 flags=RECLAIM_WB_FILE|RECLAIM_WB_ASYNC

Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
 include/trace/events/mmflags.h |  5 ++
 include/trace/events/vmscan.h  | 96 ++++++++++++++++++++++++++++++++++
 mm/vmscan.c                    | 18 +++++--
 3 files changed, 114 insertions(+), 5 deletions(-)

Comments

Steven Rostedt Sept. 19, 2023, 2:09 p.m. UTC | #1
On Tue, 19 Sep 2023 11:52:16 +0900
Jaewon Kim <jaewon31.kim@samsung.com> wrote:

>  /*
>   * Now redefine the EM() and EMe() macros to map the enums to the strings
> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> index d2123dd960d5..e8f9d0452e89 100644
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -327,6 +327,55 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>  		__print_symbolic(__entry->lru, LRU_NAMES))
>  );
>  
> +TRACE_EVENT(mm_vmscan_lru_gen_scan,
> +	TP_PROTO(int highest_zoneidx,
> +		int order,
> +		unsigned long nr_requested,
> +		unsigned long nr_scanned,
> +		unsigned long nr_skipped,
> +		unsigned long nr_taken,
> +		isolate_mode_t isolate_mode,
> +		int lru),

This is a lot of parameter passing, can you consolidate it?

(see below to where you call this)

> +
> +	TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
> +
> +	TP_STRUCT__entry(
> +		__field(int, highest_zoneidx)
> +		__field(int, order)
> +		__field(unsigned long, nr_requested)
> +		__field(unsigned long, nr_scanned)
> +		__field(unsigned long, nr_skipped)
> +		__field(unsigned long, nr_taken)
> +		__field(unsigned int, isolate_mode)
> +		__field(int, lru)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->highest_zoneidx = highest_zoneidx;
> +		__entry->order = order;
> +		__entry->nr_requested = nr_requested;
> +		__entry->nr_scanned = nr_scanned;
> +		__entry->nr_skipped = nr_skipped;
> +		__entry->nr_taken = nr_taken;
> +		__entry->isolate_mode = (__force unsigned int)isolate_mode;
> +		__entry->lru = lru;
> +	),
> +
> +	/*
> +	 * classzone is previous name of the highest_zoneidx.
> +	 * Reason not to change it is the ABI requirement of the tracepoint.
> +	 */
> +	TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
> +		__entry->isolate_mode,
> +		__entry->highest_zoneidx,
> +		__entry->order,
> +		__entry->nr_requested,
> +		__entry->nr_scanned,
> +		__entry->nr_skipped,
> +		__entry->nr_taken,
> +		__print_symbolic(__entry->lru, LRU_GEN_NAMES))
> +);
> +
>  TRACE_EVENT(mm_vmscan_write_folio,
>  
>  	TP_PROTO(struct folio *folio),
> @@ -437,6 +486,53 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active,
>  		show_reclaim_flags(__entry->reclaim_flags))
>  );
>  
> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
> +
> +	TP_PROTO(int nid, unsigned long nr_reclaimed,
> +		struct reclaim_stat *stat, int priority, int file),
> +
> +	TP_ARGS(nid, nr_reclaimed, stat, priority, file),
> +
> +	TP_STRUCT__entry(
> +		__field(int, nid)

On 64 bit architectures, this causes a 4 byte hole in the ring buffer
layout. Please keep 32 bit size fields paired with other 32 bit size if
possible. That is, move the above "int nid" down where it doesn't cause a
long field to be 4 bytes away.

> +		__field(unsigned long, nr_reclaimed)
> +		__field(unsigned long, nr_dirty)
> +		__field(unsigned long, nr_writeback)
> +		__field(unsigned long, nr_congested)
> +		__field(unsigned long, nr_immediate)
> +		__field(unsigned int, nr_activate0)
> +		__field(unsigned int, nr_activate1)
> +		__field(unsigned long, nr_ref_keep)
> +		__field(unsigned long, nr_unmap_fail)

		__field(int, nid)

here!

> +		__field(int, priority)
> +		__field(int, reclaim_flags)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->nid = nid;
> +		__entry->nr_reclaimed = nr_reclaimed;
> +		__entry->nr_dirty = stat->nr_dirty;
> +		__entry->nr_writeback = stat->nr_writeback;
> +		__entry->nr_congested = stat->nr_congested;
> +		__entry->nr_immediate = stat->nr_immediate;
> +		__entry->nr_activate0 = stat->nr_activate[0];
> +		__entry->nr_activate1 = stat->nr_activate[1];
> +		__entry->nr_ref_keep = stat->nr_ref_keep;
> +		__entry->nr_unmap_fail = stat->nr_unmap_fail;
> +		__entry->priority = priority;
> +		__entry->reclaim_flags = trace_reclaim_flags(file);
> +	),
> +
> +	TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
> +		__entry->nid, __entry->nr_reclaimed,
> +		__entry->nr_dirty, __entry->nr_writeback,
> +		__entry->nr_congested, __entry->nr_immediate,
> +		__entry->nr_activate0, __entry->nr_activate1,
> +		__entry->nr_ref_keep, __entry->nr_unmap_fail,
> +		__entry->priority,
> +		show_reclaim_flags(__entry->reclaim_flags))
> +);
> +
>  TRACE_EVENT(mm_vmscan_node_reclaim_begin,
>  
>  	TP_PROTO(int nid, int order, gfp_t gfp_flags),
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 6f13394b112e..cc10e3fb8fa2 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  	int sorted = 0;
>  	int scanned = 0;
>  	int isolated = 0;
> +	int skipped = 0;
>  	int remaining = MAX_LRU_BATCH;
>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
>  	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  
>  	for (i = MAX_NR_ZONES; i > 0; i--) {
>  		LIST_HEAD(moved);
> -		int skipped = 0;
> +		int skipped_zone = 0;
>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>  		struct list_head *head = &lrugen->folios[gen][type][zone];
>  
> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  				isolated += delta;
>  			} else {
>  				list_move(&folio->lru, &moved);
> -				skipped += delta;
> +				skipped_zone += delta;
>  			}
>  
> -			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
> +			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>  				break;
>  		}
>  
> -		if (skipped) {
> +		if (skipped_zone) {
>  			list_splice(&moved, head);
> -			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
> +			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
> +			skipped += skipped_zone;
>  		}
>  
>  		if (!remaining || isolated >= MIN_LRU_BATCH)
> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>  	__count_memcg_events(memcg, PGREFILL, sorted);
>  	__count_vm_events(PGSCAN_ANON + type, isolated);
>  
> +	if (scanned)

BTW, you can make this branch conditional with the trace event logic, so
that it isn't tested when tracing is enabled. That is, remove the
 "if (scanned)" test and use TRACE_EVENT_CONDITION() as I show below.

> +		trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
> +				MAX_LRU_BATCH, scanned, skipped, isolated,
> +				sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);

Why not pass the sc in to the trace event, and then do the assigning there?

// use CONDITION to test scanned

TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,

	TP_PROTO(struct scan_control *sc,
		unsigned long nr_requested,
		unsigned long nr_scanned,
		unsigned long nr_skipped,
		unsigned long nr_taken,
		int lru),

	TP_ARGS(...)

	TP_CONDITION(nr_scanned)

	TP_fast_assign(
		__entry->highest_zoneidx = sc->reclaim_idx;
		__entry->order = sc->order;
		__entry->nr_requested = nr_requested;
		__entry->nr_scanned = nr_scanned;
		__entry->nr_skipped = nr_skipped;
		__entry->nr_taken = nr_taken;
		__entry->isolate_mode = (__force unsigned int)(sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
		__entry->lru = lru;
	),

Lots of parameters can be expensive to pass, as it requires more copying.

-- Steve


>  	/*
>  	 * There might not be eligible folios due to reclaim_idx. Check the
>  	 * remaining to prevent livelock if it's not making progress.
> @@ -5194,6 +5200,8 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>  retry:
>  	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>  	sc->nr_reclaimed += reclaimed;
> +	trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
> +				      sc->priority, type);
>  
>  	list_for_each_entry_safe_reverse(folio, next, &list, lru) {
>  		if (!folio_evictable(folio)) {
Jaewon Kim Sept. 20, 2023, 7:49 a.m. UTC | #2
>On Tue, 19 Sep 2023 11:52:16 +0900
>Jaewon Kim <jaewon31.kim@samsung.com> wrote:
>
>>  /*
>>   * Now redefine the EM() and EMe() macros to map the enums to the strings
>> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
>> index d2123dd960d5..e8f9d0452e89 100644
>> --- a/include/trace/events/vmscan.h
>> +++ b/include/trace/events/vmscan.h
>> @@ -327,6 +327,55 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>>  		__print_symbolic(__entry->lru, LRU_NAMES))
>>  );
>>  
>> +TRACE_EVENT(mm_vmscan_lru_gen_scan,
>> +	TP_PROTO(int highest_zoneidx,
>> +		int order,
>> +		unsigned long nr_requested,
>> +		unsigned long nr_scanned,
>> +		unsigned long nr_skipped,
>> +		unsigned long nr_taken,
>> +		isolate_mode_t isolate_mode,
>> +		int lru),
>
>This is a lot of parameter passing, can you consolidate it?
>
>(see below to where you call this)
>
>> +
>> +	TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(int, highest_zoneidx)
>> +		__field(int, order)
>> +		__field(unsigned long, nr_requested)
>> +		__field(unsigned long, nr_scanned)
>> +		__field(unsigned long, nr_skipped)
>> +		__field(unsigned long, nr_taken)
>> +		__field(unsigned int, isolate_mode)
>> +		__field(int, lru)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->highest_zoneidx = highest_zoneidx;
>> +		__entry->order = order;
>> +		__entry->nr_requested = nr_requested;
>> +		__entry->nr_scanned = nr_scanned;
>> +		__entry->nr_skipped = nr_skipped;
>> +		__entry->nr_taken = nr_taken;
>> +		__entry->isolate_mode = (__force unsigned int)isolate_mode;
>> +		__entry->lru = lru;
>> +	),
>> +
>> +	/*
>> +	 * classzone is previous name of the highest_zoneidx.
>> +	 * Reason not to change it is the ABI requirement of the tracepoint.
>> +	 */
>> +	TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
>> +		__entry->isolate_mode,
>> +		__entry->highest_zoneidx,
>> +		__entry->order,
>> +		__entry->nr_requested,
>> +		__entry->nr_scanned,
>> +		__entry->nr_skipped,
>> +		__entry->nr_taken,
>> +		__print_symbolic(__entry->lru, LRU_GEN_NAMES))
>> +);
>> +
>>  TRACE_EVENT(mm_vmscan_write_folio,
>>  
>>  	TP_PROTO(struct folio *folio),
>> @@ -437,6 +486,53 @@ TRACE_EVENT(mm_vmscan_lru_shrink_active,
>>  		show_reclaim_flags(__entry->reclaim_flags))
>>  );
>>  
>> +TRACE_EVENT(mm_vmscan_lru_gen_evict,
>> +
>> +	TP_PROTO(int nid, unsigned long nr_reclaimed,
>> +		struct reclaim_stat *stat, int priority, int file),
>> +
>> +	TP_ARGS(nid, nr_reclaimed, stat, priority, file),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(int, nid)
>
>On 64 bit architectures, this causes a 4 byte hole in the ring buffer
>layout. Please keep 32 bit size fields paired with other 32 bit size if
>possible. That is, move the above "int nid" down where it doesn't cause a
>long field to be 4 bytes away.
>
>> +		__field(unsigned long, nr_reclaimed)
>> +		__field(unsigned long, nr_dirty)
>> +		__field(unsigned long, nr_writeback)
>> +		__field(unsigned long, nr_congested)
>> +		__field(unsigned long, nr_immediate)
>> +		__field(unsigned int, nr_activate0)
>> +		__field(unsigned int, nr_activate1)
>> +		__field(unsigned long, nr_ref_keep)
>> +		__field(unsigned long, nr_unmap_fail)
>
>		__field(int, nid)
>
>here!
>
>> +		__field(int, priority)
>> +		__field(int, reclaim_flags)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->nid = nid;
>> +		__entry->nr_reclaimed = nr_reclaimed;
>> +		__entry->nr_dirty = stat->nr_dirty;
>> +		__entry->nr_writeback = stat->nr_writeback;
>> +		__entry->nr_congested = stat->nr_congested;
>> +		__entry->nr_immediate = stat->nr_immediate;
>> +		__entry->nr_activate0 = stat->nr_activate[0];
>> +		__entry->nr_activate1 = stat->nr_activate[1];
>> +		__entry->nr_ref_keep = stat->nr_ref_keep;
>> +		__entry->nr_unmap_fail = stat->nr_unmap_fail;
>> +		__entry->priority = priority;
>> +		__entry->reclaim_flags = trace_reclaim_flags(file);
>> +	),
>> +
>> +	TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
>> +		__entry->nid, __entry->nr_reclaimed,
>> +		__entry->nr_dirty, __entry->nr_writeback,
>> +		__entry->nr_congested, __entry->nr_immediate,
>> +		__entry->nr_activate0, __entry->nr_activate1,
>> +		__entry->nr_ref_keep, __entry->nr_unmap_fail,
>> +		__entry->priority,
>> +		show_reclaim_flags(__entry->reclaim_flags))
>> +);
>> +
>>  TRACE_EVENT(mm_vmscan_node_reclaim_begin,
>>  
>>  	TP_PROTO(int nid, int order, gfp_t gfp_flags),
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 6f13394b112e..cc10e3fb8fa2 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -5005,6 +5005,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>  	int sorted = 0;
>>  	int scanned = 0;
>>  	int isolated = 0;
>> +	int skipped = 0;
>>  	int remaining = MAX_LRU_BATCH;
>>  	struct lru_gen_folio *lrugen = &lruvec->lrugen;
>>  	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>> @@ -5018,7 +5019,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>  
>>  	for (i = MAX_NR_ZONES; i > 0; i--) {
>>  		LIST_HEAD(moved);
>> -		int skipped = 0;
>> +		int skipped_zone = 0;
>>  		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
>>  		struct list_head *head = &lrugen->folios[gen][type][zone];
>>  
>> @@ -5040,16 +5041,17 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>  				isolated += delta;
>>  			} else {
>>  				list_move(&folio->lru, &moved);
>> -				skipped += delta;
>> +				skipped_zone += delta;
>>  			}
>>  
>> -			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
>> +			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
>>  				break;
>>  		}
>>  
>> -		if (skipped) {
>> +		if (skipped_zone) {
>>  			list_splice(&moved, head);
>> -			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
>> +			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
>> +			skipped += skipped_zone;
>>  		}
>>  
>>  		if (!remaining || isolated >= MIN_LRU_BATCH)
>> @@ -5065,6 +5067,10 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>  	__count_memcg_events(memcg, PGREFILL, sorted);
>>  	__count_vm_events(PGSCAN_ANON + type, isolated);
>>  
>> +	if (scanned)
>
>BTW, you can make this branch conditional with the trace event logic, so
>that it isn't tested when tracing is enabled. That is, remove the
> "if (scanned)" test and use TRACE_EVENT_CONDITION() as I show below.
>
>> +		trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
>> +				MAX_LRU_BATCH, scanned, skipped, isolated,
>> +				sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
>
>Why not pass the sc in to the trace event, and then do the assigning there?
>
>// use CONDITION to test scanned
>
>TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>
>	TP_PROTO(struct scan_control *sc,
>		unsigned long nr_requested,
>		unsigned long nr_scanned,
>		unsigned long nr_skipped,
>		unsigned long nr_taken,
>		int lru),
>
>	TP_ARGS(...)
>
>	TP_CONDITION(nr_scanned)
>
>	TP_fast_assign(
>		__entry->highest_zoneidx = sc->reclaim_idx;
>		__entry->order = sc->order;
>		__entry->nr_requested = nr_requested;
>		__entry->nr_scanned = nr_scanned;
>		__entry->nr_skipped = nr_skipped;
>		__entry->nr_taken = nr_taken;
>		__entry->isolate_mode = (__force unsigned int)(sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
>		__entry->lru = lru;
>	),
>
>Lots of parameters can be expensive to pass, as it requires more copying.
>
>-- Steve

Great. Thank you for your comment.

For the putting the struct scan_control *sc inside the trace,
I couldn't do that because struct scan_control is defined in mm/vmscan.c.
I think I should not move it to a seperate header file.

As you may expect, I just made this by copying the existing
trace_mm_vmscan_lru_isolate and trace_mm_vmscan_lru_shrink_inactive

I've tried to change like this.
Would this be good for you?


--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -327,7 +327,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
                __print_symbolic(__entry->lru, LRU_NAMES))
 );
 
-TRACE_EVENT(mm_vmscan_lru_gen_scan,
+TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
        TP_PROTO(int highest_zoneidx,
                int order,
                unsigned long nr_requested,
@@ -339,6 +339,8 @@ TRACE_EVENT(mm_vmscan_lru_gen_scan,
 
        TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
 
+       TP_CONDITION(nr_scanned),
+
        TP_STRUCT__entry(
                __field(int, highest_zoneidx)
                __field(int, order)
@@ -494,7 +496,6 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
        TP_ARGS(nid, nr_reclaimed, stat, priority, file),
 
        TP_STRUCT__entry(
-               __field(int, nid)
                __field(unsigned long, nr_reclaimed)
                __field(unsigned long, nr_dirty)
                __field(unsigned long, nr_writeback)
@@ -504,6 +505,7 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
                __field(unsigned int, nr_activate1)
                __field(unsigned long, nr_ref_keep)
                __field(unsigned long, nr_unmap_fail)
+               __field(int, nid)
                __field(int, priority)
                __field(int, reclaim_flags)
        ),

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5131,10 +5131,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
        __count_memcg_events(memcg, PGREFILL, sorted);
        __count_vm_events(PGSCAN_ANON + type, isolated);
 
-       if (scanned)
-               trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
-                               MAX_LRU_BATCH, scanned, skipped, isolated,
-                               sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
+       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
+                       scanned, skipped, isolated,
+                       sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);



>
>
>>  	/*
>>  	 * There might not be eligible folios due to reclaim_idx. Check the
>>  	 * remaining to prevent livelock if it's not making progress.
>> @@ -5194,6 +5200,8 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
>>  retry:
>>  	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
>>  	sc->nr_reclaimed += reclaimed;
>> +	trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
>> +				      sc->priority, type);
>>  
>>  	list_for_each_entry_safe_reverse(folio, next, &list, lru) {
>>  		if (!folio_evictable(folio)) {
>
Steven Rostedt Sept. 20, 2023, 2:32 p.m. UTC | #3
On Wed, 20 Sep 2023 16:49:48 +0900
김재원 <jaewon31.kim@samsung.com> wrote:

> Great. Thank you for your comment.
> 
> For the putting the struct scan_control *sc inside the trace,
> I couldn't do that because struct scan_control is defined in mm/vmscan.c.
> I think I should not move it to a seperate header file.

Well if you ever decide to do so, one thing to do is to move the
trace/events/vmscan.h into mm/ as trace_vmscan.h so that it would have
access to local header files. Then all you need to do is to move the
struct scan_control into a local mm/X.h header file.

> 
> As you may expect, I just made this by copying the existing
> trace_mm_vmscan_lru_isolate and trace_mm_vmscan_lru_shrink_inactive
> 
> I've tried to change like this.
> Would this be good for you?

The below looks fine to me. Thanks.

-- Steve

> 
> 
> --- a/include/trace/events/vmscan.h
> +++ b/include/trace/events/vmscan.h
> @@ -327,7 +327,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>                 __print_symbolic(__entry->lru, LRU_NAMES))
>  );
>  
> -TRACE_EVENT(mm_vmscan_lru_gen_scan,
> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>         TP_PROTO(int highest_zoneidx,
>                 int order,
>                 unsigned long nr_requested,
> @@ -339,6 +339,8 @@ TRACE_EVENT(mm_vmscan_lru_gen_scan,
>  
>         TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
>  
> +       TP_CONDITION(nr_scanned),
> +
>         TP_STRUCT__entry(
>                 __field(int, highest_zoneidx)
>                 __field(int, order)
> @@ -494,7 +496,6 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
>         TP_ARGS(nid, nr_reclaimed, stat, priority, file),
>  
>         TP_STRUCT__entry(
> -               __field(int, nid)
>                 __field(unsigned long, nr_reclaimed)
>                 __field(unsigned long, nr_dirty)
>                 __field(unsigned long, nr_writeback)
> @@ -504,6 +505,7 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
>                 __field(unsigned int, nr_activate1)
>                 __field(unsigned long, nr_ref_keep)
>                 __field(unsigned long, nr_unmap_fail)
> +               __field(int, nid)
>                 __field(int, priority)
>                 __field(int, reclaim_flags)
>         ),
> 
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -5131,10 +5131,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>         __count_memcg_events(memcg, PGREFILL, sorted);
>         __count_vm_events(PGSCAN_ANON + type, isolated);
>  
> -       if (scanned)
> -               trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
> -                               MAX_LRU_BATCH, scanned, skipped, isolated,
> -                               sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
> +                       scanned, skipped, isolated,
> +                       sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
> 
> 
> 
> >
Jaewon Kim Sept. 21, 2023, 6:09 a.m. UTC | #4
>> Great. Thank you for your comment.
>> 
>> For the putting the struct scan_control *sc inside the trace,
>> I couldn't do that because struct scan_control is defined in mm/vmscan.c.
>> I think I should not move it to a seperate header file.
>
>Well if you ever decide to do so, one thing to do is to move the
>trace/events/vmscan.h into mm/ as trace_vmscan.h so that it would have
>access to local header files. Then all you need to do is to move the
>struct scan_control into a local mm/X.h header file.
>
>> 
>> As you may expect, I just made this by copying the existing
>> trace_mm_vmscan_lru_isolate and trace_mm_vmscan_lru_shrink_inactive
>> 
>> I've tried to change like this.
>> Would this be good for you?
>
>The below looks fine to me. Thanks.
>
>-- Steve

Thank you for your comment.
But I still don't know if I can move the strut scan_control into a
new separate header file, mm/X.h. I guess it was meant to be located
in vmscan.c only.

Let me just send v2 patch with only this change, and wait for other
mm guys comments regarding the moving the struct scan_control thing.

Thank you very much.

>
>> 
>> 
>> --- a/include/trace/events/vmscan.h
>> +++ b/include/trace/events/vmscan.h
>> @@ -327,7 +327,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
>>                 __print_symbolic(__entry->lru, LRU_NAMES))
>>  );
>>  
>> -TRACE_EVENT(mm_vmscan_lru_gen_scan,
>> +TRACE_EVENT_CONDITION(mm_vmscan_lru_gen_scan,
>>         TP_PROTO(int highest_zoneidx,
>>                 int order,
>>                 unsigned long nr_requested,
>> @@ -339,6 +339,8 @@ TRACE_EVENT(mm_vmscan_lru_gen_scan,
>>  
>>         TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
>>  
>> +       TP_CONDITION(nr_scanned),
>> +
>>         TP_STRUCT__entry(
>>                 __field(int, highest_zoneidx)
>>                 __field(int, order)
>> @@ -494,7 +496,6 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
>>         TP_ARGS(nid, nr_reclaimed, stat, priority, file),
>>  
>>         TP_STRUCT__entry(
>> -               __field(int, nid)
>>                 __field(unsigned long, nr_reclaimed)
>>                 __field(unsigned long, nr_dirty)
>>                 __field(unsigned long, nr_writeback)
>> @@ -504,6 +505,7 @@ TRACE_EVENT(mm_vmscan_lru_gen_evict,
>>                 __field(unsigned int, nr_activate1)
>>                 __field(unsigned long, nr_ref_keep)
>>                 __field(unsigned long, nr_unmap_fail)
>> +               __field(int, nid)
>>                 __field(int, priority)
>>                 __field(int, reclaim_flags)
>>         ),
>> 
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -5131,10 +5131,9 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
>>         __count_memcg_events(memcg, PGREFILL, sorted);
>>         __count_vm_events(PGSCAN_ANON + type, isolated);
>>  
>> -       if (scanned)
>> -               trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
>> -                               MAX_LRU_BATCH, scanned, skipped, isolated,
>> -                               sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
>> +       trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order, MAX_LRU_BATCH,
>> +                       scanned, skipped, isolated,
>> +                       sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
>> 
>> 
>> 
>> >
diff mbox series

Patch

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 1478b9dd05fa..44e9b38f83e7 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -274,6 +274,10 @@  IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 		EM (LRU_ACTIVE_FILE, "active_file") \
 		EMe(LRU_UNEVICTABLE, "unevictable")
 
+#define LRU_GEN_NAMES		\
+		EM (LRU_GEN_ANON, "anon") \
+		EMe(LRU_GEN_FILE, "file")
+
 /*
  * First define the enums in the above macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
@@ -288,6 +292,7 @@  COMPACTION_PRIORITY
 /* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
 ZONE_TYPE
 LRU_NAMES
+LRU_GEN_NAMES
 
 /*
  * Now redefine the EM() and EMe() macros to map the enums to the strings
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d2123dd960d5..e8f9d0452e89 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -327,6 +327,55 @@  TRACE_EVENT(mm_vmscan_lru_isolate,
 		__print_symbolic(__entry->lru, LRU_NAMES))
 );
 
+TRACE_EVENT(mm_vmscan_lru_gen_scan,
+	TP_PROTO(int highest_zoneidx,
+		int order,
+		unsigned long nr_requested,
+		unsigned long nr_scanned,
+		unsigned long nr_skipped,
+		unsigned long nr_taken,
+		isolate_mode_t isolate_mode,
+		int lru),
+
+	TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru),
+
+	TP_STRUCT__entry(
+		__field(int, highest_zoneidx)
+		__field(int, order)
+		__field(unsigned long, nr_requested)
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_skipped)
+		__field(unsigned long, nr_taken)
+		__field(unsigned int, isolate_mode)
+		__field(int, lru)
+	),
+
+	TP_fast_assign(
+		__entry->highest_zoneidx = highest_zoneidx;
+		__entry->order = order;
+		__entry->nr_requested = nr_requested;
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_skipped = nr_skipped;
+		__entry->nr_taken = nr_taken;
+		__entry->isolate_mode = (__force unsigned int)isolate_mode;
+		__entry->lru = lru;
+	),
+
+	/*
+	 * classzone is previous name of the highest_zoneidx.
+	 * Reason not to change it is the ABI requirement of the tracepoint.
+	 */
+	TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s",
+		__entry->isolate_mode,
+		__entry->highest_zoneidx,
+		__entry->order,
+		__entry->nr_requested,
+		__entry->nr_scanned,
+		__entry->nr_skipped,
+		__entry->nr_taken,
+		__print_symbolic(__entry->lru, LRU_GEN_NAMES))
+);
+
 TRACE_EVENT(mm_vmscan_write_folio,
 
 	TP_PROTO(struct folio *folio),
@@ -437,6 +486,53 @@  TRACE_EVENT(mm_vmscan_lru_shrink_active,
 		show_reclaim_flags(__entry->reclaim_flags))
 );
 
+TRACE_EVENT(mm_vmscan_lru_gen_evict,
+
+	TP_PROTO(int nid, unsigned long nr_reclaimed,
+		struct reclaim_stat *stat, int priority, int file),
+
+	TP_ARGS(nid, nr_reclaimed, stat, priority, file),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(unsigned long, nr_reclaimed)
+		__field(unsigned long, nr_dirty)
+		__field(unsigned long, nr_writeback)
+		__field(unsigned long, nr_congested)
+		__field(unsigned long, nr_immediate)
+		__field(unsigned int, nr_activate0)
+		__field(unsigned int, nr_activate1)
+		__field(unsigned long, nr_ref_keep)
+		__field(unsigned long, nr_unmap_fail)
+		__field(int, priority)
+		__field(int, reclaim_flags)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->nr_reclaimed = nr_reclaimed;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate0 = stat->nr_activate[0];
+		__entry->nr_activate1 = stat->nr_activate[1];
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
+		__entry->priority = priority;
+		__entry->reclaim_flags = trace_reclaim_flags(file);
+	),
+
+	TP_printk("nid=%d nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld priority=%d flags=%s",
+		__entry->nid, __entry->nr_reclaimed,
+		__entry->nr_dirty, __entry->nr_writeback,
+		__entry->nr_congested, __entry->nr_immediate,
+		__entry->nr_activate0, __entry->nr_activate1,
+		__entry->nr_ref_keep, __entry->nr_unmap_fail,
+		__entry->priority,
+		show_reclaim_flags(__entry->reclaim_flags))
+);
+
 TRACE_EVENT(mm_vmscan_node_reclaim_begin,
 
 	TP_PROTO(int nid, int order, gfp_t gfp_flags),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6f13394b112e..cc10e3fb8fa2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5005,6 +5005,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	int sorted = 0;
 	int scanned = 0;
 	int isolated = 0;
+	int skipped = 0;
 	int remaining = MAX_LRU_BATCH;
 	struct lru_gen_folio *lrugen = &lruvec->lrugen;
 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -5018,7 +5019,7 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 
 	for (i = MAX_NR_ZONES; i > 0; i--) {
 		LIST_HEAD(moved);
-		int skipped = 0;
+		int skipped_zone = 0;
 		int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES;
 		struct list_head *head = &lrugen->folios[gen][type][zone];
 
@@ -5040,16 +5041,17 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 				isolated += delta;
 			} else {
 				list_move(&folio->lru, &moved);
-				skipped += delta;
+				skipped_zone += delta;
 			}
 
-			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
+			if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH)
 				break;
 		}
 
-		if (skipped) {
+		if (skipped_zone) {
 			list_splice(&moved, head);
-			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone);
+			skipped += skipped_zone;
 		}
 
 		if (!remaining || isolated >= MIN_LRU_BATCH)
@@ -5065,6 +5067,10 @@  static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 	__count_memcg_events(memcg, PGREFILL, sorted);
 	__count_vm_events(PGSCAN_ANON + type, isolated);
 
+	if (scanned)
+		trace_mm_vmscan_lru_gen_scan(sc->reclaim_idx, sc->order,
+				MAX_LRU_BATCH, scanned, skipped, isolated,
+				sc->may_unmap ? 0 : ISOLATE_UNMAPPED, type);
 	/*
 	 * There might not be eligible folios due to reclaim_idx. Check the
 	 * remaining to prevent livelock if it's not making progress.
@@ -5194,6 +5200,8 @@  static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
 retry:
 	reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
 	sc->nr_reclaimed += reclaimed;
+	trace_mm_vmscan_lru_gen_evict(pgdat->node_id, reclaimed, &stat,
+				      sc->priority, type);
 
 	list_for_each_entry_safe_reverse(folio, next, &list, lru) {
 		if (!folio_evictable(folio)) {