From 2d00946bd7f4e8c17cbd2fce5fd7c3ab58046dff Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 7 Sep 2023 02:29:29 +0000 Subject: mm/damon/core: remove 'struct target *' parameter from damon_aggregated tracepoint damon_aggregateed tracepoint is receiving 'struct target *', but doesn't use it. Remove it from the prototype. Link: https://lkml.kernel.org/r/20230907022929.91361-12-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index c79f1d4c39af..0b8d13bde17a 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -11,10 +11,10 @@ TRACE_EVENT(damon_aggregated, - TP_PROTO(struct damon_target *t, unsigned int target_id, - struct damon_region *r, unsigned int nr_regions), + TP_PROTO(unsigned int target_id, struct damon_region *r, + unsigned int nr_regions), - TP_ARGS(t, target_id, r, nr_regions), + TP_ARGS(target_id, r, nr_regions), TP_STRUCT__entry( __field(unsigned long, target_id) -- cgit v1.2.3 From c603c630b509d690d470b9b49eb96f40482f47d5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 13 Sep 2023 02:20:49 +0000 Subject: mm/damon/core: add a tracepoint for damos apply target regions Patch series "mm/damon: add a tracepoint for damos apply target regions", v2. DAMON provides damon_aggregated tracepoint to let users record full monitoring results. Sometimes, users need to record monitoring results of specific pattern. DAMOS tried regions directory of DAMON sysfs interface allows it, but the interface is mainly designed for snapshots and therefore would be inefficient for such recording. Implement yet another tracepoint for efficient support of the usecase. This patch (of 2): DAMON provides damon_aggregated tracepoint, which exposes details of each region and its access monitoring results. It is useful for getting whole monitoring results, e.g., for recording purposes. For investigations of DAMOS, DAMON Sysfs interface provides DAMOS statistics and tried_regions directory. But, those provides only statistics and snapshots. If the scheme is frequently applied and if the user needs to know every detail of DAMOS behavior, the snapshot-based interface could be insufficient and expensive. As a last resort, userspace users need to record the all monitoring results via damon_aggregated tracepoint and simulate how DAMOS would worked. It is unnecessarily complicated. DAMON kernel API users, meanwhile, can do that easily via before_damos_apply() callback field of 'struct damon_callback', though. Add a tracepoint that will be called just after before_damos_apply() callback for more convenient investigations of DAMOS. The tracepoint exposes all details about each regions, similar to damon_aggregated tracepoint. Please note that DAMOS is currently not only for memory management but also for query-like efficient monitoring results retrievals (when 'stat' action is used). Until now, only statistics or snapshots were supported. Addition of this tracepoint allows efficient full recording of DAMOS-based filtered monitoring results. Link: https://lkml.kernel.org/r/20230913022050.2109-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230913022050.2109-2-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Steven Rostedt (Google) [tracing] Cc: Jonathan Corbet Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 39 +++++++++++++++++++++++++++++++++++++++ mm/damon/core.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 0b8d13bde17a..19930bb7af9a 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -9,6 +9,45 @@ #include #include +TRACE_EVENT_CONDITION(damos_before_apply, + + TP_PROTO(unsigned int context_idx, unsigned int scheme_idx, + unsigned int target_idx, struct damon_region *r, + unsigned int nr_regions, bool do_trace), + + TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace), + + TP_CONDITION(do_trace), + + TP_STRUCT__entry( + __field(unsigned int, context_idx) + __field(unsigned int, scheme_idx) + __field(unsigned long, target_idx) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, nr_accesses) + __field(unsigned int, age) + __field(unsigned int, nr_regions) + ), + + TP_fast_assign( + __entry->context_idx = context_idx; + __entry->scheme_idx = scheme_idx; + __entry->target_idx = target_idx; + __entry->start = r->ar.start; + __entry->end = r->ar.end; + __entry->nr_accesses = r->nr_accesses; + __entry->age = r->age; + __entry->nr_regions = nr_regions; + ), + + TP_printk("ctx_idx=%u scheme_idx=%u target_idx=%lu nr_regions=%u %lu-%lu: %u %u", + __entry->context_idx, __entry->scheme_idx, + __entry->target_idx, __entry->nr_regions, + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) +); + TRACE_EVENT(damon_aggregated, TP_PROTO(unsigned int target_id, struct damon_region *r, diff --git a/mm/damon/core.c b/mm/damon/core.c index ca631dd88b33..3ca34a252a3c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -950,6 +950,33 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, struct timespec64 begin, end; unsigned long sz_applied = 0; int err = 0; + /* + * We plan to support multiple context per kdamond, as DAMON sysfs + * implies with 'nr_contexts' file. Nevertheless, only single context + * per kdamond is supported for now. So, we can simply use '0' context + * index here. + */ + unsigned int cidx = 0; + struct damos *siter; /* schemes iterator */ + unsigned int sidx = 0; + struct damon_target *titer; /* targets iterator */ + unsigned int tidx = 0; + bool do_trace = false; + + /* get indices for trace_damos_before_apply() */ + if (trace_damos_before_apply_enabled()) { + damon_for_each_scheme(siter, c) { + if (siter == s) + break; + sidx++; + } + damon_for_each_target(titer, c) { + if (titer == t) + break; + tidx++; + } + do_trace = true; + } if (c->ops.apply_scheme) { if (quota->esz && quota->charged_sz + sz > quota->esz) { @@ -964,8 +991,11 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, ktime_get_coarse_ts64(&begin); if (c->callback.before_damos_apply) err = c->callback.before_damos_apply(c, t, r, s); - if (!err) + if (!err) { + trace_damos_before_apply(cidx, sidx, tidx, r, + damon_nr_regions(t), do_trace); sz_applied = c->ops.apply_scheme(c, t, r, s); + } ktime_get_coarse_ts64(&end); quota->total_charged_ns += timespec64_to_ns(&end) - timespec64_to_ns(&begin); -- cgit v1.2.3 From 3dfbb555c98ac55b9d911f9af0e35014b445fb41 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 14 Sep 2023 15:16:39 +0200 Subject: mm, vmscan: remove ISOLATE_UNMAPPED This isolate_mode_t flag is effectively unused since 89f6c88a6ab4 ("mm: __isolate_lru_page_prepare() in isolate_migratepages_block()") as sc->may_unmap is now checked directly (and only node_reclaim has a mode that sets it to 0). The last remaining place is mm_vmscan_lru_isolate tracepoint for the isolate_mode parameter. That one was mainly used to indicate the active/inactive mode, which the trace-vmscan-postprocess.pl script consumed, but that got silently broken. After fixing the script by the previous patch, it does not need the isolate_mode anymore. So just remove the parameter and with that the whole ISOLATE_UNMAPPED flag. Link: https://lkml.kernel.org/r/20230914131637.12204-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Hugh Dickins Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 8 ++++---- include/linux/mmzone.h | 2 -- include/trace/events/vmscan.h | 8 ++------ mm/vmscan.c | 3 +-- 4 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/trace') diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 725d41a8d4ef..048dc0dbce64 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -112,7 +112,7 @@ my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; my $regex_wakeup_kswapd_default = 'nid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; -my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; +my $regex_lru_isolate_default = 'classzone=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_taken=([0-9]*) nr_active=([0-9]*) nr_deactivated=([0-9]*) nr_referenced=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)' ; my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; @@ -204,7 +204,7 @@ $regex_wakeup_kswapd = generate_traceevent_regex( $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, - "isolate_mode", classzone", "order", + "classzone", "order", "nr_requested", "nr_scanned", "nr_skipped", "nr_taken", "lru"); $regex_lru_shrink_inactive = generate_traceevent_regex( @@ -379,8 +379,8 @@ EVENT_PROCESS: print " $regex_lru_isolate/o\n"; next; } - my $nr_scanned = $5; - my $lru = $8; + my $nr_scanned = $4; + my $lru = $7; # To closer match vmstat scanning statistics, only count # inactive lru as scanning diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4106fbc5b4b3..486587fcd27f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -639,8 +639,6 @@ struct lruvec { #endif }; -/* Isolate unmapped pages */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* Isolate unevictable pages */ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index d2123dd960d5..1a488c30afa5 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -285,10 +285,9 @@ TRACE_EVENT(mm_vmscan_lru_isolate, unsigned long nr_scanned, unsigned long nr_skipped, unsigned long nr_taken, - isolate_mode_t isolate_mode, int lru), - TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, isolate_mode, lru), + TP_ARGS(highest_zoneidx, order, nr_requested, nr_scanned, nr_skipped, nr_taken, lru), TP_STRUCT__entry( __field(int, highest_zoneidx) @@ -297,7 +296,6 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __field(unsigned long, nr_scanned) __field(unsigned long, nr_skipped) __field(unsigned long, nr_taken) - __field(unsigned int, isolate_mode) __field(int, lru) ), @@ -308,7 +306,6 @@ TRACE_EVENT(mm_vmscan_lru_isolate, __entry->nr_scanned = nr_scanned; __entry->nr_skipped = nr_skipped; __entry->nr_taken = nr_taken; - __entry->isolate_mode = (__force unsigned int)isolate_mode; __entry->lru = lru; ), @@ -316,8 +313,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, * classzone is previous name of the highest_zoneidx. * Reason not to change it is the ABI requirement of the tracepoint. */ - TP_printk("isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", - __entry->isolate_mode, + TP_printk("classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", __entry->highest_zoneidx, __entry->order, __entry->nr_requested, diff --git a/mm/vmscan.c b/mm/vmscan.c index acf115468bf8..3df0e2a59052 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1689,8 +1689,7 @@ move: } *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, - total_scan, skipped, nr_taken, - sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru); + total_scan, skipped, nr_taken, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } -- cgit v1.2.3 From a72217ad596ec8a957e1f73e45817c3664d99f1e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 16 Sep 2023 02:09:39 +0000 Subject: mm/damon/core: use nr_accesses_bp as a source of damos_before_apply tracepoint damos_before_apply tracepoint is exposing access rate of DAMON regions using nr_accesses field of regions, which was actually used by DAMOS in the past. However, it has changed to use nr_accesses_bp instead. Update the tracepoint to expose the value that DAMOS is really using. Note that it doesn't expose the value as is in the basis point, but after converting it to the natural number by dividing it by 10,000. Therefore this change doesn't make user-visible behavioral differences. Link: https://lkml.kernel.org/r/20230916020945.47296-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/trace/events/damon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h index 19930bb7af9a..23200aabccac 100644 --- a/include/trace/events/damon.h +++ b/include/trace/events/damon.h @@ -36,7 +36,7 @@ TRACE_EVENT_CONDITION(damos_before_apply, __entry->target_idx = target_idx; __entry->start = r->ar.start; __entry->end = r->ar.end; - __entry->nr_accesses = r->nr_accesses; + __entry->nr_accesses = r->nr_accesses_bp / 10000; __entry->age = r->age; __entry->nr_regions = nr_regions; ), -- cgit v1.2.3 From 49cac03a8f0a56cafa5329911564c97c130ced43 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 17 Oct 2023 12:31:29 -0400 Subject: mm/migrate: add nr_split to trace_mm_migrate_pages stats. Add nr_split to trace_mm_migrate_pages for large folio (including THP) split events. [akpm@linux-foundation.org: cleanup per Huang, Ying] Link: https://lkml.kernel.org/r/20231017163129.2025214-2-zi.yan@sent.com Signed-off-by: Zi Yan Reviewed-by: "Huang, Ying" Reviewed-by: Baolin Wang Cc: David Hildenbrand Cc: Huang Ying Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/trace/events/migrate.h | 24 ++++++++++++++---------- mm/migrate.c | 5 +++-- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 061b5128f335..0190ef725b43 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -49,10 +49,11 @@ TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, unsigned long thp_succeeded, unsigned long thp_failed, - unsigned long thp_split, enum migrate_mode mode, int reason), + unsigned long thp_split, unsigned long large_folio_split, + enum migrate_mode mode, int reason), TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, - thp_split, mode, reason), + thp_split, large_folio_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) @@ -60,26 +61,29 @@ TRACE_EVENT(mm_migrate_pages, __field( unsigned long, thp_succeeded) __field( unsigned long, thp_failed) __field( unsigned long, thp_split) + __field( unsigned long, large_folio_split) __field( enum migrate_mode, mode) __field( int, reason) ), TP_fast_assign( - __entry->succeeded = succeeded; - __entry->failed = failed; - __entry->thp_succeeded = thp_succeeded; - __entry->thp_failed = thp_failed; - __entry->thp_split = thp_split; - __entry->mode = mode; - __entry->reason = reason; + __entry->succeeded = succeeded; + __entry->failed = failed; + __entry->thp_succeeded = thp_succeeded; + __entry->thp_failed = thp_failed; + __entry->thp_split = thp_split; + __entry->large_folio_split = large_folio_split; + __entry->mode = mode; + __entry->reason = reason; ), - TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s", + TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu nr_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, __entry->thp_succeeded, __entry->thp_failed, __entry->thp_split, + __entry->large_folio_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); diff --git a/mm/migrate.c b/mm/migrate.c index 53244d283c3e..125194f5af0f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1686,7 +1686,7 @@ static int migrate_pages_batch(struct list_head *from, if (!ret) { stats->nr_thp_split += is_thp; - stats->nr_split += is_large; + stats->nr_split++; break; } else if (reason == MR_LONGTERM_PIN && ret == -EAGAIN) { @@ -1979,7 +1979,8 @@ out: count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, stats.nr_thp_succeeded, stats.nr_thp_failed, - stats.nr_thp_split, mode, reason); + stats.nr_thp_split, stats.nr_split, mode, + reason); if (ret_succeeded) *ret_succeeded = stats.nr_succeeded; -- cgit v1.2.3