From ab755bf4249b992fc2140d615ab0a686d50765b4 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 20 Feb 2024 14:16:31 +0800 Subject: mm: compaction: update the cc->nr_migratepages when allocating or freeing the freepages Currently we will use 'cc->nr_freepages >= cc->nr_migratepages' comparison to ensure that enough freepages are isolated in isolate_freepages(), however it just decreases the cc->nr_freepages without updating cc->nr_migratepages in compaction_alloc(), which will waste more CPU cycles and cause too many freepages to be isolated. So we should also update the cc->nr_migratepages when allocating or freeing the freepages to avoid isolating excess freepages. And I can see fewer free pages are scanned and isolated when running thpcompact on my Arm64 server: k6.7 k6.7_patched Ops Compaction pages isolated 120692036.00 118160797.00 Ops Compaction migrate scanned 131210329.00 154093268.00 Ops Compaction free scanned 1090587971.00 1080632536.00 Ops Compact scan efficiency 12.03 14.26 Moreover, I did not see an obvious latency improvements, this is likely because isolating freepages is not the bottleneck in the thpcompact test case. k6.7 k6.7_patched Amean fault-both-1 1089.76 ( 0.00%) 1080.16 * 0.88%* Amean fault-both-3 1616.48 ( 0.00%) 1636.65 * -1.25%* Amean fault-both-5 2266.66 ( 0.00%) 2219.20 * 2.09%* Amean fault-both-7 2909.84 ( 0.00%) 2801.90 * 3.71%* Amean fault-both-12 4861.26 ( 0.00%) 4733.25 * 2.63%* Amean fault-both-18 7351.11 ( 0.00%) 6950.51 * 5.45%* Amean fault-both-24 9059.30 ( 0.00%) 9159.99 * -1.11%* Amean fault-both-30 10685.68 ( 0.00%) 11399.02 * -6.68%* Link: https://lkml.kernel.org/r/6440493f18da82298152b6305d6b41c2962a3ce6.1708409245.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: Mel Gorman Reviewed-by: Vlastimil Babka Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Steven Rostedt Cc: Zi Yan Signed-off-by: Andrew Morton --- include/trace/events/compaction.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 2b2a975efd20..d05759d18538 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -78,10 +78,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_fast_isolate_freepage #ifdef CONFIG_COMPACTION TRACE_EVENT(mm_compaction_migratepages, - TP_PROTO(struct compact_control *cc, + TP_PROTO(unsigned int nr_migratepages, unsigned int nr_succeeded), - TP_ARGS(cc, nr_succeeded), + TP_ARGS(nr_migratepages, nr_succeeded), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -90,7 +90,7 @@ TRACE_EVENT(mm_compaction_migratepages, TP_fast_assign( __entry->nr_migrated = nr_succeeded; - __entry->nr_failed = cc->nr_migratepages - nr_succeeded; + __entry->nr_failed = nr_migratepages - nr_succeeded; ), TP_printk("nr_migrated=%lu nr_failed=%lu", -- cgit v1.2.3 From 72ba14deb40a9e9668ec5e66a341ed657e5215c2 Mon Sep 17 00:00:00 2001 From: Carlos Galo Date: Fri, 23 Feb 2024 17:32:49 +0000 Subject: mm: update mark_victim tracepoints fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of the mark_victim tracepoint provides only the process ID (pid) of the victim process. This limitation poses challenges for userspace tools requiring real-time OOM analysis and intervention. Although this information is available from the kernel logs, it’s not the appropriate format to provide OOM notifications. In Android, BPF programs are used with the mark_victim trace events to notify userspace of an OOM kill. For consistency, update the trace event to include the same information about the OOMed victim as the kernel logs. - UID In Android each installed application has a unique UID. Including the `uid` assists in correlating OOM events with specific apps. - Process Name (comm) Enables identification of the affected process. - OOM Score Will allow userspace to get additional insight of the relative kill priority of the OOM victim. In Android, the oom_score_adj is used to categorize app state (foreground, background, etc.), which aids in analyzing user-perceptible impacts of OOM events [1]. - Total VM, RSS Stats, and pgtables Amount of memory used by the victim that will, potentially, be freed up by killing it. [1] https://cs.android.com/android/platform/superproject/main/+/246dc8fc95b6d93afcba5c6d6c133307abb3ac2e:frameworks/base/services/core/java/com/android/server/am/ProcessList.java;l=188-283 Signed-off-by: Carlos Galo Reviewed-by: Steven Rostedt Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton --- include/trace/events/oom.h | 36 ++++++++++++++++++++++++++++++++---- mm/oom_kill.c | 6 +++++- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 26a11e4a2c36..b799f3bcba82 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -7,6 +7,8 @@ #include #include +#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) + TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), @@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone, ); TRACE_EVENT(mark_victim, - TP_PROTO(int pid), + TP_PROTO(struct task_struct *task, uid_t uid), - TP_ARGS(pid), + TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) + __string(comm, task->comm) + __field(unsigned long, total_vm) + __field(unsigned long, anon_rss) + __field(unsigned long, file_rss) + __field(unsigned long, shmem_rss) + __field(uid_t, uid) + __field(unsigned long, pgtables) + __field(short, oom_score_adj) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = task->pid; + __assign_str(comm, task->comm); + __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); + __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); + __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); + __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); + __entry->uid = uid; + __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; + __entry->oom_score_adj = task->signal->oom_score_adj; ), - TP_printk("pid=%d", __entry->pid) + TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", + __entry->pid, + __get_str(comm), + __entry->total_vm, + __entry->anon_rss, + __entry->file_rss, + __entry->shmem_rss, + __entry->uid, + __entry->pgtables, + __entry->oom_score_adj + ) ); TRACE_EVENT(wake_reaper, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 91ccd82097c2..8d6a207c3c59 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "internal.h" @@ -754,6 +755,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { + const struct cred *cred; struct mm_struct *mm = tsk->mm; WARN_ON(oom_killer_disabled); @@ -773,7 +775,9 @@ static void mark_oom_victim(struct task_struct *tsk) */ __thaw_task(tsk); atomic_inc(&oom_victims); - trace_mark_victim(tsk->pid); + cred = get_task_cred(tsk); + trace_mark_victim(tsk, cred->uid.val); + put_cred(cred); } /** -- cgit v1.2.3 From c8b36003121834cb77fcaf8a1ce0a454d7a97891 Mon Sep 17 00:00:00 2001 From: Richard Chang Date: Wed, 28 Feb 2024 05:11:17 +0000 Subject: mm: add alloc_contig_migrate_range allocation statistics alloc_contig_migrate_range has every information to be able to understand big contiguous allocation latency. For example, how many pages are migrated, how many times they were needed to unmap from page tables. This patch adds the trace event to collect the allocation statistics. In the field, it was quite useful to understand CMA allocation latency. [akpm@linux-foundation.org: a/trace_mm_alloc_config_migrate_range_info_enabled/trace_mm_alloc_contig_migrate_range_info_enabled] Link: https://lkml.kernel.org/r/20240228051127.2859472-1-richardycc@google.com Signed-off-by: Richard Chang Reviewed-by: Steven Rostedt (Google) Cc: "Masami Hiramatsu (Google)" Cc: Mathieu Desnoyers Cc: Minchan Kim Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++ mm/internal.h | 3 ++- mm/page_alloc.c | 32 +++++++++++++++++++++++++++----- mm/page_isolation.c | 2 +- 4 files changed, 68 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 58688768ef0f..6e62cc64cd92 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -304,6 +304,44 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->change_ownership) ); +TRACE_EVENT(mm_alloc_contig_migrate_range_info, + + TP_PROTO(unsigned long start, + unsigned long end, + unsigned long nr_migrated, + unsigned long nr_reclaimed, + unsigned long nr_mapped, + int migratetype), + + TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned long, nr_migrated) + __field(unsigned long, nr_reclaimed) + __field(unsigned long, nr_mapped) + __field(int, migratetype) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + __entry->nr_migrated = nr_migrated; + __entry->nr_reclaimed = nr_reclaimed; + __entry->nr_mapped = nr_mapped; + __entry->migratetype = migratetype; + ), + + TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu", + __entry->start, + __entry->end, + __entry->migratetype, + __entry->nr_migrated, + __entry->nr_reclaimed, + __entry->nr_mapped) +); + /* * Required for uniquely and securely identifying mm in rss_stat tracepoint. */ diff --git a/mm/internal.h b/mm/internal.h index b1d806125e8d..5be91e2f9943 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -539,7 +539,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn); int __alloc_contig_migrate_range(struct compact_control *cc, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + int migratetype); /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void init_cma_reserved_pageblock(struct page *page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7873e9375802..29c982542dcc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6222,9 +6222,14 @@ static void alloc_contig_dump_pages(struct list_head *page_list) } } -/* [start, end) must belong to a single zone. */ +/* + * [start, end) must belong to a single zone. + * @migratetype: using migratetype to filter the type of migration in + * trace_mm_alloc_contig_migrate_range_info. + */ int __alloc_contig_migrate_range(struct compact_control *cc, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + int migratetype) { /* This function is based on compact_zone() from compaction.c. */ unsigned int nr_reclaimed; @@ -6235,6 +6240,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc, .nid = zone_to_nid(cc->zone), .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, }; + struct page *page; + unsigned long total_mapped = 0; + unsigned long total_migrated = 0; + unsigned long total_reclaimed = 0; lru_cache_disable(); @@ -6260,9 +6269,18 @@ int __alloc_contig_migrate_range(struct compact_control *cc, &cc->migratepages); cc->nr_migratepages -= nr_reclaimed; + if (trace_mm_alloc_contig_migrate_range_info_enabled()) { + total_reclaimed += nr_reclaimed; + list_for_each_entry(page, &cc->migratepages, lru) + total_mapped += page_mapcount(page); + } + ret = migrate_pages(&cc->migratepages, alloc_migration_target, NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL); + if (trace_mm_alloc_contig_migrate_range_info_enabled() && !ret) + total_migrated += cc->nr_migratepages; + /* * On -ENOMEM, migrate_pages() bails out right away. It is pointless * to retry again over this error, so do the same here. @@ -6276,9 +6294,13 @@ int __alloc_contig_migrate_range(struct compact_control *cc, if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY) alloc_contig_dump_pages(&cc->migratepages); putback_movable_pages(&cc->migratepages); - return ret; } - return 0; + + trace_mm_alloc_contig_migrate_range_info(start, end, migratetype, + total_migrated, + total_reclaimed, + total_mapped); + return (ret < 0) ? ret : 0; } /** @@ -6358,7 +6380,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, * allocated. So, if we fall through be sure to clear ret so that * -EBUSY is not accidentally used or returned to caller. */ - ret = __alloc_contig_migrate_range(&cc, start, end); + ret = __alloc_contig_migrate_range(&cc, start, end, migratetype); if (ret && ret != -EBUSY) goto done; ret = 0; diff --git a/mm/page_isolation.c b/mm/page_isolation.c index cd0ea3668253..a5c8fa4c2a75 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -434,7 +434,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, } ret = __alloc_contig_migrate_range(&cc, head_pfn, - head_pfn + nr_pages); + head_pfn + nr_pages, page_mt); /* * restore the page's migratetype so that it can -- cgit v1.2.3