From 9bbdc0f324097f72b2354c2f8be4cdffd32679b6 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 22 Mar 2022 14:41:12 -0700 Subject: xarray: use kmem_cache_alloc_lru to allocate xa_node The workingset will add the xa_node to the shadow_nodes list. So the allocation of xa_node should be done by kmem_cache_alloc_lru(). Using xas_set_lru() to pass the list_lru which we want to insert xa_node into to set up the xa_node reclaim context correctly. Link: https://lkml.kernel.org/r/20220228122126.37293-9-songmuchun@bytedance.com Signed-off-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Roman Gushchin Cc: Alex Shi Cc: Anna Schumaker Cc: Chao Yu Cc: Dave Chinner Cc: Fam Zheng Cc: Jaegeuk Kim Cc: Kari Argillander Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Qi Zheng Cc: Shakeel Butt Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Wei Yang Cc: Xiongchun Duan Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/xarray.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/xarray.c b/lib/xarray.c index 6f47f6375808..b95e92598b9c 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -302,7 +302,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) } if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; - xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!xas->xa_alloc) return false; xas->xa_alloc->parent = NULL; @@ -334,10 +334,10 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) gfp |= __GFP_ACCOUNT; if (gfpflags_allow_blocking(gfp)) { xas_unlock_type(xas, lock_type); - xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); xas_lock_type(xas, lock_type); } else { - xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + xas->xa_alloc = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); } if (!xas->xa_alloc) return false; @@ -371,7 +371,7 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; - node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) { xas_set_err(xas, -ENOMEM); return NULL; @@ -1014,7 +1014,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, void *sibling = NULL; struct xa_node *node; - node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + node = kmem_cache_alloc_lru(radix_tree_node_cachep, xas->xa_lru, gfp); if (!node) goto nomem; node->array = xas->xa; -- cgit v1.2.3 From adf505457032c11b79b5a7c277c62ff5d61b17c2 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 22 Mar 2022 14:48:16 -0700 Subject: kunit: fix UAF when run kfence test case test_gfpzero Patch series "kunit: fix a UAF bug and do some optimization", v2. This series is to fix UAF (use after free) when running kfence test case test_gfpzero, which is time costly. This UAF bug can be easily triggered by setting CONFIG_KFENCE_NUM_OBJECTS = 65535. Furthermore, some optimization for kunit tests has been done. This patch (of 3): Kunit will create a new thread to run an actual test case, and the main process will wait for the completion of the actual test thread until overtime. The variable "struct kunit test" has local property in function kunit_try_catch_run, and will be used in the test case thread. Task kunit_try_catch_run will free "struct kunit test" when kunit runs overtime, but the actual test case is still run and an UAF bug will be triggered. The above problem has been both observed in a physical machine and qemu platform when running kfence kunit tests. The problem can be triggered when setting CONFIG_KFENCE_NUM_OBJECTS = 65535. Under this setting, the test case test_gfpzero will cost hours and kunit will run to overtime. The follows show the panic log. BUG: unable to handle page fault for address: ffffffff82d882e9 Call Trace: kunit_log_append+0x58/0xd0 ... test_alloc.constprop.0.cold+0x6b/0x8a [kfence_test] test_gfpzero.cold+0x61/0x8ab [kfence_test] kunit_try_run_case+0x4c/0x70 kunit_generic_run_threadfn_adapter+0x11/0x20 kthread+0x166/0x190 ret_from_fork+0x22/0x30 Kernel panic - not syncing: Fatal exception Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 To solve this problem, the test case thread should be stopped when the kunit frame runs overtime. The stop signal will send in function kunit_try_catch_run, and test_gfpzero will handle it. Link: https://lkml.kernel.org/r/20220309083753.1561921-1-liupeng256@huawei.com Link: https://lkml.kernel.org/r/20220309083753.1561921-2-liupeng256@huawei.com Signed-off-by: Peng Liu Reviewed-by: Marco Elver Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Wang Kefeng Cc: Daniel Latypov Cc: David Gow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kunit/try-catch.c | 1 + mm/kfence/kfence_test.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index be38a2c5ecc2..6b3d4db94077 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -78,6 +78,7 @@ void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) if (time_remaining == 0) { kunit_err(test, "try timed out\n"); try_catch->try_result = -ETIMEDOUT; + kthread_stop(task_struct); } exit_code = try_catch->try_result; diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 50dbb815a2a8..caed6b4eba94 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -623,7 +623,7 @@ static void test_gfpzero(struct kunit *test) break; test_free(buf2); - if (i == CONFIG_KFENCE_NUM_OBJECTS) { + if (kthread_should_stop() || (i == CONFIG_KFENCE_NUM_OBJECTS)) { kunit_warn(test, "giving up ... cannot get same object back\n"); return; } -- cgit v1.2.3 From bdd015f7b71b92c2e4ecabac689642cc72553e04 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 22 Mar 2022 14:48:19 -0700 Subject: kunit: make kunit_test_timeout compatible with comment In function kunit_test_timeout, it is declared "300 * MSEC_PER_SEC" represent 5min. However, it is wrong when dealing with arm64 whose default HZ = 250, or some other situations. Use msecs_to_jiffies to fix this, and kunit_test_timeout will work as desired. Link: https://lkml.kernel.org/r/20220309083753.1561921-3-liupeng256@huawei.com Fixes: 5f3e06208920 ("kunit: test: add support for test abort") Signed-off-by: Peng Liu Reviewed-by: Marco Elver Reviewed-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Wang Kefeng Cc: David Gow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kunit/try-catch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index 6b3d4db94077..f7825991d576 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -52,7 +52,7 @@ static unsigned long kunit_test_timeout(void) * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, * the task will be killed and an oops generated. */ - return 300 * MSEC_PER_SEC; /* 5 min */ + return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */ } void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) -- cgit v1.2.3 From 737b6a10ac19e41877aa1911bf6b361a72a88ad9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 22 Mar 2022 14:48:25 -0700 Subject: kfence: allow use of a deferrable timer Allow the use of a deferrable timer, which does not force CPU wake-ups when the system is idle. A consequence is that the sample interval becomes very unpredictable, to the point that it is not guaranteed that the KFENCE KUnit test still passes. Nevertheless, on power-constrained systems this may be preferable, so let's give the user the option should they accept the above trade-off. Link: https://lkml.kernel.org/r/20220308141415.3168078-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kfence.rst | 12 ++++++++++++ lib/Kconfig.kfence | 12 ++++++++++++ mm/kfence/core.c | 15 +++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index ac6b89d1a8c3..936f6aaa75c8 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option ``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0`` disables KFENCE. +The sample interval controls a timer that sets up KFENCE allocations. By +default, to keep the real sample interval predictable, the normal timer also +causes CPU wake-ups when the system is completely idle. This may be undesirable +on power-constrained systems. The boot parameter ``kfence.deferrable=1`` +instead switches to a "deferrable" timer which does not force CPU wake-ups on +idle systems, at the risk of unpredictable sample intervals. The default is +configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``. + +.. warning:: + The KUnit test suite is very likely to fail when using a deferrable timer + since it currently causes very unpredictable sample intervals. + The KFENCE memory pool is of fixed size, and if the pool is exhausted, no further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default 255), the number of available guarded objects can be controlled. Each object diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence index 912f252a41fc..459dda9ef619 100644 --- a/lib/Kconfig.kfence +++ b/lib/Kconfig.kfence @@ -45,6 +45,18 @@ config KFENCE_NUM_OBJECTS pages are required; with one containing the object and two adjacent ones used as guard pages. +config KFENCE_DEFERRABLE + bool "Use a deferrable timer to trigger allocations" + help + Use a deferrable timer to trigger allocations. This avoids forcing + CPU wake-ups if the system is idle, at the risk of a less predictable + sample interval. + + Warning: The KUnit test suite fails with this option enabled - due to + the unpredictability of the sample interval! + + Say N if you are unsure. + config KFENCE_STATIC_KEYS bool "Use static keys to set up allocations" if EXPERT depends on JUMP_LABEL diff --git a/mm/kfence/core.c b/mm/kfence/core.c index f126b53b9b85..2f9fdfde1941 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -95,6 +95,10 @@ module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_inte static unsigned long kfence_skip_covered_thresh __read_mostly = 75; module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); +/* If true, use a deferrable timer. */ +static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE); +module_param_named(deferrable, kfence_deferrable, bool, 0444); + /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __read_mostly; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ @@ -740,6 +744,8 @@ late_initcall(kfence_debugfs_init); /* === Allocation Gate Timer ================================================ */ +static struct delayed_work kfence_timer; + #ifdef CONFIG_KFENCE_STATIC_KEYS /* Wait queue to wake up allocation-gate timer task. */ static DECLARE_WAIT_QUEUE_HEAD(allocation_wait); @@ -762,7 +768,6 @@ static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer); * avoids IPIs, at the cost of not immediately capturing allocations if the * instructions remain cached. */ -static struct delayed_work kfence_timer; static void toggle_allocation_gate(struct work_struct *work) { if (!READ_ONCE(kfence_enabled)) @@ -790,7 +795,6 @@ static void toggle_allocation_gate(struct work_struct *work) queue_delayed_work(system_unbound_wq, &kfence_timer, msecs_to_jiffies(kfence_sample_interval)); } -static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate); /* === Public interface ===================================================== */ @@ -809,8 +813,15 @@ static void kfence_init_enable(void) { if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS)) static_branch_enable(&kfence_allocation_key); + + if (kfence_deferrable) + INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate); + else + INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate); + WRITE_ONCE(kfence_enabled, true); queue_delayed_work(system_unbound_wq, &kfence_timer, 0); + pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE, CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool, (void *)(__kfence_pool + KFENCE_POOL_SIZE)); -- cgit v1.2.3