From 3db978d480e2843979a2b56f2f7da726f2b295b2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sun, 7 Jun 2020 21:40:24 -0700 Subject: kernel/sysctl: support setting sysctl parameters from kernel command line Patch series "support setting sysctl parameters from kernel command line", v3. This series adds support for something that seems like many people always wanted but nobody added it yet, so here's the ability to set sysctl parameters via kernel command line options in the form of sysctl.vm.something=1 The important part is Patch 1. The second, not so important part is an attempt to clean up legacy one-off parameters that do the same thing as a sysctl. I don't want to remove them completely for compatibility reasons, but with generic sysctl support the idea is to remove the one-off param handlers and treat the parameters as aliases for the sysctl variants. I have identified several parameters that mention sysctl counterparts in Documentation/admin-guide/kernel-parameters.txt but there might be more. The conversion also has varying level of success: - numa_zonelist_order is converted in Patch 2 together with adding the necessary infrastructure. It's easy as it doesn't really do anything but warn on deprecated value these days. - hung_task_panic is converted in Patch 3, but there's a downside that now it only accepts 0 and 1, while previously it was any integer value - nmi_watchdog maps to two sysctls nmi_watchdog and hardlockup_panic, so there's no straighforward conversion possible - traceoff_on_warning is a flag without value and it would be required to handle that somehow in the conversion infractructure, which seems pointless for a single flag This patch (of 5): A recently proposed patch to add vm_swappiness command line parameter in addition to existing sysctl [1] made me wonder why we don't have a general support for passing sysctl parameters via command line. Googling found only somebody else wondering the same [2], but I haven't found any prior discussion with reasons why not to do this. Settings the vm_swappiness issue aside (the underlying issue might be solved in a different way), quick search of kernel-parameters.txt shows there are already some that exist as both sysctl and kernel parameter - hung_task_panic, nmi_watchdog, numa_zonelist_order, traceoff_on_warning. A general mechanism would remove the need to add more of those one-offs and might be handy in situations where configuration by e.g. /etc/sysctl.d/ is impractical. Hence, this patch adds a new parse_args() pass that looks for parameters prefixed by 'sysctl.' and tries to interpret them as writes to the corresponding sys/ files using an temporary in-kernel procfs mount. This mechanism was suggested by Eric W. Biederman [3], as it handles all dynamically registered sysctl tables, even though we don't handle modular sysctls. Errors due to e.g. invalid parameter name or value are reported in the kernel log. The processing is hooked right before the init process is loaded, as some handlers might be more complicated than simple setters and might need some subsystems to be initialized. At the moment the init process can be started and eventually execute a process writing to /proc/sys/ then it should be also fine to do that from the kernel. Sysctls registered later on module load time are not set by this mechanism - it's expected that in such scenarios, setting sysctl values from userspace is practical enough. [1] https://lore.kernel.org/r/BL0PR02MB560167492CA4094C91589930E9FC0@BL0PR02MB5601.namprd02.prod.outlook.com/ [2] https://unix.stackexchange.com/questions/558802/how-to-set-sysctl-using-kernel-command-line-parameter [3] https://lore.kernel.org/r/87bloj2skm.fsf@x220.int.ebiederm.org/ Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Reviewed-by: Luis Chamberlain Reviewed-by: Masami Hiramatsu Acked-by: Kees Cook Acked-by: Michal Hocko Cc: Iurii Zaikin Cc: Ivan Teterevkov Cc: Michal Hocko Cc: David Rientjes Cc: Matthew Wilcox Cc: "Eric W . Biederman" Cc: "Guilherme G . Piccoli" Cc: Alexey Dobriyan Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Christian Brauner Link: http://lkml.kernel.org/r/20200427180433.7029-1-vbabka@suse.cz Link: http://lkml.kernel.org/r/20200427180433.7029-2-vbabka@suse.cz Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'fs/proc') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index df2143e05c57..973acf96f37c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1703,3 +1704,109 @@ int __init proc_sys_init(void) return sysctl_init(); } + +/* Set sysctl value passed on kernel command line. */ +static int process_sysctl_arg(char *param, char *val, + const char *unused, void *arg) +{ + char *path; + struct vfsmount **proc_mnt = arg; + struct file_system_type *proc_fs_type; + struct file *file; + int len; + int err; + loff_t pos = 0; + ssize_t wret; + + if (strncmp(param, "sysctl", sizeof("sysctl") - 1)) + return 0; + + param += sizeof("sysctl") - 1; + + if (param[0] != '/' && param[0] != '.') + return 0; + + param++; + + /* + * To set sysctl options, we use a temporary mount of proc, look up the + * respective sys/ file and write to it. To avoid mounting it when no + * options were given, we mount it only when the first sysctl option is + * found. Why not a persistent mount? There are problems with a + * persistent mount of proc in that it forces userspace not to use any + * proc mount options. + */ + if (!*proc_mnt) { + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) { + pr_err("Failed to find procfs to set sysctl from command line\n"); + return 0; + } + *proc_mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(*proc_mnt)) { + pr_err("Failed to mount procfs to set sysctl from command line\n"); + return 0; + } + } + + path = kasprintf(GFP_KERNEL, "sys/%s", param); + if (!path) + panic("%s: Failed to allocate path for %s\n", __func__, param); + strreplace(path, '.', '/'); + + file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0); + if (IS_ERR(file)) { + err = PTR_ERR(file); + if (err == -ENOENT) + pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", + param, val); + else if (err == -EACCES) + pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", + param, val); + else + pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", + file, param, val); + goto out; + } + len = strlen(val); + wret = kernel_write(file, val, len, &pos); + if (wret < 0) { + err = wret; + if (err == -EINVAL) + pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", + param, val); + else + pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", + ERR_PTR(err), param, val); + } else if (wret != len) { + pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", + wret, len, path, param, val); + } + + err = filp_close(file, NULL); + if (err) + pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", + ERR_PTR(err), param, val); +out: + kfree(path); + return 0; +} + +void do_sysctl_args(void) +{ + char *command_line; + struct vfsmount *proc_mnt = NULL; + + command_line = kstrdup(saved_command_line, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate copy of command line\n", __func__); + + parse_args("Setting sysctl args", command_line, + NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); + + if (proc_mnt) + kern_unmount(proc_mnt); + + kfree(command_line); +} -- cgit v1.2.3 From 0a477e1ae21b28267b9bd8599f75c115291b1666 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sun, 7 Jun 2020 21:40:27 -0700 Subject: kernel/sysctl: support handling command line aliases We can now handle sysctl parameters on kernel command line, but historically some parameters introduced their own command line equivalent, which we don't want to remove for compatibility reasons. We can, however, convert them to the generic infrastructure with a table translating the legacy command line parameters to their sysctl names, and removing the one-off param handlers. This patch adds the support and makes the first conversion to demonstrate it, on the (deprecated) numa_zonelist_order parameter. Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Reviewed-by: Luis Chamberlain Acked-by: Kees Cook Acked-by: Michal Hocko Cc: Alexey Dobriyan Cc: Christian Brauner Cc: David Rientjes Cc: "Eric W . Biederman" Cc: Greg Kroah-Hartman Cc: "Guilherme G . Piccoli" Cc: Iurii Zaikin Cc: Ivan Teterevkov Cc: Masami Hiramatsu Cc: Matthew Wilcox Cc: Michal Hocko Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200427180433.7029-3-vbabka@suse.cz Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 48 +++++++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 9 --------- 2 files changed, 41 insertions(+), 16 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 973acf96f37c..124298168f8b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1705,6 +1705,37 @@ int __init proc_sys_init(void) return sysctl_init(); } +struct sysctl_alias { + const char *kernel_param; + const char *sysctl_param; +}; + +/* + * Historically some settings had both sysctl and a command line parameter. + * With the generic sysctl. parameter support, we can handle them at a single + * place and only keep the historical name for compatibility. This is not meant + * to add brand new aliases. When adding existing aliases, consider whether + * the possibly different moment of changing the value (e.g. from early_param + * to the moment do_sysctl_args() is called) is an issue for the specific + * parameter. + */ +static const struct sysctl_alias sysctl_aliases[] = { + {"numa_zonelist_order", "vm.numa_zonelist_order" }, + { } +}; + +static const char *sysctl_find_alias(char *param) +{ + const struct sysctl_alias *alias; + + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { + if (strcmp(alias->kernel_param, param) == 0) + return alias->sysctl_param; + } + + return NULL; +} + /* Set sysctl value passed on kernel command line. */ static int process_sysctl_arg(char *param, char *val, const char *unused, void *arg) @@ -1718,15 +1749,18 @@ static int process_sysctl_arg(char *param, char *val, loff_t pos = 0; ssize_t wret; - if (strncmp(param, "sysctl", sizeof("sysctl") - 1)) - return 0; - - param += sizeof("sysctl") - 1; + if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { + param += sizeof("sysctl") - 1; - if (param[0] != '/' && param[0] != '.') - return 0; + if (param[0] != '/' && param[0] != '.') + return 0; - param++; + param++; + } else { + param = (char *) sysctl_find_alias(param); + if (!param) + return 0; + } /* * To set sysctl options, we use a temporary mount of proc, look up the diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ae77d97952..727751219003 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5575,15 +5575,6 @@ static int __parse_numa_zonelist_order(char *s) return 0; } -static __init int setup_numa_zonelist_order(char *s) -{ - if (!s) - return 0; - - return __parse_numa_zonelist_order(s); -} -early_param("numa_zonelist_order", setup_numa_zonelist_order); - char numa_zonelist_order[] = "Node"; /* -- cgit v1.2.3 From b467f3ef3c50c4fa8926ca07f7db9a33a645e13a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sun, 7 Jun 2020 21:40:31 -0700 Subject: kernel/hung_task convert hung_task_panic boot parameter to sysctl We can now handle sysctl parameters on kernel command line and have infrastructure to convert legacy command line options that duplicate sysctl to become a sysctl alias. This patch converts the hung_task_panic parameter. Note that the sysctl handler is more strict and allows only 0 and 1, while the legacy parameter allowed any non-zero value. But there is little reason anyone would not be using 1. Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Acked-by: Michal Hocko Cc: Alexey Dobriyan Cc: Christian Brauner Cc: David Rientjes Cc: "Eric W . Biederman" Cc: Greg Kroah-Hartman Cc: "Guilherme G . Piccoli" Cc: Iurii Zaikin Cc: Ivan Teterevkov Cc: Luis Chamberlain Cc: Masami Hiramatsu Cc: Matthew Wilcox Cc: Michal Hocko Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20200427180433.7029-4-vbabka@suse.cz Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 2 +- fs/proc/proc_sysctl.c | 1 + kernel/hung_task.c | 10 ---------- 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/proc') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d02b9d023660..0c72b5c22416 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1515,7 +1515,7 @@ [KNL] Should the hung task detector generate panics. Format: - A nonzero value instructs the kernel to panic when a + A value of 1 instructs the kernel to panic when a hung task is detected. The default value is controlled by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time option. The value selected by this boot parameter can diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 124298168f8b..15030784566c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1721,6 +1721,7 @@ struct sysctl_alias { */ static const struct sysctl_alias sysctl_aliases[] = { {"numa_zonelist_order", "vm.numa_zonelist_order" }, + {"hung_task_panic", "kernel.hung_task_panic" }, { } }; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 14a625c16cb3..b22b5eeab3cb 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -63,16 +63,6 @@ static struct task_struct *watchdog_task; unsigned int __read_mostly sysctl_hung_task_panic = CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; -static int __init hung_task_panic_setup(char *str) -{ - int rc = kstrtouint(str, 0, &sysctl_hung_task_panic); - - if (rc) - return rc; - return 1; -} -__setup("hung_task_panic=", hung_task_panic_setup); - static int hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) { -- cgit v1.2.3 From f117955a2255721a6a0e9cecf6cad3a6eb43cbc3 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Sun, 7 Jun 2020 21:40:42 -0700 Subject: kernel/watchdog.c: convert {soft/hard}lockup boot parameters to sysctl aliases After a recent change introduced by Vlastimil's series [0], kernel is able now to handle sysctl parameters on kernel command line; also, the series introduced a simple infrastructure to convert legacy boot parameters (that duplicate sysctls) into sysctl aliases. This patch converts the watchdog parameters softlockup_panic and {hard,soft}lockup_all_cpu_backtrace to use the new alias infrastructure. It fixes the documentation too, since the alias only accepts values 0 or 1, not the full range of integers. We also took the opportunity here to improve the documentation of the previously converted hung_task_panic (see the patch series [0]) and put the alias table in alphabetical order. [0] http://lkml.kernel.org/r/20200427180433.7029-1-vbabka@suse.cz Signed-off-by: Guilherme G. Piccoli Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Kees Cook Cc: Iurii Zaikin Cc: Luis Chamberlain Link: http://lkml.kernel.org/r/20200507214624.21911-1-gpiccoli@canonical.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 10 +++---- fs/proc/proc_sysctl.c | 7 +++-- kernel/watchdog.c | 37 ++++++------------------- 3 files changed, 19 insertions(+), 35 deletions(-) (limited to 'fs/proc') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0c72b5c22416..ed9597e37415 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1445,7 +1445,7 @@ hardlockup_all_cpu_backtrace= [KNL] Should the hard-lockup detector generate backtraces on all cpus. - Format: + Format: 0 | 1 hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on @@ -1513,7 +1513,7 @@ hung_task_panic= [KNL] Should the hung task detector generate panics. - Format: + Format: 0 | 1 A value of 1 instructs the kernel to panic when a hung task is detected. The default value is controlled @@ -4665,9 +4665,9 @@ softlockup_panic= [KNL] Should the soft-lockup detector generate panics. - Format: + Format: 0 | 1 - A nonzero value instructs the soft-lockup detector + A value of 1 instructs the soft-lockup detector to panic the machine when a soft-lockup occurs. It is also controlled by the kernel.softlockup_panic sysctl and CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC, which is the @@ -4676,7 +4676,7 @@ softlockup_all_cpu_backtrace= [KNL] Should the soft-lockup detector generate backtraces on all cpus. - Format: + Format: 0 | 1 sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/admin-guide/laptops/sonypi.rst diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 15030784566c..5b405f32971d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1720,8 +1720,11 @@ struct sysctl_alias { * parameter. */ static const struct sysctl_alias sysctl_aliases[] = { - {"numa_zonelist_order", "vm.numa_zonelist_order" }, - {"hung_task_panic", "kernel.hung_task_panic" }, + {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" }, + {"hung_task_panic", "kernel.hung_task_panic" }, + {"numa_zonelist_order", "vm.numa_zonelist_order" }, + {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, + {"softlockup_panic", "kernel.softlockup_panic" }, { } }; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 53ff2c81b084..5abb5b22ad13 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -50,6 +50,11 @@ struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); #ifdef CONFIG_HARDLOCKUP_DETECTOR + +# ifdef CONFIG_SMP +int __read_mostly sysctl_hardlockup_all_cpu_backtrace; +# endif /* CONFIG_SMP */ + /* * Should we panic when a soft-lockup or hard-lockup occurs: */ @@ -82,16 +87,6 @@ static int __init hardlockup_panic_setup(char *str) } __setup("nmi_watchdog=", hardlockup_panic_setup); -# ifdef CONFIG_SMP -int __read_mostly sysctl_hardlockup_all_cpu_backtrace; - -static int __init hardlockup_all_cpu_backtrace_setup(char *str) -{ - sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); - return 1; -} -__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); -# endif /* CONFIG_SMP */ #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* @@ -163,6 +158,10 @@ static void lockup_detector_update_enable(void) #define SOFTLOCKUP_RESET ULONG_MAX +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#endif + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -178,13 +177,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; -static int __init softlockup_panic_setup(char *str) -{ - softlockup_panic = simple_strtoul(str, NULL, 0); - return 1; -} -__setup("softlockup_panic=", softlockup_panic_setup); - static int __init nowatchdog_setup(char *str) { watchdog_user_enabled = 0; @@ -206,17 +198,6 @@ static int __init watchdog_thresh_setup(char *str) } __setup("watchdog_thresh=", watchdog_thresh_setup); -#ifdef CONFIG_SMP -int __read_mostly sysctl_softlockup_all_cpu_backtrace; - -static int __init softlockup_all_cpu_backtrace_setup(char *str) -{ - sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); - return 1; -} -__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); -#endif - static void __lockup_detector_cleanup(void); /* -- cgit v1.2.3