summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig169
-rw-r--r--init/do_mounts.c15
-rw-r--r--init/do_mounts.h9
-rw-r--r--init/init_task.c13
-rw-r--r--init/initramfs.c32
-rw-r--r--init/main.c53
6 files changed, 166 insertions, 125 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 9ffb103fc927..25f6caeb1491 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT
# Detect buggy gcc and clang, fixed in gcc-11 clang-14.
def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
+config GCC_ASM_GOTO_OUTPUT_WORKAROUND
+ bool
+ depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT
+ # Fixed in GCC 14, 13.3, 12.4 and 11.5
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
+ default y if GCC_VERSION < 110500
+ default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400
+ default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300
+
config TOOLS_SUPPORT_RELR
def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
@@ -106,7 +115,7 @@ config CONSTRUCTORS
bool
config IRQ_WORK
- bool
+ def_bool y if SMP
config BUILDTIME_TABLE_SORT
bool
@@ -538,24 +547,24 @@ config HAVE_SCHED_AVG_IRQ
depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING
depends on SMP
-config SCHED_THERMAL_PRESSURE
+config SCHED_HW_PRESSURE
bool
default y if ARM && ARM_CPU_TOPOLOGY
default y if ARM64
depends on SMP
depends on CPU_FREQ_THERMAL
help
- Select this option to enable thermal pressure accounting in the
- scheduler. Thermal pressure is the value conveyed to the scheduler
+ Select this option to enable HW pressure accounting in the
+ scheduler. HW pressure is the value conveyed to the scheduler
that reflects the reduction in CPU compute capacity resulted from
- thermal throttling. Thermal throttling occurs when the performance of
- a CPU is capped due to high operating temperatures.
+ HW throttling. HW throttling occurs when the performance of
+ a CPU is capped due to high operating temperatures as an example.
If selected, the scheduler will be able to balance tasks accordingly,
i.e. put less load on throttled CPUs than on non/less throttled ones.
This requires the architecture to implement
- arch_update_thermal_pressure() and arch_scale_thermal_pressure().
+ arch_update_hw_pressure() and arch_scale_thermal_pressure().
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
@@ -734,8 +743,8 @@ config LOG_CPU_MAX_BUF_SHIFT
int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
depends on SMP
range 0 21
- default 12 if !BASE_SMALL
default 0 if BASE_SMALL
+ default 12
depends on PRINTK
help
This option allows to increase the default ring buffer size
@@ -867,14 +876,26 @@ config CC_IMPLICIT_FALLTHROUGH
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
-# Currently, disable gcc-11+ array-bounds globally.
+# Currently, disable gcc-10+ array-bounds globally.
# It's still broken in gcc-13, so no upper bound yet.
-config GCC11_NO_ARRAY_BOUNDS
+config GCC10_NO_ARRAY_BOUNDS
def_bool y
config CC_NO_ARRAY_BOUNDS
bool
- default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
+ default y if CC_IS_GCC && GCC_VERSION >= 100000 && GCC10_NO_ARRAY_BOUNDS
+
+# Currently, disable -Wstringop-overflow for GCC globally.
+config GCC_NO_STRINGOP_OVERFLOW
+ def_bool y
+
+config CC_NO_STRINGOP_OVERFLOW
+ bool
+ default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW
+
+config CC_STRINGOP_OVERFLOW
+ bool
+ default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW
#
# For architectures that know their GCC __int128 support is sound
@@ -1445,11 +1466,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
config HAVE_PCSPKR_PLATFORM
bool
-# interpreter that classic socket filters depend on
-config BPF
- bool
- select CRYPTO_LIB_SHA1
-
menuconfig EXPERT
bool "Configure standard kernel features (expert users)"
# Unhide debug options, to make the on-by-default options visible
@@ -1483,7 +1499,7 @@ config MULTIUSER
config SGETMASK_SYSCALL
bool "sgetmask/ssetmask syscalls support" if EXPERT
- def_bool PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
+ default PARISC || M68K || PPC || MIPS || X86 || SPARC || MICROBLAZE || SUPERH
help
sys_sgetmask and sys_ssetmask are obsolete system calls
no longer supported in libc but still enabled by default in some
@@ -1569,11 +1585,10 @@ config PCSPKR_PLATFORM
This option allows to disable the internal PC-Speaker
support, saving some memory.
-config BASE_FULL
- default y
- bool "Enable full-sized data structures for core" if EXPERT
+config BASE_SMALL
+ bool "Enable smaller-sized data structures for core" if EXPERT
help
- Disabling this option reduces the size of miscellaneous core
+ Enabling this option reduces the size of miscellaneous core
kernel data structures. This saves memory on small machines,
but may reduce performance.
@@ -1676,6 +1691,56 @@ config MEMBARRIER
If unsure, say Y.
+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ help
+ Enable the kernel resource comparison system call. It provides
+ user-space with the ability to compare two processes to see if they
+ share a common resource, such as a file descriptor or even virtual
+ memory space.
+
+ If unsure, say N.
+
+config RSEQ
+ bool "Enable rseq() system call" if EXPERT
+ default y
+ depends on HAVE_RSEQ
+ select MEMBARRIER
+ help
+ Enable the restartable sequences system call. It provides a
+ user-space cache for the current CPU number value, which
+ speeds up getting the current CPU number from user-space,
+ as well as an ABI to speed up user-space operations on
+ per-CPU data.
+
+ If unsure, say Y.
+
+config DEBUG_RSEQ
+ default n
+ bool "Enable debugging of rseq() system call" if EXPERT
+ depends on RSEQ && DEBUG_KERNEL
+ help
+ Enable extra debugging checks for the rseq system call.
+
+ If unsure, say N.
+
+config CACHESTAT_SYSCALL
+ bool "Enable cachestat() system call" if EXPERT
+ default y
+ help
+ Enable the cachestat system call, which queries the page cache
+ statistics of a file (number of cached pages, dirty pages,
+ pages marked for writeback, (recently) evicted pages).
+
+ If unsure say Y here.
+
+config PC104
+ bool "PC/104 support" if EXPERT
+ help
+ Expose PC/104 form factor device drivers and options available for
+ selection and configuration. Enable this option if your target
+ machine has a PC/104 bus.
+
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
@@ -1740,57 +1805,12 @@ config KALLSYMS_BASE_RELATIVE
# end of the "standard kernel features (expert users)" menu
-# syscall, maps, verifier
-
config ARCH_HAS_MEMBARRIER_CALLBACKS
bool
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool
-config KCMP
- bool "Enable kcmp() system call" if EXPERT
- help
- Enable the kernel resource comparison system call. It provides
- user-space with the ability to compare two processes to see if they
- share a common resource, such as a file descriptor or even virtual
- memory space.
-
- If unsure, say N.
-
-config RSEQ
- bool "Enable rseq() system call" if EXPERT
- default y
- depends on HAVE_RSEQ
- select MEMBARRIER
- help
- Enable the restartable sequences system call. It provides a
- user-space cache for the current CPU number value, which
- speeds up getting the current CPU number from user-space,
- as well as an ABI to speed up user-space operations on
- per-CPU data.
-
- If unsure, say Y.
-
-config CACHESTAT_SYSCALL
- bool "Enable cachestat() system call" if EXPERT
- default y
- help
- Enable the cachestat system call, which queries the page cache
- statistics of a file (number of cached pages, dirty pages,
- pages marked for writeback, (recently) evicted pages).
-
- If unsure say Y here.
-
-config DEBUG_RSEQ
- default n
- bool "Enabled debugging of rseq() system call" if EXPERT
- depends on RSEQ && DEBUG_KERNEL
- help
- Enable extra debugging checks for the rseq system call.
-
- If unsure, say N.
-
config HAVE_PERF_EVENTS
bool
help
@@ -1805,13 +1825,6 @@ config PERF_USE_VMALLOC
help
See tools/perf/design.txt for details
-config PC104
- bool "PC/104 support" if EXPERT
- help
- Expose PC/104 form factor device drivers and options available for
- selection and configuration. Enable this option if your target
- machine has a PC/104 bus.
-
menu "Kernel Performance Events And Counters"
config PERF_EVENTS
@@ -1885,11 +1898,11 @@ config RUST
bool "Rust support"
depends on HAVE_RUST
depends on RUST_IS_AVAILABLE
+ depends on !CFI_CLANG
depends on !MODVERSIONS
depends on !GCC_PLUGINS
depends on !RANDSTRUCT
depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
- select CONSTRUCTORS
help
Enables Rust support in the kernel.
@@ -1930,11 +1943,6 @@ config RT_MUTEXES
bool
default y if PREEMPT_RT
-config BASE_SMALL
- int
- default 0 if BASE_FULL
- default 1 if !BASE_FULL
-
config MODULE_SIG_FORMAT
def_bool n
select SYSTEM_DATA_VERIFICATION
@@ -1972,6 +1980,9 @@ source "kernel/Kconfig.locks"
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
bool
+config ARCH_HAS_PREPARE_SYNC_CORE_CMD
+ bool
+
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
bool
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 5fdef94f0864..6af29da8889e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -159,8 +159,7 @@ static int __init do_mount_root(const char *name, const char *fs,
if (!p)
return -ENOMEM;
data_page = page_address(p);
- /* zero-pad. init_mount() will make sure it's terminated */
- strncpy(data_page, data, PAGE_SIZE);
+ strscpy_pad(data_page, data, PAGE_SIZE);
}
ret = init_mount(name, "/root", fs, flags, data_page);
@@ -208,6 +207,9 @@ retry:
goto out;
case -EACCES:
case -EINVAL:
+#ifdef CONFIG_BLOCK
+ init_flush_fput();
+#endif
continue;
}
/*
@@ -510,7 +512,10 @@ struct file_system_type rootfs_fs_type = {
void __init init_rootfs(void)
{
- if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] &&
- (!root_fs_names || strstr(root_fs_names, "tmpfs")))
- is_tmpfs = true;
+ if (IS_ENABLED(CONFIG_TMPFS)) {
+ if (!saved_root_name[0] && !root_fs_names)
+ is_tmpfs = true;
+ else if (root_fs_names && !!strstr(root_fs_names, "tmpfs"))
+ is_tmpfs = true;
+ }
}
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 15e372b00ce7..6069ea3eb80d 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -9,6 +9,8 @@
#include <linux/major.h>
#include <linux/root_dev.h>
#include <linux/init_syscalls.h>
+#include <linux/task_work.h>
+#include <linux/file.h>
void mount_root_generic(char *name, char *pretty_name, int flags);
void mount_root(char *root_device_name);
@@ -41,3 +43,10 @@ static inline bool initrd_load(char *root_device_name)
}
#endif
+
+/* Ensure that async file closing finished to prevent spurious errors. */
+static inline void init_flush_fput(void)
+{
+ flush_delayed_fput();
+ task_work_run();
+}
diff --git a/init/init_task.c b/init/init_task.c
index 9b41f00d30e2..eeb110c65fe2 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -12,6 +12,7 @@
#include <linux/audit.h>
#include <linux/numa.h>
#include <linux/scs.h>
+#include <linux/plist.h>
#include <linux/uaccess.h>
@@ -51,8 +52,7 @@ static struct sighand_struct init_sighand = {
};
#ifdef CONFIG_SHADOW_CALL_STACK
-unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
- __init_task_data = {
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = {
[(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
};
#endif
@@ -61,12 +61,7 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
* Set up the first task table, touch at your own risk!. Base=0,
* limit=0x1fffff (=2MB)
*/
-struct task_struct init_task
-#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
- __init_task_data
-#endif
- __aligned(L1_CACHE_BYTES)
-= {
+struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#ifdef CONFIG_THREAD_INFO_IN_TASK
.thread_info = INIT_THREAD_INFO(init_task),
.stack_refcount = REFCOUNT_INIT(1),
@@ -82,6 +77,7 @@ struct task_struct init_task
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL,
+ .max_allowed_capacity = SCHED_CAPACITY_SCALE,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
.active_mm = &init_mm,
@@ -152,6 +148,7 @@ struct task_struct init_task
.rcu_tasks_holdout = false,
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
.rcu_tasks_idle_cpu = -1,
+ .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list),
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
diff --git a/init/initramfs.c b/init/initramfs.c
index 8d0fd946cdd2..a298a3854a80 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -16,9 +16,10 @@
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/init_syscalls.h>
-#include <linux/task_work.h>
#include <linux/umh.h>
+#include "do_mounts.h"
+
static __initdata bool csum_present;
static __initdata u32 io_csum;
@@ -366,7 +367,7 @@ static int __init do_name(void)
if (S_ISREG(mode)) {
int ml = maybe_link();
if (ml >= 0) {
- int openflags = O_WRONLY|O_CREAT;
+ int openflags = O_WRONLY|O_CREAT|O_LARGEFILE;
if (ml != 1)
openflags |= O_TRUNC;
wfile = filp_open(collected, openflags, mode);
@@ -574,6 +575,16 @@ extern unsigned long __initramfs_size;
#include <linux/initrd.h>
#include <linux/kexec.h>
+static ssize_t raw_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t pos, size_t count)
+{
+ memcpy(buf, attr->private + pos, count);
+ return count;
+}
+
+static BIN_ATTR(initrd, 0440, raw_read, NULL, 0);
+
void __init reserve_initrd_mem(void)
{
phys_addr_t start;
@@ -632,7 +643,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
"initrd");
}
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_RESERVE
static bool __init kexec_free_initrd(void)
{
unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
@@ -669,11 +680,9 @@ static void __init populate_initrd_image(char *err)
struct file *file;
loff_t pos = 0;
- unpack_to_rootfs(__initramfs_start, __initramfs_size);
-
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700);
if (IS_ERR(file))
return;
@@ -715,13 +724,18 @@ done:
* If the initrd region is overlapped with crashkernel reserved region,
* free only memory that is not part of crashkernel region.
*/
- if (!do_retain_initrd && initrd_start && !kexec_free_initrd())
+ if (!do_retain_initrd && initrd_start && !kexec_free_initrd()) {
free_initrd_mem(initrd_start, initrd_end);
+ } else if (do_retain_initrd && initrd_start) {
+ bin_attr_initrd.size = initrd_end - initrd_start;
+ bin_attr_initrd.private = (void *)initrd_start;
+ if (sysfs_create_bin_file(firmware_kobj, &bin_attr_initrd))
+ pr_err("Failed to create initrd sysfs file");
+ }
initrd_start = 0;
initrd_end = 0;
- flush_delayed_fput();
- task_work_run();
+ init_flush_fput();
}
static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
diff --git a/init/main.c b/init/main.c
index e24b0780fdff..f4e6001ebe79 100644
--- a/init/main.c
+++ b/init/main.c
@@ -88,6 +88,7 @@
#include <linux/sched/task_stack.h>
#include <linux/context_tracking.h>
#include <linux/random.h>
+#include <linux/moduleloader.h>
#include <linux/list.h>
#include <linux/integrity.h>
#include <linux/proc_ns.h>
@@ -99,6 +100,8 @@
#include <linux/init_syscalls.h>
#include <linux/stackdepot.h>
#include <linux/randomize_kstack.h>
+#include <linux/pidfs.h>
+#include <linux/ptdump.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -484,6 +487,11 @@ static int __init warn_bootconfig(char *str)
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
@@ -603,7 +611,6 @@ static int __init rdinit_setup(char *str)
__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
-static const unsigned int setup_max_cpus = NR_CPUS;
static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
#endif
@@ -629,6 +636,8 @@ static void __init setup_command_line(char *command_line)
if (!saved_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ len = xlen + strlen(command_line) + 1;
+
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
@@ -681,7 +690,7 @@ static void __init setup_command_line(char *command_line)
static __initdata DECLARE_COMPLETION(kthreadd_done);
-noinline void __ref __noreturn rest_init(void)
+static noinline void __ref __noreturn rest_init(void)
{
struct task_struct *tsk;
int pid;
@@ -776,6 +785,10 @@ void __init __weak smp_setup_processor_id(void)
{
}
+void __init __weak smp_prepare_boot_cpu(void)
+{
+}
+
# if THREAD_SIZE >= PAGE_SIZE
void __init __weak thread_stack_cache_init(void)
{
@@ -822,11 +835,6 @@ static int __init early_randomize_kstack_offset(char *buf)
early_param("randomize_kstack_offset", early_randomize_kstack_offset);
#endif
-void __init __weak __noreturn arch_call_rest_init(void)
-{
- rest_init();
-}
-
static void __init print_unknown_bootoptions(void)
{
char *unknown_options;
@@ -1059,6 +1067,7 @@ void start_kernel(void)
seq_file_init();
proc_root_init();
nsfs_init();
+ pidfs_init();
cpuset_init();
cgroup_init();
taskstats_init_early();
@@ -1069,7 +1078,7 @@ void start_kernel(void)
kcsan_init();
/* Do the rest non-__init'ed, we're now alive */
- arch_call_rest_init();
+ rest_init();
/*
* Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
@@ -1396,33 +1405,28 @@ static int __init set_debug_rodata(char *str)
early_param("rodata", set_debug_rodata);
#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
static void mark_readonly(void)
{
- if (rodata_enabled) {
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled) {
/*
* load_module() results in W+X mappings, which are cleaned
- * up with call_rcu(). Let's make sure that queued work is
+ * up with init_free_wq. Let's make sure that queued work is
* flushed so that we don't hit false positives looking for
* insecure pages which are W+X.
*/
- rcu_barrier();
+ flush_module_init_free_work();
+ jump_label_init_ro();
mark_rodata_ro();
+ debug_checkwx();
rodata_test();
- } else
+ } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
pr_info("Kernel memory protection disabled.\n");
+ } else if (IS_ENABLED(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)) {
+ pr_warn("Kernel memory protection not selected by kernel config.\n");
+ } else {
+ pr_warn("This architecture does not have kernel memory protection.\n");
+ }
}
-#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
-static inline void mark_readonly(void)
-{
- pr_warn("Kernel memory protection not selected by kernel config.\n");
-}
-#else
-static inline void mark_readonly(void)
-{
- pr_warn("This architecture does not have kernel memory protection.\n");
-}
-#endif
void __weak free_initmem(void)
{
@@ -1545,6 +1549,7 @@ static noinline void __init kernel_init_freeable(void)
sched_init_smp();
workqueue_init_topology();
+ async_init();
padata_init();
page_alloc_init_late();