summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2024-02-14 00:05:53 +0300
committerThomas Gleixner <tglx@linutronix.de>2024-02-16 00:07:43 +0300
commit7c0edad3643f4493c4dafa6f5dfcfb1a86432156 (patch)
treeb6e88ee7e910a8933a16cd10e850c04e973e7743 /arch/x86/kernel
parent0e53e7b656cf5aa67c08eca381cec858478195a7 (diff)
downloadlinux-7c0edad3643f4493c4dafa6f5dfcfb1a86432156.tar.xz
x86/cpu/topology: Rework possible CPU management
Managing possible CPUs is an unreadable and uncomprehensible maze. Aside of that it's backwards because it applies command line limits after registering all APICs. Rewrite it so that it: - Applies the command line limits upfront so that only the allowed amount of APIC IDs can be registered. - Applies eventual late restrictions in an understandable way - Uses simple min_t() calculations which are trivial to follow. - Provides a separate function for resetting to UP mode late in the bringup process. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Michael Kelley <mhklinux@outlook.com> Tested-by: Sohil Mehta <sohil.mehta@intel.com> Link: https://lore.kernel.org/r/20240213210252.290098853@linutronix.de
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/topology.c176
-rw-r--r--arch/x86/kernel/setup.c9
-rw-r--r--arch/x86/kernel/smpboot.c6
3 files changed, 111 insertions, 80 deletions
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 80625f4194de..fc47f5216f9a 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -5,6 +5,7 @@
#include <xen/xen.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
@@ -85,73 +86,6 @@ early_initcall(smp_init_primary_thread_mask);
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif
-static int __initdata setup_possible_cpus = -1;
-
-/*
- * cpu_possible_mask should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and don't expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_mask on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with possible_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
- unsigned int num_processors = topo_info.nr_assigned_cpus;
- unsigned int disabled_cpus = topo_info.nr_disabled_cpus;
- int i, possible;
-
- i = setup_max_cpus ?: 1;
- if (setup_possible_cpus == -1) {
- possible = topo_info.nr_assigned_cpus;
-#ifdef CONFIG_HOTPLUG_CPU
- if (setup_max_cpus)
- possible += num_processors;
-#else
- if (possible > i)
- possible = i;
-#endif
- } else
- possible = setup_possible_cpus;
-
- total_cpus = max_t(int, possible, num_processors + disabled_cpus);
-
- /* nr_cpu_ids could be reduced via nr_cpus= */
- if (possible > nr_cpu_ids) {
- pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
- possible, nr_cpu_ids);
- possible = nr_cpu_ids;
- }
-
-#ifdef CONFIG_HOTPLUG_CPU
- if (!setup_max_cpus)
-#endif
- if (possible > i) {
- pr_warn("%d Processors exceeds max_cpus limit of %u\n",
- possible, setup_max_cpus);
- possible = i;
- }
-
- set_nr_cpu_ids(possible);
-
- pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
- possible, max_t(int, possible - num_processors, 0));
-
- reset_cpu_possible_mask();
-
- for (i = 0; i < possible; i++)
- set_cpu_possible(i, true);
-}
-
static int topo_lookup_cpuid(u32 apic_id)
{
int i;
@@ -293,12 +227,114 @@ void topology_hotunplug_apic(unsigned int cpu)
}
#endif
-static int __init _setup_possible_cpus(char *str)
+#ifdef CONFIG_SMP
+static unsigned int max_possible_cpus __initdata = NR_CPUS;
+
+/**
+ * topology_apply_cmdline_limits_early - Apply topology command line limits early
+ *
+ * Ensure that command line limits are in effect before firmware parsing
+ * takes place.
+ */
+void __init topology_apply_cmdline_limits_early(void)
{
- get_option(&str, &setup_possible_cpus);
+ unsigned int possible = nr_cpu_ids;
+
+ /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
+ if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+ possible = 1;
+
+ /* 'possible_cpus=N' */
+ possible = min_t(unsigned int, max_possible_cpus, possible);
+
+ if (possible < nr_cpu_ids) {
+ pr_info("Limiting to %u possible CPUs\n", possible);
+ set_nr_cpu_ids(possible);
+ }
+}
+
+static __init bool restrict_to_up(void)
+{
+ if (!smp_found_config || ioapic_is_disabled)
+ return true;
+ /*
+ * XEN PV is special as it does not advertise the local APIC
+ * properly, but provides a fake topology for it so that the
+ * infrastructure works. So don't apply the restrictions vs. APIC
+ * here.
+ */
+ if (xen_pv_domain())
+ return false;
+
+ return apic_is_disabled;
+}
+
+void __init topology_init_possible_cpus(void)
+{
+ unsigned int assigned = topo_info.nr_assigned_cpus;
+ unsigned int disabled = topo_info.nr_disabled_cpus;
+ unsigned int total = assigned + disabled;
+ unsigned int cpu, allowed = 1;
+
+ if (!restrict_to_up()) {
+ if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
+ disabled += assigned - nr_cpu_ids;
+ assigned = nr_cpu_ids;
+ }
+ allowed = min_t(unsigned int, total, nr_cpu_ids);
+ }
+
+ if (total > allowed)
+ pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
+
+ assigned = min_t(unsigned int, allowed, assigned);
+ disabled = allowed - assigned;
+
+ topo_info.nr_assigned_cpus = assigned;
+ topo_info.nr_disabled_cpus = disabled;
+
+ total_cpus = allowed;
+ set_nr_cpu_ids(allowed);
+
+ pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
+ if (topo_info.nr_rejected_cpus)
+ pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
+
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
+
+ for (cpu = 0; cpu < allowed; cpu++) {
+ u32 apicid = cpuid_to_apicid[cpu];
+
+ set_cpu_possible(cpu, true);
+
+ if (apicid == BAD_APICID)
+ continue;
+
+ set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
+ }
+}
+
+/*
+ * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
+ */
+void __init topology_reset_possible_cpus_up(void)
+{
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
+
+ bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
+ if (topo_info.boot_cpu_apic_id != BAD_APICID)
+ set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
+}
+
+static int __init setup_possible_cpus(char *str)
+{
+ get_option(&str, &max_possible_cpus);
return 0;
}
-early_param("possible_cpus", _setup_possible_cpus);
+early_param("possible_cpus", setup_possible_cpus);
+#endif
static int __init apic_set_disabled_cpu_apicid(char *arg)
{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b1e52ac164b1..4e320d4d3898 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1131,6 +1131,8 @@ void __init setup_arch(char **cmdline_p)
early_quirks();
+ topology_apply_cmdline_limits_early();
+
/*
* Parse SMP configuration. Try ACPI first and then the platform
* specific parser.
@@ -1138,13 +1140,10 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_init();
x86_init.mpparse.parse_smp_cfg();
- /*
- * Systems w/o ACPI and mptables might not have it mapped the local
- * APIC yet, but prefill_possible_map() might need to access it.
- */
+ /* Last opportunity to detect and map the local APIC */
init_apic_mappings();
- prefill_possible_map();
+ topology_init_possible_cpus();
init_cpu_to_node();
init_gi_nodes();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d850faca946d..7f85f174690f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1147,11 +1147,7 @@ static __init void disable_smp(void)
pr_info("SMP disabled\n");
disable_ioapic_support();
-
- init_cpu_present(cpumask_of(0));
- init_cpu_possible(cpumask_of(0));
-
- reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0);
+ topology_reset_possible_cpus_up();
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));