From ea3752ba9685b47db4571ddaee39344cf2b0bf45 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:27 +0100 Subject: arm64: module: mandate MODULE_PLTS Contemporary kernels and modules can be relatively large, especially when common debug options are enabled. Using GCC 12.1.0, a v6.3-rc7 defconfig kernel is ~38M, and with PROVE_LOCKING + KASAN_INLINE enabled this expands to ~117M. Shanker reports [1] that the NVIDIA GPU driver alone can consume 110M of module space in some configurations. Both KASLR and ARM64_ERRATUM_843419 select MODULE_PLTS, so anyone wanting a kernel to have KASLR or run on Cortex-A53 will have MODULE_PLTS selected. This is the case in defconfig and distribution kernels (e.g. Debian, Android, etc). Practically speaking, this means we're very likely to need MODULE_PLTS and while it's almost guaranteed that MODULE_PLTS will be selected, it is possible to disable support, and we have to maintain some awkward special cases for such unusual configurations. This patch removes the MODULE_PLTS config option, with the support code always enabled if MODULES is selected. This results in a slight simplification, and will allow for further improvement in subsequent patches. For any config which currently selects MODULE_PLTS, there will be no functional change as a result of this patch. [1] https://lore.kernel.org/linux-arm-kernel/159ceeab-09af-3174-5058-445bc8dcf85b@nvidia.com/ Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Shanker Donthineni Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'arch/arm64/Kconfig') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..a6002084f09c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,6 +207,7 @@ config ARM64 select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM + select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -577,7 +578,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and enables PLT support to replace certain ADRP instructions, which can @@ -2107,26 +2107,6 @@ config ARM64_SME register state capable of holding two dimensional matrix tiles to enable various matrix operations. -config ARM64_MODULE_PLTS - bool "Use PLTs to allow module memory to spill over into vmalloc area" - depends on MODULES - select HAVE_MOD_ARCH_SPECIFIC - help - Allocate PLTs when loading modules so that jumps and calls whose - targets are too far away for their relative offsets to be encoded - in the instructions themselves can be bounced via veneers in the - module's PLT. This allows modules to be allocated in the generic - vmalloc area after the dedicated module memory area has been - exhausted. - - When running with address space randomization (KASLR), the module - region itself may be too far away for ordinary relative jumps and - calls, and so in that case, module PLTs are required and cannot be - disabled. - - Specific errata workaround(s) might also force module PLTs to be - enabled (ARM64_ERRATUM_843419). - config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" select ARM_GIC_V3 @@ -2167,7 +2147,6 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -2198,9 +2177,8 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the core kernel, so branch relocations are almost always in range unless - ARM64_MODULE_PLTS is enabled and the region is exhausted. In this - particular case of region exhaustion, modules might be able to fall - back to a larger 2GB area. + the region is exhausted. In this particular case of region + exhaustion, modules might be able to fall back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) -- cgit v1.2.3 From 6df696cd9bc1ceed0e92e36908f88bbd16d18255 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 9 Jun 2023 22:01:02 +0000 Subject: arm64: errata: Mitigate Ampere1 erratum AC03_CPU_38 at stage-2 AmpereOne has an erratum in its implementation of FEAT_HAFDBS that required disabling the feature on the design. This was done by reporting the feature as not implemented in the ID register, although the corresponding control bits were not actually RES0. This does not align well with the requirements of the architecture, which mandates these bits be RES0 if HAFDBS isn't implemented. The kernel's use of stage-1 is unaffected, as the HA and HD bits are only set if HAFDBS is detected in the ID register. KVM, on the other hand, relies on the RES0 behavior at stage-2 to use the same value for VTCR_EL2 on any cpu in the system. Mitigate the non-RES0 behavior by leaving VTCR_EL2.HA clear on affected systems. Cc: stable@vger.kernel.org Cc: D Scott Phillips Cc: Darren Hart Acked-by: D Scott Phillips Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20230609220104.1836988-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- Documentation/arm64/silicon-errata.rst | 3 +++ arch/arm64/Kconfig | 19 +++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 7 +++++++ arch/arm64/kvm/hyp/pgtable.c | 14 +++++++++++--- arch/arm64/tools/cpucaps | 1 + 5 files changed, 41 insertions(+), 3 deletions(-) (limited to 'arch/arm64/Kconfig') diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 9e311bc43e05..cd46e2b20a81 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,9 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 | ++----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..0987c637fbf2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -406,6 +406,25 @@ menu "Kernel Features" menu "ARM errata workarounds via the alternatives framework" +config AMPERE_ERRATUM_AC03_CPU_38 + bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics" + default y + help + This option adds an alternative code sequence to work around Ampere + erratum AC03_CPU_38 on AmpereOne. + + The affected design reports FEAT_HAFDBS as not implemented in + ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0 + as required by the architecture. The unadvertised HAFDBS + implementation suffers from an additional erratum where hardware + A/D updates can occur after a PTE has been marked invalid. + + The workaround forces KVM to explicitly set VTCR_EL2.HA to 0, + which avoids enabling unadvertised hardware Access Flag management + at stage-2. + + If unsure, say Y. + config ARM64_WORKAROUND_CLEAN_CACHE bool diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 307faa2b4395..be66e94a21bd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -729,6 +729,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), .cpu_enable = cpu_clear_bf16_from_user_emulation, }, +#endif +#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 + { + .desc = "AmpereOne erratum AC03_CPU_38", + .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38, + ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1), + }, #endif { } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 5282cb9ca4cf..32d92ce4bae5 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -611,10 +611,18 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) #ifdef CONFIG_ARM64_HW_AFDBM /* * Enable the Hardware Access Flag management, unconditionally - * on all CPUs. The features is RES0 on CPUs without the support - * and must be ignored by the CPUs. + * on all CPUs. In systems that have asymmetric support for the feature + * this allows KVM to leverage hardware support on the subset of cores + * that implement the feature. + * + * The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by + * hardware) on implementations that do not advertise support for the + * feature. As such, setting HA unconditionally is safe, unless you + * happen to be running on a design that has unadvertised support for + * HAFDBS. Here be dragons. */ - vtcr |= VTCR_EL2_HA; + if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + vtcr |= VTCR_EL2_HA; #endif /* CONFIG_ARM64_HW_AFDBM */ /* Set the vmid bits */ diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 40ba95472594..9f9a2d6652eb 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -77,6 +77,7 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 +WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE -- cgit v1.2.3