From b11b998e983119e27b33210a0128b27df6ae5f78 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:24 -0700 Subject: x86/fpu: fix asm/fpu/types.h include guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Unified cross-architecture kernel-mode FPU API", v4. This series unifies the kernel-mode FPU API across several architectures by wrapping the existing functions (where needed) in consistently-named functions placed in a consistent header location, with mostly the same semantics: they can be called from preemptible or non-preemptible task context, and are not assumed to be reentrant. Architectures are also expected to provide CFLAGS adjustments for compiling FPU-dependent code. For the moment, SIMD/vector units are out of scope for this common API. This allows us to remove the ifdeffery and duplicated Makefile logic at each FPU user. It then implements the common API on RISC-V, and converts a couple of users to the new API: the AMDGPU DRM driver, and the FPU self test. The underlying goal of this series is to allow using newer AMD GPUs (e.g. Navi) on RISC-V boards such as SiFive's HiFive Unmatched. Those GPUs need CONFIG_DRM_AMD_DC_FP to initialize, which requires kernel-mode FPU support. This patch (of 15): The include guard should match the filename, or it will conflict with the newly-added asm/fpu.h. Link: https://lkml.kernel.org/r/20240329072441.591471-1-samuel.holland@sifive.com Link: https://lkml.kernel.org/r/20240329072441.591471-10-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Dave Hansen Acked-by: Christian König Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: Alex Deucher Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: WANG Xuerui Signed-off-by: Andrew Morton --- arch/x86/include/asm/fpu/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index ace9aa3b78a3..eb17f31b06d2 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -2,8 +2,8 @@ /* * FPU data structures: */ -#ifndef _ASM_X86_FPU_H -#define _ASM_X86_FPU_H +#ifndef _ASM_X86_FPU_TYPES_H +#define _ASM_X86_FPU_TYPES_H #include @@ -596,4 +596,4 @@ struct fpu_state_config { /* FPU state configuration information */ extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; -#endif /* _ASM_X86_FPU_H */ +#endif /* _ASM_X86_FPU_TYPES_H */ -- cgit v1.2.3 From 6cbd1d6d36c5d8312de99d1dfa3bec40ac840ce0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:16 -0700 Subject: arch: add ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several architectures provide an API to enable the FPU and run floating-point SIMD code in kernel space. However, the function names, header locations, and semantics are inconsistent across architectures, and FPU support may be gated behind other Kconfig options. provide a standard way for architectures to declare that kernel space FPU support is available. Architectures selecting this option must implement what is currently the most common API (kernel_fpu_begin() and kernel_fpu_end(), plus a new function kernel_fpu_available()) and provide the appropriate CFLAGS for compiling floating-point C code. Link: https://lkml.kernel.org/r/20240329072441.591471-2-samuel.holland@sifive.com Signed-off-by: Samuel Holland Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- Documentation/core-api/floating-point.rst | 78 +++++++++++++++++++++++++++++++ Documentation/core-api/index.rst | 1 + Makefile | 5 ++ arch/Kconfig | 6 +++ include/linux/fpu.h | 12 +++++ 5 files changed, 102 insertions(+) create mode 100644 Documentation/core-api/floating-point.rst create mode 100644 include/linux/fpu.h (limited to 'arch') diff --git a/Documentation/core-api/floating-point.rst b/Documentation/core-api/floating-point.rst new file mode 100644 index 000000000000..a8d0d4b05052 --- /dev/null +++ b/Documentation/core-api/floating-point.rst @@ -0,0 +1,78 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Floating-point API +================== + +Kernel code is normally prohibited from using floating-point (FP) registers or +instructions, including the C float and double data types. This rule reduces +system call overhead, because the kernel does not need to save and restore the +userspace floating-point register state. + +However, occasionally drivers or library functions may need to include FP code. +This is supported by isolating the functions containing FP code to a separate +translation unit (a separate source file), and saving/restoring the FP register +state around calls to those functions. This creates "critical sections" of +floating-point usage. + +The reason for this isolation is to prevent the compiler from generating code +touching the FP registers outside these critical sections. Compilers sometimes +use FP registers to optimize inlined ``memcpy`` or variable assignment, as +floating-point registers may be wider than general-purpose registers. + +Usability of floating-point code within the kernel is architecture-specific. +Additionally, because a single kernel may be configured to support platforms +both with and without a floating-point unit, FPU availability must be checked +both at build time and at run time. + +Several architectures implement the generic kernel floating-point API from +``linux/fpu.h``, as described below. Some other architectures implement their +own unique APIs, which are documented separately. + +Build-time API +-------------- + +Floating-point code may be built if the option ``ARCH_HAS_KERNEL_FPU_SUPPORT`` +is enabled. For C code, such code must be placed in a separate file, and that +file must have its compilation flags adjusted using the following pattern:: + + CFLAGS_foo.o += $(CC_FLAGS_FPU) + CFLAGS_REMOVE_foo.o += $(CC_FLAGS_NO_FPU) + +Architectures are expected to define one or both of these variables in their +top-level Makefile as needed. For example:: + + CC_FLAGS_FPU := -mhard-float + +or:: + + CC_FLAGS_NO_FPU := -msoft-float + +Normal kernel code is assumed to use the equivalent of ``CC_FLAGS_NO_FPU``. + +Runtime API +----------- + +The runtime API is provided in ``linux/fpu.h``. This header cannot be included +from files implementing FP code (those with their compilation flags adjusted as +above). Instead, it must be included when defining the FP critical sections. + +.. c:function:: bool kernel_fpu_available( void ) + + This function reports if floating-point code can be used on this CPU or + platform. The value returned by this function is not expected to change + at runtime, so it only needs to be called once, not before every + critical section. + +.. c:function:: void kernel_fpu_begin( void ) + void kernel_fpu_end( void ) + + These functions create a floating-point critical section. It is only + valid to call ``kernel_fpu_begin()`` after a previous call to + ``kernel_fpu_available()`` returned ``true``. These functions are only + guaranteed to be callable from (preemptible or non-preemptible) process + context. + + Preemption may be disabled inside critical sections, so their size + should be minimized. They are *not* required to be reentrant. If the + caller expects to nest critical sections, it must implement its own + reference counting. diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 7a3a08d81f11..974beccd671f 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -48,6 +48,7 @@ Library functionality that is used throughout the kernel. errseq wrappers/atomic_t wrappers/atomic_bitops + floating-point Low level entry and exit ======================== diff --git a/Makefile b/Makefile index 77b283d16e73..452913ce42c8 100644 --- a/Makefile +++ b/Makefile @@ -970,6 +970,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif +# Architectures can define flags to add/remove for floating-point support +CC_FLAGS_FPU += -D_LINUX_FPU_COMPILATION_UNIT +export CC_FLAGS_FPU +export CC_FLAGS_NO_FPU + ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) # Set the minimal function alignment. Use the newer GCC option # -fmin-function-alignment if it is available, or fall back to -falign-funtions. diff --git a/arch/Kconfig b/arch/Kconfig index b34946d90e4b..975dd22a2dbd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1594,6 +1594,12 @@ config ARCH_HAS_NONLEAF_PMD_YOUNG address translations. Page table walkers that clear the accessed bit may use this capability to reduce their search space. +config ARCH_HAS_KERNEL_FPU_SUPPORT + bool + help + Architectures that select this option can run floating-point code in + the kernel, as described in Documentation/core-api/floating-point.rst. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/linux/fpu.h b/include/linux/fpu.h new file mode 100644 index 000000000000..2fb63e22913b --- /dev/null +++ b/include/linux/fpu.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_FPU_H +#define _LINUX_FPU_H + +#ifdef _LINUX_FPU_COMPILATION_UNIT +#error FP code must be compiled separately. See Documentation/core-api/floating-point.rst. +#endif + +#include + +#endif -- cgit v1.2.3 From cb2b7b7de805cd649851fb8f2f36df3b8425682e Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:17 -0700 Subject: ARM: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ARM provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. Add a wrapper header, and export CFLAGS adjustments as found in lib/raid6/Makefile. [samuel.holland@sifive.com: ARM: do not select ARCH_HAS_KERNEL_FPU_SUPPORT] Link: https://lkml.kernel.org/r/20240509013727.648600-1-samuel.holland@sifive.com Link: https://lkml.kernel.org/r/20240329072441.591471-3-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Cc: Thiago Jung Bauermann Cc: Ard Biesheuvel Signed-off-by: Andrew Morton --- arch/arm/Makefile | 7 +++++++ arch/arm/include/asm/fpu.h | 15 +++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 arch/arm/include/asm/fpu.h (limited to 'arch') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index d82908b1b1bb..71afdd98ddf2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -130,6 +130,13 @@ endif # Accept old syntax despite ".syntax unified" AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +CC_FLAGS_FPU := -ffreestanding +# Enable +CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include) +CC_FLAGS_FPU += -march=armv7-a -mfloat-abi=softfp -mfpu=neon + ifeq ($(CONFIG_THUMB2_KERNEL),y) CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb diff --git a/arch/arm/include/asm/fpu.h b/arch/arm/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ -- cgit v1.2.3 From c41624315b602da32f59e70baa825c5f11fea892 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:18 -0700 Subject: ARM: crypto: use CC_FLAGS_FPU for NEON CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that CC_FLAGS_FPU is exported and can be used anywhere in the source tree, use it instead of duplicating the flags here. Link: https://lkml.kernel.org/r/20240329072441.591471-4-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm/lib/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 650404be6768..0ca5aae1bcc3 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -40,8 +40,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) - NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon - CFLAGS_xor-neon.o += $(NEON_FLAGS) + CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif -- cgit v1.2.3 From 71883ae3527808d445c019870512d4b9fea2332b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:19 -0700 Subject: arm64: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arm64 provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. Add a wrapper header, and export CFLAGS adjustments as found in lib/raid6/Makefile. Link: https://lkml.kernel.org/r/20240329072441.591471-5-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 9 ++++++++- arch/arm64/include/asm/fpu.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/fpu.h (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 00cbb794aeda..2f31376e85aa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -30,6 +30,7 @@ config ARM64 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b8b1d4f4a572..3f0f35fd5bb7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -36,7 +36,14 @@ ifeq ($(CONFIG_BROKEN_GAS_INST),y) $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif -KBUILD_CFLAGS += -mgeneral-regs-only \ +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +CC_FLAGS_FPU := -ffreestanding +# Enable +CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include) +CC_FLAGS_NO_FPU := -mgeneral-regs-only + +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm64/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ -- cgit v1.2.3 From 7177089525d97bd8d7fefd6a9406f45d818410e0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:20 -0700 Subject: arm64: crypto: use CC_FLAGS_FPU for NEON CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that CC_FLAGS_FPU is exported and can be used anywhere in the source tree, use it instead of duplicating the flags here. Link: https://lkml.kernel.org/r/20240329072441.591471-6-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/arm64/lib/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 29490be2546b..13e6a2829116 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -7,10 +7,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \ ifeq ($(CONFIG_KERNEL_MODE_NEON), y) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o -CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only -CFLAGS_xor-neon.o += -ffreestanding -# Enable -CFLAGS_xor-neon.o += -isystem $(shell $(CC) -print-file-name=include) +CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU) endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -- cgit v1.2.3 From 372f662345d603c6e4e26864a8908826bf7a3e45 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:22 -0700 Subject: LoongArch: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in asm/fpu.h, so it only needs to add kernel_fpu_available() and export the CFLAGS adjustments. Link: https://lkml.kernel.org/r/20240329072441.591471-8-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: WANG Xuerui Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/loongarch/Kconfig | 1 + arch/loongarch/Makefile | 5 ++++- arch/loongarch/include/asm/fpu.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4bda59525a13..b94223f58908 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -18,6 +18,7 @@ config LOONGARCH select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index df6caf79537a..efb5440a43ec 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -26,6 +26,9 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +CC_FLAGS_FPU := -mfpu=64 +CC_FLAGS_NO_FPU := -msoft-float + ifdef CONFIG_UNWINDER_ORC orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h orc_hash_sh := $(srctree)/scripts/orc_hash.sh @@ -59,7 +62,7 @@ ld-emul = $(64bit-emul) cflags-y += -mabi=lp64s endif -cflags-y += -pipe -msoft-float +cflags-y += -pipe $(CC_FLAGS_NO_FPU) LDFLAGS_vmlinux += -static -n -nostdlib # When the assembler supports explicit relocation hint, we must use it. diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index c2d8962fda00..3177674228f8 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -21,6 +21,7 @@ struct sigcontext; +#define kernel_fpu_available() cpu_has_fpu extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); -- cgit v1.2.3 From 01db473e1aa32d651477be80f79b309313636203 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:23 -0700 Subject: powerpc: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PowerPC provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. The PowerPC API also requires a non-preemptible context. Add a wrapper header, and export the CFLAGS adjustments. Link: https://lkml.kernel.org/r/20240329072441.591471-9-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Michael Ellerman (powerpc) Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 5 ++++- arch/powerpc/include/asm/fpu.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/fpu.h (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 56dcafbb83b3..3c968f2f4ac4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 0a0c57aee1ae..a8479c881cac 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -149,6 +149,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD)) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) +CC_FLAGS_FPU := $(call cc-option,-mhard-float) +CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) + ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY @@ -170,7 +173,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) -KBUILD_CFLAGS += $(call cc-option,-msoft-float) +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) KBUILD_CFLAGS += $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h new file mode 100644 index 000000000000..ca584e4bc40f --- /dev/null +++ b/arch/powerpc/include/asm/fpu.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_POWERPC_FPU_H +#define _ASM_POWERPC_FPU_H + +#include + +#include +#include + +#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) + +static inline void kernel_fpu_begin(void) +{ + preempt_disable(); + enable_kernel_fp(); +} + +static inline void kernel_fpu_end(void) +{ + disable_kernel_fp(); + preempt_enable(); +} + +#endif /* ! _ASM_POWERPC_FPU_H */ -- cgit v1.2.3 From b0b8a15bb89e09e12aa6be8ae28128bb656338f1 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:25 -0700 Subject: x86: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit x86 already provides kernel_fpu_begin() and kernel_fpu_end(), but in a different header. Add a wrapper header, and export the CFLAGS adjustments as found in lib/Makefile. Link: https://lkml.kernel.org/r/20240329072441.591471-11-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/x86/Kconfig | 1 + arch/x86/Makefile | 20 ++++++++++++++++++++ arch/x86/include/asm/fpu.h | 13 +++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 arch/x86/include/asm/fpu.h (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 671bc6fd557c..1d7122a1883e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_KERNEL_FPU_SUPPORT select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5ab93fcdd691..164498b3382a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -74,6 +74,26 @@ KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +# +# CFLAGS for compiling floating point code inside the kernel. +# +CC_FLAGS_FPU := -msse -msse2 +ifdef CONFIG_CC_IS_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 +# +# The "-msse" in the first argument is there so that the +# -mpreferred-stack-boundary=3 build error: +# +# -mpreferred-stack-boundary=3 is not between 4 and 12 +# +# can be triggered. Otherwise gcc doesn't complain. +CC_FLAGS_FPU += -mhard-float +CC_FLAGS_FPU += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4) +endif + ifeq ($(CONFIG_X86_KERNEL_IBT),y) # # Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h new file mode 100644 index 000000000000..b2743fe19339 --- /dev/null +++ b/arch/x86/include/asm/fpu.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +#include + +#define kernel_fpu_available() true + +#endif /* ! _ASM_X86_FPU_H */ -- cgit v1.2.3 From 77acc6b55ae46f52bfa4eca52c9fe627f5c3ba3f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:26 -0700 Subject: riscv: add support for kernel-mode FPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is motivated by the amdgpu DRM driver, which needs floating-point code to support recent hardware. That code is not performance-critical, so only provide a minimal non-preemptible implementation for now. Support is limited to riscv64 because riscv32 requires runtime (libgcc) assistance to convert between doubles and 64-bit integers. Link: https://lkml.kernel.org/r/20240329072441.591471-12-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Palmer Dabbelt Reviewed-by: Palmer Dabbelt Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 3 +++ arch/riscv/include/asm/fpu.h | 16 ++++++++++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/kernel_mode_fpu.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+) create mode 100644 arch/riscv/include/asm/fpu.h create mode 100644 arch/riscv/kernel/kernel_mode_fpu.c (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 410d1d6e1b6c..8f10a2fb5f86 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -27,6 +27,7 @@ config RISCV select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MMIOWB diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1e002d8003c5..0577d34fb1a7 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -84,6 +84,9 @@ KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64i KBUILD_AFLAGS += -march=$(riscv-march-y) +# For C code built with floating-point support, exclude V but keep F and D. +CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/') + KBUILD_CFLAGS += -mno-save-restore KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) diff --git a/arch/riscv/include/asm/fpu.h b/arch/riscv/include/asm/fpu.h new file mode 100644 index 000000000000..91c04c244e12 --- /dev/null +++ b/arch/riscv/include/asm/fpu.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_RISCV_FPU_H +#define _ASM_RISCV_FPU_H + +#include + +#define kernel_fpu_available() has_fpu() + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +#endif /* ! _ASM_RISCV_FPU_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 81d94a8ee10f..5b243d46f4b1 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_FPU) += kernel_mode_fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o obj-$(CONFIG_SMP) += smpboot.o diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c new file mode 100644 index 000000000000..0ac8348876c4 --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_fpu.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 SiFive + */ + +#include +#include + +#include +#include +#include +#include + +void kernel_fpu_begin(void) +{ + preempt_disable(); + fstate_save(current, task_pt_regs(current)); + csr_set(CSR_SSTATUS, SR_FS); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + csr_clear(CSR_SSTATUS, SR_FS); + fstate_restore(current, task_pt_regs(current)); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); -- cgit v1.2.3