From 03b9a6e18d325ede28c6dc218cedda53969eb41b Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Thu, 20 Oct 2022 16:38:20 +0800 Subject: x86/hyperv: Remove BUG_ON() for kmap_local_page() The commit 154fb14df7a3c ("x86/hyperv: Replace kmap() with kmap_local_page()") keeps the BUG_ON() to check if kmap_local_page() fails. But in fact, kmap_local_page() always returns a valid kernel address and won't return NULL here. It will BUG on its own if it fails. [1] So directly use memcpy_to_page() which creates local mapping to copy. [1]: https://lore.kernel.org/lkml/YztFEyUA48et0yTt@iweiny-mobl/ Suggested-by: Fabio M. De Francesco Suggested-by: Ira Weiny Reviewed-by: Ira Weiny Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20221020083820.2341088-1-zhao1.liu@linux.intel.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 29774126e931..d46f70522ddf 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -444,7 +444,7 @@ void __init hyperv_init(void) if (hv_root_partition) { struct page *pg; - void *src, *dst; + void *src; /* * For the root partition, the hypervisor will set up its @@ -459,13 +459,11 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); pg = vmalloc_to_page(hv_hypercall_pg); - dst = kmap_local_page(pg); src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); - BUG_ON(!(src && dst)); - memcpy(dst, src, HV_HYP_PAGE_SIZE); + BUG_ON(!src); + memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); memunmap(src); - kunmap_local(dst); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); -- cgit v1.2.3 From 8bc8824d30193eb7755043d5bb65fa7f0d11a595 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 12 Oct 2022 13:09:28 +0200 Subject: riscv: dts: sifive unleashed: Add PWM controlled LEDs This adds the 4 PWM controlled green LEDs to the HiFive Unleashed device tree. The schematic doesn't specify any special function for the LEDs, so they're added here without any default triggers and named d1, d2, d3 and d4 just like in the schematic. Signed-off-by: Emil Renner Berthing Reviewed-by: Conor Dooley Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221012110928.352910-1-emil.renner.berthing@canonical.com Signed-off-by: Palmer Dabbelt --- .../riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'arch') diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ced0d4e47938..900a50526d77 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -3,6 +3,8 @@ #include "fu540-c000.dtsi" #include +#include +#include /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ #define RTCCLK_FREQ 1000000 @@ -42,6 +44,42 @@ compatible = "gpio-restart"; gpios = <&gpio 10 GPIO_ACTIVE_LOW>; }; + + led-controller { + compatible = "pwm-leds"; + + led-d1 { + pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d1"; + }; + + led-d2 { + pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d2"; + }; + + led-d3 { + pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d3"; + }; + + led-d4 { + pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; + active-low; + color = ; + max-brightness = <255>; + label = "d4"; + }; + }; }; &uart0 { -- cgit v1.2.3 From bb8738876b24c3bbfccbd2ccbdc57c27402516ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Oct 2022 18:29:04 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 64 ++++++++++++++++++------------------- arch/s390/configs/defconfig | 66 +++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 65 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2a827002934b..e33dc2d6fd04 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -723,52 +723,42 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -779,6 +769,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index fb780e80e4c8..9cc8f024f3df 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -707,53 +707,43 @@ CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m -CONFIG_CRYPTO_GCM=y -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_SEQIV=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_ADIANTUM=m -CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SHA3_256_S390=m -CONFIG_CRYPTO_SHA3_512_S390=m -CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m -CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_SM4_GENERIC=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ADIANTUM=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_CFB=m +CONFIG_CRYPTO_HCTR2=m +CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_OFB=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3_GENERIC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -764,6 +754,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_SHA3_256_S390=m +CONFIG_CRYPTO_SHA3_512_S390=m +CONFIG_CRYPTO_GHASH_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_CHACHA_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m -- cgit v1.2.3 From 2982635a0b3d08d6fee2ff05632206286df0e703 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Thu, 27 Oct 2022 15:27:29 +0530 Subject: x86/hyperv: fix invalid writes to MSRs during root partition kexec hyperv_cleanup resets the hypercall page by setting the MSR to 0. However, the root partition is not allowed to write to the GPA bits of the MSR. Instead, it uses the hypercall page provided by the MSR. Similar is the case with the reference TSC MSR. Clear only the enable bit instead of zeroing the entire MSR to make the code valid for root partition too. Signed-off-by: Anirudh Rayabharam Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221027095729.1676394-3-anrayabh@linux.microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index d46f70522ddf..f49bc3ec76e6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -535,6 +535,7 @@ common_free: void hyperv_cleanup(void) { union hv_x64_msr_hypercall_contents hypercall_msr; + union hv_reference_tsc_msr tsc_msr; unregister_syscore_ops(&hv_syscore_ops); @@ -550,12 +551,14 @@ void hyperv_cleanup(void) hv_hypercall_pg = NULL; /* Reset the hypercall page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); + hypercall_msr.enable = 0; + hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* Reset the TSC page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); + tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); + tsc_msr.enable = 0; + hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) -- cgit v1.2.3 From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:13 +0100 Subject: bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because __attribute__((patchable_function_entry)) is only available since GCC-8 this solution fails to build on the minimum required GCC version. Undo these changes so we might try again -- without cluttering up the patches with too many changes. This is an almost complete revert of: dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop") ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations") (notably the arch/x86/Kconfig hunk is kept). Reported-by: David Laight Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org --- arch/x86/net/bpf_jit_comp.c | 13 ------------- include/linux/bpf.h | 21 +-------------------- kernel/bpf/dispatcher.c | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -389,18 +388,6 @@ out: return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..5cd95716b441 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); - #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); }, \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); - + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 04f0a045dcaa..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,7 +4,6 @@ #include #include #include -#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -91,11 +90,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } -int __weak __init bpf_arch_init_dispatcher_early(void *ip) -{ - return -ENOTSUPP; -} - static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; -- cgit v1.2.3 From 6973091d1b50ab4042f6a2d495f59e9db3662ab8 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 11 Oct 2022 18:07:12 +0200 Subject: KVM: s390: pv: don't allow userspace to set the clock under PV When running under PV, the guest's TOD clock is under control of the ultravisor and the hypervisor isn't allowed to change it. Hence, don't allow userspace to change the guest's TOD clock by returning -EOPNOTSUPP. When userspace changes the guest's TOD clock, KVM updates its kvm.arch.epoch field and, in addition, the epoch field in all state descriptions of all VCPUs. But, under PV, the ultravisor will ignore the epoch field in the state description and simply overwrite it on next SIE exit with the actual guest epoch. This leads to KVM having an incorrect view of the guest's TOD clock: it has updated its internal kvm.arch.epoch field, but the ultravisor ignores the field in the state description. Whenever a guest is now waiting for a clock comparator, KVM will incorrectly calculate the time when the guest should wake up, possibly causing the guest to sleep for much longer than expected. With this change, kvm_s390_set_tod() will now take the kvm->lock to be able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock() also takes kvm->lock, use __kvm_s390_set_tod_clock() instead. The function kvm_s390_set_tod_clock is now unused, hence remove it. Update the documentation to indicate the TOD clock attr calls can now return -EOPNOTSUPP. Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible") Reported-by: Marc Hartmayer Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- Documentation/virt/kvm/devices/vm.rst | 3 +++ arch/s390/kvm/kvm-s390.c | 26 +++++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 1 - 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 0aa5b1cfd700..60acc39e0e93 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT). :Parameters: address of a buffer in user space to store the data (u8) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW ----------------------------------- @@ -224,6 +225,7 @@ the POP (u64). :Parameters: address of a buffer in user space to store the data (u64) to :Returns: -EFAULT if the given address is not accessible from kernel space + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT ----------------------------------- @@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0. (kvm_s390_vm_tod_clock) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 4. GROUP: KVM_S390_VM_CRYPTO ============================ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b07..bc491a73815c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -4377,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e..4755492dfabc 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); -- cgit v1.2.3 From b6662e37772715447aeff2538444ff291e02ea31 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Tue, 25 Oct 2022 22:32:33 -0300 Subject: KVM: s390: pci: Fix allocation size of aift kzdev elements The 'kzdev' field of struct 'zpci_aift' is an array of pointers to 'kvm_zdev' structs. Allocate the proper size accordingly. Reported by Coccinelle: WARNING: Use correct pointer type argument for sizeof Fixes: 98b1d33dac5f ("KVM: s390: pci: do initial setup for AEN interpretation") Signed-off-by: Rafael Mendonca Reviewed-by: Christian Borntraeger Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20221026013234.960859-1-rafaelmendsr@gmail.com Message-Id: <20221026013234.960859-1-rafaelmendsr@gmail.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index c50c1645c0ae..ded1af2ddae9 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc) return -EPERM; mutex_lock(&aift->aift_lock); - aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev), + aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), GFP_KERNEL); if (!aift->kzdev) { rc = -ENOMEM; -- cgit v1.2.3 From 2081b3bd0c11757725dcab9ba5d38e1bddb03459 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Nov 2022 18:00:15 +0100 Subject: arm64: fix rodata=full again Commit 2e8cff0a0eee87b2 ("arm64: fix rodata=full") addressed a couple of issues with the rodata= kernel command line option, which is not a simple boolean on arm64, and inadvertently got broken due to changes in the generic bool handling. Unfortunately, the resulting code never clears the rodata_full boolean variable if it defaults to true and rodata=on or rodata=off is passed, as the generic code is not aware of the existence of this variable. Given the way this code is plumbed together, clearing rodata_full when returning false from arch_parse_debug_rodata() may result in inconsistencies if the generic code decides that it cannot parse the right hand side, so the best way to deal with this is to only take rodata_full in account if rodata_enabled is also true. Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") Cc: # 6.0.x Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Link: https://lore.kernel.org/r/20221103170015.4124426-1-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/pageattr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index d107c3d434e2..5922178d7a06 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -26,7 +26,7 @@ bool can_set_direct_map(void) * mapped at page granularity, so that it is possible to * protect/unprotect single pages. */ - return rodata_full || debug_pagealloc_enabled() || + return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || IS_ENABLED(CONFIG_KFENCE); } @@ -102,7 +102,8 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_enabled && + rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), -- cgit v1.2.3 From 6191de8b175fad1a877ae8ed4be699a5022b9c03 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Oct 2022 19:11:49 +0100 Subject: s390/configs: move CONFIG_DEBUG_INFO_BTF into btf.config addon config CONFIG_DEBUG_INFO_BTF significantly increases compile time for the kernel. E.g. when changing a single C file compile time for a new bzImage is increased by ~50% if BTF debug info is generated. Therefore remove CONFIG_DEBUG_INFO_BTF from all defconfigs and introduce a btf.config addon config file. Quickly enabling CONFIG_DEBUG_INFO_BTF into the current kernel config can be done by simply invoking make btf.config Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/btf.config | 1 + arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/configs/zfcpdump_defconfig | 1 - 4 files changed, 1 insertion(+), 3 deletions(-) create mode 100644 arch/s390/configs/btf.config (limited to 'arch') diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config new file mode 100644 index 000000000000..39227b4511af --- /dev/null +++ b/arch/s390/configs/btf.config @@ -0,0 +1 @@ +CONFIG_DEBUG_INFO_BTF=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index e33dc2d6fd04..63807bd0b536 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -797,7 +797,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_HEADERS_INSTALL=y CONFIG_DEBUG_SECTION_MISMATCH=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 9cc8f024f3df..4f9a98247442 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -781,7 +781,6 @@ CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_GDB_SCRIPTS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a5576b8d4081..5fe9948be644 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -74,7 +74,6 @@ CONFIG_PRINTK_TIME=y # CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set -- cgit v1.2.3 From 9afea696a04af29b114b38aec734e28b03e93dae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Oct 2022 19:40:31 +0100 Subject: s390/configs: add kasan.config addon config file Add kasan.config addon config file which allows to easily enable KASAN into the current kernel config. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/configs/kasan.config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 arch/s390/configs/kasan.config (limited to 'arch') diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config new file mode 100644 index 000000000000..700a8b25c3ff --- /dev/null +++ b/arch/s390/configs/kasan.config @@ -0,0 +1,3 @@ +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_KASAN_VMALLOC=y -- cgit v1.2.3 From 80ddf5ce1c9291cb175d52ed1227134ad48c47ee Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 30 Oct 2022 19:22:02 +0100 Subject: s390: always build relocatable kernel Nathan Chancellor reported several link errors on s390 with CONFIG_RELOCATABLE disabled, after binutils commit 906f69cf65da ("IBM zSystems: Issue error for *DBL relocs on misaligned symbols"). The binutils commit reveals potential miscompiles that might have happened already before with linker script defined symbols at odd addresses. A similar bug was recently fixed in the kernel with commit c9305b6c1f52 ("s390: fix nospec table alignments"). See https://github.com/ClangBuiltLinux/linux/issues/1747 for an analysis from Ulich Weigand. Therefore always build a relocatable kernel to avoid this problem. There is hardly any use-case for non-relocatable kernels, so this shouldn't be controversial. Link: https://github.com/ClangBuiltLinux/linux/issues/1747 Signed-off-by: Heiko Carstens Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20221030182202.2062705-1-hca@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 6 +++--- arch/s390/Makefile | 2 -- arch/s390/boot/Makefile | 3 +-- arch/s390/boot/startup.c | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 318fce77601d..de575af02ffe 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -568,8 +568,7 @@ config EXPOLINE_FULL endchoice config RELOCATABLE - bool "Build a relocatable kernel" - default y + def_bool y help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. @@ -578,10 +577,11 @@ config RELOCATABLE bootup process. The relocations make the kernel image about 15% larger (compressed 10%), but are discarded at runtime. + Note: this option exists only for documentation purposes, please do + not remove it. config RANDOMIZE_BASE bool "Randomize the address of the kernel image (KASLR)" - depends on RELOCATABLE default y help In support of Kernel Address Space Layout Randomization (KASLR), diff --git a/arch/s390/Makefile b/arch/s390/Makefile index de6d8b2ea4d8..b3235ab0ace8 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,10 +14,8 @@ KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 -ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS += -fPIE LDFLAGS_vmlinux := -pie -endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 883357a211a3..d52c3e2e16bc 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -37,9 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o -obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6e7f01ca53e6..47ca3264c023 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -291,8 +291,7 @@ void startup_kernel(void) clear_bss_section(); copy_bootdata(); - if (IS_ENABLED(CONFIG_RELOCATABLE)) - handle_relocs(__kaslr_offset); + handle_relocs(__kaslr_offset); if (__kaslr_offset) { /* -- cgit v1.2.3 From 11385b2612004298ac2fbc9877e73f1410cfd3c0 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 2 Nov 2022 12:06:08 +0100 Subject: x86/uaccess: instrument copy_from_user_nmi() Make sure usercopy hooks from linux/instrumented.h are invoked for copy_from_user_nmi(). This fixes KMSAN false positives reported when dumping opcodes for a stack trace. Link: https://lkml.kernel.org/r/20221102110611.1085175-2-glider@google.com Signed-off-by: Alexander Potapenko Acked-by: Peter Zijlstra (Intel) Cc: Dave Hansen Cc: Kees Cook Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Marco Elver Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/lib/usercopy.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f1bb18617156..24b48af27417 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -6,6 +6,7 @@ #include #include +#include #include @@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) * called from other contexts. */ pagefault_disable(); + instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; -- cgit v1.2.3 From ba54d194f8daad8943802d6dfe06e205f882c391 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 2 Nov 2022 12:06:11 +0100 Subject: x86/traps: avoid KMSAN bugs originating from handle_bug() There is a case in exc_invalid_op handler that is executed outside the irqentry_enter()/irqentry_exit() region when an UD2 instruction is used to encode a call to __warn(). In that case the `struct pt_regs` passed to the interrupt handler is never unpoisoned by KMSAN (this is normally done in irqentry_enter()), which leads to false positives inside handle_bug(). Use kmsan_unpoison_entry_regs() to explicitly unpoison those registers before using them. Link: https://lkml.kernel.org/r/20221102110611.1085175-5-glider@google.com Signed-off-by: Alexander Potapenko Cc: Borislav Petkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Dmitry Vyukov Cc: Kees Cook Cc: Marco Elver Cc: Masahiro Yamada Cc: Nick Desaulniers Cc: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton --- arch/x86/kernel/traps.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 178015a820f0..d3fdec706f1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,12 @@ static noinstr bool handle_bug(struct pt_regs *regs) { bool handled = false; + /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() + * is a rare case that uses @regs without passing them to + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); if (!is_valid_bugaddr(regs->ip)) return handled; -- cgit v1.2.3 From 1fdbed657a4726639c4f17841fd2a0fb646c746e Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 7 Nov 2022 11:10:10 +0900 Subject: arch/x86/mm/hugetlbpage.c: pud_huge() returns 0 when using 2-level paging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following bug is reported to be triggered when starting X on x86-32 system with i915: [ 225.777375] kernel BUG at mm/memory.c:2664! [ 225.777391] invalid opcode: 0000 [#1] PREEMPT SMP [ 225.777405] CPU: 0 PID: 2402 Comm: Xorg Not tainted 6.1.0-rc3-bdg+ #86 [ 225.777415] Hardware name: /8I865G775-G, BIOS F1 08/29/2006 [ 225.777421] EIP: __apply_to_page_range+0x24d/0x31c [ 225.777437] Code: ff ff 8b 55 e8 8b 45 cc e8 0a 11 ec ff 89 d8 83 c4 28 5b 5e 5f 5d c3 81 7d e0 a0 ef 96 c1 74 ad 8b 45 d0 e8 2d 83 49 00 eb a3 <0f> 0b 25 00 f0 ff ff 81 eb 00 00 00 40 01 c3 8b 45 ec 8b 00 e8 76 [ 225.777446] EAX: 00000001 EBX: c53a3b58 ECX: b5c00000 EDX: c258aa00 [ 225.777454] ESI: b5c00000 EDI: b5900000 EBP: c4b0fdb4 ESP: c4b0fd80 [ 225.777462] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010202 [ 225.777470] CR0: 80050033 CR2: b5900000 CR3: 053a3000 CR4: 000006d0 [ 225.777479] Call Trace: [ 225.777486] ? i915_memcpy_init_early+0x63/0x63 [i915] [ 225.777684] apply_to_page_range+0x21/0x27 [ 225.777694] ? i915_memcpy_init_early+0x63/0x63 [i915] [ 225.777870] remap_io_mapping+0x49/0x75 [i915] [ 225.778046] ? i915_memcpy_init_early+0x63/0x63 [i915] [ 225.778220] ? mutex_unlock+0xb/0xd [ 225.778231] ? i915_vma_pin_fence+0x6d/0xf7 [i915] [ 225.778420] vm_fault_gtt+0x2a9/0x8f1 [i915] [ 225.778644] ? lock_is_held_type+0x56/0xe7 [ 225.778655] ? lock_is_held_type+0x7a/0xe7 [ 225.778663] ? 0xc1000000 [ 225.778670] __do_fault+0x21/0x6a [ 225.778679] handle_mm_fault+0x708/0xb21 [ 225.778686] ? mt_find+0x21e/0x5ae [ 225.778696] exc_page_fault+0x185/0x705 [ 225.778704] ? doublefault_shim+0x127/0x127 [ 225.778715] handle_exception+0x130/0x130 [ 225.778723] EIP: 0xb700468a Recently pud_huge() got aware of non-present entry by commit 3a194f3f8ad0 ("mm/hugetlb: make pud_huge() and follow_huge_pud() aware of non-present pud entry") to handle some special states of gigantic page. However, it's overlooked that pud_none() always returns false when running with 2-level paging, and as a result pud_huge() can return true pointlessly. Introduce "#if CONFIG_PGTABLE_LEVELS > 2" to pud_huge() to deal with this. Link: https://lkml.kernel.org/r/20221107021010.2449306-1-naoya.horiguchi@linux.dev Fixes: 3a194f3f8ad0 ("mm/hugetlb: make pud_huge() and follow_huge_pud() aware of non-present pud entry") Signed-off-by: Naoya Horiguchi Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Reviewed-by: Miaohe Lin Cc: David Hildenbrand Cc: Liu Shixin Cc: Mike Kravetz Cc: Muchun Song Cc: Oscar Salvador Cc: Yang Shi Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton --- arch/x86/mm/hugetlbpage.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 6b3033845c6d..5804bbae4f01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd) */ int pud_huge(pud_t pud) { +#if CONFIG_PGTABLE_LEVELS > 2 return !pud_none(pud) && (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; +#else + return 0; +#endif } #ifdef CONFIG_HUGETLB_PAGE -- cgit v1.2.3 From 8ec8490a1950efeccb00967698cf7cb2fcd25ca7 Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Wed, 2 Nov 2022 09:01:06 -0700 Subject: arm64: Fix bit-shifting UB in the MIDR_CPU_MODEL() macro CONFIG_UBSAN_SHIFT with gcc-5 complains that the shifting of ARM_CPU_IMP_AMPERE (0xC0) into bits [31:24] by MIDR_CPU_MODEL() is undefined behavior. Well, sort of, it actually spells the error as: arch/arm64/kernel/proton-pack.c: In function 'spectre_bhb_loop_affected': arch/arm64/include/asm/cputype.h:44:2: error: initializer element is not constant (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ ^ This isn't an issue for other Implementor codes, as all the other codes have zero in the top bit and so are representable as a signed int. Cast the implementor code to unsigned in MIDR_CPU_MODEL to remove the undefined behavior. Fixes: 0e5d5ae837c8 ("arm64: Add AMPERE1 to the Spectre-BHB affected list") Reported-by: Geert Uytterhoeven Signed-off-by: D Scott Phillips Link: https://lore.kernel.org/r/20221102160106.1096948-1-scott@os.amperecomputing.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index abc418650fec..65e53ef5a396 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -41,7 +41,7 @@ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) #define MIDR_CPU_MODEL(imp, partnum) \ - (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) -- cgit v1.2.3 From acfc35cfcee5df419391671ef1a631f43feee4e3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 31 Oct 2022 14:57:28 -0700 Subject: arm64/syscall: Include asm/ptrace.h in syscall_wrapper header. Add the same change for ARM64 as done in the commit 9440c4294160 ("x86/syscall: Include asm/ptrace.h in syscall_wrapper header") to make sure all syscalls see 'struct pt_regs' definition and resulted BTF for '__arm64_sys_*(struct pt_regs *regs)' functions point to actual struct. Without this patch, the BPF verifier refuses to load a tracing prog which accesses pt_regs. bpf(BPF_PROG_LOAD, {prog_type=0x1a, ...}, 128) = -1 EACCES With this patch, we can see the correct error, which saves us time in debugging the prog. bpf(BPF_PROG_LOAD, {prog_type=0x1a, ...}, 128) = 4 bpf(BPF_RAW_TRACEPOINT_OPEN, {raw_tracepoint={name=NULL, prog_fd=4}}, 128) = -1 ENOTSUPP Signed-off-by: Kuniyuki Iwashima Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20221031215728.50389-1-kuniyu@amazon.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscall_wrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index b383b4802a7b..d30217c21eff 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -8,7 +8,7 @@ #ifndef __ASM_SYSCALL_WRAPPER_H #define __ASM_SYSCALL_WRAPPER_H -struct pt_regs; +#include #define SC_ARM64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ -- cgit v1.2.3 From debc5a1ec0d195ffea70d11efeffb713de9fdbc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 8 Nov 2022 09:44:53 +0100 Subject: KVM: x86: use a separate asm-offsets.c file This already removes an ugly #include "" from asm-offsets.c, but especially it avoids a future error when trying to define asm-offsets for KVM's svm/svm.h header. This would not work for kernel/asm-offsets.c, because svm/svm.h includes kvm_cache_regs.h which is not in the include path when compiling asm-offsets.c. The problem is not there if the .c file is in arch/x86/kvm. Suggested-by: Sean Christopherson Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kernel/asm-offsets.c | 6 ------ arch/x86/kvm/.gitignore | 2 ++ arch/x86/kvm/Makefile | 9 +++++++++ arch/x86/kvm/kvm-asm-offsets.c | 18 ++++++++++++++++++ arch/x86/kvm/vmx/vmenter.S | 2 +- 5 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 arch/x86/kvm/.gitignore create mode 100644 arch/x86/kvm/kvm-asm-offsets.c (limited to 'arch') diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cb50589a7102..437308004ef2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,7 +19,6 @@ #include #include #include -#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include @@ -108,9 +107,4 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); - - if (IS_ENABLED(CONFIG_KVM_INTEL)) { - BLANK(); - OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); - } } diff --git a/arch/x86/kvm/.gitignore b/arch/x86/kvm/.gitignore new file mode 100644 index 000000000000..615d6ff35c00 --- /dev/null +++ b/arch/x86/kvm/.gitignore @@ -0,0 +1,2 @@ +/kvm-asm-offsets.s +/kvm-asm-offsets.h diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 30f244b64523..a02cf9baacc8 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -34,3 +34,12 @@ endif obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o + +AFLAGS_vmx/vmenter.o := -iquote $(obj) +$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h + +$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE + $(call filechk,offsets,__KVM_ASM_OFFSETS_H__) + +targets += kvm-asm-offsets.s +clean-files += kvm-asm-offsets.h diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c new file mode 100644 index 000000000000..9d84f2b32d7f --- /dev/null +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ +#define COMPILE_OFFSETS + +#include +#include "vmx/vmx.h" + +static void __used common(void) +{ + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } +} diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8477d8bdd69c..0b5db4de4d09 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include -#include #include #include #include #include #include +#include "kvm-asm-offsets.h" #include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) -- cgit v1.2.3 From 16fdc1de169ee0a4e59a8c02244414ec7acd55c3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 30 Sep 2022 14:14:44 -0400 Subject: KVM: SVM: replace regs argument of __svm_vcpu_run() with vcpu_svm Since registers are reachable through vcpu_svm, and we will need to access more fields of that struct, pass it instead of the regs[] array. No functional change intended. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Makefile | 3 +++ arch/x86/kvm/kvm-asm-offsets.c | 6 ++++++ arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/svm/svm.h | 2 +- arch/x86/kvm/svm/vmenter.S | 37 +++++++++++++++++++------------------ 5 files changed, 30 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a02cf9baacc8..f453a0f96e24 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -35,6 +35,9 @@ obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o +AFLAGS_svm/vmenter.o := -iquote $(obj) +$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h + AFLAGS_vmx/vmenter.o := -iquote $(obj) $(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index 9d84f2b32d7f..30db96852e2d 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -8,9 +8,15 @@ #include #include "vmx/vmx.h" +#include "svm/svm.h" static void __used common(void) { + if (IS_ENABLED(CONFIG_KVM_AMD)) { + BLANK(); + OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + } + if (IS_ENABLED(CONFIG_KVM_INTEL)) { BLANK(); OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 58f0077d9357..b412bc5773c5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3930,7 +3930,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) * vmcb02 when switching vmcbs for nested virtualization. */ vmload(svm->vmcb01.pa); - __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs); + __svm_vcpu_run(vmcb_pa, svm); vmsave(svm->vmcb01.pa); vmload(__sme_page_pa(sd->save_area)); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6a7686bf6900..447e25c9101a 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -684,6 +684,6 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm); #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 723f8534986c..f0ff41103e4c 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -4,27 +4,28 @@ #include #include #include +#include "kvm-asm-offsets.h" #define WORD_SIZE (BITS_PER_LONG / 8) /* Intentionally omit RAX as it's context switched by hardware */ -#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE -#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE -#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE +#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) +#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) +#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) /* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE -#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE -#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE +#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) +#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) +#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) #ifdef CONFIG_X86_64 -#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE -#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE -#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE -#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE -#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE -#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE -#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE -#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE +#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) +#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) +#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) +#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) +#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) +#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) +#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) +#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif .section .noinstr.text, "ax" @@ -32,7 +33,7 @@ /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode * @vmcb_pa: unsigned long - * @regs: unsigned long * (to guest registers) + * @svm: struct vcpu_svm * */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -47,13 +48,13 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX - /* Save @regs. */ + /* Save @svm. */ push %_ASM_ARG2 /* Save @vmcb. */ push %_ASM_ARG1 - /* Move @regs to RAX. */ + /* Move @svm to RAX. */ mov %_ASM_ARG2, %_ASM_AX /* Load guest registers. */ @@ -89,7 +90,7 @@ SYM_FUNC_START(__svm_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif - /* "POP" @regs to RAX. */ + /* "POP" @svm to RAX. */ pop %_ASM_AX /* Save all guest registers. */ -- cgit v1.2.3 From f7ef280132f9bf6f82acf5aa5c3c837206eef501 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Oct 2022 17:30:07 -0400 Subject: KVM: SVM: adjust register allocation for __svm_vcpu_run() 32-bit ABI uses RAX/RCX/RDX as its argument registers, so they are in the way of instructions that hardcode their operands such as RDMSR/WRMSR or VMLOAD/VMRUN/VMSAVE. In preparation for moving vmload/vmsave to __svm_vcpu_run(), keep the pointer to the struct vcpu_svm in %rdi. In particular, it is now possible to load svm->vmcb01.pa in %rax without clobbering the struct vcpu_svm pointer. No functional change intended. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/vmenter.S | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index f0ff41103e4c..531510ab6072 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -54,29 +54,29 @@ SYM_FUNC_START(__svm_vcpu_run) /* Save @vmcb. */ push %_ASM_ARG1 - /* Move @svm to RAX. */ - mov %_ASM_ARG2, %_ASM_AX + /* Move @svm to RDI. */ + mov %_ASM_ARG2, %_ASM_DI + + /* "POP" @vmcb to RAX. */ + pop %_ASM_AX /* Load guest registers. */ - mov VCPU_RCX(%_ASM_AX), %_ASM_CX - mov VCPU_RDX(%_ASM_AX), %_ASM_DX - mov VCPU_RBX(%_ASM_AX), %_ASM_BX - mov VCPU_RBP(%_ASM_AX), %_ASM_BP - mov VCPU_RSI(%_ASM_AX), %_ASM_SI - mov VCPU_RDI(%_ASM_AX), %_ASM_DI + mov VCPU_RCX(%_ASM_DI), %_ASM_CX + mov VCPU_RDX(%_ASM_DI), %_ASM_DX + mov VCPU_RBX(%_ASM_DI), %_ASM_BX + mov VCPU_RBP(%_ASM_DI), %_ASM_BP + mov VCPU_RSI(%_ASM_DI), %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_AX), %r8 - mov VCPU_R9 (%_ASM_AX), %r9 - mov VCPU_R10(%_ASM_AX), %r10 - mov VCPU_R11(%_ASM_AX), %r11 - mov VCPU_R12(%_ASM_AX), %r12 - mov VCPU_R13(%_ASM_AX), %r13 - mov VCPU_R14(%_ASM_AX), %r14 - mov VCPU_R15(%_ASM_AX), %r15 + mov VCPU_R8 (%_ASM_DI), %r8 + mov VCPU_R9 (%_ASM_DI), %r9 + mov VCPU_R10(%_ASM_DI), %r10 + mov VCPU_R11(%_ASM_DI), %r11 + mov VCPU_R12(%_ASM_DI), %r12 + mov VCPU_R13(%_ASM_DI), %r13 + mov VCPU_R14(%_ASM_DI), %r14 + mov VCPU_R15(%_ASM_DI), %r15 #endif - - /* "POP" @vmcb to RAX. */ - pop %_ASM_AX + mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ sti -- cgit v1.2.3 From f6d58266d731fd7e63163790aad21e0dbb1d5264 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Nov 2022 04:17:29 -0500 Subject: KVM: SVM: retrieve VMCB from assembly Continue moving accesses to struct vcpu_svm to vmenter.S. Reducing the number of arguments limits the chance of mistakes due to different registers used for argument passing in 32- and 64-bit ABIs; pushing the VMCB argument and almost immediately popping it into a different register looks pretty weird. 32-bit ABI is not a concern for __svm_sev_es_vcpu_run() which is 64-bit only; however, it will soon need @svm to save/restore SPEC_CTRL so stay consistent with __svm_vcpu_run() and let them share the same prototype. No functional change intended. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm-asm-offsets.c | 2 ++ arch/x86/kvm/svm/svm.c | 5 ++--- arch/x86/kvm/svm/svm.h | 4 ++-- arch/x86/kvm/svm/vmenter.S | 20 ++++++++++---------- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index 30db96852e2d..f1b694e431ae 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -15,6 +15,8 @@ static void __used common(void) if (IS_ENABLED(CONFIG_KVM_AMD)) { BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); } if (IS_ENABLED(CONFIG_KVM_INTEL)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b412bc5773c5..0c86c435c51f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3914,12 +3914,11 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - unsigned long vmcb_pa = svm->current_vmcb->pa; guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) { - __svm_sev_es_vcpu_run(vmcb_pa); + __svm_sev_es_vcpu_run(svm); } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); @@ -3930,7 +3929,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) * vmcb02 when switching vmcbs for nested virtualization. */ vmload(svm->vmcb01.pa); - __svm_vcpu_run(vmcb_pa, svm); + __svm_vcpu_run(svm); vmsave(svm->vmcb01.pa); vmload(__sme_page_pa(sd->save_area)); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 447e25c9101a..7ff1879e73c5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -683,7 +683,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); -void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); +void __svm_vcpu_run(struct vcpu_svm *svm); #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 531510ab6072..d07bac1952c5 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,7 +32,6 @@ /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long * @svm: struct vcpu_svm * */ SYM_FUNC_START(__svm_vcpu_run) @@ -49,16 +48,16 @@ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BX /* Save @svm. */ - push %_ASM_ARG2 - - /* Save @vmcb. */ push %_ASM_ARG1 +.ifnc _ASM_ARG1, _ASM_DI /* Move @svm to RDI. */ - mov %_ASM_ARG2, %_ASM_DI + mov %_ASM_ARG1, %_ASM_DI +.endif - /* "POP" @vmcb to RAX. */ - pop %_ASM_AX + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Load guest registers. */ mov VCPU_RCX(%_ASM_DI), %_ASM_CX @@ -170,7 +169,7 @@ SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long + * @svm: struct vcpu_svm * */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -185,8 +184,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX - /* Move @vmcb to RAX. */ - mov %_ASM_ARG1, %_ASM_AX + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Enter guest mode */ sti -- cgit v1.2.3 From 001459787158280d84fce1bc94cec49f34f48b9f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2022 04:54:40 -0500 Subject: KVM: SVM: remove unused field from struct vcpu_svm The pointer to svm_cpu_data in struct vcpu_svm looks interesting from the point of view of accessing it after vmexit, when the GSBASE is still containing the guest value. However, despite existing since the very first commit of drivers/kvm/svm.c (commit 6aa8b732ca01, "[PATCH] kvm: userspace interface", 2006-12-10), it was never set to anything. Ignore the opportunity to fix a 16 year old "bug" and delete it; doing things the "harder" way makes it possible to remove more old cruft. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7ff1879e73c5..626240707ba9 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -209,7 +209,6 @@ struct vcpu_svm { struct vmcb *vmcb; struct kvm_vmcb_info vmcb01; struct kvm_vmcb_info *current_vmcb; - struct svm_cpu_data *svm_data; u32 asid; u32 sysenter_esp_hi; u32 sysenter_eip_hi; -- cgit v1.2.3 From 181d0fb0bb023e8996b1cf7970e3708d72442b0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2022 08:54:20 -0500 Subject: KVM: SVM: remove dead field from struct svm_cpu_data The "cpu" field of struct svm_cpu_data has been write-only since commit 4b656b120249 ("KVM: SVM: force new asid on vcpu migration", 2009-08-05). Remove it. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 - arch/x86/kvm/svm/svm.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0c86c435c51f..0f873b298931 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -665,7 +665,6 @@ static int svm_cpu_init(int cpu) sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); if (!sd) return ret; - sd->cpu = cpu; sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!sd->save_area) goto free_cpu_data; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 626240707ba9..7540db9902a6 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -280,8 +280,6 @@ struct vcpu_svm { }; struct svm_cpu_data { - int cpu; - u64 asid_generation; u32 max_asid; u32 next_asid; -- cgit v1.2.3 From 73412dfeea724e6bd775ba64d21157ff322eac9a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2022 09:07:55 -0500 Subject: KVM: SVM: do not allocate struct svm_cpu_data dynamically The svm_data percpu variable is a pointer, but it is allocated via svm_hardware_setup() when KVM is loaded. Unlike hardware_enable() this means that it is never NULL for the whole lifetime of KVM, and static allocation does not waste any memory compared to the status quo. It is also more efficient and more easily handled from assembly code, so do it and don't look back. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 4 ++-- arch/x86/kvm/svm/svm.c | 41 +++++++++++++++-------------------------- arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 18 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 28064060413a..9b66ee34e264 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { - sd = per_cpu(svm_data, cpu); + sd = per_cpu_ptr(&svm_data, cpu); sd->sev_vmcbs[sev->asid] = NULL; } @@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0f873b298931..48274c93d78b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -245,7 +245,7 @@ struct kvm_ldttss_desc { u32 zero1; } __attribute__((packed)); -DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); +DEFINE_PER_CPU(struct svm_cpu_data, svm_data); /* * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via @@ -581,12 +581,7 @@ static int svm_hardware_enable(void) pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); return -EINVAL; } - sd = per_cpu(svm_data, me); - if (!sd) { - pr_err("%s: svm_data is NULL on %d\n", __func__, me); - return -EINVAL; - } - + sd = per_cpu_ptr(&svm_data, me); sd->asid_generation = 1; sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; @@ -646,41 +641,35 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - if (!sd) + if (!sd->save_area) return; - per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); - kfree(sd); + sd->save_area = NULL; } static int svm_cpu_init(int cpu) { - struct svm_cpu_data *sd; + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int ret = -ENOMEM; - sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); - if (!sd) - return ret; + memset(sd, 0, sizeof(struct svm_cpu_data)); sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!sd->save_area) - goto free_cpu_data; + return ret; ret = sev_cpu_init(sd); if (ret) goto free_save_area; - per_cpu(svm_data, cpu) = sd; - return 0; free_save_area: __free_page(sd->save_area); -free_cpu_data: - kfree(sd); + sd->save_area = NULL; return ret; } @@ -1424,7 +1413,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) int i; for_each_online_cpu(i) - cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); + cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); } static void svm_vcpu_free(struct kvm_vcpu *vcpu) @@ -1449,7 +1438,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); if (sev_es_guest(vcpu->kvm)) sev_es_unmap_ghcb(svm); @@ -1486,7 +1475,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; @@ -3442,7 +3431,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) static void reload_tss(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); @@ -3450,7 +3439,7 @@ static void reload_tss(struct kvm_vcpu *vcpu) static void pre_svm_run(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); /* @@ -3919,7 +3908,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) if (sev_es_guest(vcpu->kvm)) { __svm_sev_es_vcpu_run(svm); } else { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); /* * Use a single vmcb (vmcb01 because it's always valid) for diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7540db9902a6..2af6a71126c1 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -293,7 +293,7 @@ struct svm_cpu_data { struct vmcb **sev_vmcbs; }; -DECLARE_PER_CPU(struct svm_cpu_data *, svm_data); +DECLARE_PER_CPU(struct svm_cpu_data, svm_data); void recalc_intercepts(struct vcpu_svm *svm); -- cgit v1.2.3 From e61ab42de874c5af8c5d98b327c77a374d9e7da1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Nov 2022 05:14:27 -0500 Subject: KVM: SVM: move guest vmsave/vmload back to assembly It is error-prone that code after vmexit cannot access percpu data because GSBASE has not been restored yet. It forces MSR_IA32_SPEC_CTRL save/restore to happen very late, after the predictor untraining sequence, and it gets in the way of return stack depth tracking (a retbleed mitigation that is in linux-next as of 2022-11-09). As a first step towards fixing that, move the VMCB VMSAVE/VMLOAD to assembly, essentially undoing commit fb0c4a4fee5a ("KVM: SVM: move VMLOAD/VMSAVE to C code", 2021-03-15). The reason for that commit was that it made it simpler to use a different VMCB for VMLOAD/VMSAVE versus VMRUN; but that is not a big hassle anymore thanks to the kvm-asm-offsets machinery and other related cleanups. The idea on how to number the exception tables is stolen from a prototype patch by Peter Zijlstra. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Link: Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm-asm-offsets.c | 1 + arch/x86/kvm/svm/svm.c | 9 -------- arch/x86/kvm/svm/vmenter.S | 49 ++++++++++++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index f1b694e431ae..f83e88b85bf2 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -16,6 +16,7 @@ static void __used common(void) BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 48274c93d78b..4e3a47eb5002 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3910,16 +3910,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) } else { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); - /* - * Use a single vmcb (vmcb01 because it's always valid) for - * context switching guest state via VMLOAD/VMSAVE, that way - * the state doesn't need to be copied between vmcb01 and - * vmcb02 when switching vmcbs for nested virtualization. - */ - vmload(svm->vmcb01.pa); __svm_vcpu_run(svm); - vmsave(svm->vmcb01.pa); - vmload(__sme_page_pa(sd->save_area)); } diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index d07bac1952c5..5bc2ed7d79c0 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -28,6 +28,8 @@ #define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif +#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa) + .section .noinstr.text, "ax" /** @@ -55,6 +57,16 @@ SYM_FUNC_START(__svm_vcpu_run) mov %_ASM_ARG1, %_ASM_DI .endif + /* + * Use a single vmcb (vmcb01 because it's always valid) for + * context switching guest state via VMLOAD/VMSAVE, that way + * the state doesn't need to be copied between vmcb01 and + * vmcb02 when switching vmcbs for nested virtualization. + */ + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +1: vmload %_ASM_AX +2: + /* Get svm->current_vmcb->pa into RAX. */ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX @@ -80,16 +92,11 @@ SYM_FUNC_START(__svm_vcpu_run) /* Enter guest mode */ sti -1: vmrun %_ASM_AX - -2: cli - -#ifdef CONFIG_RETPOLINE - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif +3: vmrun %_ASM_AX +4: + cli - /* "POP" @svm to RAX. */ + /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX /* Save all guest registers. */ @@ -110,6 +117,18 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* @svm can stay in RDI from now on. */ + mov %_ASM_AX, %_ASM_DI + + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +5: vmsave %_ASM_AX +6: + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -159,11 +178,19 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET -3: cmpb $0, kvm_rebooting +10: cmpb $0, kvm_rebooting jne 2b ud2 +30: cmpb $0, kvm_rebooting + jne 4b + ud2 +50: cmpb $0, kvm_rebooting + jne 6b + ud2 - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(1b, 10b) + _ASM_EXTABLE(3b, 30b) + _ASM_EXTABLE(5b, 50b) SYM_FUNC_END(__svm_vcpu_run) -- cgit v1.2.3 From e287bd005ad9d85dd6271dd795d3ecfb6bca46ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Nov 2022 03:49:59 -0500 Subject: KVM: SVM: restore host save area from assembly Allow access to the percpu area via the GS segment base, which is needed in order to access the saved host spec_ctrl value. In linux-next FILL_RETURN_BUFFER also needs to access percpu data. For simplicity, the physical address of the save area is added to struct svm_cpu_data. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Reported-by: Nathan Chancellor Analyzed-by: Andrew Cooper Tested-by: Nathan Chancellor Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm-asm-offsets.c | 1 + arch/x86/kvm/svm/svm.c | 14 ++++++-------- arch/x86/kvm/svm/svm.h | 2 ++ arch/x86/kvm/svm/svm_ops.h | 5 ----- arch/x86/kvm/svm/vmenter.S | 17 +++++++++++++++++ 5 files changed, 26 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index f83e88b85bf2..1b805cd24d66 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -18,6 +18,7 @@ static void __used common(void) OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); + OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); } if (IS_ENABLED(CONFIG_KVM_INTEL)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4e3a47eb5002..469c1b5617af 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -592,7 +592,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); + wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { /* @@ -648,6 +648,7 @@ static void svm_cpu_uninit(int cpu) kfree(sd->sev_vmcbs); __free_page(sd->save_area); + sd->save_area_pa = 0; sd->save_area = NULL; } @@ -665,6 +666,7 @@ static int svm_cpu_init(int cpu) if (ret) goto free_save_area; + sd->save_area_pa = __sme_page_pa(sd->save_area); return 0; free_save_area: @@ -1450,7 +1452,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area. */ - vmsave(__sme_page_pa(sd->save_area)); + vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm)) { struct sev_es_save_area *hostsa; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); @@ -3905,14 +3907,10 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) guest_state_enter_irqoff(); - if (sev_es_guest(vcpu->kvm)) { + if (sev_es_guest(vcpu->kvm)) __svm_sev_es_vcpu_run(svm); - } else { - struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); - + else __svm_vcpu_run(svm); - vmload(__sme_page_pa(sd->save_area)); - } guest_state_exit_irqoff(); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 2af6a71126c1..83955a4e520e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -287,6 +287,8 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + unsigned long save_area_pa; + struct vmcb *current_vmcb; /* index = sev_asid, value = vmcb pointer */ diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 9430d6437c9f..36c8af87a707 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa) svm_asm1(vmsave, "a" (pa), "memory"); } -static __always_inline void vmload(unsigned long pa) -{ - svm_asm1(vmload, "a" (pa), "memory"); -} - #endif /* __KVM_X86_SVM_OPS_H */ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 5bc2ed7d79c0..57440acfc73e 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -49,6 +49,14 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Needed to restore access to percpu variables. */ + __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) + /* Save @svm. */ push %_ASM_ARG1 @@ -124,6 +132,11 @@ SYM_FUNC_START(__svm_vcpu_run) 5: vmsave %_ASM_AX 6: + /* Restores GSBASE among other things, allowing access to percpu data. */ + pop %_ASM_AX +7: vmload %_ASM_AX +8: + #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE @@ -187,10 +200,14 @@ SYM_FUNC_START(__svm_vcpu_run) 50: cmpb $0, kvm_rebooting jne 6b ud2 +70: cmpb $0, kvm_rebooting + jne 8b + ud2 _ASM_EXTABLE(1b, 10b) _ASM_EXTABLE(3b, 30b) _ASM_EXTABLE(5b, 50b) + _ASM_EXTABLE(7b, 70b) SYM_FUNC_END(__svm_vcpu_run) -- cgit v1.2.3 From 9f2febf3f04daebdaaa5a43cfa20e3844905c0f9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 30 Sep 2022 14:24:40 -0400 Subject: KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly Restoration of the host IA32_SPEC_CTRL value is probably too late with respect to the return thunk training sequence. With respect to the user/kernel boundary, AMD says, "If software chooses to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel exit), software should set STIBP to 1 before executing the return thunk training sequence." I assume the same requirements apply to the guest/host boundary. The return thunk training sequence is in vmenter.S, quite close to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's IA32_SPEC_CTRL value is not restored until much later. To avoid this, move the restoration of host SPEC_CTRL to assembly and, for consistency, move the restoration of the guest SPEC_CTRL as well. This is not particularly difficult, apart from some care to cover both 32- and 64-bit, and to share code between SEV-ES and normal vmentry. Cc: stable@vger.kernel.org Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") Suggested-by: Jim Mattson Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kernel/cpu/bugs.c | 13 ++--- arch/x86/kvm/kvm-asm-offsets.c | 1 + arch/x86/kvm/svm/svm.c | 37 +++++-------- arch/x86/kvm/svm/svm.h | 4 +- arch/x86/kvm/svm/vmenter.S | 119 +++++++++++++++++++++++++++++++++++++++-- 5 files changed, 136 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..6ec0b7ce7453 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,22 +196,15 @@ void __init check_bugs(void) } /* - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is - * done in vmenter.S. + * NOTE: This function is *only* called for SVM, since Intel uses + * MSR_IA32_SPEC_CTRL for SSBD. */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + u64 guestval, hostval; struct thread_info *ti = current_thread_info(); - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - if (hostval != guestval) { - msrval = setguest ? guestval : hostval; - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - /* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index 1b805cd24d66..24a710d37323 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -16,6 +16,7 @@ static void __used common(void) BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 469c1b5617af..cf1aed25f4ab 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -720,6 +720,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) u32 offset; u32 *msrpm; + /* + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: to_svm(vcpu)->msrpm; @@ -3901,16 +3910,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { struct vcpu_svm *svm = to_svm(vcpu); guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) - __svm_sev_es_vcpu_run(svm); + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); else - __svm_vcpu_run(svm); + __svm_vcpu_run(svm, spec_ctrl_intercepted); guest_state_exit_irqoff(); } @@ -3918,6 +3927,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); trace_kvm_entry(vcpu); @@ -3976,26 +3986,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - svm_vcpu_enter_exit(vcpu); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 83955a4e520e..199a2ecef1ce 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -682,7 +682,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); -void __svm_vcpu_run(struct vcpu_svm *svm); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 57440acfc73e..34367dc203f2 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -32,9 +32,69 @@ .section .noinstr.text, "ax" +.macro RESTORE_GUEST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +801: +.endm +.macro RESTORE_GUEST_SPEC_CTRL_BODY +800: + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. This is kept out-of-line so that the common + * case does not have to jump. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + movl SVM_spec_ctrl(%_ASM_DI), %eax + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax + je 801b + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr + jmp 801b +.endm + +.macro RESTORE_HOST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +901: +.endm +.macro RESTORE_HOST_SPEC_CTRL_BODY +900: + /* Same for after vmexit. */ + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + cmpb $0, (%_ASM_SP) + jnz 998f + rdmsr + movl %eax, SVM_spec_ctrl(%_ASM_DI) +998: + + /* Now restore the host value of the MSR if different from the guest's. */ + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax + cmp SVM_spec_ctrl(%_ASM_DI), %eax + je 901b + xor %edx, %edx + wrmsr + jmp 901b +.endm + + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -54,17 +114,26 @@ SYM_FUNC_START(__svm_vcpu_run) * order compared to when they are needed. */ + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ + push %_ASM_ARG2 + /* Needed to restore access to percpu variables. */ __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) - /* Save @svm. */ + /* Finally save @svm. */ push %_ASM_ARG1 .ifnc _ASM_ARG1, _ASM_DI - /* Move @svm to RDI. */ + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ mov %_ASM_ARG1, %_ASM_DI .endif + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + /* * Use a single vmcb (vmcb01 because it's always valid) for * context switching guest state via VMLOAD/VMSAVE, that way @@ -142,6 +211,9 @@ SYM_FUNC_START(__svm_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -177,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -191,6 +266,9 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + 10: cmpb $0, kvm_rebooting jne 2b ud2 @@ -214,6 +292,7 @@ SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -228,8 +307,30 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ + push %_ASM_ARG2 + + /* Save @svm. */ + push %_ASM_ARG1 + +.ifnc _ASM_ARG1, _ASM_DI + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + /* Get svm->current_vmcb->pa into RAX. */ - mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Enter guest mode */ @@ -239,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) 2: cli + /* Pop @svm to RDI, guest registers have been saved already. */ + pop %_ASM_DI + #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -253,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -267,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %_ASM_BP RET + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + 3: cmpb $0, kvm_rebooting jne 2b ud2 -- cgit v1.2.3 From bd3d394e367e66e773a6cb25a82c29b04464230b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 30 Sep 2022 14:48:24 -0400 Subject: x86, KVM: remove unnecessary argument to x86_virt_spec_ctrl and callers x86_virt_spec_ctrl only deals with the paravirtualized MSR_IA32_VIRT_SPEC_CTRL now and does not handle MSR_IA32_SPEC_CTRL anymore; remove the corresponding, unused argument. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/spec-ctrl.h | 10 +++++----- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/svm/svm.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 5393babc0598..cb0386fc4dc3 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,7 +13,7 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); +extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); /** * x86_spec_ctrl_set_guest - Set speculation control registers for the guest @@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); } /** @@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); } /* AMD specific Speculative Store Bypass MSR data */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6ec0b7ce7453..3e3230cccaa7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -200,7 +200,7 @@ void __init check_bugs(void) * MSR_IA32_SPEC_CTRL for SSBD. */ void -x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest) { u64 guestval, hostval; struct thread_info *ti = current_thread_info(); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cf1aed25f4ab..9f88c8e6766e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3984,7 +3984,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + x86_spec_ctrl_set_guest(svm->virt_spec_ctrl); svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); @@ -3992,7 +3992,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) reload_tss(vcpu); if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + x86_spec_ctrl_restore_host(svm->virt_spec_ctrl); if (!sev_es_guest(vcpu->kvm)) { vcpu->arch.cr2 = svm->vmcb->save.cr2; -- cgit v1.2.3 From 0bd8bd2f7a789fe1dcb21ad148199d2f62d79873 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Fri, 4 Nov 2022 07:22:20 -0700 Subject: KVM: SVM: Only dump VMSA to klog at KERN_DEBUG level Explicitly print the VMSA dump at KERN_DEBUG log level, KERN_CONT uses KERNEL_DEFAULT if the previous log line has a newline, i.e. if there's nothing to continuing, and as a result the VMSA gets dumped when it shouldn't. The KERN_CONT documentation says it defaults back to KERNL_DEFAULT if the previous log line has a newline. So switch from KERN_CONT to print_hex_dump_debug(). Jarkko pointed this out in reference to the original patch. See: https://lore.kernel.org/all/YuPMeWX4uuR1Tz3M@kernel.org/ print_hex_dump(KERN_DEBUG, ...) was pointed out there, but print_hex_dump_debug() should similar. Fixes: 6fac42f127b8 ("KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog") Signed-off-by: Peter Gonda Reviewed-by: Sean Christopherson Cc: Jarkko Sakkinen Cc: Harald Hoyer Cc: Paolo Bonzini Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Message-Id: <20221104142220.469452-1-pgonda@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9b66ee34e264..efaaef2b7ae1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->dr6 = svm->vcpu.arch.dr6; pr_debug("Virtual Machine Save Area (VMSA):\n"); - print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); return 0; } -- cgit v1.2.3 From 8631ef59b62290c7d88e7209e35dfb47f33f4902 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 19 Sep 2022 17:10:06 +0800 Subject: KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet The SDM lists an architectural MSR IA32_CORE_CAPABILITIES (0xCF) that limits the theoretical maximum value of the Intel GP PMC MSRs allocated at 0xC1 to 14; likewise the Intel April 2022 SDM adds IA32_OVERCLOCKING_STATUS at 0x195 which limits the number of event selection MSRs to 15 (0x186-0x194). Limiting the maximum number of counters to 14 or 18 based on the currently allocated MSRs is clearly fragile, and it seems likely that Intel will even place PMCs 8-15 at a completely different range of MSR indices. So stop at the maximum number of GP PMCs supported today on Intel processors. There are some machines, like Intel P4 with non Architectural PMU, that may indeed have 18 counters, but those counters are in a completely different MSR address range and are not supported by KVM. Cc: Vitaly Kuznetsov Cc: stable@vger.kernel.org Fixes: cf05a67b68b8 ("KVM: x86: omit "impossible" pmu MSRs from MSR list") Suggested-by: Jim Mattson Signed-off-by: Like Xu Reviewed-by: Jim Mattson Message-Id: <20220919091008.60695-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..73716fab120f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1442,20 +1442,10 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, @@ -7041,12 +7031,12 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; -- cgit v1.2.3 From 4f1fa2a1bbeb2feca436d2c86bf6f78dc4e5e4c4 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 19 Sep 2022 17:10:07 +0800 Subject: KVM: x86/pmu: Limit the maximum number of supported Intel GP counters The Intel Architectural IA32_PMCx MSRs addresses range allows for a maximum of 8 GP counters, and KVM cannot address any more. Introduce a local macro (named KVM_INTEL_PMC_MAX_GENERIC) and use it consistently to refer to the number of counters supported by KVM, thus avoiding possible out-of-bound accesses. Suggested-by: Jim Mattson Signed-off-by: Like Xu Reviewed-by: Jim Mattson Message-Id: <20220919091008.60695-2-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 +++++- arch/x86/kvm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- arch/x86/kvm/x86.c | 12 +++++++----- 4 files changed, 15 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7551b6f9c31c..286f6130dcb4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -501,6 +501,10 @@ struct kvm_pmc { bool intr; }; +/* More counters may conflict with other existing Architectural MSRs */ +#define KVM_INTEL_PMC_MAX_GENERIC 8 +#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) +#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define KVM_PMC_MAX_FIXED 3 struct kvm_pmu { unsigned nr_arch_gp_counters; @@ -516,7 +520,7 @@ struct kvm_pmu { u64 reserved_bits; u64 raw_event_mask; u8 version; - struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; + struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index d9b9a0f0db17..de1fd7369736 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = { * code. Each pmc, stored in kvm_pmc.idx field, is unique across * all perf counters (both gp and fixed). The mapping relationship * between pmc and perf counters is as the following: - * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters + * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 25b70a85bef5..10b33da9bd05 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -617,7 +617,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -643,7 +643,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = NULL; int i; - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 73716fab120f..1e25f410d1fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1438,6 +1438,9 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + + /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, @@ -1446,7 +1449,6 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, @@ -7031,14 +7033,14 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; case MSR_IA32_XFD: -- cgit v1.2.3 From 556f3c9ad7c101aa16a43ef4539f3aabc1d7b32e Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 19 Sep 2022 17:10:08 +0800 Subject: KVM: x86/pmu: Limit the maximum number of supported AMD GP counters The AMD PerfMonV2 specification allows for a maximum of 16 GP counters, but currently only 6 pairs of MSRs are accepted by KVM. While AMD64_NUM_COUNTERS_CORE is already equal to 6, increasing without adjusting msrs_to_save_all[] could result in out-of-bounds accesses. Therefore introduce a macro (named KVM_AMD_PMC_MAX_GENERIC) to refer to the number of counters supported by KVM. Signed-off-by: Like Xu Reviewed-by: Jim Mattson Message-Id: <20220919091008.60695-3-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/pmu.c | 7 ++++--- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 286f6130dcb4..f05ebaa26f0f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -506,6 +506,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define KVM_PMC_MAX_FIXED 3 +#define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index b68956299fa8..9d65cd095691 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC); - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1e25f410d1fe..ecea83f0da49 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1452,10 +1452,13 @@ static const u32 msrs_to_save_all[] = { MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */ MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; -- cgit v1.2.3 From 6510c78490c490a6636e48b61eeaa6fb65981f4b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 29 Oct 2022 19:34:50 +0800 Subject: riscv: process: fix kernel info leakage thread_struct's s[12] may contain random kernel memory content, which may be finally leaked to userspace. This is a security hole. Fix it by clearing the s[12] array in thread_struct when fork. As for kthread case, it's better to clear the s[12] array as well. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Signed-off-by: Jisheng Zhang Tested-by: Guo Ren Link: https://lore.kernel.org/r/20221029113450.4027-1-jszhang@kernel.org Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/CAJF2gTSdVyAaM12T%2B7kXAdRPGS4VyuO08X1c7paE-n4Fr8OtRA@mail.gmail.com/ Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index b0c63e8e867e..8955f2432c2d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -164,6 +164,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + memset(&p->thread.s, 0, sizeof(p->thread.s)); + /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ -- cgit v1.2.3 From 50f4dd657a0fcf90aa8da8dc2794a8100ff4c37c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 1 Nov 2022 02:29:43 +0800 Subject: riscv: vdso: fix build with llvm Even after commit 89fd4a1df829 ("riscv: jump_label: mark arguments as const to satisfy asm constraints"), building with CC_OPTIMIZE_FOR_SIZE + LLVM=1 can reproduce below build error: CC arch/riscv/kernel/vdso/vgettimeofday.o In file included from :4: In file included from lib/vdso/gettimeofday.c:5: In file included from include/vdso/datapage.h:17: In file included from include/vdso/processor.h:10: In file included from arch/riscv/include/asm/vdso/processor.h:7: In file included from include/linux/jump_label.h:112: arch/riscv/include/asm/jump_label.h:42:3: error: invalid operand for inline asm constraint 'i' " .option push \n\t" ^ 1 error generated. I think the problem is when "-Os" is passed as CFLAGS, it's removed by "CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os" which is introduced in commit e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace"), thus no optimization at all for vgettimeofday.c arm64 does remove "-Os" as well, but it forces "-O2" after removing "-Os". I compared the generated vgettimeofday.o with "-O2" and "-Os", I think no big performance difference. So let's tell the kbuild not to remove "-Os" rather than follow arm64 style. vdso related performance can be improved a lot when building kernel with CC_OPTIMIZE_FOR_SIZE after this commit, ("-Os" VS no optimization) Fixes: e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace") Signed-off-by: Jisheng Zhang Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221031182943.2453-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index f2e065671e4d..84ac0fe612e7 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -30,7 +30,7 @@ obj-y += vdso.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n -- cgit v1.2.3 From 9b9eaee9828fe98b030cf43ac50065a54a2f5d52 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 6 Nov 2022 15:53:54 +0100 Subject: arm64: efi: Fix handling of misaligned runtime regions and drop warning Currently, when mapping the EFI runtime regions in the EFI page tables, we complain about misaligned regions in a rather noisy way, using WARN(). Not only does this produce a lot of irrelevant clutter in the log, it is factually incorrect, as misaligned runtime regions are actually allowed by the EFI spec as long as they don't require conflicting memory types within the same 64k page. So let's drop the warning, and tweak the code so that we - take both the start and end of the region into account when checking for misalignment - only revert to RWX mappings for non-code regions if misaligned code regions are also known to exist. Cc: Acked-by: Linus Torvalds Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 52 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 8d36e66a6e64..ee53f2a0aa03 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -13,6 +13,14 @@ #include +static bool region_is_misaligned(const efi_memory_desc_t *md) +{ + if (PAGE_SIZE == EFI_PAGE_SIZE) + return false; + return !PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT); +} + /* * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be * executable, everything else can be mapped with the XN bits @@ -26,14 +34,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) if (type == EFI_MEMORY_MAPPED_IO) return PROT_DEVICE_nGnRE; - if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), - "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) + if (region_is_misaligned(md)) { + static bool __initdata code_is_misaligned; + /* - * If the region is not aligned to the page size of the OS, we - * can not use strict permissions, since that would also affect - * the mapping attributes of the adjacent regions. + * Regions that are not aligned to the OS page size cannot be + * mapped with strict permissions, as those might interfere + * with the permissions that are needed by the adjacent + * region's mapping. However, if we haven't encountered any + * misaligned runtime code regions so far, we can safely use + * non-executable permissions for non-code regions. */ - return pgprot_val(PAGE_KERNEL_EXEC); + code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE); + + return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC) + : pgprot_val(PAGE_KERNEL); + } /* R-- */ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == @@ -64,19 +80,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); - if (!PAGE_ALIGNED(md->phys_addr) || - !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { - /* - * If the end address of this region is not aligned to page - * size, the mapping is rounded up, and may end up sharing a - * page frame with the next UEFI memory region. If we create - * a block entry now, we may need to split it again when mapping - * the next region, and support for that is going to be removed - * from the MMU routines. So avoid block mappings altogether in - * that case. - */ + /* + * If this region is not aligned to the page size used by the OS, the + * mapping will be rounded outwards, and may end up sharing a page + * frame with an adjacent runtime memory region. Given that the page + * table descriptor covering the shared page will be rewritten when the + * adjacent region gets mapped, we must avoid block mappings here so we + * don't have to worry about splitting them when that happens. + */ + if (region_is_misaligned(md)) page_mappings_only = true; - } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, @@ -103,6 +116,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); + if (region_is_misaligned(md)) + return 0; + /* * Calling apply_to_page_range() is only safe on regions that are * guaranteed to be mapped down to pages. Since we are only called -- cgit v1.2.3 From 50e63dd8ed92045eb70a72d7ec725488320fb68b Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 7 Nov 2022 15:15:25 +0000 Subject: riscv: fix reserved memory setup Currently, RISC-V sets up reserved memory using the "early" copy of the device tree. As a result, when trying to get a reserved memory region using of_reserved_mem_lookup(), the pointer to reserved memory regions is using the early, pre-virtual-memory address which causes a kernel panic when trying to use the buffer's name: Unable to handle kernel paging request at virtual address 00000000401c31ac Oops [#1] Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc1-00001-g0d9d6953d834 #1 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) epc : string+0x4a/0xea ra : vsnprintf+0x1e4/0x336 epc : ffffffff80335ea0 ra : ffffffff80338936 sp : ffffffff81203be0 gp : ffffffff812e0a98 tp : ffffffff8120de40 t0 : 0000000000000000 t1 : ffffffff81203e28 t2 : 7265736572203a46 s0 : ffffffff81203c20 s1 : ffffffff81203e28 a0 : ffffffff81203d22 a1 : 0000000000000000 a2 : ffffffff81203d08 a3 : 0000000081203d21 a4 : ffffffffffffffff a5 : 00000000401c31ac a6 : ffff0a00ffffff04 a7 : ffffffffffffffff s2 : ffffffff81203d08 s3 : ffffffff81203d00 s4 : 0000000000000008 s5 : ffffffff000000ff s6 : 0000000000ffffff s7 : 00000000ffffff00 s8 : ffffffff80d9821a s9 : ffffffff81203d22 s10: 0000000000000002 s11: ffffffff80d9821c t3 : ffffffff812f3617 t4 : ffffffff812f3617 t5 : ffffffff812f3618 t6 : ffffffff81203d08 status: 0000000200000100 badaddr: 00000000401c31ac cause: 000000000000000d [] vsnprintf+0x1e4/0x336 [] vprintk_store+0xf6/0x344 [] vprintk_emit+0x56/0x192 [] vprintk_default+0x16/0x1e [] vprintk+0x72/0x80 [] _printk+0x36/0x50 [] print_reserved_mem+0x1c/0x24 [] paging_init+0x528/0x5bc [] setup_arch+0xd0/0x592 [] start_kernel+0x82/0x73c early_init_fdt_scan_reserved_mem() takes no arguments as it operates on initial_boot_params, which is populated by early_init_dt_verify(). On RISC-V, early_init_dt_verify() is called twice. Once, directly, in setup_arch() if CONFIG_BUILTIN_DTB is not enabled and once indirectly, very early in the boot process, by parse_dtb() when it calls early_init_dt_scan_nodes(). This first call uses dtb_early_va to set initial_boot_params, which is not usable later in the boot process when early_init_fdt_scan_reserved_mem() is called. On arm64 for example, the corresponding call to early_init_dt_scan_nodes() uses fixmap addresses and doesn't suffer the same fate. Move early_init_fdt_scan_reserved_mem() further along the boot sequence, after the direct call to early_init_dt_verify() in setup_arch() so that the names use the correct virtual memory addresses. The above supposed that CONFIG_BUILTIN_DTB was not set, but should work equally in the case where it is - unflatted_and_copy_device_tree() also updates initial_boot_params. Reported-by: Valentina Fernandez Reported-by: Evgenii Shatokhin Link: https://lore.kernel.org/linux-riscv/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/ Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob") Signed-off-by: Conor Dooley Tested-by: Evgenii Shatokhin Link: https://lore.kernel.org/r/20221107151524.3941467-1-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/init.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index ad76bb59b059..67ec1fadcfe2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -283,6 +283,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + early_init_fdt_scan_reserved_mem(); misc_mem_init(); init_resources(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b56a0a75533f..50a1b6edd491 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -262,7 +262,6 @@ static void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); } - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); -- cgit v1.2.3 From fcae44fd36d052e956e69a64642fc03820968d78 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Nov 2022 10:13:23 -0700 Subject: RISC-V: vdso: Do not add missing symbols to version section in linker script Recently, ld.lld moved from '--undefined-version' to '--no-undefined-version' as the default, which breaks the compat vDSO build: ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_gettimeofday' failed: symbol not defined ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_clock_gettime' failed: symbol not defined ld.lld: error: version script assignment of 'LINUX_4.15' to symbol '__vdso_clock_getres' failed: symbol not defined These symbols are not present in the compat vDSO or the regular vDSO for 32-bit but they are unconditionally included in the version section of the linker script, which is prohibited with '--no-undefined-version'. Fix this issue by only including the symbols that are actually exported in the version section of the linker script. Link: https://github.com/ClangBuiltLinux/linux/issues/1756 Signed-off-by: Nathan Chancellor Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221108171324.3377226-1-nathan@kernel.org/ Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 3 +++ arch/riscv/kernel/vdso/vdso.lds.S | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 84ac0fe612e7..db6548509bb3 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -28,6 +28,9 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +ifneq ($(filter vgettimeofday, $(vdso-syms)),) +CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY +endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 01d94aae5bf5..150b1a572e61 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -68,9 +68,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; +#ifdef HAS_VGETTIMEOFDAY __vdso_gettimeofday; __vdso_clock_gettime; __vdso_clock_getres; +#endif __vdso_getcpu; __vdso_flush_icache; local: *; -- cgit v1.2.3 From 6d3085e4d89ad7e6c7f1c6cf929d903393565861 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Nov 2022 00:18:41 +0000 Subject: KVM: x86/mmu: Block all page faults during kvm_zap_gfn_range() When zapping a GFN range, pass 0 => ALL_ONES for the to-be-invalidated range to effectively block all page faults while the zap is in-progress. The invalidation helpers take a host virtual address, whereas zapping a GFN obviously provides a guest physical address and with the wrong unit of measurement (frame vs. byte). Alternatively, KVM could walk all memslots to get the associated HVAs, but thanks to SMM, that would require multiple lookups. And practically speaking, kvm_zap_gfn_range() usage is quite rare and not a hot path, e.g. MTRR and CR0.CD are almost guaranteed to be done only on vCPU0 during boot, and APICv inhibits are similarly infrequent operations. Fixes: edb298c663fc ("KVM: x86/mmu: bump mmu notifier count in kvm_zap_gfn_range") Reported-by: Chao Peng Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20221111001841.2412598-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f81539061d6..1ccb769f62af 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, 0, -1ul); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, 0, -1ul); write_unlock(&kvm->mmu_lock); } -- cgit v1.2.3 From 62776e4378ae9086115ffd6f8bd9b9c0fe6e6809 Mon Sep 17 00:00:00 2001 From: John Thomson Date: Tue, 1 Nov 2022 03:07:49 +1000 Subject: mips: boot/compressed: use __NO_FORTIFY In the mips CONFIG_SYS_SUPPORTS_ZBOOT kernel, fix the compile error when using CONFIG_FORTIFY_SOURCE=y LD vmlinuz mipsel-openwrt-linux-musl-ld: arch/mips/boot/compressed/decompress.o: in function `decompress_kernel': ./include/linux/decompress/mm.h:(.text.decompress_kernel+0x177c): undefined reference to `warn_slowpath_fmt' kernel test robot helped identify this as related to fortify. The error appeared with commit 54d9469bc515 ("fortify: Add run-time WARN for cross-field memcpy()") Link: https://lore.kernel.org/r/202209161144.x9xSqNQZ-lkp@intel.com/ Resolve this in the same style as commit cfecea6ead5f ("lib/string: Move helper functions out of string.c") Reported-by: kernel test robot Fixes: 54d9469bc515 ("fortify: Add run-time WARN for cross-field memcpy()") Reviewed-by: Kees Cook Signed-off-by: John Thomson Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 5b38a802e101..c5dd415254d3 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -9,6 +9,7 @@ #define DISABLE_BRANCH_PROFILING +#define __NO_FORTIFY #include #include #include -- cgit v1.2.3 From 612d80784fdc0c2e2ee2e2d901a55ef2f72ebf4b Mon Sep 17 00:00:00 2001 From: Rongwei Zhang Date: Wed, 2 Nov 2022 20:27:39 +0800 Subject: MIPS: fix duplicate definitions for exported symbols Building with clang-14 fails with: AS arch/mips/kernel/relocate_kernel.o :0: error: symbol 'kexec_args' is already defined :0: error: symbol 'secondary_kexec_args' is already defined :0: error: symbol 'kexec_start_address' is already defined :0: error: symbol 'kexec_indirection_page' is already defined :0: error: symbol 'relocate_new_kernel_size' is already defined It turns out EXPORT defined in asm/asm.h expands to a symbol definition, so there is no need to define these symbols again. Remove duplicated symbol definitions. Fixes: 7aa1c8f47e7e ("MIPS: kdump: Add support") Signed-off-by: Rongwei Zhang Reviewed-by: Nathan Chancellor Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/relocate_kernel.S | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index cfde14b48fd8..f5b2ef979b43 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -145,8 +145,7 @@ LEAF(kexec_smp_wait) * kexec_args[0..3] are used to prepare register values. */ -kexec_args: - EXPORT(kexec_args) +EXPORT(kexec_args) arg0: PTR_WD 0x0 arg1: PTR_WD 0x0 arg2: PTR_WD 0x0 @@ -159,8 +158,7 @@ arg3: PTR_WD 0x0 * their registers a0-a3. secondary_kexec_args[0..3] are used * to prepare register values. */ -secondary_kexec_args: - EXPORT(secondary_kexec_args) +EXPORT(secondary_kexec_args) s_arg0: PTR_WD 0x0 s_arg1: PTR_WD 0x0 s_arg2: PTR_WD 0x0 @@ -171,19 +169,16 @@ kexec_flag: #endif -kexec_start_address: - EXPORT(kexec_start_address) +EXPORT(kexec_start_address) PTR_WD 0x0 .size kexec_start_address, PTRSIZE -kexec_indirection_page: - EXPORT(kexec_indirection_page) +EXPORT(kexec_indirection_page) PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: -relocate_new_kernel_size: - EXPORT(relocate_new_kernel_size) +EXPORT(relocate_new_kernel_size) PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE -- cgit v1.2.3 From fa706927f4722a2df723b2a28d139b1904a3e7fa Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Thu, 3 Nov 2022 09:18:15 +0800 Subject: MIPS: Loongson64: Add WARN_ON on kexec related kmalloc failed Add WARN_ON on kexec related kmalloc failed, avoid to pass NULL pointer to following memcpy and loongson_kexec_prepare. Fixes: 6ce48897ce47 ("MIPS: Loongson64: Add kexec/kdump support") Signed-off-by: Liao Chang Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/reset.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index 758d5d26aaaa..e420800043b0 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -159,8 +160,17 @@ static int __init mips_reboot_setup(void) #ifdef CONFIG_KEXEC kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + if (WARN_ON(!kexec_argv)) + return -ENOMEM; + kdump_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); + if (WARN_ON(!kdump_argv)) + return -ENOMEM; + kexec_envp = kmalloc(KEXEC_ENVP_SIZE, GFP_KERNEL); + if (WARN_ON(!kexec_envp)) + return -ENOMEM; + fw_arg1 = KEXEC_ARGV_ADDR; memcpy(kexec_envp, (void *)fw_arg2, KEXEC_ENVP_SIZE); -- cgit v1.2.3 From 2a296157859287c3b639f7228354c13f7182ed71 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 3 Nov 2022 11:15:35 +0100 Subject: mips: alchemy: gpio: Include the right header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local GPIO driver in the MIPS Alchemy is including the legacy header but what it wants is to implement a GPIO driver so include instead. Cc: Bartosz Golaszewski Cc: linux-gpio@vger.kernel.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Linus Walleij Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/common/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index a17d7a8909c4..1b16daaa86ae 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 64ac0befe75bdfaffc396c2b4a0ed5ae6920eeee Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 3 Nov 2022 15:10:53 +0000 Subject: MIPS: jump_label: Fix compat branch range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cast upper bound of branch range to long to do signed compare, avoid negative offset trigger this warning. Fixes: 9b6584e35f40 ("MIPS: jump_label: Use compact branches for >= r6") Signed-off-by: Jiaxun Yang Cc: stable@vger.kernel.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 71a882c8c6eb..f7978d50a2ba 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e, * The branch offset must fit in the instruction's 26 * bit field. */ - WARN_ON((offset >= BIT(25)) || + WARN_ON((offset >= (long)BIT(25)) || (offset < -(long)BIT(25))); insn.j_format.opcode = bc6_op; -- cgit v1.2.3 From 648060902aa302331b5d6e4f26d8ee0761d239ab Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 28 Oct 2022 15:23:44 +0200 Subject: MIPS: pic32: treat port as signed integer get_port_from_cmdline() returns an int, yet is assigned to a char, which is wrong in its own right, but also, with char becoming unsigned, this poses problems, because -1 is used as an error value. Further complicating things, fw_init_early_console() is only ever called with a -1 argument. Fix this up by removing the unused argument from fw_init_early_console() and treating port as a proper signed integer. Cc: Thomas Bogendoerfer Signed-off-by: Jason A. Donenfeld Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/fw/fw.h | 2 +- arch/mips/pic32/pic32mzda/early_console.c | 13 ++++++------- arch/mips/pic32/pic32mzda/init.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/fw/fw.h b/arch/mips/include/asm/fw/fw.h index d0ef8b4892bb..d0494ce4b337 100644 --- a/arch/mips/include/asm/fw/fw.h +++ b/arch/mips/include/asm/fw/fw.h @@ -26,6 +26,6 @@ extern char *fw_getcmdline(void); extern void fw_meminit(void); extern char *fw_getenv(char *name); extern unsigned long fw_getenvl(char *name); -extern void fw_init_early_console(char port); +extern void fw_init_early_console(void); #endif /* __ASM_FW_H_ */ diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c index 25372e62783b..3cd1b408fa1c 100644 --- a/arch/mips/pic32/pic32mzda/early_console.c +++ b/arch/mips/pic32/pic32mzda/early_console.c @@ -27,7 +27,7 @@ #define U_BRG(x) (UART_BASE(x) + 0x40) static void __iomem *uart_base; -static char console_port = -1; +static int console_port = -1; static int __init configure_uart_pins(int port) { @@ -47,7 +47,7 @@ static int __init configure_uart_pins(int port) return 0; } -static void __init configure_uart(char port, int baud) +static void __init configure_uart(int port, int baud) { u32 pbclk; @@ -60,7 +60,7 @@ static void __init configure_uart(char port, int baud) uart_base + PIC32_SET(U_STA(port))); } -static void __init setup_early_console(char port, int baud) +static void __init setup_early_console(int port, int baud) { if (configure_uart_pins(port)) return; @@ -130,16 +130,15 @@ _out: return baud; } -void __init fw_init_early_console(char port) +void __init fw_init_early_console(void) { char *arch_cmdline = pic32_getcmdline(); - int baud = -1; + int baud, port; uart_base = ioremap(PIC32_BASE_UART, 0xc00); baud = get_baud_from_cmdline(arch_cmdline); - if (port == -1) - port = get_port_from_cmdline(arch_cmdline); + port = get_port_from_cmdline(arch_cmdline); if (port == -1) port = EARLY_CONSOLE_PORT; diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 08c46cf122d7..53b227a9074c 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -47,7 +47,7 @@ void __init plat_mem_setup(void) strscpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); #ifdef CONFIG_EARLY_PRINTK - fw_init_early_console(-1); + fw_init_early_console(); #endif pic32_config_init(); } -- cgit v1.2.3 From 639b2e2ff1e850eb4e8853b4dc233875108eec4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:14:16 +0100 Subject: x86/xen: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/e91af3c8708af38b1c57e0a2d7eb9765dda0e963.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 3 ++- arch/x86/xen/setup.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0ad3d4bf52b3..34b05ee1523f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -113,7 +114,7 @@ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); static int __init parse_xen_msr_safe(char *str) { if (str) - return strtobool(str, &xen_msr_safe); + return kstrtobool(str, &xen_msr_safe); return -EINVAL; } early_param("xen_msr_safe", parse_xen_msr_safe); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 4f4309500559..8db26f10fb1d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -85,7 +86,7 @@ static void __init xen_parse_512gb(void) arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); if (!arg) val = true; - else if (strtobool(arg + strlen("xen_512gb_limit="), &val)) + else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val)) return; xen_512gb_limit = val; -- cgit v1.2.3 From 2632daebafd04746b4b96c2f26a6021bc38f6209 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2022 12:44:01 +0100 Subject: x86/cpu: Restore AMD's DE_CFG MSR after resume DE_CFG contains the LFENCE serializing bit, restore it on resume too. This is relevant to older families due to the way how they do S3. Unify and correct naming while at it. Fixes: e4d0e84e4907 ("x86/cpu/AMD: Make LFENCE a serializing instruction") Reported-by: Andrew Cooper Reported-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: Signed-off-by: Linus Torvalds --- arch/x86/include/asm/msr-index.h | 8 +++++--- arch/x86/kernel/cpu/amd.c | 6 ++---- arch/x86/kernel/cpu/hygon.c | 4 ++-- arch/x86/kvm/svm/svm.c | 10 +++++----- arch/x86/kvm/x86.c | 2 +- arch/x86/power/cpu.c | 1 + tools/arch/x86/include/asm/msr-index.h | 8 +++++--- 7 files changed, 21 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 10ac52705892..4a2af82553e4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 860b60273df3..c75d75b9f11a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 21fd425088fe..c393b8773ace 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9f88c8e6766e..4b6d2b050e57 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2709,9 +2709,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2812,7 +2812,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -3041,7 +3041,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecea83f0da49..490ec23c8450 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1557,7 +1557,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index bb176c72891c..4cd39f304e20 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -519,6 +519,7 @@ static void pm_save_spec_msr(void) MSR_TSX_FORCE_ABORT, MSR_IA32_MCU_OPT_CTRL, MSR_AMD64_LS_CFG, + MSR_AMD64_DE_CFG, }; msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 10ac52705892..f17ade084720 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a -- cgit v1.2.3