summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-08 21:10:58 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-08 21:10:58 +0300
commit410feb75de245664d66bc05ab2e2412751d10acf (patch)
tree73deae83ab33a7c0668eb00eb2e1d347b20782c9
parent2996148a9d4169f19a57827003c75605ce3b152b (diff)
parent0fe42512b2f03f9e5a20b9f55ef1013a68b4cd48 (diff)
downloadlinux-410feb75de245664d66bc05ab2e2412751d10acf.tar.xz
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: "Apart from the core arm64 and perf changes, the Spectre v4 mitigation touches the arm KVM code and the ACPI PPTT support touches drivers/ (acpi and cacheinfo). I should have the maintainers' acks in place. Summary: - Spectre v4 mitigation (Speculative Store Bypass Disable) support for arm64 using SMC firmware call to set a hardware chicken bit - ACPI PPTT (Processor Properties Topology Table) parsing support and enable the feature for arm64 - Report signal frame size to user via auxv (AT_MINSIGSTKSZ). The primary motivation is Scalable Vector Extensions which requires more space on the signal frame than the currently defined MINSIGSTKSZ - ARM perf patches: allow building arm-cci as module, demote dev_warn() to dev_dbg() in arm-ccn event_init(), miscellaneous cleanups - cmpwait() WFE optimisation to avoid some spurious wakeups - L1_CACHE_BYTES reverted back to 64 (for performance reasons that have to do with some network allocations) while keeping ARCH_DMA_MINALIGN to 128. cache_line_size() returns the actual hardware Cache Writeback Granule - Turn LSE atomics on by default in Kconfig - Kernel fault reporting tidying - Some #include and miscellaneous cleanups" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (53 commits) arm64: Fix syscall restarting around signal suppressed by tracer arm64: topology: Avoid checking numa mask for scheduler MC selection ACPI / PPTT: fix build when CONFIG_ACPI_PPTT is not enabled arm64: cpu_errata: include required headers arm64: KVM: Move VCPU_WORKAROUND_2_FLAG macros to the top of the file arm64: signal: Report signal frame size to userspace via auxv arm64/sve: Thin out initialisation sanity-checks for sve_max_vl arm64: KVM: Add ARCH_WORKAROUND_2 discovery through ARCH_FEATURES_FUNC_ID arm64: KVM: Handle guest's ARCH_WORKAROUND_2 requests arm64: KVM: Add ARCH_WORKAROUND_2 support for guests arm64: KVM: Add HYP per-cpu accessors arm64: ssbd: Add prctl interface for per-thread mitigation arm64: ssbd: Introduce thread flag to control userspace mitigation arm64: ssbd: Restore mitigation status on CPU resume arm64: ssbd: Skip apply_ssbd if not using dynamic mitigation arm64: ssbd: Add global mitigation state accessor arm64: Add 'ssbd' command-line option arm64: Add ARCH_WORKAROUND_2 probing arm64: Add per-cpu infrastructure to call ARCH_WORKAROUND_2 arm64: Call ARCH_WORKAROUND_2 on transitions between EL0 and EL1 ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt17
-rw-r--r--arch/arm/common/mcpm_entry.c2
-rw-r--r--arch/arm/include/asm/kvm_host.h12
-rw-r--r--arch/arm/include/asm/kvm_mmu.h5
-rw-r--r--arch/arm/kernel/perf_event_v6.c4
-rw-r--r--arch/arm/kernel/perf_event_v7.c3
-rw-r--r--arch/arm/kernel/perf_event_xscale.c6
-rw-r--r--arch/arm64/Kconfig15
-rw-r--r--arch/arm64/include/asm/acpi.h4
-rw-r--r--arch/arm64/include/asm/cache.h6
-rw-r--r--arch/arm64/include/asm/cmpxchg.h4
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h22
-rw-r--r--arch/arm64/include/asm/elf.h13
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h12
-rw-r--r--arch/arm64/include/asm/kvm_asm.h30
-rw-r--r--arch/arm64/include/asm/kvm_host.h26
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h25
-rw-r--r--arch/arm64/include/asm/processor.h5
-rw-r--r--arch/arm64/include/asm/thread_info.h1
-rw-r--r--arch/arm64/include/asm/topology.h6
-rw-r--r--arch/arm64/include/uapi/asm/auxvec.h3
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c3
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cacheinfo.c15
-rw-r--r--arch/arm64/kernel/cpu_errata.c182
-rw-r--r--arch/arm64/kernel/cpufeature.c10
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S2
-rw-r--r--arch/arm64/kernel/entry.S30
-rw-r--r--arch/arm64/kernel/fpsimd.c18
-rw-r--r--arch/arm64/kernel/hibernate.c11
-rw-r--r--arch/arm64/kernel/perf_event.c3
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kernel/signal.c57
-rw-r--r--arch/arm64/kernel/ssbd.c110
-rw-r--r--arch/arm64/kernel/suspend.c8
-rw-r--r--arch/arm64/kernel/topology.c104
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S20
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S38
-rw-r--r--arch/arm64/kvm/hyp/switch.c42
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/mm/dma-mapping.c5
-rw-r--r--arch/arm64/mm/fault.c46
-rw-r--r--arch/riscv/kernel/cacheinfo.c1
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/pptt.c655
-rw-r--r--drivers/acpi/tables.c2
-rw-r--r--drivers/base/cacheinfo.c157
-rw-r--r--drivers/perf/Kconfig36
-rw-r--r--drivers/perf/arm-cci.c47
-rw-r--r--drivers/perf/arm-ccn.c22
-rw-r--r--drivers/perf/arm_pmu.c2
-rw-r--r--drivers/perf/arm_spe_pmu.c6
-rw-r--r--include/linux/acpi.h19
-rw-r--r--include/linux/arm-smccc.h10
-rw-r--r--include/linux/cacheinfo.h25
-rw-r--r--include/linux/perf/arm_pmu.h2
-rw-r--r--virt/kvm/arm/arm.c4
-rw-r--r--virt/kvm/arm/psci.c18
61 files changed, 1689 insertions, 260 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 20cc45602f45..638342d0a095 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4106,6 +4106,23 @@
expediting. Set to zero to disable automatic
expediting.
+ ssbd= [ARM64,HW]
+ Speculative Store Bypass Disable control
+
+ On CPUs that are vulnerable to the Speculative
+ Store Bypass vulnerability and offer a
+ firmware based mitigation, this parameter
+ indicates how the mitigation should be used:
+
+ force-on: Unconditionally enable mitigation for
+ for both kernel and userspace
+ force-off: Unconditionally disable mitigation for
+ for both kernel and userspace
+ kernel: Always enable mitigation in the
+ kernel, and offer a prctl interface
+ to allow userspace to register its
+ interest in being mitigated too.
+
stack_guard_gap= [MM]
override the default stack gap protection. The value
is in page units and it defines how many pages prior
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 2b913f17d50f..ad574d20415c 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -9,6 +9,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irqflags.h>
@@ -174,6 +175,7 @@ bool mcpm_is_available(void)
{
return (platform_ops) ? true : false;
}
+EXPORT_SYMBOL_GPL(mcpm_is_available);
/*
* We can't use regular spinlocks. In the switcher case, it is possible
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 343fc9e6f78d..2d75e77bf7bb 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -325,6 +325,18 @@ static inline bool kvm_arm_harden_branch_predictor(void)
}
}
+#define KVM_SSBD_UNKNOWN -1
+#define KVM_SSBD_FORCE_DISABLE 0
+#define KVM_SSBD_KERNEL 1
+#define KVM_SSBD_FORCE_ENABLE 2
+#define KVM_SSBD_MITIGATED 3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+ /* No way to detect it yet, pretend it is not there. */
+ return KVM_SSBD_UNKNOWN;
+}
+
static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index c94d291fd1a8..8553d68b7c8a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -356,6 +356,11 @@ static inline int kvm_map_vectors(void)
return 0;
}
+static inline int hyp_map_aux_data(void)
+{
+ return 0;
+}
+
#define kvm_phys_to_vttbr(addr) (addr)
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 1d7061a38922..be42c4f66a40 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -303,12 +303,10 @@ static void armv6pmu_enable_event(struct perf_event *event)
}
static irqreturn_t
-armv6pmu_handle_irq(int irq_num,
- void *dev)
+armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
unsigned long pmcr = armv6_pmcr_read();
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 870b66c1e4ef..57f01e059f39 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -946,11 +946,10 @@ static void armv7pmu_disable_event(struct perf_event *event)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
-static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
+static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
u32 pmnc;
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index fcf218da660e..88d1a76f5367 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -142,11 +142,10 @@ xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
}
static irqreturn_t
-xscale1pmu_handle_irq(int irq_num, void *dev)
+xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
unsigned long pmnc;
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
@@ -489,11 +488,10 @@ xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
}
static irqreturn_t
-xscale2pmu_handle_irq(int irq_num, void *dev)
+xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
unsigned long pmnc, of_flags;
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4759566a78cb..9795b59aa28a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,11 +7,13 @@ config ARM64
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
+ select ACPI_PPTT if ACPI
select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
@@ -923,6 +925,15 @@ config HARDEN_EL2_VECTORS
If unsure, say Y.
+config ARM64_SSBD
+ bool "Speculative Store Bypass Disable" if EXPERT
+ default y
+ help
+ This enables mitigation of the bypassing of previous stores
+ by speculative loads.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
@@ -1034,6 +1045,7 @@ config ARM64_PAN
config ARM64_LSE_ATOMICS
bool "Atomic instructions"
+ default y
help
As part of the Large System Extensions, ARMv8.1 introduces new
atomic instructions that are designed specifically to scale in
@@ -1042,7 +1054,8 @@ config ARM64_LSE_ATOMICS
Say Y here to make use of these instructions for the in-kernel
atomic routines. This incurs a small overhead on CPUs that do
not support these instructions and requires the kernel to be
- built with binutils >= 2.25.
+ built with binutils >= 2.25 in order for the new instructions
+ to be used.
config ARM64_VHE
bool "Enable support for Virtualization Host Extensions (VHE)"
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 32f465a80e4e..0db62a4cbce2 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void)
}
struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+{
+ return acpi_cpu_get_madt_gicc(cpu)->uid;
+}
static inline void arch_fix_phys_package_id(int num, u32 slot) { }
void __init acpi_init_cpus(void);
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 9bbffc7a301f..5df5cfe1c143 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -33,7 +33,7 @@
#define ICACHE_POLICY_VIPT 2
#define ICACHE_POLICY_PIPT 3
-#define L1_CACHE_SHIFT 7
+#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
/*
@@ -43,7 +43,7 @@
* cache before the transfer is done, causing old data to be seen by
* the CPU.
*/
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#define ARCH_DMA_MINALIGN (128)
#ifndef __ASSEMBLY__
@@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void)
static inline int cache_line_size(void)
{
u32 cwg = cache_type_cwg();
- return cwg ? 4 << cwg : L1_CACHE_BYTES;
+ return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 4f5fd2a36e6e..3b0938281541 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -204,7 +204,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \
unsigned long tmp; \
\
asm volatile( \
- " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " sevl\n" \
+ " wfe\n" \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
" eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index bc51b72fafd4..8a699c708fc9 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -48,7 +48,8 @@
#define ARM64_HAS_CACHE_IDC 27
#define ARM64_HAS_CACHE_DIC 28
#define ARM64_HW_DBM 29
+#define ARM64_SSBD 30
-#define ARM64_NCAPS 30
+#define ARM64_NCAPS 31
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 09b0f2a80c8f..55bc1f073bfb 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -537,6 +537,28 @@ static inline u64 read_zcr_features(void)
return zcr;
}
+#define ARM64_SSBD_UNKNOWN -1
+#define ARM64_SSBD_FORCE_DISABLE 0
+#define ARM64_SSBD_KERNEL 1
+#define ARM64_SSBD_FORCE_ENABLE 2
+#define ARM64_SSBD_MITIGATED 3
+
+static inline int arm64_get_ssbd_state(void)
+{
+#ifdef CONFIG_ARM64_SSBD
+ extern int ssbd_state;
+ return ssbd_state;
+#else
+ return ARM64_SSBD_UNKNOWN;
+#endif
+}
+
+#ifdef CONFIG_ARM64_SSBD
+void arm64_set_ssbd_mitigation(bool state);
+#else
+static inline void arm64_set_ssbd_mitigation(bool state) {}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index fac1c4de7898..433b9554c6a1 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -121,6 +121,9 @@
#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */
+
typedef unsigned long elf_greg_t;
#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
@@ -148,6 +151,16 @@ typedef struct user_fpsimd_state elf_fpregset_t;
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
+ \
+ /* \
+ * Should always be nonzero unless there's a kernel bug. \
+ * If we haven't determined a sensible value to give to \
+ * userspace, omit the entry: \
+ */ \
+ if (likely(signal_minsigstksz)) \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \
+ else \
+ NEW_AUX_ENT(AT_IGNORE, 0); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index e050d765ca9e..46843515d77b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -207,12 +207,14 @@
str w\nxtmp, [\xpfpsr, #4]
.endm
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp
+.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
mrs_s x\nxtmp, SYS_ZCR_EL1
- bic x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK
- orr x\nxtmp, x\nxtmp, \xvqminus1
- msr_s SYS_ZCR_EL1, x\nxtmp // self-synchronising
-
+ bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, x\nxtmp
+ b.eq 921f
+ msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising
+921:
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
_sve_ldr_p 0, \nxbase
_sve_wrffr 0
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f6648a3e4152..951b2076a5e2 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -20,6 +20,9 @@
#include <asm/virt.h>
+#define VCPU_WORKAROUND_2_FLAG_SHIFT 0
+#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
+
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
@@ -71,14 +74,37 @@ extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
+/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+#define __hyp_this_cpu_ptr(sym) \
+ ({ \
+ void *__ptr = hyp_symbol_addr(sym); \
+ __ptr += read_sysreg(tpidr_el2); \
+ (typeof(&sym))__ptr; \
+ })
+
+#define __hyp_this_cpu_read(sym) \
+ ({ \
+ *__hyp_this_cpu_ptr(sym); \
+ })
+
#else /* __ASSEMBLY__ */
-.macro get_host_ctxt reg, tmp
- adr_l \reg, kvm_host_cpu_state
+.macro hyp_adr_this_cpu reg, sym, tmp
+ adr_l \reg, \sym
mrs \tmp, tpidr_el2
add \reg, \reg, \tmp
.endm
+.macro hyp_ldr_this_cpu reg, sym, tmp
+ adr_l \reg, \sym
+ mrs \tmp, tpidr_el2
+ ldr \reg, [\reg, \tmp]
+.endm
+
+.macro get_host_ctxt reg, tmp
+ hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+.endm
+
.macro get_vcpu_ptr vcpu, ctxt
get_host_ctxt \ctxt, \vcpu
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 469de8acd06f..95d8a0e15b5f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -216,6 +216,9 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
+ /* State of various workarounds, see kvm_asm.h for bit assignment */
+ u64 workaround_flags;
+
/* Guest debug state */
u64 debug_flags;
@@ -452,6 +455,29 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
}
+#define KVM_SSBD_UNKNOWN -1
+#define KVM_SSBD_FORCE_DISABLE 0
+#define KVM_SSBD_KERNEL 1
+#define KVM_SSBD_FORCE_ENABLE 2
+#define KVM_SSBD_MITIGATED 3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+ switch (arm64_get_ssbd_state()) {
+ case ARM64_SSBD_FORCE_DISABLE:
+ return KVM_SSBD_FORCE_DISABLE;
+ case ARM64_SSBD_KERNEL:
+ return KVM_SSBD_KERNEL;
+ case ARM64_SSBD_FORCE_ENABLE:
+ return KVM_SSBD_FORCE_ENABLE;
+ case ARM64_SSBD_MITIGATED:
+ return KVM_SSBD_MITIGATED;
+ case ARM64_SSBD_UNKNOWN:
+ default:
+ return KVM_SSBD_UNKNOWN;
+ }
+}
+
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6128992c2ded..fb9a7127bb75 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -72,7 +72,6 @@
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
-#include <asm/cpufeature.h>
/*
* Convert a kernel VA into a HYP VA.
@@ -473,6 +472,30 @@ static inline int kvm_map_vectors(void)
}
#endif
+#ifdef CONFIG_ARM64_SSBD
+DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+static inline int hyp_map_aux_data(void)
+{
+ int cpu, err;
+
+ for_each_possible_cpu(cpu) {
+ u64 *ptr;
+
+ ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu);
+ err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+#else
+static inline int hyp_map_aux_data(void)
+{
+ return 0;
+}
+#endif
+
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 767598932549..65ab83e8926e 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -35,6 +35,8 @@
#ifdef __KERNEL__
#include <linux/build_bug.h>
+#include <linux/cache.h>
+#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/string.h>
@@ -244,6 +246,9 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
+extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
+extern void __init minsigstksz_setup(void);
+
/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 740aa03c5f0d..cbcf11b5e637 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -94,6 +94,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_32BIT 22 /* 32bit process */
#define TIF_SVE 23 /* Scalable Vector Extension in use */
#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
+#define TIF_SSBD 25 /* Wants SSB mitigation */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index c4f2d50491eb..df48212f767b 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -7,14 +7,16 @@
struct cpu_topology {
int thread_id;
int core_id;
- int cluster_id;
+ int package_id;
+ int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
+ cpumask_t llc_siblings;
};
extern struct cpu_topology cpu_topology[NR_CPUS];
-#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id)
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id)
#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h
index ec0a86d484e1..743c0b84fd30 100644
--- a/arch/arm64/include/uapi/asm/auxvec.h
+++ b/arch/arm64/include/uapi/asm/auxvec.h
@@ -19,7 +19,8 @@
/* vDSO location */
#define AT_SYSINFO_EHDR 33
+#define AT_MINSIGSTKSZ 51 /* stack needed for signal delivery */
-#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index bf825f38d206..0025f8691046 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
+arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 6e47fc3ab549..97d45d5151d4 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
@@ -20,8 +21,6 @@
#include <asm/system_misc.h>
#include <asm/traps.h>
#include <asm/kprobes.h>
-#include <linux/uaccess.h>
-#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include "trace-events-emulation.h"
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5bdda651bd05..323aeb5f2fe6 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -136,6 +136,7 @@ int main(void)
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
+ DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 380f2e2fbed5..0bf0a835122f 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/acpi.h>
#include <linux/cacheinfo.h>
#include <linux/of.h>
@@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
static int __init_cache_level(unsigned int cpu)
{
- unsigned int ctype, level, leaves, of_level;
+ unsigned int ctype, level, leaves, fw_level;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu)
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
}
- of_level = of_find_last_cache_level(cpu);
- if (level < of_level) {
+ if (acpi_disabled)
+ fw_level = of_find_last_cache_level(cpu);
+ else
+ fw_level = acpi_find_last_cache_level(cpu);
+
+ if (level < fw_level) {
/*
* some external caches not specified in CLIDR_EL1
* the information may be available in the device tree
* only unified external caches are considered here
*/
- leaves += (of_level - level);
- level = of_level;
+ leaves += (fw_level - level);
+ level = fw_level;
}
this_cpu_ci->num_levels = level;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index e4a1182deff7..1d2b6d768efe 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -16,6 +16,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
#include <linux/types.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
@@ -232,6 +234,178 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
}
#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+#ifdef CONFIG_ARM64_SSBD
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
+
+static const struct ssbd_options {
+ const char *str;
+ int state;
+} ssbd_options[] = {
+ { "force-on", ARM64_SSBD_FORCE_ENABLE, },
+ { "force-off", ARM64_SSBD_FORCE_DISABLE, },
+ { "kernel", ARM64_SSBD_KERNEL, },
+};
+
+static int __init ssbd_cfg(char *buf)
+{
+ int i;
+
+ if (!buf || !buf[0])
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
+ int len = strlen(ssbd_options[i].str);
+
+ if (strncmp(buf, ssbd_options[i].str, len))
+ continue;
+
+ ssbd_state = ssbd_options[i].state;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("ssbd", ssbd_cfg);
+
+void __init arm64_update_smccc_conduit(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr,
+ int nr_inst)
+{
+ u32 insn;
+
+ BUG_ON(nr_inst != 1);
+
+ switch (psci_ops.conduit) {
+ case PSCI_CONDUIT_HVC:
+ insn = aarch64_insn_get_hvc_value();
+ break;
+ case PSCI_CONDUIT_SMC:
+ insn = aarch64_insn_get_smc_value();
+ break;
+ default:
+ return;
+ }
+
+ *updptr = cpu_to_le32(insn);
+}
+
+void __init arm64_enable_wa2_handling(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr,
+ int nr_inst)
+{
+ BUG_ON(nr_inst != 1);
+ /*
+ * Only allow mitigation on EL1 entry/exit and guest
+ * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
+ * be flipped.
+ */
+ if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
+ *updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+void arm64_set_ssbd_mitigation(bool state)
+{
+ switch (psci_ops.conduit) {
+ case PSCI_CONDUIT_HVC:
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+ break;
+
+ case PSCI_CONDUIT_SMC:
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
+static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ struct arm_smccc_res res;
+ bool required = true;
+ s32 val;
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
+ ssbd_state = ARM64_SSBD_UNKNOWN;
+ return false;
+ }
+
+ switch (psci_ops.conduit) {
+ case PSCI_CONDUIT_HVC:
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+ break;
+
+ case PSCI_CONDUIT_SMC:
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+ break;
+
+ default:
+ ssbd_state = ARM64_SSBD_UNKNOWN;
+ return false;
+ }
+
+ val = (s32)res.a0;
+
+ switch (val) {
+ case SMCCC_RET_NOT_SUPPORTED:
+ ssbd_state = ARM64_SSBD_UNKNOWN;
+ return false;
+
+ case SMCCC_RET_NOT_REQUIRED:
+ pr_info_once("%s mitigation not required\n", entry->desc);
+ ssbd_state = ARM64_SSBD_MITIGATED;
+ return false;
+
+ case SMCCC_RET_SUCCESS:
+ required = true;
+ break;
+
+ case 1: /* Mitigation not required on this CPU */
+ required = false;
+ break;
+
+ default:
+ WARN_ON(1);
+ return false;
+ }
+
+ switch (ssbd_state) {
+ case ARM64_SSBD_FORCE_DISABLE:
+ pr_info_once("%s disabled from command-line\n", entry->desc);
+ arm64_set_ssbd_mitigation(false);
+ required = false;
+ break;
+
+ case ARM64_SSBD_KERNEL:
+ if (required) {
+ __this_cpu_write(arm64_ssbd_callback_required, 1);
+ arm64_set_ssbd_mitigation(true);
+ }
+ break;
+
+ case ARM64_SSBD_FORCE_ENABLE:
+ pr_info_once("%s forced from command-line\n", entry->desc);
+ arm64_set_ssbd_mitigation(true);
+ required = true;
+ break;
+
+ default:
+ WARN_ON(1);
+ break;
+ }
+
+ return required;
+}
+#endif /* CONFIG_ARM64_SSBD */
+
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
.matches = is_affected_midr_range, \
.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@@ -488,6 +662,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
},
#endif
+#ifdef CONFIG_ARM64_SSBD
+ {
+ .desc = "Speculative Store Bypass Disable",
+ .capability = ARM64_SSBD,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_ssbd_mitigation,
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9d1b06d67c53..d2856b129097 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void)
void __init setup_cpu_features(void)
{
u32 cwg;
- int cls;
setup_system_capabilities();
mark_const_caps_ready();
@@ -1619,6 +1618,7 @@ void __init setup_cpu_features(void)
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
sve_setup();
+ minsigstksz_setup();
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
@@ -1627,13 +1627,9 @@ void __init setup_cpu_features(void)
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
- cls = cache_line_size();
if (!cwg)
- pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
- cls);
- if (L1_CACHE_BYTES < cls)
- pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
- L1_CACHE_BYTES, cls);
+ pr_warn("No Cache Writeback Granule information, assuming %d\n",
+ ARCH_DMA_MINALIGN);
}
static bool __maybe_unused
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 73f17bffcd23..12d4958e6429 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -49,7 +49,7 @@ ENTRY(sve_save_state)
ENDPROC(sve_save_state)
ENTRY(sve_load_state)
- sve_load 0, x1, x2, 3
+ sve_load 0, x1, x2, 3, x4
ret
ENDPROC(sve_load_state)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ec2ee720e33e..28ad8799406f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -18,6 +18,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
#include <linux/init.h>
#include <linux/linkage.h>
@@ -137,6 +138,25 @@ alternative_else_nop_endif
add \dst, \dst, #(\sym - .entry.tramp.text)
.endm
+ // This macro corrupts x0-x3. It is the caller's duty
+ // to save/restore them if required.
+ .macro apply_ssbd, state, targ, tmp1, tmp2
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb arm64_enable_wa2_handling
+ b \targ
+alternative_cb_end
+ ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1
+ cbz \tmp2, \targ
+ ldr \tmp2, [tsk, #TSK_TI_FLAGS]
+ tbnz \tmp2, #TIF_SSBD, \targ
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ mov w1, #\state
+alternative_cb arm64_update_smccc_conduit
+ nop // Patched to SMC/HVC #0
+alternative_cb_end
+#endif
+ .endm
+
.macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
@@ -163,6 +183,14 @@ alternative_else_nop_endif
ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
+ apply_ssbd 1, 1f, x22, x23
+
+#ifdef CONFIG_ARM64_SSBD
+ ldp x0, x1, [sp, #16 * 0]
+ ldp x2, x3, [sp, #16 * 1]
+#endif
+1:
+
mov x29, xzr // fp pointed to user-space
.else
add x21, sp, #S_FRAME_SIZE
@@ -303,6 +331,8 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
3:
+ apply_ssbd 0, 5f, x0, x1
+5:
.endif
msr elr_el1, x21 // set up the return data
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4bcdd0318729..3b527ae46e49 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -31,7 +31,6 @@
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/preempt.h>
-#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
@@ -129,7 +128,7 @@ static int sve_default_vl = -1;
#ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */
-int __ro_after_init sve_max_vl = -1;
+int __ro_after_init sve_max_vl = SVE_VL_MIN;
/* Set of available vector lengths, as vq_to_bit(vq): */
static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
static void __percpu *efi_sve_state;
@@ -360,22 +359,13 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write,
return ret;
/* Writing -1 has the special meaning "set to max": */
- if (vl == -1) {
- /* Fail safe if sve_max_vl wasn't initialised */
- if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- vl = SVE_VL_MIN;
- else
- vl = sve_max_vl;
-
- goto chosen;
- }
+ if (vl == -1)
+ vl = sve_max_vl;
if (!sve_vl_valid(vl))
return -EINVAL;
- vl = find_supported_vector_length(vl);
-chosen:
- sve_default_vl = vl;
+ sve_default_vl = find_supported_vector_length(vl);
return 0;
}
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 1ec5f28c39fc..6b2686d54411 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -313,6 +313,17 @@ int swsusp_arch_suspend(void)
sleep_cpu = -EINVAL;
__cpu_suspend_exit();
+
+ /*
+ * Just in case the boot kernel did turn the SSBD
+ * mitigation off behind our back, let's set the state
+ * to what we expect it to be.
+ */
+ switch (arm64_get_ssbd_state()) {
+ case ARM64_SSBD_FORCE_ENABLE:
+ case ARM64_SSBD_KERNEL:
+ arm64_set_ssbd_mitigation(true);
+ }
}
local_daif_restore(flags);
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 85a251b6dfa8..33147aacdafd 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -670,11 +670,10 @@ static void armv8pmu_disable_event(struct perf_event *event)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
-static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
+static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
u32 pmovsr;
struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7ff81fed46e1..bd732644c2f6 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -766,9 +766,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
vq = sve_vq_from_vl(header->vl);
header->max_vl = sve_max_vl;
- if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- header->max_vl = header->vl;
-
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
@@ -1046,8 +1043,6 @@ static const struct user_regset_view user_aarch64_view = {
};
#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 154b7d30145d..511af13e8d8f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/cache.h>
#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -570,8 +571,15 @@ badframe:
return 0;
}
-/* Determine the layout of optional records in the signal frame */
-static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
+/*
+ * Determine the layout of optional records in the signal frame
+ *
+ * add_all: if true, lays out the biggest possible signal frame for
+ * this task; otherwise, generates a layout for the current state
+ * of the task.
+ */
+static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
+ bool add_all)
{
int err;
@@ -581,7 +589,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return err;
/* fault information, if valid */
- if (current->thread.fault_code) {
+ if (add_all || current->thread.fault_code) {
err = sigframe_alloc(user, &user->esr_offset,
sizeof(struct esr_context));
if (err)
@@ -591,8 +599,14 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
if (system_supports_sve()) {
unsigned int vq = 0;
- if (test_thread_flag(TIF_SVE))
- vq = sve_vq_from_vl(current->thread.sve_vl);
+ if (add_all || test_thread_flag(TIF_SVE)) {
+ int vl = sve_max_vl;
+
+ if (!add_all)
+ vl = current->thread.sve_vl;
+
+ vq = sve_vq_from_vl(vl);
+ }
err = sigframe_alloc(user, &user->sve_offset,
SVE_SIG_CONTEXT_SIZE(vq));
@@ -603,7 +617,6 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return sigframe_alloc_end(user);
}
-
static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
@@ -701,7 +714,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
int err;
init_user_layout(user);
- err = setup_sigframe_layout(user);
+ err = setup_sigframe_layout(user, false);
if (err)
return err;
@@ -830,11 +843,12 @@ static void do_signal(struct pt_regs *regs)
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
struct ksignal ksig;
+ bool syscall = in_syscall(regs);
/*
* If we were from a system call, check for system call restarting...
*/
- if (in_syscall(regs)) {
+ if (syscall) {
continue_addr = regs->pc;
restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
retval = regs->regs[0];
@@ -886,7 +900,7 @@ static void do_signal(struct pt_regs *regs)
* Handle restarting a different system call. As above, if a debugger
* has chosen to restart at a different PC, ignore the restart.
*/
- if (in_syscall(regs) && regs->pc == restart_addr) {
+ if (syscall && regs->pc == restart_addr) {
if (retval == -ERESTART_RESTARTBLOCK)
setup_restart_syscall(regs);
user_rewind_single_step(current);
@@ -936,3 +950,28 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
thread_flags = READ_ONCE(current_thread_info()->flags);
} while (thread_flags & _TIF_WORK_MASK);
}
+
+unsigned long __ro_after_init signal_minsigstksz;
+
+/*
+ * Determine the stack space required for guaranteed signal devliery.
+ * This function is used to populate AT_MINSIGSTKSZ at process startup.
+ * cpufeatures setup is assumed to be complete.
+ */
+void __init minsigstksz_setup(void)
+{
+ struct rt_sigframe_user_layout user;
+
+ init_user_layout(&user);
+
+ /*
+ * If this fails, SIGFRAME_MAXSZ needs to be enlarged. It won't
+ * be big enough, but it's our best guess:
+ */
+ if (WARN_ON(setup_sigframe_layout(&user, true)))
+ return;
+
+ signal_minsigstksz = sigframe_size(&user) +
+ round_up(sizeof(struct frame_record), 16) +
+ 16; /* max alignment padding */
+}
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
new file mode 100644
index 000000000000..3432e5ef9f41
--- /dev/null
+++ b/arch/arm64/kernel/ssbd.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+
+/*
+ * prctl interface for SSBD
+ * FIXME: Drop the below ifdefery once merged in 4.18.
+ */
+#ifdef PR_SPEC_STORE_BYPASS
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ int state = arm64_get_ssbd_state();
+
+ /* Unsupported */
+ if (state == ARM64_SSBD_UNKNOWN)
+ return -EINVAL;
+
+ /* Treat the unaffected/mitigated state separately */
+ if (state == ARM64_SSBD_MITIGATED) {
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ return -EPERM;
+ case PR_SPEC_DISABLE:
+ case PR_SPEC_FORCE_DISABLE:
+ return 0;
+ }
+ }
+
+ /*
+ * Things are a bit backward here: the arm64 internal API
+ * *enables the mitigation* when the userspace API *disables
+ * speculation*. So much fun.
+ */
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (state == ARM64_SSBD_FORCE_ENABLE ||
+ task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ if (state == ARM64_SSBD_FORCE_DISABLE)
+ return -EPERM;
+ task_set_spec_ssb_disable(task);
+ set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ if (state == ARM64_SSBD_FORCE_DISABLE)
+ return -EPERM;
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+ switch (arm64_get_ssbd_state()) {
+ case ARM64_SSBD_UNKNOWN:
+ return -EINVAL;
+ case ARM64_SSBD_FORCE_ENABLE:
+ return PR_SPEC_DISABLE;
+ case ARM64_SSBD_KERNEL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ case ARM64_SSBD_FORCE_DISABLE:
+ return PR_SPEC_ENABLE;
+ default:
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+#endif /* PR_SPEC_STORE_BYPASS */
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index a307b9e13392..70c283368b64 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -62,6 +62,14 @@ void notrace __cpu_suspend_exit(void)
*/
if (hw_breakpoint_restore)
hw_breakpoint_restore(cpu);
+
+ /*
+ * On resume, firmware implementing dynamic mitigation will
+ * have turned the mitigation on. If the user has forcefully
+ * disabled it, make sure their wishes are obeyed.
+ */
+ if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
+ arm64_set_ssbd_mitigation(false);
}
/*
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 21868530018e..f845a8617812 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,7 +11,9 @@
* for more details.
*/
+#include <linux/acpi.h>
#include <linux/arch_topology.h>
+#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/init.h>
@@ -22,6 +24,7 @@
#include <linux/sched.h>
#include <linux/sched/topology.h>
#include <linux/slab.h>
+#include <linux/smp.h>
#include <linux/string.h>
#include <asm/cpu.h>
@@ -47,7 +50,7 @@ static int __init get_cpu_for_node(struct device_node *node)
return cpu;
}
-static int __init parse_core(struct device_node *core, int cluster_id,
+static int __init parse_core(struct device_node *core, int package_id,
int core_id)
{
char name[10];
@@ -63,7 +66,7 @@ static int __init parse_core(struct device_node *core, int cluster_id,
leaf = false;
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
- cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else {
@@ -85,7 +88,7 @@ static int __init parse_core(struct device_node *core, int cluster_id,
return -EINVAL;
}
- cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -101,7 +104,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
bool leaf = true;
bool has_cores = false;
struct device_node *c;
- static int cluster_id __initdata;
+ static int package_id __initdata;
int core_id = 0;
int i, ret;
@@ -140,7 +143,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
}
if (leaf) {
- ret = parse_core(c, cluster_id, core_id++);
+ ret = parse_core(c, package_id, core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
cluster, name);
@@ -158,7 +161,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
pr_warn("%pOF: empty cluster\n", cluster);
if (leaf)
- cluster_id++;
+ package_id++;
return 0;
}
@@ -194,7 +197,7 @@ static int __init parse_dt_topology(void)
* only mark cores described in the DT as possible.
*/
for_each_possible_cpu(cpu)
- if (cpu_topology[cpu].cluster_id == -1)
+ if (cpu_topology[cpu].package_id == -1)
ret = -EINVAL;
out_map:
@@ -212,7 +215,14 @@ EXPORT_SYMBOL_GPL(cpu_topology);
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- return &cpu_topology[cpu].core_sibling;
+ const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling;
+
+ if (cpu_topology[cpu].llc_id != -1) {
+ if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask))
+ core_mask = &cpu_topology[cpu].llc_siblings;
+ }
+
+ return core_mask;
}
static void update_siblings_masks(unsigned int cpuid)
@@ -224,7 +234,12 @@ static void update_siblings_masks(unsigned int cpuid)
for_each_possible_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
- if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+ if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+ cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings);
+ cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings);
+ }
+
+ if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
@@ -245,7 +260,7 @@ void store_cpu_topology(unsigned int cpuid)
struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
u64 mpidr;
- if (cpuid_topo->cluster_id != -1)
+ if (cpuid_topo->package_id != -1)
goto topology_populated;
mpidr = read_cpuid_mpidr();
@@ -259,19 +274,19 @@ void store_cpu_topology(unsigned int cpuid)
/* Multiprocessor system : Multi-threads per core */
cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
- cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
+ cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
} else {
/* Multiprocessor system : Single-thread per core */
cpuid_topo->thread_id = -1;
cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
- cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
+ cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
}
pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
- cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id,
+ cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
cpuid_topo->thread_id, mpidr);
topology_populated:
@@ -287,7 +302,11 @@ static void __init reset_cpu_topology(void)
cpu_topo->thread_id = -1;
cpu_topo->core_id = 0;
- cpu_topo->cluster_id = -1;
+ cpu_topo->package_id = -1;
+
+ cpu_topo->llc_id = -1;
+ cpumask_clear(&cpu_topo->llc_siblings);
+ cpumask_set_cpu(cpu, &cpu_topo->llc_siblings);
cpumask_clear(&cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
@@ -296,6 +315,59 @@ static void __init reset_cpu_topology(void)
}
}
+#ifdef CONFIG_ACPI
+/*
+ * Propagate the topology information of the processor_topology_node tree to the
+ * cpu_topology array.
+ */
+static int __init parse_acpi_topology(void)
+{
+ bool is_threaded;
+ int cpu, topology_id;
+
+ is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
+
+ for_each_possible_cpu(cpu) {
+ int i, cache_id;
+
+ topology_id = find_acpi_cpu_topology(cpu, 0);
+ if (topology_id < 0)
+ return topology_id;
+
+ if (is_threaded) {
+ cpu_topology[cpu].thread_id = topology_id;
+ topology_id = find_acpi_cpu_topology(cpu, 1);
+ cpu_topology[cpu].core_id = topology_id;
+ } else {
+ cpu_topology[cpu].thread_id = -1;
+ cpu_topology[cpu].core_id = topology_id;
+ }
+ topology_id = find_acpi_cpu_topology_package(cpu);
+ cpu_topology[cpu].package_id = topology_id;
+
+ i = acpi_find_last_cache_level(cpu);
+
+ if (i > 0) {
+ /*
+ * this is the only part of cpu_topology that has
+ * a direct relationship with the cache topology
+ */
+ cache_id = find_acpi_cpu_cache_topology(cpu, i);
+ if (cache_id > 0)
+ cpu_topology[cpu].llc_id = cache_id;
+ }
+ }
+
+ return 0;
+}
+
+#else
+static inline int __init parse_acpi_topology(void)
+{
+ return -EINVAL;
+}
+#endif
+
void __init init_cpu_topology(void)
{
reset_cpu_topology();
@@ -304,6 +376,8 @@ void __init init_cpu_topology(void)
* Discard anything that was parsed if we hit an error so we
* don't use partial information.
*/
- if (of_have_populated_dt() && parse_dt_topology())
+ if (!acpi_disabled && parse_acpi_topology())
+ reset_cpu_topology();
+ else if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 0221aca6493d..605d1b60469c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -34,25 +34,25 @@ jiffies = jiffies_64;
* 4 KB (see related ASSERT() below) \
*/ \
. = ALIGN(SZ_4K); \
- VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+ __hyp_idmap_text_start = .; \
*(.hyp.idmap.text) \
- VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \
- VMLINUX_SYMBOL(__hyp_text_start) = .; \
+ __hyp_idmap_text_end = .; \
+ __hyp_text_start = .; \
*(.hyp.text) \
- VMLINUX_SYMBOL(__hyp_text_end) = .;
+ __hyp_text_end = .;
#define IDMAP_TEXT \
. = ALIGN(SZ_4K); \
- VMLINUX_SYMBOL(__idmap_text_start) = .; \
+ __idmap_text_start = .; \
*(.idmap.text) \
- VMLINUX_SYMBOL(__idmap_text_end) = .;
+ __idmap_text_end = .;
#ifdef CONFIG_HIBERNATION
#define HIBERNATE_TEXT \
. = ALIGN(SZ_4K); \
- VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ __hibernate_exit_text_start = .; \
*(.hibernate_exit.text) \
- VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+ __hibernate_exit_text_end = .;
#else
#define HIBERNATE_TEXT
#endif
@@ -60,10 +60,10 @@ jiffies = jiffies_64;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
- VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
+ __entry_tramp_text_start = .; \
*(.entry.tramp.text) \
. = ALIGN(PAGE_SIZE); \
- VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
+ __entry_tramp_text_end = .;
#else
#define TRAMP_TEXT
#endif
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index bffece27b5c1..05d836979032 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -106,8 +106,44 @@ el1_hvc_guest:
*/
ldr x1, [sp] // Guest's x0
eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+ cbz w1, wa_epilogue
+
+ /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_2)
cbnz w1, el1_trap
- mov x0, x1
+
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb arm64_enable_wa2_handling
+ b wa2_end
+alternative_cb_end
+ get_vcpu_ptr x2, x0
+ ldr x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ // Sanitize the argument and update the guest flags
+ ldr x1, [sp, #8] // Guest's x1
+ clz w1, w1 // Murphy's device:
+ lsr w1, w1, #5 // w1 = !!w1 without using
+ eor w1, w1, #1 // the flags...
+ bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+ str x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ /* Check that we actually need to perform the call */
+ hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+ cbz x0, wa2_end
+
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ smc #0
+
+ /* Don't leak data from the SMC call */
+ mov x3, xzr
+wa2_end:
+ mov x2, xzr
+ mov x1, xzr
+#endif
+
+wa_epilogue:
+ mov x0, xzr
add sp, sp, #16
eret
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d9645236e474..c50cedc447f1 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -15,6 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
#include <linux/types.h>
#include <linux/jump_label.h>
#include <uapi/linux/psci.h>
@@ -389,6 +390,39 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_const_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
/* Switch to the guest for VHE systems running in EL2 */
int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
@@ -409,6 +443,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
+ __set_guest_arch_workaround_state(vcpu);
+
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
@@ -416,6 +452,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
+ __set_host_arch_workaround_state(vcpu);
+
fp_enabled = fpsimd_enabled_vhe();
sysreg_save_guest_state_vhe(guest_ctxt);
@@ -465,6 +503,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
+ __set_guest_arch_workaround_state(vcpu);
+
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
@@ -472,6 +512,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
+ __set_host_arch_workaround_state(vcpu);
+
fp_enabled = __fpsimd_enabled_nvhe();
__sysreg_save_state_nvhe(guest_ctxt);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 3256b9228e75..a74311beda35 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -122,6 +122,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset PMU */
kvm_pmu_vcpu_reset(vcpu);
+ /* Default workaround setup is enabled (if supported) */
+ if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
+ vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
+
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index db01f2709842..49e217ac7e1e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -504,6 +504,11 @@ static int __init arm64_dma_init(void)
max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
swiotlb = 1;
+ WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
+ TAINT_CPU_OUT_OF_SPEC,
+ "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+ ARCH_DMA_MINALIGN, cache_line_size());
+
return atomic_pool_init();
}
arch_initcall(arm64_dma_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 576f15153080..b8eecc7b9531 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -235,8 +235,9 @@ static bool is_el1_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
}
-static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
- unsigned long addr)
+static inline bool is_el1_permission_fault(unsigned int esr,
+ struct pt_regs *regs,
+ unsigned long addr)
{
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
@@ -254,6 +255,22 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
return false;
}
+static void die_kernel_fault(const char *msg, unsigned long addr,
+ unsigned int esr, struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+
+ pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
+ addr);
+
+ mem_abort_decode(esr);
+
+ show_pte(addr);
+ die("Oops", regs, esr);
+ bust_spinlocks(0);
+ do_exit(SIGKILL);
+}
+
static void __do_kernel_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -266,9 +283,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
- bust_spinlocks(1);
-
- if (is_permission_fault(esr, regs, addr)) {
+ if (is_el1_permission_fault(esr, regs, addr)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
else
@@ -279,15 +294,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
msg = "paging request";
}
- pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
- addr);
-
- mem_abort_decode(esr);
-
- show_pte(addr);
- die("Oops", regs, esr);
- bust_spinlocks(0);
- do_exit(SIGKILL);
+ die_kernel_fault(msg, addr, esr, regs);
}
static void __do_user_fault(struct siginfo *info, unsigned int esr)
@@ -447,16 +454,19 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) {
+ if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
- die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+ die_kernel_fault("access to user memory with fs=KERNEL_DS",
+ addr, esr, regs);
if (is_el1_instruction_abort(esr))
- die("Attempting to execute userspace memory", regs, esr);
+ die_kernel_fault("execution of user memory",
+ addr, esr, regs);
if (!search_exception_tables(regs->pc))
- die("Accessing user space memory outside uaccess.h routines", regs, esr);
+ die_kernel_fault("access to user memory outside uaccess routines",
+ addr, esr, regs);
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 10ed2749e246..0bc86e5f8f3f 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
struct device_node *node,
enum cache_type type, unsigned int level)
{
- this_leaf->of_node = node;
this_leaf->level = level;
this_leaf->type = type;
/* not a sector cache */
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 516d7b36d6fb..b533eeb6139d 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -547,6 +547,9 @@ config ACPI_CONFIGFS
if ARM64
source "drivers/acpi/arm64/Kconfig"
+
+config ACPI_PPTT
+ bool
endif
config TPS68470_PMIC_OPREGION
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 48e202752754..6d59aa109a91 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_ACPI_BGRT) += bgrt.o
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o
obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o
obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o
+obj-$(CONFIG_ACPI_PPTT) += pptt.o
# processor has its own "processor." module_param namespace
processor-y := processor_driver.o
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
new file mode 100644
index 000000000000..e5ea1974d1e3
--- /dev/null
+++ b/drivers/acpi/pptt.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pptt.c - parsing of Processor Properties Topology Table (PPTT)
+ *
+ * Copyright (C) 2018, ARM
+ *
+ * This file implements parsing of the Processor Properties Topology Table
+ * which is optionally used to describe the processor and cache topology.
+ * Due to the relative pointers used throughout the table, this doesn't
+ * leverage the existing subtable parsing in the kernel.
+ *
+ * The PPTT structure is an inverted tree, with each node potentially
+ * holding one or two inverted tree data structures describing
+ * the caches available at that level. Each cache structure optionally
+ * contains properties describing the cache at a given level which can be
+ * used to override hardware probed values.
+ */
+#define pr_fmt(fmt) "ACPI PPTT: " fmt
+
+#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
+#include <acpi/processor.h>
+
+static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
+ u32 pptt_ref)
+{
+ struct acpi_subtable_header *entry;
+
+ /* there isn't a subtable at reference 0 */
+ if (pptt_ref < sizeof(struct acpi_subtable_header))
+ return NULL;
+
+ if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
+ return NULL;
+
+ entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
+
+ if (entry->length == 0)
+ return NULL;
+
+ if (pptt_ref + entry->length > table_hdr->length)
+ return NULL;
+
+ return entry;
+}
+
+static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header *table_hdr,
+ u32 pptt_ref)
+{
+ return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref);
+}
+
+static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_hdr,
+ u32 pptt_ref)
+{
+ return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref);
+}
+
+static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *node,
+ int resource)
+{
+ u32 *ref;
+
+ if (resource >= node->number_of_priv_resources)
+ return NULL;
+
+ ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
+ ref += resource;
+
+ return fetch_pptt_subtable(table_hdr, *ref);
+}
+
+static inline bool acpi_pptt_match_type(int table_type, int type)
+{
+ return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type ||
+ table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
+}
+
+/**
+ * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @local_level: passed res reflects this cache level
+ * @res: cache resource in the PPTT we want to walk
+ * @found: returns a pointer to the requested level if found
+ * @level: the requested cache level
+ * @type: the requested cache type
+ *
+ * Attempt to find a given cache level, while counting the max number
+ * of cache levels for the cache node.
+ *
+ * Given a pptt resource, verify that it is a cache node, then walk
+ * down each level of caches, counting how many levels are found
+ * as well as checking the cache type (icache, dcache, unified). If a
+ * level & type match, then we set found, and continue the search.
+ * Once the entire cache branch has been walked return its max
+ * depth.
+ *
+ * Return: The cache structure and the level we terminated with.
+ */
+static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
+ int local_level,
+ struct acpi_subtable_header *res,
+ struct acpi_pptt_cache **found,
+ int level, int type)
+{
+ struct acpi_pptt_cache *cache;
+
+ if (res->type != ACPI_PPTT_TYPE_CACHE)
+ return 0;
+
+ cache = (struct acpi_pptt_cache *) res;
+ while (cache) {
+ local_level++;
+
+ if (local_level == level &&
+ cache->flags & ACPI_PPTT_CACHE_TYPE_VALID &&
+ acpi_pptt_match_type(cache->attributes, type)) {
+ if (*found != NULL && cache != *found)
+ pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
+
+ pr_debug("Found cache @ level %d\n", level);
+ *found = cache;
+ /*
+ * continue looking at this node's resource list
+ * to verify that we don't find a duplicate
+ * cache node.
+ */
+ }
+ cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
+ }
+ return local_level;
+}
+
+static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu_node,
+ int *starting_level, int level,
+ int type)
+{
+ struct acpi_subtable_header *res;
+ int number_of_levels = *starting_level;
+ int resource = 0;
+ struct acpi_pptt_cache *ret = NULL;
+ int local_level;
+
+ /* walk down from processor node */
+ while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) {
+ resource++;
+
+ local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
+ res, &ret, level, type);
+ /*
+ * we are looking for the max depth. Since its potentially
+ * possible for a given node to have resources with differing
+ * depths verify that the depth we have found is the largest.
+ */
+ if (number_of_levels < local_level)
+ number_of_levels = local_level;
+ }
+ if (number_of_levels > *starting_level)
+ *starting_level = number_of_levels;
+
+ return ret;
+}
+
+/**
+ * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @cpu_node: processor node we wish to count caches for
+ *
+ * Given a processor node containing a processing unit, walk into it and count
+ * how many levels exist solely for it, and then walk up each level until we hit
+ * the root node (ignore the package level because it may be possible to have
+ * caches that exist across packages). Count the number of cache levels that
+ * exist at each level on the way up.
+ *
+ * Return: Total number of levels found.
+ */
+static int acpi_count_levels(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu_node)
+{
+ int total_levels = 0;
+
+ do {
+ acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0);
+ cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+ } while (cpu_node);
+
+ return total_levels;
+}
+
+/**
+ * acpi_pptt_leaf_node() - Given a processor node, determine if its a leaf
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @node: passed node is checked to see if its a leaf
+ *
+ * Determine if the *node parameter is a leaf node by iterating the
+ * PPTT table, looking for nodes which reference it.
+ *
+ * Return: 0 if we find a node referencing the passed node (or table error),
+ * or 1 if we don't.
+ */
+static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *node)
+{
+ struct acpi_subtable_header *entry;
+ unsigned long table_end;
+ u32 node_entry;
+ struct acpi_pptt_processor *cpu_node;
+ u32 proc_sz;
+
+ table_end = (unsigned long)table_hdr + table_hdr->length;
+ node_entry = ACPI_PTR_DIFF(node, table_hdr);
+ entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+ sizeof(struct acpi_table_pptt));
+ proc_sz = sizeof(struct acpi_pptt_processor *);
+
+ while ((unsigned long)entry + proc_sz < table_end) {
+ cpu_node = (struct acpi_pptt_processor *)entry;
+ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
+ cpu_node->parent == node_entry)
+ return 0;
+ if (entry->length == 0)
+ return 0;
+ entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+ entry->length);
+
+ }
+ return 1;
+}
+
+/**
+ * acpi_find_processor_node() - Given a PPTT table find the requested processor
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @acpi_cpu_id: cpu we are searching for
+ *
+ * Find the subtable entry describing the provided processor.
+ * This is done by iterating the PPTT table looking for processor nodes
+ * which have an acpi_processor_id that matches the acpi_cpu_id parameter
+ * passed into the function. If we find a node that matches this criteria
+ * we verify that its a leaf node in the topology rather than depending
+ * on the valid flag, which doesn't need to be set for leaf nodes.
+ *
+ * Return: NULL, or the processors acpi_pptt_processor*
+ */
+static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_header *table_hdr,
+ u32 acpi_cpu_id)
+{
+ struct acpi_subtable_header *entry;
+ unsigned long table_end;
+ struct acpi_pptt_processor *cpu_node;
+ u32 proc_sz;
+
+ table_end = (unsigned long)table_hdr + table_hdr->length;
+ entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr,
+ sizeof(struct acpi_table_pptt));
+ proc_sz = sizeof(struct acpi_pptt_processor *);
+
+ /* find the processor structure associated with this cpuid */
+ while ((unsigned long)entry + proc_sz < table_end) {
+ cpu_node = (struct acpi_pptt_processor *)entry;
+
+ if (entry->length == 0) {
+ pr_warn("Invalid zero length subtable\n");
+ break;
+ }
+ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR &&
+ acpi_cpu_id == cpu_node->acpi_processor_id &&
+ acpi_pptt_leaf_node(table_hdr, cpu_node)) {
+ return (struct acpi_pptt_processor *)entry;
+ }
+
+ entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry,
+ entry->length);
+ }
+
+ return NULL;
+}
+
+static int acpi_find_cache_levels(struct acpi_table_header *table_hdr,
+ u32 acpi_cpu_id)
+{
+ int number_of_levels = 0;
+ struct acpi_pptt_processor *cpu;
+
+ cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+ if (cpu)
+ number_of_levels = acpi_count_levels(table_hdr, cpu);
+
+ return number_of_levels;
+}
+
+static u8 acpi_cache_type(enum cache_type type)
+{
+ switch (type) {
+ case CACHE_TYPE_DATA:
+ pr_debug("Looking for data cache\n");
+ return ACPI_PPTT_CACHE_TYPE_DATA;
+ case CACHE_TYPE_INST:
+ pr_debug("Looking for instruction cache\n");
+ return ACPI_PPTT_CACHE_TYPE_INSTR;
+ default:
+ case CACHE_TYPE_UNIFIED:
+ pr_debug("Looking for unified cache\n");
+ /*
+ * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED
+ * contains the bit pattern that will match both
+ * ACPI unified bit patterns because we use it later
+ * to match both cases.
+ */
+ return ACPI_PPTT_CACHE_TYPE_UNIFIED;
+ }
+}
+
+static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr,
+ u32 acpi_cpu_id,
+ enum cache_type type,
+ unsigned int level,
+ struct acpi_pptt_processor **node)
+{
+ int total_levels = 0;
+ struct acpi_pptt_cache *found = NULL;
+ struct acpi_pptt_processor *cpu_node;
+ u8 acpi_type = acpi_cache_type(type);
+
+ pr_debug("Looking for CPU %d's level %d cache type %d\n",
+ acpi_cpu_id, level, acpi_type);
+
+ cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id);
+
+ while (cpu_node && !found) {
+ found = acpi_find_cache_level(table_hdr, cpu_node,
+ &total_levels, level, acpi_type);
+ *node = cpu_node;
+ cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
+ }
+
+ return found;
+}
+
+/* total number of attributes checked by the properties code */
+#define PPTT_CHECKED_ATTRIBUTES 4
+
+/**
+ * update_cache_properties() - Update cacheinfo for the given processor
+ * @this_leaf: Kernel cache info structure being updated
+ * @found_cache: The PPTT node describing this cache instance
+ * @cpu_node: A unique reference to describe this cache instance
+ *
+ * The ACPI spec implies that the fields in the cache structures are used to
+ * extend and correct the information probed from the hardware. Lets only
+ * set fields that we determine are VALID.
+ *
+ * Return: nothing. Side effect of updating the global cacheinfo
+ */
+static void update_cache_properties(struct cacheinfo *this_leaf,
+ struct acpi_pptt_cache *found_cache,
+ struct acpi_pptt_processor *cpu_node)
+{
+ int valid_flags = 0;
+
+ this_leaf->fw_token = cpu_node;
+ if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) {
+ this_leaf->size = found_cache->size;
+ valid_flags++;
+ }
+ if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) {
+ this_leaf->coherency_line_size = found_cache->line_size;
+ valid_flags++;
+ }
+ if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) {
+ this_leaf->number_of_sets = found_cache->number_of_sets;
+ valid_flags++;
+ }
+ if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) {
+ this_leaf->ways_of_associativity = found_cache->associativity;
+ valid_flags++;
+ }
+ if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) {
+ switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) {
+ case ACPI_PPTT_CACHE_POLICY_WT:
+ this_leaf->attributes = CACHE_WRITE_THROUGH;
+ break;
+ case ACPI_PPTT_CACHE_POLICY_WB:
+ this_leaf->attributes = CACHE_WRITE_BACK;
+ break;
+ }
+ }
+ if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) {
+ switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) {
+ case ACPI_PPTT_CACHE_READ_ALLOCATE:
+ this_leaf->attributes |= CACHE_READ_ALLOCATE;
+ break;
+ case ACPI_PPTT_CACHE_WRITE_ALLOCATE:
+ this_leaf->attributes |= CACHE_WRITE_ALLOCATE;
+ break;
+ case ACPI_PPTT_CACHE_RW_ALLOCATE:
+ case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT:
+ this_leaf->attributes |=
+ CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE;
+ break;
+ }
+ }
+ /*
+ * If the above flags are valid, and the cache type is NOCACHE
+ * update the cache type as well.
+ */
+ if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+ valid_flags == PPTT_CHECKED_ATTRIBUTES)
+ this_leaf->type = CACHE_TYPE_UNIFIED;
+}
+
+static void cache_setup_acpi_cpu(struct acpi_table_header *table,
+ unsigned int cpu)
+{
+ struct acpi_pptt_cache *found_cache;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+ struct cacheinfo *this_leaf;
+ unsigned int index = 0;
+ struct acpi_pptt_processor *cpu_node = NULL;
+
+ while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
+ this_leaf = this_cpu_ci->info_list + index;
+ found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+ this_leaf->type,
+ this_leaf->level,
+ &cpu_node);
+ pr_debug("found = %p %p\n", found_cache, cpu_node);
+ if (found_cache)
+ update_cache_properties(this_leaf,
+ found_cache,
+ cpu_node);
+
+ index++;
+ }
+}
+
+/* Passing level values greater than this will result in search termination */
+#define PPTT_ABORT_PACKAGE 0xFF
+
+static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu,
+ int level, int flag)
+{
+ struct acpi_pptt_processor *prev_node;
+
+ while (cpu && level) {
+ if (cpu->flags & flag)
+ break;
+ pr_debug("level %d\n", level);
+ prev_node = fetch_pptt_node(table_hdr, cpu->parent);
+ if (prev_node == NULL)
+ break;
+ cpu = prev_node;
+ level--;
+ }
+ return cpu;
+}
+
+/**
+ * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature
+ * @table: Pointer to the head of the PPTT table
+ * @cpu: Kernel logical cpu number
+ * @level: A level that terminates the search
+ * @flag: A flag which terminates the search
+ *
+ * Get a unique value given a cpu, and a topology level, that can be
+ * matched to determine which cpus share common topological features
+ * at that level.
+ *
+ * Return: Unique value, or -ENOENT if unable to locate cpu
+ */
+static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
+ unsigned int cpu, int level, int flag)
+{
+ struct acpi_pptt_processor *cpu_node;
+ u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+
+ cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
+ if (cpu_node) {
+ cpu_node = acpi_find_processor_package_id(table, cpu_node,
+ level, flag);
+ /* Only the first level has a guaranteed id */
+ if (level == 0)
+ return cpu_node->acpi_processor_id;
+ return ACPI_PTR_DIFF(cpu_node, table);
+ }
+ pr_warn_once("PPTT table found, but unable to locate core %d (%d)\n",
+ cpu, acpi_cpu_id);
+ return -ENOENT;
+}
+
+static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
+{
+ struct acpi_table_header *table;
+ acpi_status status;
+ int retval;
+
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ pr_warn_once("No PPTT table found, cpu topology may be inaccurate\n");
+ return -ENOENT;
+ }
+ retval = topology_get_acpi_cpu_tag(table, cpu, level, flag);
+ pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n",
+ cpu, level, retval);
+ acpi_put_table(table);
+
+ return retval;
+}
+
+/**
+ * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
+ * @cpu: Kernel logical cpu number
+ *
+ * Given a logical cpu number, returns the number of levels of cache represented
+ * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0
+ * indicating we didn't find any cache levels.
+ *
+ * Return: Cache levels visible to this core.
+ */
+int acpi_find_last_cache_level(unsigned int cpu)
+{
+ u32 acpi_cpu_id;
+ struct acpi_table_header *table;
+ int number_of_levels = 0;
+ acpi_status status;
+
+ pr_debug("Cache Setup find last level cpu=%d\n", cpu);
+
+ acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ pr_warn_once("No PPTT table found, cache topology may be inaccurate\n");
+ } else {
+ number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
+ acpi_put_table(table);
+ }
+ pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
+
+ return number_of_levels;
+}
+
+/**
+ * cache_setup_acpi() - Override CPU cache topology with data from the PPTT
+ * @cpu: Kernel logical cpu number
+ *
+ * Updates the global cache info provided by cpu_get_cacheinfo()
+ * when there are valid properties in the acpi_pptt_cache nodes. A
+ * successful parse may not result in any updates if none of the
+ * cache levels have any valid flags set. Futher, a unique value is
+ * associated with each known CPU cache entry. This unique value
+ * can be used to determine whether caches are shared between cpus.
+ *
+ * Return: -ENOENT on failure to find table, or 0 on success
+ */
+int cache_setup_acpi(unsigned int cpu)
+{
+ struct acpi_table_header *table;
+ acpi_status status;
+
+ pr_debug("Cache Setup ACPI cpu %d\n", cpu);
+
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ pr_warn_once("No PPTT table found, cache topology may be inaccurate\n");
+ return -ENOENT;
+ }
+
+ cache_setup_acpi_cpu(table, cpu);
+ acpi_put_table(table);
+
+ return status;
+}
+
+/**
+ * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu
+ * @cpu: Kernel logical cpu number
+ * @level: The topological level for which we would like a unique ID
+ *
+ * Determine a topology unique ID for each thread/core/cluster/mc_grouping
+ * /socket/etc. This ID can then be used to group peers, which will have
+ * matching ids.
+ *
+ * The search terminates when either the requested level is found or
+ * we reach a root node. Levels beyond the termination point will return the
+ * same unique ID. The unique id for level 0 is the acpi processor id. All
+ * other levels beyond this use a generated value to uniquely identify
+ * a topological feature.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents a unique topological feature.
+ */
+int find_acpi_cpu_topology(unsigned int cpu, int level)
+{
+ return find_acpi_cpu_topology_tag(cpu, level, 0);
+}
+
+/**
+ * find_acpi_cpu_cache_topology() - Determine a unique cache topology value
+ * @cpu: Kernel logical cpu number
+ * @level: The cache level for which we would like a unique ID
+ *
+ * Determine a unique ID for each unified cache in the system
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents a unique topological feature.
+ */
+int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
+{
+ struct acpi_table_header *table;
+ struct acpi_pptt_cache *found_cache;
+ acpi_status status;
+ u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+ struct acpi_pptt_processor *cpu_node = NULL;
+ int ret = -1;
+
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+ if (ACPI_FAILURE(status)) {
+ pr_warn_once("No PPTT table found, topology may be inaccurate\n");
+ return -ENOENT;
+ }
+
+ found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+ CACHE_TYPE_UNIFIED,
+ level,
+ &cpu_node);
+ if (found_cache)
+ ret = ACPI_PTR_DIFF(cpu_node, table);
+
+ acpi_put_table(table);
+
+ return ret;
+}
+
+
+/**
+ * find_acpi_cpu_topology_package() - Determine a unique cpu package value
+ * @cpu: Kernel logical cpu number
+ *
+ * Determine a topology unique package ID for the given cpu.
+ * This ID can then be used to group peers, which will have matching ids.
+ *
+ * The search terminates when either a level is found with the PHYSICAL_PACKAGE
+ * flag set or we reach a root node.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found.
+ * Otherwise returns a value which represents the package for this cpu.
+ */
+int find_acpi_cpu_topology_package(unsigned int cpu)
+{
+ return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
+ ACPI_PPTT_PHYSICAL_PACKAGE);
+}
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 4a3410aa6540..a3d012b08fc5 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -462,7 +462,7 @@ static const char * const table_sigs[] = {
ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT,
- ACPI_SIG_NFIT, ACPI_SIG_HMAT, NULL };
+ ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, NULL };
#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index edf726267282..2880e2ab01f5 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -32,50 +32,10 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
}
#ifdef CONFIG_OF
-static int cache_setup_of_node(unsigned int cpu)
-{
- struct device_node *np;
- struct cacheinfo *this_leaf;
- struct device *cpu_dev = get_cpu_device(cpu);
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- unsigned int index = 0;
-
- /* skip if of_node is already populated */
- if (this_cpu_ci->info_list->of_node)
- return 0;
-
- if (!cpu_dev) {
- pr_err("No cpu device for CPU %d\n", cpu);
- return -ENODEV;
- }
- np = cpu_dev->of_node;
- if (!np) {
- pr_err("Failed to find cpu%d device node\n", cpu);
- return -ENOENT;
- }
-
- while (index < cache_leaves(cpu)) {
- this_leaf = this_cpu_ci->info_list + index;
- if (this_leaf->level != 1)
- np = of_find_next_cache_node(np);
- else
- np = of_node_get(np);/* cpu node itself */
- if (!np)
- break;
- this_leaf->of_node = np;
- index++;
- }
-
- if (index != cache_leaves(cpu)) /* not all OF nodes populated */
- return -ENOENT;
-
- return 0;
-}
-
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
struct cacheinfo *sib_leaf)
{
- return sib_leaf->of_node == this_leaf->of_node;
+ return sib_leaf->fw_token == this_leaf->fw_token;
}
/* OF properties to query for a given cache type */
@@ -111,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type)
return type;
}
-static void cache_size(struct cacheinfo *this_leaf)
+static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
{
const char *propname;
const __be32 *cache_size;
@@ -120,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].size_prop;
- cache_size = of_get_property(this_leaf->of_node, propname, NULL);
+ cache_size = of_get_property(np, propname, NULL);
if (cache_size)
this_leaf->size = of_read_number(cache_size, 1);
}
/* not cache_line_size() because that's a macro in include/linux/cache.h */
-static void cache_get_line_size(struct cacheinfo *this_leaf)
+static void cache_get_line_size(struct cacheinfo *this_leaf,
+ struct device_node *np)
{
const __be32 *line_size;
int i, lim, ct_idx;
@@ -138,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
const char *propname;
propname = cache_type_info[ct_idx].line_size_props[i];
- line_size = of_get_property(this_leaf->of_node, propname, NULL);
+ line_size = of_get_property(np, propname, NULL);
if (line_size)
break;
}
@@ -147,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
this_leaf->coherency_line_size = of_read_number(line_size, 1);
}
-static void cache_nr_sets(struct cacheinfo *this_leaf)
+static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
{
const char *propname;
const __be32 *nr_sets;
@@ -156,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].nr_sets_prop;
- nr_sets = of_get_property(this_leaf->of_node, propname, NULL);
+ nr_sets = of_get_property(np, propname, NULL);
if (nr_sets)
this_leaf->number_of_sets = of_read_number(nr_sets, 1);
}
@@ -175,41 +136,77 @@ static void cache_associativity(struct cacheinfo *this_leaf)
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
}
-static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+static bool cache_node_is_unified(struct cacheinfo *this_leaf,
+ struct device_node *np)
{
- return of_property_read_bool(this_leaf->of_node, "cache-unified");
+ return of_property_read_bool(np, "cache-unified");
}
-static void cache_of_override_properties(unsigned int cpu)
+static void cache_of_set_props(struct cacheinfo *this_leaf,
+ struct device_node *np)
{
- int index;
+ /*
+ * init_cache_level must setup the cache level correctly
+ * overriding the architecturally specified levels, so
+ * if type is NONE at this stage, it should be unified
+ */
+ if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+ cache_node_is_unified(this_leaf, np))
+ this_leaf->type = CACHE_TYPE_UNIFIED;
+ cache_size(this_leaf, np);
+ cache_get_line_size(this_leaf, np);
+ cache_nr_sets(this_leaf, np);
+ cache_associativity(this_leaf);
+}
+
+static int cache_setup_of_node(unsigned int cpu)
+{
+ struct device_node *np;
struct cacheinfo *this_leaf;
+ struct device *cpu_dev = get_cpu_device(cpu);
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ unsigned int index = 0;
- for (index = 0; index < cache_leaves(cpu); index++) {
+ /* skip if fw_token is already populated */
+ if (this_cpu_ci->info_list->fw_token) {
+ return 0;
+ }
+
+ if (!cpu_dev) {
+ pr_err("No cpu device for CPU %d\n", cpu);
+ return -ENODEV;
+ }
+ np = cpu_dev->of_node;
+ if (!np) {
+ pr_err("Failed to find cpu%d device node\n", cpu);
+ return -ENOENT;
+ }
+
+ while (index < cache_leaves(cpu)) {
this_leaf = this_cpu_ci->info_list + index;
- /*
- * init_cache_level must setup the cache level correctly
- * overriding the architecturally specified levels, so
- * if type is NONE at this stage, it should be unified
- */
- if (this_leaf->type == CACHE_TYPE_NOCACHE &&
- cache_node_is_unified(this_leaf))
- this_leaf->type = CACHE_TYPE_UNIFIED;
- cache_size(this_leaf);
- cache_get_line_size(this_leaf);
- cache_nr_sets(this_leaf);
- cache_associativity(this_leaf);
+ if (this_leaf->level != 1)
+ np = of_find_next_cache_node(np);
+ else
+ np = of_node_get(np);/* cpu node itself */
+ if (!np)
+ break;
+ cache_of_set_props(this_leaf, np);
+ this_leaf->fw_token = np;
+ index++;
}
+
+ if (index != cache_leaves(cpu)) /* not all OF nodes populated */
+ return -ENOENT;
+
+ return 0;
}
#else
-static void cache_of_override_properties(unsigned int cpu) { }
static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
struct cacheinfo *sib_leaf)
{
/*
- * For non-DT systems, assume unique level 1 cache, system-wide
+ * For non-DT/ACPI systems, assume unique level 1 caches, system-wide
* shared caches for all other levels. This will be used only if
* arch specific code has not populated shared_cpu_map
*/
@@ -217,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
}
#endif
+int __weak cache_setup_acpi(unsigned int cpu)
+{
+ return -ENOTSUPP;
+}
+
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -230,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (of_have_populated_dt())
ret = cache_setup_of_node(cpu);
else if (!acpi_disabled)
- /* No cache property/hierarchy support yet in ACPI */
- ret = -ENOTSUPP;
+ ret = cache_setup_acpi(cpu);
+
if (ret)
return ret;
@@ -282,16 +284,11 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
}
- of_node_put(this_leaf->of_node);
+ if (of_have_populated_dt())
+ of_node_put(this_leaf->fw_token);
}
}
-static void cache_override_properties(unsigned int cpu)
-{
- if (of_have_populated_dt())
- return cache_of_override_properties(cpu);
-}
-
static void free_cache_attributes(unsigned int cpu)
{
if (!per_cpu_cacheinfo(cpu))
@@ -325,12 +322,17 @@ static int detect_cache_attributes(unsigned int cpu)
if (per_cpu_cacheinfo(cpu) == NULL)
return -ENOMEM;
+ /*
+ * populate_cache_leaves() may completely setup the cache leaves and
+ * shared_cpu_map or it may leave it partially setup.
+ */
ret = populate_cache_leaves(cpu);
if (ret)
goto free_ci;
/*
- * For systems using DT for cache hierarchy, of_node and shared_cpu_map
- * will be set up here only if they are not populated already
+ * For systems using DT for cache hierarchy, fw_token
+ * and shared_cpu_map will be set up here only if they are
+ * not populated already
*/
ret = cache_shared_cpu_map_setup(cpu);
if (ret) {
@@ -338,7 +340,6 @@ static int detect_cache_attributes(unsigned int cpu)
goto free_ci;
}
- cache_override_properties(cpu);
return 0;
free_ci:
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 28bb5a029558..08ebaf7cca8b 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -6,30 +6,32 @@ menu "Performance monitor support"
depends on PERF_EVENTS
config ARM_CCI_PMU
- bool
+ tristate "ARM CCI PMU driver"
+ depends on (ARM && CPU_V7) || ARM64
select ARM_CCI
+ help
+ Support for PMU events monitoring on the ARM CCI (Cache Coherent
+ Interconnect) family of products.
+
+ If compiled as a module, it will be called arm-cci.
config ARM_CCI400_PMU
- bool "ARM CCI400 PMU support"
- depends on (ARM && CPU_V7) || ARM64
+ bool "support CCI-400"
+ default y
+ depends on ARM_CCI_PMU
select ARM_CCI400_COMMON
- select ARM_CCI_PMU
help
- Support for PMU events monitoring on the ARM CCI-400 (cache coherent
- interconnect). CCI-400 supports counting events related to the
- connected slave/master interfaces.
+ CCI-400 provides 4 independent event counters counting events related
+ to the connected slave/master interfaces, plus a cycle counter.
config ARM_CCI5xx_PMU
- bool "ARM CCI-500/CCI-550 PMU support"
- depends on (ARM && CPU_V7) || ARM64
- select ARM_CCI_PMU
+ bool "support CCI-500/CCI-550"
+ default y
+ depends on ARM_CCI_PMU
help
- Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
- coherent interconnects. Both of them provide 8 independent event counters,
- which can count events pertaining to the slave/master interfaces as well
- as the internal events to the CCI.
-
- If unsure, say Y
+ CCI-500/CCI-550 both provide 8 independent event counters, which can
+ count events pertaining to the slave/master interfaces as well as the
+ internal events to the CCI.
config ARM_CCN
tristate "ARM CCN driver support"
@@ -94,7 +96,7 @@ config XGENE_PMU
config ARM_SPE_PMU
tristate "Enable support for the ARMv8.2 Statistical Profiling Extension"
- depends on PERF_EVENTS && ARM64
+ depends on ARM64
help
Enable perf support for the ARMv8.2 Statistical Profiling
Extension, which provides periodic sampling of operations in
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 383b2d3dcbc6..0d09d8e669cd 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -120,9 +120,9 @@ enum cci_models {
static void pmu_write_counters(struct cci_pmu *cci_pmu,
unsigned long *mask);
-static ssize_t cci_pmu_format_show(struct device *dev,
+static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev,
struct device_attribute *attr, char *buf);
-static ssize_t cci_pmu_event_show(struct device *dev,
+static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
@@ -1184,16 +1184,11 @@ static int cci_pmu_add(struct perf_event *event, int flags)
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
struct hw_perf_event *hwc = &event->hw;
int idx;
- int err = 0;
-
- perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
idx = pmu_get_event_idx(hw_events, event);
- if (idx < 0) {
- err = idx;
- goto out;
- }
+ if (idx < 0)
+ return idx;
event->hw.idx = idx;
hw_events->events[idx] = event;
@@ -1205,9 +1200,7 @@ static int cci_pmu_add(struct perf_event *event, int flags)
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
-out:
- perf_pmu_enable(event->pmu);
- return err;
+ return 0;
}
static void cci_pmu_del(struct perf_event *event, int flags)
@@ -1304,15 +1297,6 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
hwc->config_base |= (unsigned long)mapping;
- /*
- * Limit the sample_period to half of the counter width. That way, the
- * new counter value is far less likely to overtake the previous one
- * unless you have some serious IRQ latency issues.
- */
- hwc->sample_period = CCI_PMU_CNTR_MASK >> 1;
- hwc->last_period = hwc->sample_period;
- local64_set(&hwc->period_left, hwc->sample_period);
-
if (event->group_leader != event) {
if (validate_group(event) != 0)
return -EINVAL;
@@ -1423,6 +1407,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
pmu_format_attr_group.attrs = model->format_attrs;
cci_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
.name = cci_pmu->model->name,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = cci_pmu_enable,
@@ -1466,7 +1451,7 @@ static int cci_pmu_offline_cpu(unsigned int cpu)
return 0;
}
-static struct cci_pmu_model cci_pmu_models[] = {
+static __maybe_unused struct cci_pmu_model cci_pmu_models[] = {
#ifdef CONFIG_ARM_CCI400_PMU
[CCI400_R0] = {
.name = "CCI_400",
@@ -1588,6 +1573,7 @@ static const struct of_device_id arm_cci_pmu_matches[] = {
#endif
{},
};
+MODULE_DEVICE_TABLE(of, arm_cci_pmu_matches);
static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
{
@@ -1709,14 +1695,27 @@ static int cci_pmu_probe(struct platform_device *pdev)
return 0;
}
+static int cci_pmu_remove(struct platform_device *pdev)
+{
+ if (!g_cci_pmu)
+ return 0;
+
+ cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE);
+ perf_pmu_unregister(&g_cci_pmu->pmu);
+ g_cci_pmu = NULL;
+
+ return 0;
+}
+
static struct platform_driver cci_pmu_driver = {
.driver = {
.name = DRIVER_NAME,
.of_match_table = arm_cci_pmu_matches,
},
.probe = cci_pmu_probe,
+ .remove = cci_pmu_remove,
};
-builtin_platform_driver(cci_pmu_driver);
-MODULE_LICENSE("GPL");
+module_platform_driver(cci_pmu_driver);
+MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ARM CCI PMU support");
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 65b7e4042ece..b416ee18e6bb 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -736,7 +736,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
ccn = pmu_to_arm_ccn(event->pmu);
if (hw->sample_period) {
- dev_warn(ccn->dev, "Sampling not supported!\n");
+ dev_dbg(ccn->dev, "Sampling not supported!\n");
return -EOPNOTSUPP;
}
@@ -744,12 +744,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
event->attr.exclude_kernel || event->attr.exclude_hv ||
event->attr.exclude_idle || event->attr.exclude_host ||
event->attr.exclude_guest) {
- dev_warn(ccn->dev, "Can't exclude execution levels!\n");
+ dev_dbg(ccn->dev, "Can't exclude execution levels!\n");
return -EINVAL;
}
if (event->cpu < 0) {
- dev_warn(ccn->dev, "Can't provide per-task data!\n");
+ dev_dbg(ccn->dev, "Can't provide per-task data!\n");
return -EOPNOTSUPP;
}
/*
@@ -771,13 +771,13 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
switch (type) {
case CCN_TYPE_MN:
if (node_xp != ccn->mn_id) {
- dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+ dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp);
return -EINVAL;
}
break;
case CCN_TYPE_XP:
if (node_xp >= ccn->num_xps) {
- dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
+ dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp);
return -EINVAL;
}
break;
@@ -785,11 +785,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
break;
default:
if (node_xp >= ccn->num_nodes) {
- dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp);
+ dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp);
return -EINVAL;
}
if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) {
- dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n",
+ dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n",
type, node_xp);
return -EINVAL;
}
@@ -808,19 +808,19 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
if (event_id != e->event)
continue;
if (e->num_ports && port >= e->num_ports) {
- dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n",
+ dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n",
port, node_xp);
return -EINVAL;
}
if (e->num_vcs && vc >= e->num_vcs) {
- dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n",
+ dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n",
vc, node_xp);
return -EINVAL;
}
valid = 1;
}
if (!valid) {
- dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n",
+ dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n",
event_id, node_xp);
return -EINVAL;
}
@@ -1594,4 +1594,4 @@ module_init(arm_ccn_init);
module_exit(arm_ccn_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1a0d340b65cf..a6347d487635 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -339,7 +339,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
return IRQ_NONE;
start_clock = sched_clock();
- ret = armpmu->handle_irq(irq, armpmu);
+ ret = armpmu->handle_irq(armpmu);
finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 28bb642af18b..54ec278d2fc4 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -131,8 +131,7 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
struct dev_ext_attribute *ea =
container_of(attr, struct dev_ext_attribute, attr);
int cap = (long)ea->var;
@@ -247,8 +246,7 @@ static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct platform_device *pdev = to_platform_device(dev);
- struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev);
+ struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus);
}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 8758a2a9e6c1..4b35a66383f9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1299,4 +1299,23 @@ static inline int lpit_read_residency_count_address(u64 *address)
}
#endif
+#ifdef CONFIG_ACPI_PPTT
+int find_acpi_cpu_topology(unsigned int cpu, int level);
+int find_acpi_cpu_topology_package(unsigned int cpu);
+int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
+#else
+static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
+{
+ return -EINVAL;
+}
+static inline int find_acpi_cpu_topology_package(unsigned int cpu)
+{
+ return -EINVAL;
+}
+static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
+{
+ return -EINVAL;
+}
+#endif
+
#endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index a031897fca76..ca1d2cc2cdfa 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -80,6 +80,11 @@
ARM_SMCCC_SMC_32, \
0, 0x8000)
+#define ARM_SMCCC_ARCH_WORKAROUND_2 \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, 0x7fff)
+
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
@@ -291,5 +296,10 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
*/
#define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__)
+/* Return codes defined in ARM DEN 0070A */
+#define SMCCC_RET_SUCCESS 0
+#define SMCCC_RET_NOT_SUPPORTED -1
+#define SMCCC_RET_NOT_REQUIRED -2
+
#endif /*__ASSEMBLY__*/
#endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 3d9805297cda..70e19bc6cc9f 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -34,9 +34,8 @@ enum cache_type {
* @shared_cpu_map: logical cpumask representing all the cpus sharing
* this cache node
* @attributes: bitfield representing various cache attributes
- * @of_node: if devicetree is used, this represents either the cpu node in
- * case there's no explicit cache node or the cache node itself in the
- * device tree
+ * @fw_token: Unique value used to determine if different cacheinfo
+ * structures represent a single hardware cache instance.
* @disable_sysfs: indicates whether this node is visible to the user via
* sysfs or not
* @priv: pointer to any private data structure specific to particular
@@ -65,8 +64,7 @@ struct cacheinfo {
#define CACHE_ALLOCATE_POLICY_MASK \
(CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE)
#define CACHE_ID BIT(4)
-
- struct device_node *of_node;
+ void *fw_token;
bool disable_sysfs;
void *priv;
};
@@ -99,6 +97,23 @@ int func(unsigned int cpu) \
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
int init_cache_level(unsigned int cpu);
int populate_cache_leaves(unsigned int cpu);
+int cache_setup_acpi(unsigned int cpu);
+#ifndef CONFIG_ACPI_PPTT
+/*
+ * acpi_find_last_cache_level is only called on ACPI enabled
+ * platforms using the PPTT for topology. This means that if
+ * the platform supports other firmware configuration methods
+ * we need to stub out the call when ACPI is disabled.
+ * ACPI enabled platforms not using PPTT won't be making calls
+ * to this function so we need not worry about them.
+ */
+static inline int acpi_find_last_cache_level(unsigned int cpu)
+{
+ return 0;
+}
+#else
+int acpi_find_last_cache_level(unsigned int cpu);
+#endif
const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 40036a57d072..ad5444491975 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -78,7 +78,7 @@ struct arm_pmu {
struct pmu pmu;
cpumask_t supported_cpus;
char *name;
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
+ irqreturn_t (*handle_irq)(struct arm_pmu *pmu);
void (*enable)(struct perf_event *event);
void (*disable)(struct perf_event *event);
int (*get_event_idx)(struct pmu_hw_events *hw_events,
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index a4c1b76240df..2d9b4795edb2 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1490,6 +1490,10 @@ static int init_hyp_mode(void)
}
}
+ err = hyp_map_aux_data();
+ if (err)
+ kvm_err("Cannot map host auxilary data: %d\n", err);
+
return 0;
out_err:
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index c4762bef13c6..c95ab4c5a475 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -405,7 +405,7 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
{
u32 func_id = smccc_get_function(vcpu);
- u32 val = PSCI_RET_NOT_SUPPORTED;
+ u32 val = SMCCC_RET_NOT_SUPPORTED;
u32 feature;
switch (func_id) {
@@ -417,7 +417,21 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
switch(feature) {
case ARM_SMCCC_ARCH_WORKAROUND_1:
if (kvm_arm_harden_branch_predictor())
- val = 0;
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (kvm_arm_have_ssbd()) {
+ case KVM_SSBD_FORCE_DISABLE:
+ case KVM_SSBD_UNKNOWN:
+ break;
+ case KVM_SSBD_KERNEL:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_SSBD_FORCE_ENABLE:
+ case KVM_SSBD_MITIGATED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
break;
}
break;