summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/head_32.S2
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/compressed/misc.c18
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S1
-rw-r--r--arch/x86/coco/tdx/tdcall.S15
-rw-r--r--arch/x86/coco/tdx/tdx.c25
-rw-r--r--arch/x86/events/amd/brs.c13
-rw-r--r--arch/x86/events/amd/core.c6
-rw-r--r--arch/x86/events/amd/ibs.c9
-rw-r--r--arch/x86/events/core.c12
-rw-r--r--arch/x86/events/intel/core.c199
-rw-r--r--arch/x86/events/intel/ds.c133
-rw-r--r--arch/x86/events/intel/lbr.c4
-rw-r--r--arch/x86/events/intel/uncore.c41
-rw-r--r--arch/x86/events/intel/uncore.h5
-rw-r--r--arch/x86/events/intel/uncore_discovery.c60
-rw-r--r--arch/x86/events/intel/uncore_discovery.h14
-rw-r--r--arch/x86/events/intel/uncore_snb.c161
-rw-r--r--arch/x86/events/intel/uncore_snbep.c158
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/events/zhaoxin/core.c8
-rw-r--r--arch/x86/include/asm/agp.h6
-rw-r--r--arch/x86/include/asm/atomic64_32.h44
-rw-r--r--arch/x86/include/asm/atomic64_64.h36
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/fpu/xcr.h4
-rw-r--r--arch/x86/include/asm/irqflags.h11
-rw-r--r--arch/x86/include/asm/kvmclock.h2
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/mwait.h14
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/paravirt.h8
-rw-r--r--arch/x86/include/asm/perf_event.h10
-rw-r--r--arch/x86/include/asm/pvclock.h3
-rw-r--r--arch/x86/include/asm/shared/io.h4
-rw-r--r--arch/x86/include/asm/shared/tdx.h1
-rw-r--r--arch/x86/include/asm/special_insns.h8
-rw-r--r--arch/x86/include/asm/xen/hypercall.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c35
-rw-r--r--arch/x86/kernel/cpu/bugs.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/fpu/core.c4
-rw-r--r--arch/x86/kernel/kprobes/core.c34
-rw-r--r--arch/x86/kernel/kvmclock.c6
-rw-r--r--arch/x86/kernel/paravirt.c14
-rw-r--r--arch/x86/kernel/process.c65
-rw-r--r--arch/x86/kernel/pvclock.c22
-rw-r--r--arch/x86/kernel/tsc.c7
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kvm/pmu.h26
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/lib/cmdline.c4
-rw-r--r--arch/x86/lib/memcpy_64.S5
-rw-r--r--arch/x86/lib/memmove_64.S4
-rw-r--r--arch/x86/lib/memset_64.S4
-rw-r--r--arch/x86/lib/misc.c2
-rw-r--r--arch/x86/mm/pat/memtype.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c2
-rw-r--r--arch/x86/xen/irq.c2
-rw-r--r--arch/x86/xen/time.c12
60 files changed, 957 insertions, 374 deletions
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 6589ddd4cfaf..987ae727cf9f 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -187,7 +187,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
leal boot_heap@GOTOFF(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call extract_kernel /* returns kernel location in %eax */
+ call extract_kernel /* returns kernel entry point in %eax */
addl $24, %esp
/*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index a75712991df3..03c4328a88cb 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -569,7 +569,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
movl input_len(%rip), %ecx /* input_len */
movq %rbp, %r8 /* output target address */
movl output_len(%rip), %r9d /* decompressed length, end of relocs */
- call extract_kernel /* returns kernel location in %rax */
+ call extract_kernel /* returns kernel entry point in %rax */
popq %rsi
/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index cf690d8712f4..014ff222bf4b 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -277,7 +277,7 @@ static inline void handle_relocations(void *output, unsigned long output_len,
{ }
#endif
-static void parse_elf(void *output)
+static size_t parse_elf(void *output)
{
#ifdef CONFIG_X86_64
Elf64_Ehdr ehdr;
@@ -293,10 +293,8 @@ static void parse_elf(void *output)
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
- ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+ ehdr.e_ident[EI_MAG3] != ELFMAG3)
error("Kernel is not a valid ELF file");
- return;
- }
debug_putstr("Parsing ELF... ");
@@ -328,6 +326,8 @@ static void parse_elf(void *output)
}
free(phdrs);
+
+ return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
}
/*
@@ -356,6 +356,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
unsigned long needed_size;
+ size_t entry_offset;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -456,14 +457,17 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putstr("\nDecompressing Linux... ");
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
- parse_elf(output);
+ entry_offset = parse_elf(output);
handle_relocations(output, output_len, virt_addr);
- debug_putstr("done.\nBooting the kernel.\n");
+
+ debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
+ debug_puthex(entry_offset);
+ debug_putstr(").\n");
/* Disable exception handling before booting the kernel */
cleanup_exception_handling();
- return output;
+ return output + entry_offset;
}
void fortify_panic(const char *name)
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 112b2375d021..b22f34b8684a 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -34,6 +34,7 @@ SECTIONS
_text = .; /* Text */
*(.text)
*(.text.*)
+ *(.noinstr.text)
_etext = . ;
}
.rodata : {
diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S
index f9eb1134f22d..ad0d51f03cb4 100644
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -31,6 +31,8 @@
TDX_R12 | TDX_R13 | \
TDX_R14 | TDX_R15 )
+.section .noinstr.text, "ax"
+
/*
* __tdx_module_call() - Used by TDX guests to request services from
* the TDX module (does not include VMM services) using TDCALL instruction.
@@ -139,19 +141,6 @@ SYM_FUNC_START(__tdx_hypercall)
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
- /*
- * For the idle loop STI needs to be called directly before the TDCALL
- * that enters idle (EXIT_REASON_HLT case). STI instruction enables
- * interrupts only one instruction later. If there is a window between
- * STI and the instruction that emulates the HALT state, there is a
- * chance for interrupts to happen in this window, which can delay the
- * HLT operation indefinitely. Since this is the not the desired
- * result, conditionally call STI before TDCALL.
- */
- testq $TDX_HCALL_ISSUE_STI, %rsi
- jz .Lskip_sti
- sti
-.Lskip_sti:
tdcall
/*
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 669d9e4f2901..3bd111d5e6a0 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -64,8 +64,9 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
}
/* Called from __tdx_hypercall() for unrecoverable failure */
-void __tdx_hypercall_failed(void)
+noinstr void __tdx_hypercall_failed(void)
{
+ instrumentation_begin();
panic("TDVMCALL failed. TDX module bug?");
}
@@ -75,7 +76,7 @@ void __tdx_hypercall_failed(void)
* Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
* guest sides of these calls.
*/
-static u64 hcall_func(u64 exit_reason)
+static __always_inline u64 hcall_func(u64 exit_reason)
{
return exit_reason;
}
@@ -220,7 +221,7 @@ static int ve_instr_len(struct ve_info *ve)
}
}
-static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+static u64 __cpuidle __halt(const bool irq_disabled)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
@@ -240,20 +241,14 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
* can keep the vCPU in virtual HLT, even if an IRQ is
* pending, without hanging/breaking the guest.
*/
- return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+ return __tdx_hypercall(&args, 0);
}
static int handle_halt(struct ve_info *ve)
{
- /*
- * Since non safe halt is mainly used in CPU offlining
- * and the guest will always stay in the halt state, don't
- * call the STI instruction (set do_sti as false).
- */
const bool irq_disabled = irqs_disabled();
- const bool do_sti = false;
- if (__halt(irq_disabled, do_sti))
+ if (__halt(irq_disabled))
return -EIO;
return ve_instr_len(ve);
@@ -261,18 +256,12 @@ static int handle_halt(struct ve_info *ve)
void __cpuidle tdx_safe_halt(void)
{
- /*
- * For do_sti=true case, __tdx_hypercall() function enables
- * interrupts using the STI instruction before the TDCALL. So
- * set irq_disabled as false.
- */
const bool irq_disabled = false;
- const bool do_sti = true;
/*
* Use WARN_ONCE() to report the failure.
*/
- if (__halt(irq_disabled, do_sti))
+ if (__halt(irq_disabled))
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index 58461fa18b6f..ed308719236c 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -41,18 +41,15 @@ static inline unsigned int brs_to(int idx)
return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
}
-static inline void set_debug_extn_cfg(u64 val)
+static __always_inline void set_debug_extn_cfg(u64 val)
{
/* bits[4:3] must always be set to 11b */
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
+ __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
}
-static inline u64 get_debug_extn_cfg(void)
+static __always_inline u64 get_debug_extn_cfg(void)
{
- u64 val;
-
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
- return val;
+ return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
}
static bool __init amd_brs_detect(void)
@@ -405,7 +402,7 @@ void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_i
* called from ACPI processor_idle.c or acpi_pad.c
* with interrupts disabled
*/
-void perf_amd_brs_lopwr_cb(bool lopwr_in)
+void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
union amd_debug_extn_cfg cfg;
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 4386b10682ce..8c45b198b62f 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index da3f5ebac4e1..64582954b5f6 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1110,8 +1110,7 @@ fail:
.data = ibs_data.data,
},
};
- data.raw = &raw;
- data.sample_flags |= PERF_SAMPLE_RAW;
+ perf_sample_save_raw_data(&data, &raw);
}
if (perf_ibs == &perf_ibs_op)
@@ -1122,10 +1121,8 @@ fail:
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
- if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
- data.callchain = perf_callchain(event, iregs);
- data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
- }
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ perf_sample_save_callchain(&data, event, iregs);
throttle = perf_event_overflow(event, &data, &regs);
out:
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 85a63a41c471..d096b04bf80e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2974,17 +2974,19 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
- if (!x86_pmu_initialized()) {
+ /* This API doesn't currently support enumerating hybrid PMUs. */
+ if (WARN_ON_ONCE(cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) ||
+ !x86_pmu_initialized()) {
memset(cap, 0, sizeof(*cap));
return;
}
- cap->version = x86_pmu.version;
/*
- * KVM doesn't support the hybrid PMU yet.
- * Return the common value in global x86_pmu,
- * which available for all cores.
+ * Note, hybrid CPU models get tracked as having hybrid PMUs even when
+ * all E-cores are disabled via BIOS. When E-cores are disabled, the
+ * base PMU holds the correct number of counters for P-cores.
*/
+ cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu.num_counters;
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
cap->bit_width_gp = x86_pmu.cntval_bits;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index bafdc2be479a..9fce2d1247a7 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
+ INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
+ INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
+ EVENT_EXTRA_END
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event)
static struct event_constraint counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
+static struct event_constraint counter1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);
+
+static struct event_constraint counter0_1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);
+
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
@@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint =
static struct event_constraint fixed0_counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+static struct event_constraint fixed0_counter0_1_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);
+
+static struct event_constraint counters_1_7_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);
+
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return &emptyconstraint;
}
+static struct event_constraint *
+cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * The :ppp indicates the Precise Distribution (PDist) facility, which
+ * is only supported on the GP counter 0 & 1 and Fixed counter 0.
+ * If a :ppp event which is not available on the above eligible counters,
+ * error out.
+ */
+ if (event->attr.precise_ip == 3) {
+ /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
+ if (constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_counter0_1_constraint;
+
+ switch (c->idxmsk64 & 0x3ull) {
+ case 0x1:
+ return &counter0_constraint;
+ case 0x2:
+ return &counter1_constraint;
+ case 0x3:
+ return &counter0_1_constraint;
+ }
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
+rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = spr_get_event_constraints(cpuc, idx, event);
+
+ /* The Retire Latency is not supported by the fixed counter 0. */
+ if (event->attr.precise_ip &&
+ (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
+ constraint_match(&fixed0_constraint, event->hw.config)) {
+ /*
+ * The Instruction PDIR is only available
+ * on the fixed counter 0. Error out for this case.
+ */
+ if (event->attr.precise_ip == 3)
+ return &emptyconstraint;
+ return &counters_1_7_constraint;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
+mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+
+ if (pmu->cpu_type == hybrid_big)
+ return rwc_get_event_constraints(cpuc, idx, event);
+ if (pmu->cpu_type == hybrid_small)
+ return cmt_get_event_constraints(cpuc, idx, event);
+
+ WARN_ON(1);
+ return &emptyconstraint;
+}
+
static int adl_hw_config(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
@@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data)
}
}
+static void intel_pmu_check_num_counters(int *num_counters,
+ int *num_counters_fixed,
+ u64 *intel_ctrl, u64 fixed_mask);
+
+static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
+{
+ unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
+ unsigned int eax, ebx, ecx, edx;
+
+ if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
+ &eax, &ebx, &ecx, &edx);
+ pmu->num_counters = fls(eax);
+ pmu->num_counters_fixed = fls(ebx);
+ intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
+ &pmu->intel_ctrl, ebx);
+ }
+}
+
static bool init_hybrid_pmu(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu)
if (!cpumask_empty(&pmu->supported_cpus))
goto end;
+ if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(pmu);
+
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
return false;
@@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = {
NULL,
};
+static struct attribute *mtl_hybrid_mem_attrs[] = {
+ EVENT_PTR(mem_ld_adl),
+ EVENT_PTR(mem_st_adl),
+ NULL
+};
+
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
+#define ADL_HYBRID_RTM_FORMAT_ATTR \
+ FORMAT_HYBRID_PTR(in_tx), \
+ FORMAT_HYBRID_PTR(in_tx_cp)
+
+#define ADL_HYBRID_FORMAT_ATTR \
+ FORMAT_HYBRID_PTR(offcore_rsp), \
+ FORMAT_HYBRID_PTR(ldlat), \
+ FORMAT_HYBRID_PTR(frontend)
+
static struct attribute *adl_hybrid_extra_attr_rtm[] = {
- FORMAT_HYBRID_PTR(in_tx),
- FORMAT_HYBRID_PTR(in_tx_cp),
- FORMAT_HYBRID_PTR(offcore_rsp),
- FORMAT_HYBRID_PTR(ldlat),
- FORMAT_HYBRID_PTR(frontend),
- NULL,
+ ADL_HYBRID_RTM_FORMAT_ATTR,
+ ADL_HYBRID_FORMAT_ATTR,
+ NULL
};
static struct attribute *adl_hybrid_extra_attr[] = {
- FORMAT_HYBRID_PTR(offcore_rsp),
- FORMAT_HYBRID_PTR(ldlat),
- FORMAT_HYBRID_PTR(frontend),
- NULL,
+ ADL_HYBRID_FORMAT_ATTR,
+ NULL
+};
+
+PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
+FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
+
+static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
+ ADL_HYBRID_RTM_FORMAT_ATTR,
+ ADL_HYBRID_FORMAT_ATTR,
+ FORMAT_HYBRID_PTR(snoop_rsp),
+ NULL
+};
+
+static struct attribute *mtl_hybrid_extra_attr[] = {
+ ADL_HYBRID_FORMAT_ATTR,
+ FORMAT_HYBRID_PTR(snoop_rsp),
+ NULL
};
static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
@@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
}
}
+static __always_inline bool is_mtl(u8 x86_model)
+{
+ return (x86_model == INTEL_FAM6_METEORLAKE) ||
+ (x86_model == INTEL_FAM6_METEORLAKE_L);
+}
+
__init int intel_pmu_init(void)
{
struct attribute **extra_skl_attr = &empty_attrs;
@@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
+ case INTEL_FAM6_METEORLAKE:
+ case INTEL_FAM6_METEORLAKE_L:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
@@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
- x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.lbr_pt_coexist = true;
- intel_pmu_pebs_data_source_adl();
x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8;
static_call_update(intel_pmu_update_topdown_event,
@@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void)
pmu->event_constraints = intel_slm_event_constraints;
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
pmu->extra_regs = intel_grt_extra_regs;
- pr_cont("Alderlake Hybrid events, ");
- name = "alderlake_hybrid";
+ if (is_mtl(boot_cpu_data.x86_model)) {
+ x86_pmu.pebs_latency_data = mtl_latency_data_small;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+ mem_attr = mtl_hybrid_mem_attrs;
+ intel_pmu_pebs_data_source_mtl();
+ x86_pmu.get_event_constraints = mtl_get_event_constraints;
+ pmu->extra_regs = intel_cmt_extra_regs;
+ pr_cont("Meteorlake Hybrid events, ");
+ name = "meteorlake_hybrid";
+ } else {
+ x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+ intel_pmu_pebs_data_source_adl();
+ pr_cont("Alderlake Hybrid events, ");
+ name = "alderlake_hybrid";
+ }
break;
default:
@@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
+ if (x86_pmu.intel_cap.pebs_timing_info)
+ x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
+
intel_aux_output_init();
return 0;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 88e58b6ee73c..b0354dc869d2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2,12 +2,14 @@
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/sched/clock.h>
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include <asm/io.h>
+#include <asm/timer.h>
#include "../perf_event.h"
@@ -53,6 +55,13 @@ union intel_x86_pebs_dse {
unsigned int st_lat_locked:1;
unsigned int ld_reserved3:26;
};
+ struct {
+ unsigned int mtl_dse:5;
+ unsigned int mtl_locked:1;
+ unsigned int mtl_stlb_miss:1;
+ unsigned int mtl_fwd_blk:1;
+ unsigned int ld_reserved4:24;
+ };
};
@@ -135,6 +144,29 @@ void __init intel_pmu_pebs_data_source_adl(void)
__intel_pmu_pebs_data_source_grt(data_source);
}
+static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+{
+ data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
+ data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE);
+ data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+ data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
+ data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_mtl(void)
+{
+ u64 *data_source;
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_skl(false, data_source);
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ intel_pmu_pebs_data_source_cmt(data_source);
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -219,24 +251,19 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
}
/* Retrieve the latency data for e-core of ADL */
-u64 adl_latency_data_small(struct perf_event *event, u64 status)
+static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
+ u8 dse, bool tlb, bool lock, bool blk)
{
- union intel_x86_pebs_dse dse;
u64 val;
WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
- dse.val = status;
-
- val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
+ dse &= PERF_PEBS_DATA_SOURCE_MASK;
+ val = hybrid_var(event->pmu, pebs_data_source)[dse];
- /*
- * For the atom core on ADL,
- * bit 4: lock, bit 5: TLB access.
- */
- pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
+ pebs_set_tlb_lock(&val, tlb, lock);
- if (dse.ld_data_blk)
+ if (blk)
val |= P(BLK, DATA);
else
val |= P(BLK, NA);
@@ -244,6 +271,29 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status)
return val;
}
+u64 adl_latency_data_small(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+
+ dse.val = status;
+
+ return __adl_latency_data_small(event, status, dse.ld_dse,
+ dse.ld_locked, dse.ld_stlb_miss,
+ dse.ld_data_blk);
+}
+
+/* Retrieve the latency data for e-core of MTL */
+u64 mtl_latency_data_small(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+
+ dse.val = status;
+
+ return __adl_latency_data_small(event, status, dse.mtl_dse,
+ dse.mtl_stlb_miss, dse.mtl_locked,
+ dse.mtl_fwd_blk);
+}
+
static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
@@ -759,7 +809,8 @@ int intel_pmu_drain_bts_buffer(void)
* the sample.
*/
rcu_read_lock();
- perf_prepare_sample(&header, &data, event, &regs);
+ perf_prepare_sample(&data, event, &regs);
+ perf_prepare_header(&header, &data, event, &regs);
if (perf_output_begin(&handle, &data, event,
header.size * (top - base - skip)))
@@ -1519,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
return val;
}
+static void setup_pebs_time(struct perf_event *event,
+ struct perf_sample_data *data,
+ u64 tsc)
+{
+ /* Converting to a user-defined clock is not supported yet. */
+ if (event->attr.use_clockid != 0)
+ return;
+
+ /*
+ * Doesn't support the conversion when the TSC is unstable.
+ * The TSC unstable case is a corner case and very unlikely to
+ * happen. If it happens, the TSC in a PEBS record will be
+ * dropped and fall back to perf_event_clock().
+ */
+ if (!using_native_sched_clock() || !sched_clock_stable())
+ return;
+
+ data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
+ data->sample_flags |= PERF_SAMPLE_TIME;
+}
+
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_DATA_PAGE_SIZE)
@@ -1569,10 +1641,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN) {
- data->callchain = perf_callchain(event, iregs);
- data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
- }
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ perf_sample_save_callchain(data, event, iregs);
/*
* We use the interrupt regs as a base because the PEBS record does not
@@ -1668,16 +1738,11 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
*
* We can only do this for the default trace clock.
*/
- if (x86_pmu.intel_cap.pebs_format >= 3 &&
- event->attr.use_clockid == 0) {
- data->time = native_sched_clock_from_tsc(pebs->tsc);
- data->sample_flags |= PERF_SAMPLE_TIME;
- }
+ if (x86_pmu.intel_cap.pebs_format >= 3)
+ setup_pebs_time(event, data, pebs->tsc);
- if (has_branch_stack(event)) {
- data->br_stack = &cpuc->lbr_stack;
- data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1705,6 +1770,7 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#define PEBS_LATENCY_MASK 0xffff
#define PEBS_CACHE_LATENCY_OFFSET 32
+#define PEBS_RETIRE_LATENCY_OFFSET 32
/*
* With adaptive PEBS the layout depends on what fields are configured.
@@ -1735,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period;
- if (event->attr.use_clockid == 0) {
- data->time = native_sched_clock_from_tsc(basic->tsc);
- data->sample_flags |= PERF_SAMPLE_TIME;
- }
+ setup_pebs_time(event, data, basic->tsc);
/*
* We must however always use iregs for the unwinder to stay sane; the
@@ -1746,16 +1809,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN) {
- data->callchain = perf_callchain(event, iregs);
- data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
- }
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ perf_sample_save_callchain(data, event, iregs);
*regs = *iregs;
/* The ip in basic is EventingIP */
set_linear_ip(regs, basic->ip);
regs->flags = PERF_EFLAGS_EXACT;
+ if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
+ data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+
/*
* The record for MEMINFO is in front of GP
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
@@ -1835,8 +1899,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
- data->br_stack = &cpuc->lbr_stack;
- data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 1f21f576ca77..c3b0d15a9841 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1606,12 +1606,10 @@ clear_arch_lbr:
*/
void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
{
- int lbr_fmt = x86_pmu.intel_cap.lbr_format;
-
lbr->nr = x86_pmu.lbr_nr;
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
- lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
+ lbr->info = x86_pmu.lbr_info;
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 459b1aafd4d4..bc226603ef3e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -65,6 +65,21 @@ int uncore_die_to_segment(int die)
return bus ? pci_domain_nr(bus) : -EINVAL;
}
+int uncore_device_to_die(struct pci_dev *dev)
+{
+ int node = pcibus_to_node(dev->bus);
+ int cpu;
+
+ for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && cpu_to_node(cpu) == node)
+ return c->logical_die_id;
+ }
+
+ return -1;
+}
+
static void uncore_free_pcibus_map(void)
{
struct pci2phy_map *map, *tmp;
@@ -842,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
+static inline int uncore_get_box_id(struct intel_uncore_type *type,
+ struct intel_uncore_pmu *pmu)
+{
+ return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
+}
+
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@@ -850,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
sprintf(pmu_name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu_name, "uncore_type_%u_%d",
- type->type_id, type->box_ids[pmu->pmu_idx]);
+ type->type_id, uncore_get_box_id(type, pmu));
}
}
@@ -877,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
- type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
+ uncore_get_box_id(type, pmu));
}
}
@@ -1674,7 +1695,10 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
+ /* Discovery table is required */
bool use_discovery;
+ /* The units in the discovery table should be ignored. */
+ int *uncore_units_ignore;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1765,6 +1789,11 @@ static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.mmio_init = adl_uncore_mmio_init,
};
+static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
+ .cpu_init = mtl_uncore_cpu_init,
+ .mmio_init = adl_uncore_mmio_init,
+};
+
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
.cpu_init = icx_uncore_cpu_init,
.pci_init = icx_uncore_pci_init,
@@ -1782,6 +1811,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
.pci_init = spr_uncore_pci_init,
.mmio_init = spr_uncore_mmio_init,
.use_discovery = true,
+ .uncore_units_ignore = spr_uncore_units_ignore,
};
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
@@ -1832,6 +1862,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
@@ -1853,7 +1885,7 @@ static int __init intel_uncore_init(void)
id = x86_match_cpu(intel_uncore_match);
if (!id) {
- if (!uncore_no_discover && intel_uncore_has_discovery_tables())
+ if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
@@ -1861,7 +1893,8 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_no_discover && uncore_init->use_discovery)
return -ENODEV;
- if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
+ if (uncore_init->use_discovery &&
+ !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
return -ENODEV;
}
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index e278e2e7c051..c30fb5bb1222 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -34,6 +34,8 @@
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+#define UNCORE_IGNORE_END -1
+
struct pci_extra_dev {
struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
};
@@ -208,6 +210,7 @@ struct pci2phy_map {
struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_dieid(struct pci_bus *bus);
int uncore_die_to_segment(int die);
+int uncore_device_to_die(struct pci_dev *dev);
ssize_t uncore_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
@@ -589,6 +592,7 @@ extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
+extern int spr_uncore_units_ignore[];
/* uncore_snb.c */
int snb_uncore_pci_init(void);
@@ -602,6 +606,7 @@ void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
+void mtl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
void adl_uncore_mmio_init(void);
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 5fd72d4b8bbb..cb488e41807c 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -33,7 +33,7 @@ static int logical_die_id;
static int get_device_die_id(struct pci_dev *dev)
{
- int cpu, node = pcibus_to_node(dev->bus);
+ int node = pcibus_to_node(dev->bus);
/*
* If the NUMA info is not available, assume that the logical die id is
@@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev)
if (node < 0)
return logical_die_id++;
- for_each_cpu(cpu, cpumask_of_node(node)) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (c->initialized && cpu_to_node(cpu) == node)
- return c->logical_die_id;
- }
-
- /*
- * All CPUs of a node may be offlined. For this case,
- * the PCI and MMIO type of uncore blocks which are
- * enumerated by the device will be unavailable.
- */
- return -1;
+ return uncore_device_to_die(dev);
}
#define __node_2_type(cur) \
@@ -140,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
unsigned int *box_offset, *ids;
int i;
- if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset))
+ if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
+ pr_info("Invalid address is detected for uncore type %d box %d, "
+ "Disable the uncore unit.\n",
+ unit->box_type, unit->box_id);
return;
+ }
if (parsed) {
type = search_uncore_discovery_type(unit->box_type);
- if (WARN_ON_ONCE(!type))
+ if (!type) {
+ pr_info("A spurious uncore type %d is detected, "
+ "Disable the uncore type.\n",
+ unit->box_type);
return;
+ }
/* Store the first box of each die */
if (!type->box_ctrl_die[die])
type->box_ctrl_die[die] = unit->ctl;
@@ -181,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
ids[i] = type->ids[i];
box_offset[i] = type->box_offset[i];
- if (WARN_ON_ONCE(unit->box_id == ids[i]))
+ if (unit->box_id == ids[i]) {
+ pr_info("Duplicate uncore type %d box ID %d is detected, "
+ "Drop the duplicate uncore unit.\n",
+ unit->box_type, unit->box_id);
goto free_ids;
+ }
}
ids[i] = unit->box_id;
box_offset[i] = unit->ctl - type->box_ctrl;
@@ -202,8 +202,25 @@ free_box_offset:
}
+static bool
+uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
+{
+ int i;
+
+ if (!ignore)
+ return false;
+
+ for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
+ if (unit->box_type == ignore[i])
+ return true;
+ }
+
+ return false;
+}
+
static int parse_discovery_table(struct pci_dev *dev, int die,
- u32 bar_offset, bool *parsed)
+ u32 bar_offset, bool *parsed,
+ int *ignore)
{
struct uncore_global_discovery global;
struct uncore_unit_discovery unit;
@@ -258,6 +275,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
if (unit.access_type >= UNCORE_ACCESS_MAX)
continue;
+ if (uncore_ignore_unit(&unit, ignore))
+ continue;
+
uncore_insert_box_info(&unit, die, *parsed);
}
@@ -266,7 +286,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
return 0;
}
-bool intel_uncore_has_discovery_tables(void)
+bool intel_uncore_has_discovery_tables(int *ignore)
{
u32 device, val, entry_id, bar_offset;
int die, dvsec = 0, ret = true;
@@ -302,7 +322,7 @@ bool intel_uncore_has_discovery_tables(void)
if (die < 0)
continue;
- parse_discovery_table(dev, die, bar_offset, &parsed);
+ parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
}
}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index f4439357779a..6ee80ad3423e 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -21,9 +21,15 @@
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
-#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7)
-#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff)
-#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff)
+#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET 28
+#define UNCORE_DISCOVERY_PCI_DOMAIN(data) \
+ ((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7)
+#define UNCORE_DISCOVERY_PCI_BUS_OFFSET 20
+#define UNCORE_DISCOVERY_PCI_BUS(data) \
+ ((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff)
+#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET 12
+#define UNCORE_DISCOVERY_PCI_DEVFN(data) \
+ ((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff)
#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff)
@@ -122,7 +128,7 @@ struct intel_uncore_discovery_type {
unsigned int *box_offset; /* Box offset */
};
-bool intel_uncore_has_discovery_tables(void);
+bool intel_uncore_has_discovery_tables(int *ignore);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 1f4869227efb..7fd4334e12a1 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -109,6 +109,19 @@
#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728
#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729
#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A
+#define PCI_DEVICE_ID_INTEL_MTL_1_IMC 0x7d00
+#define PCI_DEVICE_ID_INTEL_MTL_2_IMC 0x7d01
+#define PCI_DEVICE_ID_INTEL_MTL_3_IMC 0x7d02
+#define PCI_DEVICE_ID_INTEL_MTL_4_IMC 0x7d05
+#define PCI_DEVICE_ID_INTEL_MTL_5_IMC 0x7d10
+#define PCI_DEVICE_ID_INTEL_MTL_6_IMC 0x7d14
+#define PCI_DEVICE_ID_INTEL_MTL_7_IMC 0x7d15
+#define PCI_DEVICE_ID_INTEL_MTL_8_IMC 0x7d16
+#define PCI_DEVICE_ID_INTEL_MTL_9_IMC 0x7d21
+#define PCI_DEVICE_ID_INTEL_MTL_10_IMC 0x7d22
+#define PCI_DEVICE_ID_INTEL_MTL_11_IMC 0x7d23
+#define PCI_DEVICE_ID_INTEL_MTL_12_IMC 0x7d24
+#define PCI_DEVICE_ID_INTEL_MTL_13_IMC 0x7d28
#define IMC_UNCORE_DEV(a) \
@@ -205,6 +218,32 @@
#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0
#define ADL_UNC_ARB_MSR_OFFSET 0x8
+/* MTL Cbo register */
+#define MTL_UNC_CBO_0_PER_CTR0 0x2448
+#define MTL_UNC_CBO_0_PERFEVTSEL0 0x2442
+
+/* MTL HAC_ARB register */
+#define MTL_UNC_HAC_ARB_CTR 0x2018
+#define MTL_UNC_HAC_ARB_CTRL 0x2012
+
+/* MTL ARB register */
+#define MTL_UNC_ARB_CTR 0x2418
+#define MTL_UNC_ARB_CTRL 0x2412
+
+/* MTL cNCU register */
+#define MTL_UNC_CNCU_FIXED_CTR 0x2408
+#define MTL_UNC_CNCU_FIXED_CTRL 0x2402
+#define MTL_UNC_CNCU_BOX_CTL 0x240e
+
+/* MTL sNCU register */
+#define MTL_UNC_SNCU_FIXED_CTR 0x2008
+#define MTL_UNC_SNCU_FIXED_CTRL 0x2002
+#define MTL_UNC_SNCU_BOX_CTL 0x200e
+
+/* MTL HAC_CBO register */
+#define MTL_UNC_HBO_CTR 0x2048
+#define MTL_UNC_HBO_CTRL 0x2042
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
@@ -598,6 +637,115 @@ void adl_uncore_cpu_init(void)
uncore_msr_uncores = adl_msr_uncores;
}
+static struct intel_uncore_type mtl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = MTL_UNC_CBO_0_PER_CTR0,
+ .event_ctl = MTL_UNC_CBO_0_PERFEVTSEL0,
+ .event_mask = ADL_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &adl_uncore_format_group,
+};
+
+static struct intel_uncore_type mtl_uncore_hac_arb = {
+ .name = "hac_arb",
+ .num_counters = 2,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = MTL_UNC_HAC_ARB_CTR,
+ .event_ctl = MTL_UNC_HAC_ARB_CTRL,
+ .event_mask = ADL_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &adl_uncore_format_group,
+};
+
+static struct intel_uncore_type mtl_uncore_arb = {
+ .name = "arb",
+ .num_counters = 2,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = MTL_UNC_ARB_CTR,
+ .event_ctl = MTL_UNC_ARB_CTRL,
+ .event_mask = ADL_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &adl_uncore_format_group,
+};
+
+static struct intel_uncore_type mtl_uncore_hac_cbox = {
+ .name = "hac_cbox",
+ .num_counters = 2,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = MTL_UNC_HBO_CTR,
+ .event_ctl = MTL_UNC_HBO_CTRL,
+ .event_mask = ADL_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &adl_uncore_format_group,
+};
+
+static void mtl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN);
+}
+
+static struct intel_uncore_ops mtl_uncore_msr_ops = {
+ .init_box = mtl_uncore_msr_init_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type mtl_uncore_cncu = {
+ .name = "cncu",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .box_ctl = MTL_UNC_CNCU_BOX_CTL,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = MTL_UNC_CNCU_FIXED_CTR,
+ .fixed_ctl = MTL_UNC_CNCU_FIXED_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_CTL_EV_SEL_MASK,
+ .format_group = &icl_uncore_clock_format_group,
+ .ops = &mtl_uncore_msr_ops,
+ .event_descs = icl_uncore_events,
+};
+
+static struct intel_uncore_type mtl_uncore_sncu = {
+ .name = "sncu",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .box_ctl = MTL_UNC_SNCU_BOX_CTL,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = MTL_UNC_SNCU_FIXED_CTR,
+ .fixed_ctl = MTL_UNC_SNCU_FIXED_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_CTL_EV_SEL_MASK,
+ .format_group = &icl_uncore_clock_format_group,
+ .ops = &mtl_uncore_msr_ops,
+ .event_descs = icl_uncore_events,
+};
+
+static struct intel_uncore_type *mtl_msr_uncores[] = {
+ &mtl_uncore_cbox,
+ &mtl_uncore_hac_arb,
+ &mtl_uncore_arb,
+ &mtl_uncore_hac_cbox,
+ &mtl_uncore_cncu,
+ &mtl_uncore_sncu,
+ NULL
+};
+
+void mtl_uncore_cpu_init(void)
+{
+ mtl_uncore_cbox.num_boxes = icl_get_cbox_num();
+ uncore_msr_uncores = mtl_msr_uncores;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -1264,6 +1412,19 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
IMC_UNCORE_DEV(RPL_23),
IMC_UNCORE_DEV(RPL_24),
IMC_UNCORE_DEV(RPL_25),
+ IMC_UNCORE_DEV(MTL_1),
+ IMC_UNCORE_DEV(MTL_2),
+ IMC_UNCORE_DEV(MTL_3),
+ IMC_UNCORE_DEV(MTL_4),
+ IMC_UNCORE_DEV(MTL_5),
+ IMC_UNCORE_DEV(MTL_6),
+ IMC_UNCORE_DEV(MTL_7),
+ IMC_UNCORE_DEV(MTL_8),
+ IMC_UNCORE_DEV(MTL_9),
+ IMC_UNCORE_DEV(MTL_10),
+ IMC_UNCORE_DEV(MTL_11),
+ IMC_UNCORE_DEV(MTL_12),
+ IMC_UNCORE_DEV(MTL_13),
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 44c2f879f708..7d1199554fe3 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1453,9 +1453,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
}
raw_spin_unlock(&pci2phy_map_lock);
} else {
- int node = pcibus_to_node(ubox_dev->bus);
- int cpu;
-
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
@@ -1465,15 +1462,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
break;
}
- die_id = -1;
- for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
+ map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev);
- if (c->initialized && cpu_to_node(cpu) == node) {
- map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
- break;
- }
- }
raw_spin_unlock(&pci2phy_map_lock);
if (WARN_ON_ONCE(die_id == -1)) {
@@ -6142,24 +6132,6 @@ static int spr_upi_get_topology(struct intel_uncore_type *type)
return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0);
}
-static struct intel_uncore_type spr_uncore_upi = {
- .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
- .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
- .format_group = &spr_uncore_raw_format_group,
- .ops = &spr_uncore_pci_ops,
- .name = "upi",
- .attr_update = spr_upi_attr_update,
- .get_topology = spr_upi_get_topology,
- .set_mapping = spr_upi_set_mapping,
- .cleanup_mapping = spr_upi_cleanup_mapping,
-};
-
-static struct intel_uncore_type spr_uncore_m3upi = {
- SPR_UNCORE_PCI_COMMON_FORMAT(),
- .name = "m3upi",
- .constraints = icx_uncore_m3upi_constraints,
-};
-
static struct intel_uncore_type spr_uncore_mdf = {
SPR_UNCORE_COMMON_FORMAT(),
.name = "mdf",
@@ -6168,7 +6140,13 @@ static struct intel_uncore_type spr_uncore_mdf = {
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
#define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6
+#define UNCORE_SPR_UPI 8
+#define UNCORE_SPR_M3UPI 9
+/*
+ * The uncore units, which are supported by the discovery table,
+ * are defined here.
+ */
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
&spr_uncore_chabox,
&spr_uncore_iio,
@@ -6178,12 +6156,56 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
NULL,
&spr_uncore_imc,
&spr_uncore_m2m,
- &spr_uncore_upi,
- &spr_uncore_m3upi,
+ NULL,
+ NULL,
NULL,
&spr_uncore_mdf,
};
+/*
+ * The uncore units, which are not supported by the discovery table,
+ * are implemented from here.
+ */
+#define SPR_UNCORE_UPI_NUM_BOXES 4
+
+static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
+ 0, 0x8000, 0x10000, 0x18000
+};
+
+static struct intel_uncore_type spr_uncore_upi = {
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
+ .format_group = &spr_uncore_raw_format_group,
+ .ops = &spr_uncore_pci_ops,
+ .name = "upi",
+ .attr_update = spr_upi_attr_update,
+ .get_topology = spr_upi_get_topology,
+ .set_mapping = spr_upi_set_mapping,
+ .cleanup_mapping = spr_upi_cleanup_mapping,
+ .type_id = UNCORE_SPR_UPI,
+ .num_counters = 4,
+ .num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
+ .perf_ctr_bits = 48,
+ .perf_ctr = ICX_UPI_PCI_PMON_CTR0,
+ .event_ctl = ICX_UPI_PCI_PMON_CTL0,
+ .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
+ .pci_offsets = spr_upi_pci_offsets,
+};
+
+static struct intel_uncore_type spr_uncore_m3upi = {
+ SPR_UNCORE_PCI_COMMON_FORMAT(),
+ .name = "m3upi",
+ .type_id = UNCORE_SPR_M3UPI,
+ .num_counters = 4,
+ .num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
+ .perf_ctr_bits = 48,
+ .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
+ .event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
+ .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
+ .pci_offsets = spr_upi_pci_offsets,
+ .constraints = icx_uncore_m3upi_constraints,
+};
+
enum perf_uncore_spr_iio_freerunning_type_id {
SPR_IIO_MSR_IOCLK,
SPR_IIO_MSR_BW_IN,
@@ -6314,6 +6336,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = {
#define UNCORE_SPR_MSR_EXTRA_UNCORES 1
#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1
+#define UNCORE_SPR_PCI_EXTRA_UNCORES 2
static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
&spr_uncore_iio_free_running,
@@ -6323,6 +6346,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES]
&spr_uncore_imc_free_running,
};
+static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = {
+ &spr_uncore_upi,
+ &spr_uncore_m3upi
+};
+
+int spr_uncore_units_ignore[] = {
+ UNCORE_SPR_UPI,
+ UNCORE_SPR_M3UPI,
+ UNCORE_IGNORE_END
+};
+
static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
struct intel_uncore_type *from_type)
{
@@ -6423,9 +6457,69 @@ void spr_uncore_cpu_init(void)
spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
}
+#define SPR_UNCORE_UPI_PCIID 0x3241
+#define SPR_UNCORE_UPI0_DEVFN 0x9
+#define SPR_UNCORE_M3UPI_PCIID 0x3246
+#define SPR_UNCORE_M3UPI0_DEVFN 0x29
+
+static void spr_update_device_location(int type_id)
+{
+ struct intel_uncore_type *type;
+ struct pci_dev *dev = NULL;
+ u32 device, devfn;
+ u64 *ctls;
+ int die;
+
+ if (type_id == UNCORE_SPR_UPI) {
+ type = &spr_uncore_upi;
+ device = SPR_UNCORE_UPI_PCIID;
+ devfn = SPR_UNCORE_UPI0_DEVFN;
+ } else if (type_id == UNCORE_SPR_M3UPI) {
+ type = &spr_uncore_m3upi;
+ device = SPR_UNCORE_M3UPI_PCIID;
+ devfn = SPR_UNCORE_M3UPI0_DEVFN;
+ } else
+ return;
+
+ ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
+ if (!ctls) {
+ type->num_boxes = 0;
+ return;
+ }
+
+ while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
+ if (devfn != dev->devfn)
+ continue;
+
+ die = uncore_device_to_die(dev);
+ if (die < 0)
+ continue;
+
+ ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
+ dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
+ devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
+ type->box_ctl;
+ }
+
+ type->box_ctls = ctls;
+}
+
int spr_uncore_pci_init(void)
{
- uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
+ /*
+ * The discovery table of UPI on some SPR variant is broken,
+ * which impacts the detection of both UPI and M3UPI uncore PMON.
+ * Use the pre-defined UPI and M3UPI table to replace.
+ *
+ * The accurate location, e.g., domain and BUS number,
+ * can only be retrieved at load time.
+ * Update the location of UPI and M3UPI.
+ */
+ spr_update_device_location(UNCORE_SPR_UPI);
+ spr_update_device_location(UNCORE_SPR_M3UPI);
+ uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI,
+ UNCORE_SPR_PCI_EXTRA_UNCORES,
+ spr_pci_uncores);
return 0;
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 0e849f28a5c1..d6de4487348c 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -35,15 +35,17 @@
* per-core reg tables.
*/
enum extra_reg_type {
- EXTRA_REG_NONE = -1, /* not used */
+ EXTRA_REG_NONE = -1, /* not used */
- EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
- EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
- EXTRA_REG_LBR = 2, /* lbr_select */
- EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
- EXTRA_REG_FE = 4, /* fe_* */
+ EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
+ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
+ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
+ EXTRA_REG_FE = 4, /* fe_* */
+ EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */
+ EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */
- EXTRA_REG_MAX /* number of entries needed */
+ EXTRA_REG_MAX /* number of entries needed */
};
struct event_constraint {
@@ -606,6 +608,7 @@ union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
+ u64 pebs_timing_info:1;
u64 anythread_deprecated:1;
};
u64 capabilities;
@@ -647,6 +650,7 @@ enum {
};
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
struct x86_hybrid_pmu {
struct pmu pmu;
@@ -1000,6 +1004,7 @@ do { \
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
+#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -1486,6 +1491,8 @@ int intel_pmu_drain_bts_buffer(void);
u64 adl_latency_data_small(struct perf_event *event, u64 status);
+u64 mtl_latency_data_small(struct perf_event *event, u64 status);
+
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1597,6 +1604,8 @@ void intel_pmu_pebs_data_source_adl(void);
void intel_pmu_pebs_data_source_grt(void);
+void intel_pmu_pebs_data_source_mtl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index 949d845c922b..3e9acdaeed1e 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -541,7 +541,13 @@ __init int zhaoxin_pmu_init(void)
switch (boot_cpu_data.x86) {
case 0x06:
- if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
+ /*
+ * Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS.
+ * Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D]
+ * ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3
+ */
+ if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) ||
+ boot_cpu_data.x86_model == 0x19) {
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index cd7b14322035..c8c111d8fbd7 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -23,10 +23,4 @@
*/
#define flush_agp_cache() wbinvd()
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order) \
- ((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order) \
- free_pages((unsigned long)(table), (order))
-
#endif /* _ASM_X86_AGP_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 5efd01b548d1..808b4eece251 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -71,7 +71,7 @@ ATOMIC64_DECL(add_unless);
* the old value.
*/
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
@@ -85,7 +85,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
* Atomically xchgs the value of @v to @n and returns
* the old value.
*/
-static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
unsigned high = (unsigned)(n >> 32);
@@ -104,7 +104,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
*
* Atomically sets the value of @v to @n.
*/
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
@@ -119,7 +119,7 @@ static inline void arch_atomic64_set(atomic64_t *v, s64 i)
*
* Atomically reads the value of @v and returns it.
*/
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 r;
alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
@@ -133,7 +133,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + *@v
*/
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -145,7 +145,7 @@ static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
/*
* Other variants with different arithmetic operators:
*/
-static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -154,7 +154,7 @@ static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
-static inline s64 arch_atomic64_inc_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
s64 a;
alternative_atomic64(inc_return, "=&A" (a),
@@ -163,7 +163,7 @@ static inline s64 arch_atomic64_inc_return(atomic64_t *v)
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
-static inline s64 arch_atomic64_dec_return(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
s64 a;
alternative_atomic64(dec_return, "=&A" (a),
@@ -179,7 +179,7 @@ static inline s64 arch_atomic64_dec_return(atomic64_t *v)
*
* Atomically adds @i to @v.
*/
-static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -194,7 +194,7 @@ static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -208,7 +208,7 @@ static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-static inline void arch_atomic64_inc(atomic64_t *v)
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
@@ -221,7 +221,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-static inline void arch_atomic64_dec(atomic64_t *v)
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
@@ -237,7 +237,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns non-zero if the add was done, zero otherwise.
*/
-static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
@@ -248,7 +248,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
}
#define arch_atomic64_add_unless arch_atomic64_add_unless
-static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
+static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
alternative_atomic64(inc_not_zero, "=&a" (r),
@@ -257,7 +257,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 r;
alternative_atomic64(dec_if_positive, "=&A" (r),
@@ -269,7 +269,7 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void arch_atomic64_and(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -277,7 +277,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v)
c = old;
}
-static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -288,7 +288,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
-static inline void arch_atomic64_or(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -296,7 +296,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v)
c = old;
}
-static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -307,7 +307,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
-static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -315,7 +315,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
c = old;
}
-static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 old, c = 0;
@@ -326,7 +326,7 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
s64 old, c = 0;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 7886d0578fc9..c496595bf601 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -17,7 +17,7 @@
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __READ_ONCE((v)->counter);
}
@@ -29,7 +29,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__WRITE_ONCE(v->counter, i);
}
@@ -55,7 +55,7 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
@@ -113,7 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
@@ -127,7 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
+static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
@@ -161,25 +161,25 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
return arch_atomic64_add_return(-i, v);
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg(&v->counter, old, new);
}
@@ -191,13 +191,13 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
}
#define arch_atomic64_xchg arch_atomic64_xchg
-static inline void arch_atomic64_and(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "andq %1,%0"
: "+m" (v->counter)
@@ -205,7 +205,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v)
: "memory");
}
-static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -215,7 +215,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
-static inline void arch_atomic64_or(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "orq %1,%0"
: "+m" (v->counter)
@@ -223,7 +223,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v)
: "memory");
}
-static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -233,7 +233,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
-static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "xorq %1,%0"
: "+m" (v->counter)
@@ -241,7 +241,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
: "memory");
}
-static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8f39c46197b8..dba52f43c8a1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -312,6 +312,7 @@
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 9656a5bc6fea..9a710c060445 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -5,7 +5,7 @@
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
#define XCR_XFEATURE_IN_USE_MASK 0x00000001
-static inline u64 xgetbv(u32 index)
+static __always_inline u64 xgetbv(u32 index)
{
u32 eax, edx;
@@ -27,7 +27,7 @@ static inline void xsetbv(u32 index, u64 value)
*
* Callers should check X86_FEATURE_XGETBV1.
*/
-static inline u64 xfeatures_in_use(void)
+static __always_inline u64 xfeatures_in_use(void)
{
return xgetbv(XCR_XFEATURE_IN_USE_MASK);
}
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 7793e52d6237..8c5ae649d2df 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -8,9 +8,6 @@
#include <asm/nospec-branch.h>
-/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
-#define __cpuidle __section(".cpuidle.text")
-
/*
* Interrupt control:
*/
@@ -45,13 +42,13 @@ static __always_inline void native_irq_enable(void)
asm volatile("sti": : :"memory");
}
-static inline __cpuidle void native_safe_halt(void)
+static __always_inline void native_safe_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
-static inline __cpuidle void native_halt(void)
+static __always_inline void native_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
@@ -84,7 +81,7 @@ static __always_inline void arch_local_irq_enable(void)
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
-static inline __cpuidle void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
{
native_safe_halt();
}
@@ -93,7 +90,7 @@ static inline __cpuidle void arch_safe_halt(void)
* Used when interrupts are already enabled or to
* shutdown the processor:
*/
-static inline __cpuidle void halt(void)
+static __always_inline void halt(void)
{
native_halt();
}
diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h
index 6c5765192102..511b35069187 100644
--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -8,7 +8,7 @@ extern struct clocksource kvm_clock;
DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
-static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
return &this_cpu_read(hv_clock_per_cpu)->pvti;
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d3fe82c5d6b6..c88dd9d3fa08 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -189,6 +189,9 @@
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+#define MSR_SNOOP_RSP_0 0x00001328
+#define MSR_SNOOP_RSP_1 0x00001329
+
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 3a8fdf881313..778df05f8539 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -26,7 +26,7 @@
#define TPAUSE_C01_STATE 1
#define TPAUSE_C02_STATE 0
-static inline void __monitor(const void *eax, unsigned long ecx,
+static __always_inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitor %eax, %ecx, %edx;" */
@@ -34,7 +34,7 @@ static inline void __monitor(const void *eax, unsigned long ecx,
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __monitorx(const void *eax, unsigned long ecx,
+static __always_inline void __monitorx(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitorx %eax, %ecx, %edx;" */
@@ -42,7 +42,7 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
@@ -77,8 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
-static inline void __mwaitx(unsigned long eax, unsigned long ebx,
- unsigned long ecx)
+static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
+ unsigned long ecx)
{
/* No MDS buffer clear as this is AMD/HYGON only */
@@ -87,7 +87,7 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
:: "a" (eax), "b" (ebx), "c" (ecx));
}
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
@@ -105,7 +105,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 771b0a2b7a34..e04313e89f4f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -564,7 +564,7 @@ static __always_inline void mds_user_clear_cpu_buffers(void)
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void mds_idle_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_idle_clear))
mds_clear_cpu_buffers();
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 73e9522db7c1..cf40e813b3d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -26,7 +26,7 @@ DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
void paravirt_set_sched_clock(u64 (*func)(void));
-static inline u64 paravirt_sched_clock(void)
+static __always_inline u64 paravirt_sched_clock(void)
{
return static_call(pv_sched_clock)();
}
@@ -168,7 +168,7 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-static inline void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
{
PVOP_VCALL0(irq.safe_halt);
}
@@ -178,7 +178,9 @@ static inline void halt(void)
PVOP_VCALL0(irq.halt);
}
-static inline void wbinvd(void)
+extern noinstr void pv_native_wbinvd(void);
+
+static __always_inline void wbinvd(void)
{
PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
}
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5d0f6891ae61..8fc15ed5e60b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -160,6 +160,14 @@ union cpuid10_edx {
};
/*
+ * Intel "Architectural Performance Monitoring extension" CPUID
+ * detection/enumeration details:
+ */
+#define ARCH_PERFMON_EXT_LEAF 0x00000023
+#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
+#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+
+/*
* Intel Architectural LBR CPUID detection/enumeration details:
*/
union cpuid28_eax {
@@ -578,7 +586,7 @@ extern void perf_amd_brs_lopwr_cb(bool lopwr_in);
DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
-static inline void perf_lopwr_cb(bool lopwr_in)
+static __always_inline void perf_lopwr_cb(bool lopwr_in)
{
static_call_mod(perf_lopwr_cb)(lopwr_in);
}
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 19b695ff2c68..0c92db84469d 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -7,6 +7,7 @@
/* some helper functions for xen and kvm pv clock sources */
u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
void pvclock_set_flags(u8 flags);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -39,7 +40,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
-static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
+static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
{
u64 product;
#ifdef __i386__
diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h
index c0ef921c0586..8009d781c2f9 100644
--- a/arch/x86/include/asm/shared/io.h
+++ b/arch/x86/include/asm/shared/io.h
@@ -5,13 +5,13 @@
#include <linux/types.h>
#define BUILDIO(bwl, bw, type) \
-static inline void __out##bwl(type value, u16 port) \
+static __always_inline void __out##bwl(type value, u16 port) \
{ \
asm volatile("out" #bwl " %" #bw "0, %w1" \
: : "a"(value), "Nd"(port)); \
} \
\
-static inline type __in##bwl(u16 port) \
+static __always_inline type __in##bwl(u16 port) \
{ \
type value; \
asm volatile("in" #bwl " %w1, %" #bw "0" \
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index e53f26228fbb..559176887791 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -8,7 +8,6 @@
#define TDX_HYPERCALL_STANDARD 0
#define TDX_HCALL_HAS_OUTPUT BIT(0)
-#define TDX_HCALL_ISSUE_STI BIT(1)
#define TDX_CPUID_LEAF_ID 0x21
#define TDX_IDENT "IntelTDX "
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 35f709f619fb..f358a23f228d 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
}
#endif
-static inline void native_wbinvd(void)
+static __always_inline void native_wbinvd(void)
{
asm volatile("wbinvd": : :"memory");
}
@@ -179,7 +179,7 @@ static inline void __write_cr4(unsigned long x)
native_write_cr4(x);
}
-static inline void wbinvd(void)
+static __always_inline void wbinvd(void)
{
native_wbinvd();
}
@@ -196,7 +196,7 @@ static inline void load_gs_index(unsigned int selector)
#endif /* CONFIG_PARAVIRT_XXL */
-static inline void clflush(volatile void *__p)
+static __always_inline void clflush(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
}
@@ -295,7 +295,7 @@ static inline int enqcmds(void __iomem *dst, const void *src)
return 0;
}
-static inline void tile_release(void)
+static __always_inline void tile_release(void)
{
/*
* Instruction opcode for TILERELEASE; supported in binutils
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index e5e0fe10c692..a2dd24947eb8 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,7 +382,7 @@ MULTI_stack_switch(struct multicall_entry *mcl,
}
#endif
-static inline int
+static __always_inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 907cc98b1938..1c38174b5f01 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -188,6 +188,17 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
return cpu;
}
+static bool __init acpi_is_processor_usable(u32 lapic_flags)
+{
+ if (lapic_flags & ACPI_MADT_ENABLED)
+ return true;
+
+ if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
+ return true;
+
+ return false;
+}
+
static int __init
acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
{
@@ -212,6 +223,10 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
if (apic_id == 0xffffffff)
return 0;
+ /* don't register processors that cannot be onlined */
+ if (!acpi_is_processor_usable(processor->lapic_flags))
+ return 0;
+
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
@@ -250,9 +265,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
return 0;
/* don't register processors that can not be onlined */
- if (acpi_support_online_capable &&
- !(processor->lapic_flags & ACPI_MADT_ENABLED) &&
- !(processor->lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
+ if (!acpi_is_processor_usable(processor->lapic_flags))
return 0;
/*
@@ -1840,23 +1853,23 @@ early_param("acpi_sci", setup_acpi_sci);
int __acpi_acquire_global_lock(unsigned int *lock)
{
- unsigned int old, new, val;
+ unsigned int old, new;
+
+ old = READ_ONCE(*lock);
do {
- old = *lock;
new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
- val = cmpxchg(lock, old, new);
- } while (unlikely (val != old));
+ } while (!try_cmpxchg(lock, &old, new));
return ((new & 0x3) < 3) ? -1 : 0;
}
int __acpi_release_global_lock(unsigned int *lock)
{
- unsigned int old, new, val;
+ unsigned int old, new;
+
+ old = READ_ONCE(*lock);
do {
- old = *lock;
new = old & ~0x3;
- val = cmpxchg(lock, old, new);
- } while (unlikely (val != old));
+ } while (!try_cmpxchg(lock, &old, new));
return old & 0x1;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bca0bd8f4846..85168740f76a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -86,7 +86,7 @@ void update_spec_ctrl_cond(u64 val)
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
-u64 spec_ctrl_current(void)
+noinstr u64 spec_ctrl_current(void)
{
return this_cpu_read(x86_spec_ctrl_current);
}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 02039ec3597d..11f83d07925e 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -143,7 +143,7 @@ static __init int parse_no_stealacc(char *arg)
}
early_param("no-steal-acc", parse_no_stealacc);
-static unsigned long long notrace vmware_sched_clock(void)
+static noinstr u64 vmware_sched_clock(void)
{
unsigned long long ns;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 9baa89a8877d..dccce58201b7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -853,12 +853,12 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* Initialize register state that may prevent from entering low-power idle.
* This function will be invoked from the cpuidle driver only when needed.
*/
-void fpu_idle_fpregs(void)
+noinstr void fpu_idle_fpregs(void)
{
/* Note: AMX_TILE being enabled implies XGETBV1 support */
if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
(xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
tile_release();
- fpregs_deactivate(&current->thread.fpu);
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 695873c0f50b..754da3b226f9 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -659,17 +659,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
* is determined by the MOD/RM byte.
*/
opcode = insn->modrm.bytes[0];
- if ((opcode & 0x30) == 0x10) {
- if ((opcode & 0x8) == 0x8)
- return -EOPNOTSUPP; /* far call */
- /* call absolute, indirect */
+ switch (X86_MODRM_REG(opcode)) {
+ case 0b010: /* FF /2, call near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_call_indirect;
- } else if ((opcode & 0x30) == 0x20) {
- if ((opcode & 0x8) == 0x8)
- return -EOPNOTSUPP; /* far jmp */
- /* jmp near absolute indirect */
+ break;
+ case 0b100: /* FF /4, jmp near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
- } else
+ break;
+ case 0b011: /* FF /3, call far, absolute indirect */
+ case 0b101: /* FF /5, jmp far, absolute indirect */
+ return -EOPNOTSUPP;
+ }
+
+ if (!p->ainsn.emulate_op)
break;
if (insn->addr_bytes != sizeof(unsigned long))
@@ -990,20 +992,6 @@ int kprobe_int3_handler(struct pt_regs *regs)
kprobe_post_process(p, regs, kcb);
return 1;
}
- }
-
- if (*addr != INT3_INSN_OPCODE) {
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Back up over the (now missing) int3 and run
- * the original instruction.
- */
- regs->ip = (unsigned long)addr;
- return 1;
} /* else: not a kprobe fault; let the kernel handle it */
return 0;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 16333ba1904b..0f35d44c56fe 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -71,12 +71,12 @@ static int kvm_set_wallclock(const struct timespec64 *now)
return -ENODEV;
}
-static u64 kvm_clock_read(void)
+static noinstr u64 kvm_clock_read(void)
{
u64 ret;
preempt_disable_notrace();
- ret = pvclock_clocksource_read(this_cpu_pvti());
+ ret = pvclock_clocksource_read_nowd(this_cpu_pvti());
preempt_enable_notrace();
return ret;
}
@@ -86,7 +86,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs)
return kvm_clock_read();
}
-static u64 kvm_sched_clock_read(void)
+static noinstr u64 kvm_sched_clock_read(void)
{
return kvm_clock_read() - kvm_sched_clock_offset;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 327757afb027..5bf4f0b2f35d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -216,6 +216,11 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
native_set_debugreg(regno, val);
}
+noinstr void pv_native_wbinvd(void)
+{
+ native_wbinvd();
+}
+
static noinstr void pv_native_irq_enable(void)
{
native_irq_enable();
@@ -225,6 +230,11 @@ static noinstr void pv_native_irq_disable(void)
{
native_irq_disable();
}
+
+static noinstr void pv_native_safe_halt(void)
+{
+ native_safe_halt();
+}
#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
@@ -256,7 +266,7 @@ struct paravirt_patch_template pv_ops = {
.cpu.read_cr0 = native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
- .cpu.wbinvd = native_wbinvd,
+ .cpu.wbinvd = pv_native_wbinvd,
.cpu.read_msr = native_read_msr,
.cpu.write_msr = native_write_msr,
.cpu.read_msr_safe = native_read_msr_safe,
@@ -290,7 +300,7 @@ struct paravirt_patch_template pv_ops = {
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
- .irq.safe_halt = native_safe_halt,
+ .irq.safe_halt = pv_native_safe_halt,
.irq.halt = native_halt,
#endif /* CONFIG_PARAVIRT_XXL */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 40d156a31676..e57cd31bfec4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
#include <linux/cpuidle.h>
#include <linux/acpi.h>
#include <linux/elf-randomize.h>
+#include <linux/static_call.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
@@ -694,7 +695,24 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
-static void (*x86_idle)(void);
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void __cpuidle default_idle(void)
+{
+ raw_safe_halt();
+ raw_local_irq_disable();
+}
+#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+EXPORT_SYMBOL(default_idle);
+#endif
+
+DEFINE_STATIC_CALL_NULL(x86_idle, default_idle);
+
+static bool x86_idle_set(void)
+{
+ return !!static_call_query(x86_idle);
+}
#ifndef CONFIG_SMP
static inline void play_dead(void)
@@ -717,28 +735,17 @@ void arch_cpu_idle_dead(void)
/*
* Called from the generic idle code.
*/
-void arch_cpu_idle(void)
-{
- x86_idle();
-}
-
-/*
- * We use this if we don't have any better idle routine..
- */
-void __cpuidle default_idle(void)
+void __cpuidle arch_cpu_idle(void)
{
- raw_safe_halt();
+ static_call(x86_idle)();
}
-#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
-EXPORT_SYMBOL(default_idle);
-#endif
#ifdef CONFIG_XEN
bool xen_set_default_idle(void)
{
- bool ret = !!x86_idle;
+ bool ret = x86_idle_set();
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
return ret;
}
@@ -800,13 +807,7 @@ static void amd_e400_idle(void)
default_idle();
- /*
- * The switch back from broadcast mode needs to be called with
- * interrupts disabled.
- */
- raw_local_irq_disable();
tick_broadcast_exit();
- raw_local_irq_enable();
}
/*
@@ -864,12 +865,10 @@ static __cpuidle void mwait_idle(void)
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched())
+ if (!need_resched()) {
__sti_mwait(0, 0);
- else
- raw_local_irq_enable();
- } else {
- raw_local_irq_enable();
+ raw_local_irq_disable();
+ }
}
__current_clr_polling();
}
@@ -880,20 +879,20 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
#endif
- if (x86_idle || boot_option_idle_override == IDLE_POLL)
+ if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
return;
if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
pr_info("using AMD E400 aware idle routine\n");
- x86_idle = amd_e400_idle;
+ static_call_update(x86_idle, amd_e400_idle);
} else if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
- x86_idle = mwait_idle;
+ static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
- x86_idle = tdx_safe_halt;
+ static_call_update(x86_idle, tdx_safe_halt);
} else
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
}
void amd_e400_c1e_apic_setup(void)
@@ -946,7 +945,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
/*
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index eda37df016f0..56acf53a782a 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,7 +64,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
return flags & valid_flags;
}
-u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+static __always_inline
+u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd)
{
unsigned version;
u64 ret;
@@ -77,7 +78,7 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
flags = src->flags;
} while (pvclock_read_retry(src, version));
- if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
+ if (dowd && unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
src->flags &= ~PVCLOCK_GUEST_STOPPED;
pvclock_touch_watchdogs();
}
@@ -100,16 +101,25 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
* updating at the same time, and one of them could be slightly behind,
* making the assumption that last_value always go forward fail to hold.
*/
- last = atomic64_read(&last_value);
+ last = arch_atomic64_read(&last_value);
do {
- if (ret < last)
+ if (ret <= last)
return last;
- last = atomic64_cmpxchg(&last_value, last, ret);
- } while (unlikely(last != ret));
+ } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret));
return ret;
}
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+ return __pvclock_clocksource_read(src, true);
+}
+
+noinstr u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src)
+{
+ return __pvclock_clocksource_read(src, false);
+}
+
void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
struct pvclock_vcpu_time_info *vcpu_time,
struct timespec64 *ts)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a78e73da4a74..8c33936b017d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -215,7 +215,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
/*
* Scheduler clock - returns current time in nanosec units.
*/
-u64 native_sched_clock(void)
+noinstr u64 native_sched_clock(void)
{
if (static_branch_likely(&__use_tsc)) {
u64 tsc_now = rdtsc();
@@ -248,7 +248,7 @@ u64 native_sched_clock_from_tsc(u64 tsc)
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
+noinstr u64 sched_clock(void)
{
return paravirt_sched_clock();
}
@@ -258,8 +258,7 @@ bool using_native_sched_clock(void)
return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
-unsigned long long
-sched_clock(void) __attribute__((alias("native_sched_clock")));
+u64 sched_clock(void) __attribute__((alias("native_sched_clock")));
bool using_native_sched_clock(void) { return true; }
#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 2e0ee14229bf..25f155205770 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -129,7 +129,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
SOFTIRQENTRY_TEXT
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index cdb91009701d..ee67ba625094 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -165,15 +165,27 @@ static inline void kvm_init_pmu_capability(void)
{
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
- perf_get_x86_pmu_capability(&kvm_pmu_cap);
-
- /*
- * For Intel, only support guest architectural pmu
- * on a host with architectural pmu.
- */
- if ((is_intel && !kvm_pmu_cap.version) || !kvm_pmu_cap.num_counters_gp)
+ /*
+ * Hybrid PMUs don't play nice with virtualization without careful
+ * configuration by userspace, and KVM's APIs for reporting supported
+ * vPMU features do not account for hybrid PMUs. Disable vPMU support
+ * for hybrid PMUs until KVM gains a way to let userspace opt-in.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
enable_pmu = false;
+ if (enable_pmu) {
+ perf_get_x86_pmu_capability(&kvm_pmu_cap);
+
+ /*
+ * For Intel, only support guest architectural pmu
+ * on a host with architectural pmu.
+ */
+ if ((is_intel && !kvm_pmu_cap.version) ||
+ !kvm_pmu_cap.num_counters_gp)
+ enable_pmu = false;
+ }
+
if (!enable_pmu) {
memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0fa3de2ceb8..a2c299d47e69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5263,12 +5263,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
{
unsigned long val;
+ memset(dbgregs, 0, sizeof(*dbgregs));
memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
kvm_get_dr(vcpu, 6, &val);
dbgregs->dr6 = val;
dbgregs->dr7 = vcpu->arch.dr7;
- dbgregs->flags = 0;
- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
}
static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index b6da09339308..80570eb3c89b 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -7,16 +7,18 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <asm/setup.h>
+#include <asm/cmdline.h>
static inline int myisspace(u8 c)
{
return c <= ' '; /* Close enough approximation */
}
-/**
+/*
* Find a boolean option (like quiet,noapic,nosmp....)
*
* @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
* @option: option string to look for
*
* Returns the position of that @option (starts counting with 1)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index dd8cd8831251..a64017602010 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -8,7 +8,7 @@
#include <asm/alternative.h>
#include <asm/export.h>
-.pushsection .noinstr.text, "ax"
+.section .noinstr.text, "ax"
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
@@ -43,7 +43,7 @@ SYM_TYPED_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
/*
@@ -184,4 +184,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
RET
SYM_FUNC_END(memcpy_orig)
-.popsection
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 724bbf83eb5b..02661861e5dd 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -13,6 +13,8 @@
#undef memmove
+.section .noinstr.text, "ax"
+
/*
* Implement memmove(). This can handle overlap between src and dst.
*
@@ -213,5 +215,5 @@ SYM_FUNC_START(__memmove)
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
-SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
+SYM_FUNC_ALIAS(memmove, __memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index fc9ffd3ff3b2..6143b1a6fa2c 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -6,6 +6,8 @@
#include <asm/alternative.h>
#include <asm/export.h>
+.section .noinstr.text, "ax"
+
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
@@ -43,7 +45,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
-SYM_FUNC_ALIAS_WEAK(memset, __memset)
+SYM_FUNC_ALIAS(memset, __memset)
EXPORT_SYMBOL(memset)
/*
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c
index a018ec4fba53..92cd8ecc3a2c 100644
--- a/arch/x86/lib/misc.c
+++ b/arch/x86/lib/misc.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <asm/misc.h>
+
/*
* Count the digits of @val including a possible sign.
*
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index fb4b1b5e0dea..004b37f026d1 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -159,10 +159,10 @@ static inline void set_page_memtype(struct page *pg,
break;
}
+ old_flags = READ_ONCE(pg->flags);
do {
- old_flags = pg->flags;
new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
- } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
+ } while (!try_cmpxchg(&pg->flags, &old_flags, new_flags));
}
#else
static inline enum page_cache_mode get_page_memtype(struct page *pg)
@@ -387,8 +387,7 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
u8 mtrr_type, uniform;
mtrr_type = mtrr_type_lookup(start, end, &uniform);
- if (mtrr_type != MTRR_TYPE_WRBACK &&
- mtrr_type != MTRR_TYPE_INVALID)
+ if (mtrr_type != MTRR_TYPE_WRBACK)
return _PAGE_CACHE_MODE_UC_MINUS;
return _PAGE_CACHE_MODE_WB;
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b1379662877..5cacd4890bab 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1068,7 +1068,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
.write_cr4 = xen_write_cr4,
- .wbinvd = native_wbinvd,
+ .wbinvd = pv_native_wbinvd,
.read_msr = xen_read_msr,
.write_msr = xen_write_msr,
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 06c3c2fb4b06..6092fea7d651 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -24,7 +24,7 @@ noinstr void xen_force_evtchn_callback(void)
(void)HYPERVISOR_xen_version(0, NULL);
}
-static void xen_safe_halt(void)
+static noinstr void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 9ef0a5cca96e..6b8836deb738 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -60,9 +60,17 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs)
return xen_clocksource_read();
}
-static u64 xen_sched_clock(void)
+static noinstr u64 xen_sched_clock(void)
{
- return xen_clocksource_read() - xen_sched_clock_offset;
+ struct pvclock_vcpu_time_info *src;
+ u64 ret;
+
+ preempt_disable_notrace();
+ src = &__this_cpu_read(xen_vcpu)->time;
+ ret = pvclock_clocksource_read_nowd(src);
+ ret -= xen_sched_clock_offset;
+ preempt_enable_notrace();
+ return ret;
}
static void xen_read_wallclock(struct timespec64 *ts)