// SPDX-License-Identifier: GPL-2.0 #include #include #include "../perf_event.h" /* LBR Branch Select valid bits */ #define LBR_SELECT_MASK 0x1ff /* * LBR Branch Select filter bits which when set, ensures that the * corresponding type of branches are not recorded */ #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */ #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */ #define LBR_SELECT_JCC 2 /* Conditional branches */ #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */ #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */ #define LBR_SELECT_RET_NEAR 5 /* Near returns */ #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */ #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */ #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */ #define LBR_KERNEL BIT(LBR_SELECT_KERNEL) #define LBR_USER BIT(LBR_SELECT_USER) #define LBR_JCC BIT(LBR_SELECT_JCC) #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL) #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND) #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR) #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL) #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND) #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH) #define LBR_NOT_SUPP -1 /* unsupported filter */ #define LBR_IGNORE 0 #define LBR_ANY \ (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \ LBR_REL_JMP | LBR_IND_JMP | LBR_FAR) struct branch_entry { union { struct { u64 ip:58; u64 ip_sign_ext:5; u64 mispredict:1; } split; u64 full; } from; union { struct { u64 ip:58; u64 ip_sign_ext:3; u64 reserved:1; u64 spec:1; u64 valid:1; } split; u64 full; } to; }; static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) { wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); } static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) { wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); } static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) { u64 val; rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); return val; } static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) { u64 val; rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); return val; } static __always_inline u64 sign_ext_branch_ip(u64 ip) { u32 shift = 64 - boot_cpu_data.x86_virt_bits; return (u64)(((s64)ip << shift) >> shift); } static void amd_pmu_lbr_filter(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int br_sel = cpuc->br_sel, offset, type, i, j; bool compress = false; bool fused_only = false; u64 from, to; /* If sampling all branches, there is nothing to filter */ if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) fused_only = true; for (i = 0; i < cpuc->lbr_stack.nr; i++) { from = cpuc->lbr_entries[i].from; to = cpuc->lbr_entries[i].to; type = branch_type_fused(from, to, 0, &offset); /* * Adjust the branch from address in case of instruction * fusion where it points to an instruction preceding the * actual branch */ if (offset) { cpuc->lbr_entries[i].from += offset; if (fused_only) continue; } /* If type does not correspond, then discard */ if (type == X86_BR_NONE || (br_sel & type) != type) { cpuc->lbr_entries[i].from = 0; /* mark invalid */ compress = true; } if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) cpuc->lbr_entries[i].type = common_branch_type(type); } if (!compress) return; /* Remove all invalid entries */ for (i = 0; i < cpuc->lbr_stack.nr; ) { if (!cpuc->lbr_entries[i].from) { j = i; while (++j < cpuc->lbr_stack.nr) cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j]; cpuc->lbr_stack.nr--; if (!cpuc->lbr_entries[i].from) continue; } i++; } } static const int lbr_spec_map[PERF_BR_SPEC_MAX] = { PERF_BR_SPEC_NA, PERF_BR_SPEC_WRONG_PATH, PERF_BR_NON_SPEC_CORRECT_PATH, PERF_BR_SPEC_CORRECT_PATH, }; void amd_pmu_lbr_read(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_branch_entry *br = cpuc->lbr_entries; struct branch_entry entry; int out = 0, idx, i; if (!cpuc->lbr_users) return; for (i = 0; i < x86_pmu.lbr_nr; i++) { entry.from.full = amd_pmu_lbr_get_from(i); entry.to.full = amd_pmu_lbr_get_to(i); /* * Check if a branch has been logged; if valid = 0, spec = 0 * then no branch was recorded; if reserved = 1 then an * erroneous branch was recorded (see Erratum 1452) */ if ((!entry.to.split.valid && !entry.to.split.spec) || entry.to.split.reserved) continue; perf_clear_branch_entry_bitfields(br + out); br[out].from = sign_ext_branch_ip(entry.from.split.ip); br[out].to = sign_ext_branch_ip(entry.to.split.ip); br[out].mispred = entry.from.split.mispredict; br[out].predicted = !br[out].mispred; /* * Set branch speculation information using the status of * the valid and spec bits. * * When valid = 0, spec = 0, no branch was recorded and the * entry is discarded as seen above. * * When valid = 0, spec = 1, the recorded branch was * speculative but took the wrong path. * * When valid = 1, spec = 0, the recorded branch was * non-speculative but took the correct path. * * When valid = 1, spec = 1, the recorded branch was * speculative and took the correct path */ idx = (entry.to.split.valid << 1) | entry.to.split.spec; br[out].spec = lbr_spec_map[idx]; out++; } cpuc->lbr_stack.nr = out; /* * Internal register renaming always ensures that LBR From[0] and * LBR To[0] always represent the TOS */ cpuc->lbr_stack.hw_idx = 0; /* Perform further software filtering */ amd_pmu_lbr_filter(); } static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE, [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP, [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP, [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP, [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP, [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP, [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP, }; static int amd_pmu_lbr_setup_filter(struct perf_event *event) { struct hw_perf_event_extra *reg = &event->hw.branch_reg; u64 br_type = event->attr.branch_sample_type; u64 mask = 0, v; int i; /* No LBR support */ if (!x86_pmu.lbr_nr) return -EOPNOTSUPP; if (br_type & PERF_SAMPLE_BRANCH_USER) mask |= X86_BR_USER; if (br_type & PERF_SAMPLE_BRANCH_KERNEL) mask |= X86_BR_KERNEL; /* Ignore BRANCH_HV here */ if (br_type & PERF_SAMPLE_BRANCH_ANY) mask |= X86_BR_ANY; if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) mask |= X86_BR_ANY_CALL; if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) mask |= X86_BR_IND_CALL; if (br_type & PERF_SAMPLE_BRANCH_COND) mask |= X86_BR_JCC; if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) mask |= X86_BR_IND_JMP; if (br_type & PERF_SAMPLE_BRANCH_CALL) mask |= X86_BR_CALL | X86_BR_ZERO_CALL; if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) mask |= X86_BR_TYPE_SAVE; reg->reg = mask; mask = 0; for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { if (!(br_type & BIT_ULL(i))) continue; v = lbr_select_map[i]; if (v == LBR_NOT_SUPP) return -EOPNOTSUPP; if (v != LBR_IGNORE) mask |= v; } /* Filter bits operate in suppress mode */ reg->config = mask ^ LBR_SELECT_MASK; return 0; } int amd_pmu_lbr_hw_config(struct perf_event *event) { int ret = 0; /* LBR is not recommended in counting mode */ if (!is_sampling_event(event)) return -EINVAL; ret = amd_pmu_lbr_setup_filter(event); if (!ret) event->attach_state |= PERF_ATTACH_SCHED_CB; return ret; } void amd_pmu_lbr_reset(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; if (!x86_pmu.lbr_nr) return; /* Reset all branch records individually */ for (i = 0; i < x86_pmu.lbr_nr; i++) { amd_pmu_lbr_set_from(i, 0); amd_pmu_lbr_set_to(i, 0); } cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; wrmsrl(MSR_AMD64_LBR_SELECT, 0); } void amd_pmu_lbr_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event_extra *reg = &event->hw.branch_reg; if (!x86_pmu.lbr_nr) return; if (has_branch_stack(event)) { cpuc->lbr_select = 1; cpuc->lbr_sel->config = reg->config; cpuc->br_sel = reg->reg; } perf_sched_cb_inc(event->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) amd_pmu_lbr_reset(); } void amd_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; if (has_branch_stack(event)) cpuc->lbr_select = 0; cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); perf_sched_cb_dec(event->pmu); } void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); /* * A context switch can flip the address space and LBR entries are * not tagged with an identifier. Hence, branches cannot be resolved * from the old address space and the LBR records should be wiped. */ if (cpuc->lbr_users && sched_in) amd_pmu_lbr_reset(); } void amd_pmu_lbr_enable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); u64 lbr_select, dbg_ctl, dbg_extn_cfg; if (!cpuc->lbr_users || !x86_pmu.lbr_nr) return; /* Set hardware branch filter */ if (cpuc->lbr_select) { lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } void amd_pmu_lbr_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); u64 dbg_ctl, dbg_extn_cfg; if (!cpuc->lbr_users || !x86_pmu.lbr_nr) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); } __init int amd_pmu_lbr_init(void) { union cpuid_0x80000022_ebx ebx; if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2)) return -EOPNOTSUPP; /* Set number of entries */ ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz; pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); return 0; }