summaryrefslogtreecommitdiff
path: root/arch/x86/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/core.c')
-rw-r--r--arch/x86/events/core.c194
1 files changed, 130 insertions, 64 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5b0dd07b1ef1..65ab6460aed4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -41,6 +41,8 @@
#include <asm/desc.h>
#include <asm/ldt.h>
#include <asm/unwind.h>
+#include <asm/uprobes.h>
+#include <asm/ibt.h>
#include "perf_event.h"
@@ -189,29 +191,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC
-static inline int get_possible_num_counters(void)
+static inline u64 get_possible_counter_mask(void)
{
- int i, num_counters = x86_pmu.num_counters;
+ u64 cntr_mask = x86_pmu.cntr_mask64;
+ int i;
if (!is_hybrid())
- return num_counters;
+ return cntr_mask;
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
- num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
+ cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;
- return num_counters;
+ return cntr_mask;
}
static bool reserve_pmc_hardware(void)
{
- int i, num_counters = get_possible_num_counters();
+ u64 cntr_mask = get_possible_counter_mask();
+ int i, end;
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
@@ -219,13 +223,14 @@ static bool reserve_pmc_hardware(void)
return true;
eventsel_fail:
- for (i--; i >= 0; i--)
+ end = i;
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_evntsel_nmi(x86_pmu_config_addr(i));
-
- i = num_counters;
+ i = X86_PMC_IDX_MAX;
perfctr_fail:
- for (i--; i >= 0; i--)
+ end = i;
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
@@ -233,9 +238,10 @@ perfctr_fail:
static void release_pmc_hardware(void)
{
- int i, num_counters = get_possible_num_counters();
+ u64 cntr_mask = get_possible_counter_mask();
+ int i;
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
release_perfctr_nmi(x86_pmu_event_addr(i));
release_evntsel_nmi(x86_pmu_config_addr(i));
}
@@ -248,7 +254,8 @@ static void release_pmc_hardware(void) {}
#endif
-bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
+bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
+ unsigned long *fixed_cntr_mask)
{
u64 val, val_fail = -1, val_new= ~0;
int i, reg, reg_fail = -1, ret = 0;
@@ -259,7 +266,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
* Check to see if the BIOS enabled any of the counters, if so
* complain and bail.
*/
- for (i = 0; i < num_counters; i++) {
+ for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
@@ -273,12 +280,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
}
}
- if (num_counters_fixed) {
+ if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
- for (i = 0; i < num_counters_fixed; i++) {
+ for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(i, pmu))
continue;
if (val & (0x03ULL << i*4)) {
@@ -619,7 +626,7 @@ int x86_pmu_hw_config(struct perf_event *event)
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
if (event->attr.type == event->pmu->type)
- event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+ event->hw.config |= x86_pmu_get_event_config(event);
if (event->attr.sample_period && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
@@ -679,7 +686,7 @@ void x86_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val;
@@ -736,7 +743,7 @@ void x86_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +982,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- int num_counters = hybrid(cpuc->pmu, num_counters);
struct event_constraint *c;
struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
@@ -1051,7 +1057,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n) {
- int gpmax = num_counters;
+ int gpmax = x86_pmu_max_num_counters(cpuc->pmu);
/*
* Do not allow scheduling of more than half the available
@@ -1072,7 +1078,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* the extra Merge events needed by large increment events.
*/
if (x86_pmu.flags & PMU_FL_PAIR) {
- gpmax = num_counters - cpuc->n_pair;
+ gpmax -= cpuc->n_pair;
WARN_ON(gpmax <= 0);
}
@@ -1157,12 +1163,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
*/
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
- int num_counters = hybrid(cpuc->pmu, num_counters);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct perf_event *event;
int n, max_count;
- max_count = num_counters + num_counters_fixed;
+ max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);
/* current number of events already accepted */
n = cpuc->n_events;
@@ -1234,8 +1238,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
fallthrough;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
- (idx - INTEL_PMC_IDX_FIXED);
+ hwc->event_base = x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
INTEL_PMC_FIXED_RDPMC_BASE;
break;
@@ -1519,19 +1522,22 @@ static void x86_pmu_start(struct perf_event *event, int flags)
void perf_event_print_debug(void)
{
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+ unsigned long *cntr_mask, *fixed_cntr_mask;
+ struct event_constraint *pebs_constraints;
+ struct cpu_hw_events *cpuc;
u64 pebs, debugctl;
- int cpu = smp_processor_id();
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int num_counters = hybrid(cpuc->pmu, num_counters);
- int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
- struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
- unsigned long flags;
- int idx;
+ int cpu, idx;
- if (!num_counters)
- return;
+ guard(irqsave)();
- local_irq_save(flags);
+ cpu = smp_processor_id();
+ cpuc = &per_cpu(cpu_hw_events, cpu);
+ cntr_mask = hybrid(cpuc->pmu, cntr_mask);
+ fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
+ pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
+
+ if (!*(u64 *)cntr_mask)
+ return;
if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
@@ -1555,7 +1561,7 @@ void perf_event_print_debug(void)
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
- for (idx = 0; idx < num_counters; idx++) {
+ for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
@@ -1568,15 +1574,14 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < num_counters_fixed; idx++) {
+ for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+ rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
}
- local_irq_restore(flags);
}
void x86_pmu_stop(struct perf_event *event, int flags)
@@ -1682,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -2038,18 +2043,15 @@ static void _x86_pmu_read(struct perf_event *event)
static_call(x86_pmu_update)(event);
}
-void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
- u64 intel_ctrl)
+void x86_pmu_show_pmu_cap(struct pmu *pmu)
{
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
- pr_info("... generic registers: %d\n", num_counters);
+ pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %lu\n",
- hweight64((((1ULL << num_counters_fixed) - 1)
- << INTEL_PMC_IDX_FIXED) & intel_ctrl));
- pr_info("... event mask: %016Lx\n", intel_ctrl);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
+ pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
}
static int __init init_hw_perf_events(void)
@@ -2086,7 +2088,7 @@ static int __init init_hw_perf_events(void)
pmu_check_apic();
/* sanity check that the hardware exists or is emulated */
- if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
+ if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
goto out_bad_pmu;
pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2097,14 +2099,17 @@ static int __init init_hw_perf_events(void)
quirk->func();
if (!x86_pmu.intel_ctrl)
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+ x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
+
+ if (!x86_pmu.config_mask)
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
unconstrained = (struct event_constraint)
- __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0, 0);
+ __EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
+ 0, x86_pmu_num_counters(NULL), 0, 0);
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
@@ -2113,11 +2118,8 @@ static int __init init_hw_perf_events(void)
pmu.attr_update = x86_pmu.attr_update;
- if (!is_hybrid()) {
- x86_pmu_show_pmu_cap(x86_pmu.num_counters,
- x86_pmu.num_counters_fixed,
- x86_pmu.intel_ctrl);
- }
+ if (!is_hybrid())
+ x86_pmu_show_pmu_cap(NULL);
if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read;
@@ -2481,10 +2483,10 @@ void perf_clear_dirty_counters(void)
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
if (i >= INTEL_PMC_IDX_FIXED) {
/* Metrics and fake events don't have corresponding HW counters. */
- if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
+ if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue;
- wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
} else {
wrmsrl(x86_pmu_event_addr(i), 0);
}
@@ -2547,6 +2549,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ static DEFINE_MUTEX(rdpmc_mutex);
unsigned long val;
ssize_t ret;
@@ -2560,6 +2563,8 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
+ guard(mutex)(&rdpmc_mutex);
+
if (val != x86_pmu.attr_rdpmc) {
/*
* Changing into or out of never available or always available,
@@ -2813,6 +2818,46 @@ static unsigned long get_segment_base(unsigned int segment)
return get_desc_base(desc);
}
+#ifdef CONFIG_UPROBES
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ struct arch_uprobe *auprobe;
+
+ if (!current->utask)
+ return false;
+
+ auprobe = current->utask->auprobe;
+ if (!auprobe)
+ return false;
+
+ /* push %rbp/%ebp */
+ if (auprobe->insn[0] == 0x55)
+ return true;
+
+ /* endbr64 (64-bit only) */
+ if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
+ return true;
+
+ return false;
+}
+
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
+
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
@@ -2824,6 +2869,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const struct stack_frame_ia32 __user *fp;
+ u32 ret_addr;
if (user_64bit_mode(regs))
return 0;
@@ -2833,6 +2879,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
+
+ /* see perf_callchain_user() below for why we do this */
+ if (is_uprobe_at_func_entry(regs) &&
+ !get_user(ret_addr, (const u32 __user *)regs->sp))
+ perf_callchain_store(entry, ret_addr);
+
while (entry->nr < entry->max_stack) {
if (!valid_user_frame(fp, sizeof(frame)))
break;
@@ -2861,6 +2913,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
{
struct stack_frame frame;
const struct stack_frame __user *fp;
+ unsigned long ret_addr;
if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
@@ -2884,6 +2937,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
return;
pagefault_disable();
+
+ /*
+ * If we are called from uprobe handler, and we are indeed at the very
+ * entry to user function (which is normally a `push %rbp` instruction,
+ * under assumption of application being compiled with frame pointers),
+ * we should read return address from *regs->sp before proceeding
+ * to follow frame pointers, otherwise we'll skip immediate caller
+ * as %rbp is not yet setup.
+ */
+ if (is_uprobe_at_func_entry(regs) &&
+ !get_user(ret_addr, (const unsigned long __user *)regs->sp))
+ perf_callchain_store(entry, ret_addr);
+
while (entry->nr < entry->max_stack) {
if (!valid_user_frame(fp, sizeof(frame)))
break;
@@ -2983,8 +3049,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
* base PMU holds the correct number of counters for P-cores.
*/
cap->version = x86_pmu.version;
- cap->num_counters_gp = x86_pmu.num_counters;
- cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+ cap->num_counters_gp = x86_pmu_num_counters(NULL);
+ cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;