From e5a5886d7ae32b7afebfffecca340e466e4be2d1 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Sat, 23 Jan 2016 03:58:12 -0500 Subject: powerpc/perf/hv-24x7: Fix usage with chip events. 24x7 counters can belong to different domains (core, chip, virtual CPU etc). For events in the 'chip' domain, sysfs entry currently looks like: $ cd /sys/bus/event_source/devices/hv_24x7/events $ cat PM_XLINK_CYCLES__PHYS_CHIP domain=0x1,offset=0x230,core=?,lpar=0x0 where the required parameter, 'core=?' is specified with perf as: perf stat -C 0 -e hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP,core=1/ \ /bin/true This is inconsistent in that 'core' is a required parameter for a chip event. Instead, have the the sysfs entry display 'chip=?' for chip events: $ cd /sys/bus/event_source/devices/hv_24x7/events $ cat PM_XLINK_CYCLES__PHYS_CHIP domain=0x1,offset=0x230,chip=?,lpar=0x0 We also need to add a 'chip' entry in the sysfs format directory: $ ls /sys/bus/event_source/devices/hv_24x7/format chip core domain lpar offset vcpu ^^^^ (new) so the perf tool can automatically check usage and format the chip parameter correctly: $ perf stat -C 0 -v -e hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP/ \ /bin/true Required parameter 'chip' not specified invalid or unsupported event: 'hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP/' $ perf stat -C 0 -v -e hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP,chip=1/ \ /bin/true hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP,chip=1/: 0 6628908 6628908 Performance counter stats for 'CPU(s) 0': 0 hv_24x7/PM_XLINK_CYCLES__PHYS_CHIP,chip=1/ 0.006606970 seconds time elapsed Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/perf/hv-24x7.c') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9f9dfda9ed2c..b7a9a03ca59d 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -101,6 +101,7 @@ static bool catalog_entry_domain_is_valid(unsigned domain) EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); /* u16 */ EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31); EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); /* u32, see "data_offset" */ EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); @@ -115,6 +116,7 @@ static struct attribute *format_attrs[] = { &format_attr_domain.attr, &format_attr_offset.attr, &format_attr_core.attr, + &format_attr_chip.attr, &format_attr_vcpu.attr, &format_attr_lpar.attr, NULL, @@ -289,10 +291,16 @@ static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) const char *sindex; const char *lpar; - if (is_physical_domain(domain)) { + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + lpar = "0x0"; + sindex = "chip"; + break; + case HV_PERF_DOMAIN_PHYS_CORE: lpar = "0x0"; sindex = "core"; - } else { + break; + default: lpar = "?"; sindex = "vcpu"; } @@ -1089,10 +1097,16 @@ static int add_event_to_24x7_request(struct perf_event *event, return -EINVAL; } - if (is_physical_domain(event_get_domain(event))) + switch (event_get_domain(event)) { + case HV_PERF_DOMAIN_PHYS_CHIP: + idx = event_get_chip(event); + break; + case HV_PERF_DOMAIN_PHYS_CORE: idx = event_get_core(event); - else + break; + default: idx = event_get_vcpu(event); + } i = request_buffer->num_requests++; req = &request_buffer->requests[i]; -- cgit v1.2.3 From 2b206ee6b0df03a89783c6a9ada363122f918800 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Mon, 25 Jan 2016 23:05:36 -0500 Subject: powerpc/perf/hv-24x7: Display change in counter values For 24x7 counters, perf displays the raw value of the 24x7 counter, which is a monotonically increasing value. perf stat -C 0 -e \ 'hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=1/' \ sleep 1 Performance counter stats for 'CPU(s) 0': 9,105,403,170 hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=1/ 0.000425751 seconds time elapsed In the typical usage of 'perf stat' this counter value is not as useful as the _change_ in the counter value over the duration of the application. Have h_24x7_event_init() set the event's prev_count to the raw value of the 24x7 counter at the time of initialization. When the application terminates, hv_24x7_event_read() will compute the change in value and report to the perf tool. Similarly, for the transaction interface, clear the event count to 0 at the beginning of the transaction. perf stat -C 0 -e \ 'hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=1/' \ sleep 1 Performance counter stats for 'CPU(s) 0': 245,758 hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=1/ 1.006366383 seconds time elapsed Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/perf/hv-24x7.c') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index b7a9a03ca59d..77b958f213a0 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1222,11 +1222,12 @@ static int h_24x7_event_init(struct perf_event *event) return -EACCES; } - /* see if the event complains */ + /* Get the initial value of the counter for this event */ if (single_24x7_request(event, &ct)) { pr_devel("test hcall failed\n"); return -EIO; } + (void)local64_xchg(&event->hw.prev_count, ct); return 0; } @@ -1289,6 +1290,16 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); + /* + * Clear the event count so we can compute the _change_ + * in the 24x7 raw counter value at the end of the txn. + * + * Note that we could alternatively read the 24x7 value + * now and save its value in event->hw.prev_count. But + * that would require issuing a hcall, which would then + * defeat the purpose of using the txn interface. + */ + local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); -- cgit v1.2.3 From d34171e88aeed4a99c00c7f2af3d5c553e7a4972 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 16 Feb 2016 20:07:51 -0500 Subject: powerpc/perf/hv-24x7: Display domain indices in sysfs To help users determine domains, display the domain indices used by the kernel in sysfs. $ cat /sys/bus/event_source/devices/hv_24x7/interface/domains 1: Physical Chip 2: Physical Core 3: VCPU Home Core 4: VCPU Home Chip 5: VCPU Home Node 6: VCPU Remote Node Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 41 +++++++++++++++++++++++++++++++++++++++++ arch/powerpc/perf/hv-24x7.h | 1 + 2 files changed, 42 insertions(+) (limited to 'arch/powerpc/perf/hv-24x7.c') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 77b958f213a0..36b29fd9295d 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -68,6 +68,24 @@ static bool is_physical_domain(unsigned domain) } } +static const char *domain_name(unsigned domain) +{ + if (!domain_is_valid(domain)) + return NULL; + + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip"; + case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip"; + case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node"; + case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node"; + } + + WARN_ON_ONCE(domain); + return NULL; +} + static bool catalog_entry_domain_is_valid(unsigned domain) { return is_physical_domain(domain); @@ -969,6 +987,27 @@ e_free: return ret; } +static ssize_t domains_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + int d, n, count = 0; + const char *str; + + for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) { + str = domain_name(d); + if (!str) + continue; + + n = sprintf(page, "%d: %s\n", d, str); + if (n < 0) + break; + + count += n; + page += n; + } + return count; +} + #define PAGE_0_ATTR(_name, _fmt, _expr) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *dev_attr, \ @@ -997,6 +1036,7 @@ PAGE_0_ATTR(catalog_version, "%lld\n", PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); +static DEVICE_ATTR_RO(domains); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -1006,6 +1046,7 @@ static struct bin_attribute *if_bin_attrs[] = { static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, + &dev_attr_domains.attr, NULL, }; diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index c57d67dc9f3f..791455e7f5cf 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -7,6 +7,7 @@ enum hv_perf_domains { #define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v, #include "hv-24x7-domains.h" #undef DOMAIN + HV_PERF_DOMAIN_MAX, }; struct hv_24x7_request { -- cgit v1.2.3 From 8f69dc701aac170421117adf7e04d8ec7031a05f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 16 Feb 2016 22:21:25 -0500 Subject: powerpc/perf/24x7: Eliminate domain suffix in event names The Physical Core events of the 24x7 PMU can be monitored across various domains (physical core, vcpu home core, vcpu home node etc). For each of these core events, we currently create multiple events in sysfs, one for each domain the event can be monitored in. These events are distinguished by their suffixes like __PHYS_CORE, __VCPU_HOME_CORE etc. Rather than creating multiple such entries, we could let the user specify make 'domain' index a required parameter and let the user specify a value for it (like they currently specify the core index). $ cat /sys/bus/event_source/devices/hv_24x7/events/HPM_CCYC domain=?,offset=0x98,core=?,lpar=0x0 $ perf stat -C 0 -e hv_24x7/HPM_CCYC,domain=2,core=1/ true (the 'domain=?' and 'core=?' in sysfs tell perf tool to enforce them as required parameters). This simplifies the interface and allows users to identify events by the name specified in the catalog (User can determine the domain index by referring to '/sys/bus/event_source/devices/hv_24x7/interface/domains'). Eliminating the event suffix eliminates several functions and simplifies code. Note that Physical Chip events can only be monitored in the chip domain so those events have the domain set to 1 (rather than =?) and users don't need to specify the domain index for the Chip events. $ cat /sys/bus/event_source/devices/hv_24x7/events/PM_XLINK_CYCLES domain=1,offset=0x230,chip=?,lpar=0x0 $ perf stat -C 0 -e hv_24x7/PM_XLINK_CYCLES,chip=1/ true Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 149 ++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 83 deletions(-) (limited to 'arch/powerpc/perf/hv-24x7.c') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 36b29fd9295d..59012e750abe 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -27,20 +27,6 @@ #include "hv-24x7-catalog.h" #include "hv-common.h" -static const char *event_domain_suffix(unsigned domain) -{ - switch (domain) { -#define DOMAIN(n, v, x, c) \ - case HV_PERF_DOMAIN_##n: \ - return "__" #n; -#include "hv-24x7-domains.h" -#undef DOMAIN - default: - WARN(1, "unknown domain %d\n", domain); - return "__UNKNOWN_DOMAIN_SUFFIX"; - } -} - static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -294,38 +280,70 @@ static unsigned long h_get_24x7_catalog_page(char page[], version, index); } -static unsigned core_domains[] = { - HV_PERF_DOMAIN_PHYS_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CHIP, - HV_PERF_DOMAIN_VCPU_HOME_NODE, - HV_PERF_DOMAIN_VCPU_REMOTE_NODE, -}; -/* chip event data always yeilds a single event, core yeilds multiple */ -#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) - +/* + * Each event we find in the catalog, will have a sysfs entry. Format the + * data for this sysfs entry based on the event's domain. + * + * Events belonging to the Chip domain can only be monitored in that domain. + * i.e the domain for these events is a fixed/knwon value. + * + * Events belonging to the Core domain can be monitored either in the physical + * core or in one of the virtual CPU domains. So the domain value for these + * events must be specified by the user (i.e is a required parameter). Format + * the Core events with 'domain=?' so the perf-tool can error check required + * parameters. + * + * NOTE: For the Core domain events, rather than making domain a required + * parameter we could default it to PHYS_CORE and allowe users to + * override the domain to one of the VCPU domains. + * + * However, this can make the interface a little inconsistent. + * + * If we set domain=2 (PHYS_CHIP) and allow user to override this field + * the user may be tempted to also modify the "offset=x" field in which + * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and + * HPM_INST (offset=0x20) events. With: + * + * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/ + * + * we end up monitoring HPM_INST, while the command line has HPM_PCYC. + * + * By not assigning a default value to the domain for the Core events, + * we can have simple guidelines: + * + * - Specifying values for parameters with "=?" is required. + * + * - Specifying (i.e overriding) values for other parameters + * is undefined. + */ static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) { const char *sindex; const char *lpar; + const char *domain_str; + char buf[8]; switch (domain) { case HV_PERF_DOMAIN_PHYS_CHIP: + snprintf(buf, sizeof(buf), "%d", domain); + domain_str = buf; lpar = "0x0"; sindex = "chip"; break; case HV_PERF_DOMAIN_PHYS_CORE: + domain_str = "?"; lpar = "0x0"; sindex = "core"; break; default: + domain_str = "?"; lpar = "?"; sindex = "vcpu"; } return kasprintf(GFP_KERNEL, - "domain=0x%x,offset=0x%x,%s=?,lpar=%s", - domain, + "domain=%s,offset=0x%x,%s=?,lpar=%s", + domain_str, be16_to_cpu(event->event_counter_offs) + be16_to_cpu(event->event_group_record_offs), sindex, @@ -365,6 +383,15 @@ static struct attribute *device_str_attr_create_(char *name, char *str) return &attr->attr.attr; } +/* + * Allocate and initialize strings representing event attributes. + * + * NOTE: The strings allocated here are never destroyed and continue to + * exist till shutdown. This is to allow us to create as many events + * from the catalog as possible, even if we encounter errors with some. + * In case of changes to error paths in future, these may need to be + * freed by the caller. + */ static struct attribute *device_str_attr_create(char *name, int name_max, int name_nonce, char *str, size_t str_max) @@ -396,16 +423,6 @@ out_s: return NULL; } -static void device_str_attr_destroy(struct attribute *attr) -{ - struct dev_ext_attribute *d; - - d = container_of(attr, struct dev_ext_attribute, attr.attr); - kfree(d->var); - kfree(d->attr.attr.name); - kfree(d); -} - static struct attribute *event_to_attr(unsigned ix, struct hv_24x7_event_data *event, unsigned domain, @@ -413,7 +430,6 @@ static struct attribute *event_to_attr(unsigned ix, { int event_name_len; char *ev_name, *a_ev_name, *val; - const char *ev_suffix; struct attribute *attr; if (!domain_is_valid(domain)) { @@ -426,14 +442,13 @@ static struct attribute *event_to_attr(unsigned ix, if (!val) return NULL; - ev_suffix = event_domain_suffix(domain); ev_name = event_name(event, &event_name_len); if (!nonce) - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", - (int)event_name_len, ev_name, ev_suffix); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s", + (int)event_name_len, ev_name); else - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", - (int)event_name_len, ev_name, ev_suffix, nonce); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d", + (int)event_name_len, ev_name, nonce); if (!a_ev_name) goto out_val; @@ -478,45 +493,14 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) return device_str_attr_create(name, nl, nonce, desc, dl); } -static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, +static int event_data_to_attrs(unsigned ix, struct attribute **attrs, struct hv_24x7_event_data *event, int nonce) { - unsigned i; - - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - *attrs = event_to_attr(ix, event, event->domain, nonce); - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - for (i = 0; i < ARRAY_SIZE(core_domains); i++) { - attrs[i] = event_to_attr(ix, event, core_domains[i], - nonce); - if (!attrs[i]) { - pr_warn("catalog event %u: individual attr %u " - "creation failure\n", ix, i); - for (; i; i--) - device_str_attr_destroy(attrs[i - 1]); - return -1; - } - } - return i; - default: - pr_warn("catalog event %u: domain %u is not allowed in the " - "catalog\n", ix, event->domain); + *attrs = event_to_attr(ix, event, event->domain, nonce); + if (!*attrs) return -1; - } -} -static size_t event_to_attr_ct(struct hv_24x7_event_data *event) -{ - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - return ARRAY_SIZE(core_domains); - default: - return 0; - } + return 0; } static unsigned long vmalloc_to_phys(void *v) @@ -752,9 +736,8 @@ static int create_events_from_catalog(struct attribute ***events_, goto e_free; } - if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { - pr_err("event_entry_count %zu is invalid\n", - event_entry_count); + if (SIZE_MAX - 1 < event_entry_count) { + pr_err("event_entry_count %zu is invalid\n", event_entry_count); ret = -EIO; goto e_free; } @@ -827,7 +810,7 @@ static int create_events_from_catalog(struct attribute ***events_, continue; } - attr_max += event_to_attr_ct(event); + attr_max++; } event_idx_last = event_idx; @@ -877,12 +860,12 @@ static int create_events_from_catalog(struct attribute ***events_, nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); - if (ct <= 0) { + if (ct < 0) { pr_warn("event %zu (%.*s) creation failure, skipping\n", event_idx, nl, name); junk_events++; } else { - event_attr_ct += ct; + event_attr_ct++; event_descs[desc_ct] = event_to_desc_attr(event, nonce); if (event_descs[desc_ct]) desc_ct++; -- cgit v1.2.3