diff options
Diffstat (limited to 'tools/tracing/rtla/src/timerlat_hist.c')
-rw-r--r-- | tools/tracing/rtla/src/timerlat_hist.c | 262 |
1 files changed, 238 insertions, 24 deletions
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a8309..47d3d8b53cb2 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -10,15 +11,20 @@ #include <unistd.h> #include <stdio.h> #include <time.h> +#include <sched.h> +#include <pthread.h> #include "utils.h" #include "osnoise.h" #include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_u.h" struct timerlat_hist_params { char *cpus; - char *monitored_cpus; + cpu_set_t monitored_cpus; char *trace_output; + char *cgroup_name; unsigned long long runtime; long long stop_us; long long stop_total_us; @@ -29,9 +35,14 @@ struct timerlat_hist_params { int duration; int set_sched; int dma_latency; + int cgroup; + int hk_cpus; + int no_aa; + int dump_tasks; + int user_hist; + cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; - char no_irq; char no_thread; char no_header; @@ -45,9 +56,11 @@ struct timerlat_hist_params { struct timerlat_hist_cpu { int *irq; int *thread; + int *user; int irq_count; int thread_count; + int user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -56,6 +69,10 @@ struct timerlat_hist_cpu { unsigned long long min_thread; unsigned long long sum_thread; unsigned long long max_thread; + + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; }; struct timerlat_hist_data { @@ -80,6 +97,10 @@ timerlat_free_histogram(struct timerlat_hist_data *data) if (data->hist[cpu].thread) free(data->hist[cpu].thread); + + if (data->hist[cpu].user) + free(data->hist[cpu].user); + } /* one set of histograms per CPU */ @@ -116,15 +137,21 @@ static struct timerlat_hist_data data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); if (!data->hist[cpu].irq) goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); if (!data->hist[cpu].thread) goto cleanup; + + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); + if (!data->hist[cpu].user) + goto cleanup; } /* set the min to max */ for (cpu = 0; cpu < nr_cpus; cpu++) { data->hist[cpu].min_irq = ~0; data->hist[cpu].min_thread = ~0; + data->hist[cpu].min_user = ~0; } return data; @@ -139,7 +166,7 @@ cleanup: */ static void timerlat_hist_update(struct osnoise_tool *tool, int cpu, - unsigned long long thread, + unsigned long long context, unsigned long long latency) { struct timerlat_hist_params *params = tool->params; @@ -154,18 +181,24 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (data->bucket_size) bucket = latency / data->bucket_size; - if (!thread) { + if (!context) { hist = data->hist[cpu].irq; data->hist[cpu].irq_count++; update_min(&data->hist[cpu].min_irq, &latency); update_sum(&data->hist[cpu].sum_irq, &latency); update_max(&data->hist[cpu].max_irq, &latency); - } else { + } else if (context == 1) { hist = data->hist[cpu].thread; data->hist[cpu].thread_count++; update_min(&data->hist[cpu].min_thread, &latency); update_sum(&data->hist[cpu].sum_thread, &latency); update_max(&data->hist[cpu].max_thread, &latency); + } else { /* user */ + hist = data->hist[cpu].user; + data->hist[cpu].user_count++; + update_min(&data->hist[cpu].min_user, &latency); + update_sum(&data->hist[cpu].sum_user, &latency); + update_max(&data->hist[cpu].max_user, &latency); } if (bucket < entries) @@ -182,16 +215,16 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *data) { struct trace_instance *trace = data; - unsigned long long thread, latency; + unsigned long long context, latency; struct osnoise_tool *tool; int cpu = record->cpu; tool = container_of(trace, struct osnoise_tool, trace); - tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "context", record, &context, 1); tep_get_field_val(s, event, "timer_latency", record, &latency, 1); - timerlat_hist_update(tool, cpu, thread, latency); + timerlat_hist_update(tool, cpu, context, latency); return 0; } @@ -222,7 +255,7 @@ static void timerlat_hist_header(struct osnoise_tool *tool) trace_seq_printf(s, "Index"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -233,6 +266,9 @@ static void timerlat_hist_header(struct osnoise_tool *tool) if (!params->no_thread) trace_seq_printf(s, " Thr-%03d", cpu); + + if (params->user_hist) + trace_seq_printf(s, " Usr-%03d", cpu); } trace_seq_printf(s, "\n"); @@ -258,7 +294,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "count:"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -271,6 +307,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread_count); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); @@ -278,7 +318,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -291,6 +331,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_user); } trace_seq_printf(trace->seq, "\n"); @@ -298,7 +342,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -315,7 +359,15 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) { if (data->hist[cpu].thread_count) trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (params->user_hist) { + if (data->hist[cpu].user_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_user / data->hist[cpu].user_count); else trace_seq_printf(trace->seq, " - "); } @@ -326,7 +378,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -339,6 +391,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_user); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -366,7 +422,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t bucket * data->bucket_size); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -384,6 +440,12 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t data->hist[cpu].thread[bucket]); } + if (params->user_hist) { + total += data->hist[cpu].user[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[bucket]); + } + } if (total == 0 && !params->with_zeros) { @@ -400,7 +462,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t trace_seq_printf(trace->seq, "over: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -413,6 +475,10 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t if (!params->no_thread) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread[data->entries]); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[data->entries]); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -431,9 +497,9 @@ static void timerlat_hist_usage(char *usage) char *msg[] = { "", " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", - " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\", + " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", - " [--no-index] [--with-zeros] [--dma-latency us]", + " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -442,13 +508,17 @@ static void timerlat_hist_usage(char *usage) " -T/--thread us: stop trace if the thread latency is higher than the argument in us", " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[m|h|d]: duration of the session in seconds", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -D/--debug: print debug info", " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", " -b/--bucket-size N: set the histogram bucket size (default 1)", " -E/--entries N: set the number of entries of the histogram (default 256)", " --no-irq: ignore IRQ latencies", @@ -464,6 +534,7 @@ static void timerlat_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", NULL, }; @@ -506,10 +577,12 @@ static struct timerlat_hist_params static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"bucket-size", required_argument, 0, 'b'}, {"debug", no_argument, 0, 'D'}, {"entries", required_argument, 0, 'E'}, {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"irq", required_argument, 0, 'i'}, {"nano", no_argument, 0, 'n'}, @@ -518,6 +591,7 @@ static struct timerlat_hist_params {"stack", required_argument, 0, 's'}, {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, {"event", required_argument, 0, 'e'}, {"no-irq", no_argument, 0, '0'}, {"no-thread", no_argument, 0, '1'}, @@ -528,13 +602,15 @@ static struct timerlat_hist_params {"trigger", required_argument, 0, '6'}, {"filter", required_argument, 0, '7'}, {"dma-latency", required_argument, 0, '8'}, + {"no-aa", no_argument, 0, '9'}, + {"dump-task", no_argument, 0, '\1'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhi:np:P:s:t::T:0123456:7:8:", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:u0123456:7:8:9\1", long_options, &option_index); /* detect the end of the options. */ @@ -547,6 +623,7 @@ static struct timerlat_hist_params /* set thread stop to auto_thresh */ params->stop_total_us = auto_thresh; + params->stop_us = auto_thresh; /* get stack trace */ params->print_stack = auto_thresh; @@ -556,11 +633,21 @@ static struct timerlat_hist_params break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); if (retval) timerlat_hist_usage("\nInvalid -c cpu list\n"); params->cpus = optarg; break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; case 'b': params->bucket_size = get_llong_from_str(optarg); if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) @@ -595,6 +682,14 @@ static struct timerlat_hist_params case '?': timerlat_hist_usage(NULL); break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; case 'i': params->stop_us = get_llong_from_str(optarg); break; @@ -625,6 +720,9 @@ static struct timerlat_hist_params else params->trace_output = "timerlat_trace.txt"; break; + case 'u': + params->user_hist = 1; + break; case '0': /* no irq */ params->no_irq = 1; break; @@ -672,6 +770,12 @@ static struct timerlat_hist_params exit(EXIT_FAILURE); } break; + case '9': + params->no_aa = 1; + break; + case '\1': + params->dump_tasks = 1; + break; default: timerlat_hist_usage("Invalid option"); } @@ -688,6 +792,12 @@ static struct timerlat_hist_params if (params->no_index && !params->with_zeros) timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->stop_us && !params->stop_total_us) + params->no_aa = 1; + return params; } @@ -697,7 +807,7 @@ static struct timerlat_hist_params static int timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) { - int retval; + int retval, i; if (!params->sleep_time) params->sleep_time = 1; @@ -708,6 +818,9 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param err_msg("Failed to apply CPUs config\n"); goto out_err; } + } else { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); } if (params->stop_us) { @@ -742,6 +855,32 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param } } + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + if (params->user_hist) { + retval = osnoise_set_workload(tool->context, 0); + if (retval) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + } + return 0; out_err: @@ -802,10 +941,13 @@ int timerlat_hist_main(int argc, char *argv[]) { struct timerlat_hist_params *params; struct osnoise_tool *record = NULL; + struct timerlat_u_params params_u; struct osnoise_tool *tool = NULL; + struct osnoise_tool *aa = NULL; struct trace_instance *trace; int dma_latency_fd = -1; int return_value = 1; + pthread_t timerlat_u; int retval; params = timerlat_hist_parse_args(argc, argv); @@ -840,6 +982,14 @@ int timerlat_hist_main(int argc, char *argv[]) } } + if (params->cgroup && !params->user_hist) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + if (params->dma_latency >= 0) { dma_latency_fd = set_cpu_dma_latency(params->dma_latency); if (dma_latency_fd < 0) { @@ -848,8 +998,6 @@ int timerlat_hist_main(int argc, char *argv[]) } } - trace_instance_start(trace); - if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -862,13 +1010,61 @@ int timerlat_hist_main(int argc, char *argv[]) if (retval) goto out_hist; } + } - trace_instance_start(&record->trace); + if (!params->no_aa) { + aa = osnoise_init_tool("timerlat_aa"); + if (!aa) + goto out_hist; + + retval = timerlat_aa_init(aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + goto out_hist; + } + + retval = enable_timerlat(&aa->trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } } + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + tool->start_time = time(NULL); timerlat_hist_set_signals(params); + if (params->user_hist) { + /* rtla asked to stop */ + params_u.should_run = 1; + /* all threads left */ + params_u.stopped_running = 0; + + params_u.set = ¶ms->monitored_cpus; + if (params->set_sched) + params_u.sched_param = ¶ms->sched_param; + else + params_u.sched_param = NULL; + + params_u.cgroup_name = params->cgroup_name; + + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + while (!stop_tracing) { sleep(params->sleep_time); @@ -885,6 +1081,18 @@ int timerlat_hist_main(int argc, char *argv[]) if (trace_is_off(&tool->trace, &record->trace)) break; + + /* is there still any user-threads ? */ + if (params->user_hist) { + if (params_u.stopped_running) { + debug_msg("timerlat user-space threads stopped!\n"); + break; + } + } + } + if (params->user_hist && !params_u.stopped_running) { + params_u.should_run = 0; + sleep(1); } timerlat_print_stats(params, tool); @@ -893,6 +1101,10 @@ int timerlat_hist_main(int argc, char *argv[]) if (trace_is_off(&tool->trace, &record->trace)) { printf("rtla timerlat hit stop tracing\n"); + + if (!params->no_aa) + timerlat_auto_analysis(params->stop_us, params->stop_total_us); + if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); @@ -900,12 +1112,14 @@ int timerlat_hist_main(int argc, char *argv[]) } out_hist: + timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: timerlat_free_histogram(tool->data); + osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); free(params); |