From a332518fda4731c07394164b3edcbb6efaf4c4d7 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 23 May 2019 16:13:50 -0400 Subject: SUNRPC: Count ops completing with tk_status < 0 We often see various error conditions with NFS4.x that show up with a very high operation count all completing with tk_status < 0 in a short period of time. Add a count to rpc_iostats to record on a per-op basis the ops that complete in this manner, which will enable lower overhead diagnostics. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/linux/sunrpc/metrics.h | 7 ++++++- net/sunrpc/stats.c | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1b3751327575..0ee3f7052846 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -30,7 +30,7 @@ #include #include -#define RPC_IOSTATS_VERS "1.0" +#define RPC_IOSTATS_VERS "1.1" struct rpc_iostats { spinlock_t om_lock; @@ -66,6 +66,11 @@ struct rpc_iostats { ktime_t om_queue, /* queued for xmit */ om_rtt, /* RPC RTT */ om_execute; /* RPC execution */ + /* + * The count of operations that complete with tk_status < 0. + * These statuses usually indicate error conditions. + */ + unsigned long om_error_status; } ____cacheline_aligned; struct rpc_task; diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 2f7bde82450b..48ea776364f8 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, execute = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute); + if (task->tk_status < 0) + op_metrics->om_error_status++; spin_unlock(&op_metrics->om_lock); @@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b) a->om_queue = ktime_add(a->om_queue, b->om_queue); a->om_rtt = ktime_add(a->om_rtt, b->om_rtt); a->om_execute = ktime_add(a->om_execute, b->om_execute); + a->om_error_status += b->om_error_status; } static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, int op, const struct rpc_procinfo *procs) { _print_name(seq, op, procs); - seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n", + seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n", stats->om_ops, stats->om_ntrans, stats->om_timeouts, @@ -233,7 +236,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats, stats->om_bytes_recv, ktime_to_ms(stats->om_queue), ktime_to_ms(stats->om_rtt), - ktime_to_ms(stats->om_execute)); + ktime_to_ms(stats->om_execute), + stats->om_error_status); } void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) -- cgit v1.2.3