summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/perf.c
diff options
context:
space:
mode:
authorLu Baolu <baolu.lu@linux.intel.com>2021-06-10 05:01:05 +0300
committerJoerg Roedel <jroedel@suse.de>2021-06-10 10:06:13 +0300
commit55ee5e67a59a1b6f388d7a1c7b24022145f47a3e (patch)
tree5fd3b8d9c40d9a22e28067a6c6bc3135e33b4a1c /drivers/iommu/intel/perf.c
parente93a67f5a0eef3e9ab5b4649cac5c3b831c6a9db (diff)
downloadlinux-55ee5e67a59a1b6f388d7a1c7b24022145f47a3e.tar.xz
iommu/vt-d: Add common code for dmar latency performance monitors
The execution time of some operations is very performance critical, such as cache invalidation and PRQ processing time. This adds some common code to monitor the execution time range of those operations. The interfaces include enabling/disabling, checking status, updating sampling data and providing a common string format for users. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/iommu/intel/perf.c')
-rw-r--r--drivers/iommu/intel/perf.c166
1 files changed, 166 insertions, 0 deletions
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
new file mode 100644
index 000000000000..faaa96dda437
--- /dev/null
+++ b/drivers/iommu/intel/perf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * perf.c - performance monitor
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/intel-iommu.h>
+
+#include "perf.h"
+
+static DEFINE_SPINLOCK(latency_lock);
+
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+
+ return lstat && lstat[type].enabled;
+}
+
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat;
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ if (dmar_latency_enabled(iommu, type))
+ return 0;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (!iommu->perf_statistic) {
+ iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
+ GFP_ATOMIC);
+ if (!iommu->perf_statistic) {
+ ret = -ENOMEM;
+ goto unlock_out;
+ }
+ }
+
+ lstat = iommu->perf_statistic;
+
+ if (!lstat[type].enabled) {
+ lstat[type].enabled = true;
+ lstat[type].counter[COUNTS_MIN] = UINT_MAX;
+ ret = 0;
+ }
+unlock_out:
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return ret;
+}
+
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ u64 min, max;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (latency < 100)
+ lstat[type].counter[COUNTS_10e2]++;
+ else if (latency < 1000)
+ lstat[type].counter[COUNTS_10e3]++;
+ else if (latency < 10000)
+ lstat[type].counter[COUNTS_10e4]++;
+ else if (latency < 100000)
+ lstat[type].counter[COUNTS_10e5]++;
+ else if (latency < 1000000)
+ lstat[type].counter[COUNTS_10e6]++;
+ else if (latency < 10000000)
+ lstat[type].counter[COUNTS_10e7]++;
+ else
+ lstat[type].counter[COUNTS_10e8_plus]++;
+
+ min = lstat[type].counter[COUNTS_MIN];
+ max = lstat[type].counter[COUNTS_MAX];
+ lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
+ lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
+ lstat[type].counter[COUNTS_SUM] += latency;
+ lstat[type].samples++;
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static char *latency_counter_names[] = {
+ " <0.1us",
+ " 0.1us-1us", " 1us-10us", " 10us-100us",
+ " 100us-1ms", " 1ms-10ms", " >=10ms",
+ " min(us)", " max(us)", " average(us)"
+};
+
+static char *latency_type_names[] = {
+ " inv_iotlb", " inv_devtlb", " inv_iec",
+ " svm_prq"
+};
+
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ int bytes = 0, i, j;
+
+ memset(str, 0, size);
+
+ for (i = 0; i < COUNTS_NUM; i++)
+ bytes += snprintf(str + bytes, size - bytes,
+ "%s", latency_counter_names[i]);
+
+ spin_lock_irqsave(&latency_lock, flags);
+ for (i = 0; i < DMAR_LATENCY_NUM; i++) {
+ if (!dmar_latency_enabled(iommu, i))
+ continue;
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "\n%s", latency_type_names[i]);
+
+ for (j = 0; j < COUNTS_NUM; j++) {
+ u64 val = lstat[i].counter[j];
+
+ switch (j) {
+ case COUNTS_MIN:
+ if (val == UINT_MAX)
+ val = 0;
+ else
+ val /= 1000;
+ break;
+ case COUNTS_MAX:
+ val /= 1000;
+ break;
+ case COUNTS_SUM:
+ if (lstat[i].samples)
+ val /= (lstat[i].samples * 1000);
+ else
+ val = 0;
+ break;
+ default:
+ break;
+ }
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "%12lld", val);
+ }
+ }
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return bytes;
+}