summaryrefslogtreecommitdiff
path: root/drivers/misc
diff options
context:
space:
mode:
authorOmer Shpigelman <oshpigelman@habana.ai>2019-04-01 22:31:22 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2019-04-01 22:31:22 +0300
commit315bc055ed5667232859b17adb837f87c1629f81 (patch)
treed50cb8fd6e69f64223d9d37a86b2c7d6d0e8608d /drivers/misc
parenta1c92d1c2a67d7b61dd5ed2d3bce810269613d37 (diff)
downloadlinux-315bc055ed5667232859b17adb837f87c1629f81.tar.xz
habanalabs: add new IOCTL for debug, tracing and profiling
Habanalabs ASICs use the ARM coresight infrastructure to support debug, tracing and profiling of neural networks topologies. Because the coresight is configured using register writes and reads, and some of the registers hold sensitive information (e.g. the address in the device's DRAM where the trace data is written to), the user must go through the kernel driver to configure this mechanism. This patch implements the common code of the IOCTL and calls the ASIC-specific function for the actual H/W configuration. The IOCTL supports configuration of seven coresight components: ETR, ETF, STM, FUNNEL, BMON, SPMU and TIMESTAMP The user specifies which component he wishes to configure and provides a pointer to a structure (located in its process space) that contains the relevant configuration. The common code copies the relevant data from the user-space to kernel space and then calls the ASIC-specific function to do the H/W configuration. After the configuration is done, which is usually composed of several IOCTL calls depending on what the user wanted to trace, the user can start executing the topology. The trace data will be written to the user's area in the device's DRAM. After the tracing operation is complete, and user will call the IOCTL again to disable the tracing operation. The user also need to read values from registers for some of the components (e.g. the size of the trace data in the device's DRAM). In that case, the user will provide a pointer to an "output" structure in user-space, which the IOCTL code will fill according the to selected component. Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/habanalabs/goya/Makefile3
-rw-r--r--drivers/misc/habanalabs/goya/goya.c1
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h1
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c13
-rw-r--r--drivers/misc/habanalabs/habanalabs.h25
-rw-r--r--drivers/misc/habanalabs/habanalabs_ioctl.c113
6 files changed, 154 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/goya/Makefile b/drivers/misc/habanalabs/goya/Makefile
index e458e5ba500b..131432f677e2 100644
--- a/drivers/misc/habanalabs/goya/Makefile
+++ b/drivers/misc/habanalabs/goya/Makefile
@@ -1,3 +1,4 @@
subdir-ccflags-y += -I$(src)
-HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o
+HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o \
+ goya/goya_coresight.o
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 9f775dd81a52..3af883ada479 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4815,6 +4815,7 @@ static const struct hl_asic_funcs goya_funcs = {
.send_heartbeat = goya_send_heartbeat,
.enable_clock_gating = goya_init_clock_gating,
.disable_clock_gating = goya_disable_clock_gating,
+ .debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
.soft_reset_late_init = goya_soft_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock,
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index b99d92f197eb..6f1f7715075d 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -175,6 +175,7 @@ void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int goya_armcp_info_get(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
+int goya_debug_coresight(struct hl_device *hdev, void *data);
u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
new file mode 100644
index 000000000000..8957b8b1c0d6
--- /dev/null
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "goyaP.h"
+
+int goya_debug_coresight(struct hl_device *hdev, void *data)
+{
+ return -ENOTTY;
+}
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index fd7fcdf7050f..18a03657780a 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -483,6 +483,7 @@ enum hl_pll_frequency {
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
* @enable_clock_gating: enable clock gating for reducing power consumption.
* @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
* @soft_reset_late_init: perform certain actions needed after soft reset.
* @hw_queues_lock: acquire H/W queues lock.
@@ -557,6 +558,7 @@ struct hl_asic_funcs {
int (*send_heartbeat)(struct hl_device *hdev);
void (*enable_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
+ int (*debug_coresight)(struct hl_device *hdev, void *data);
bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
@@ -867,6 +869,29 @@ struct hl_vm {
u8 init_done;
};
+
+/*
+ * DEBUG, PROFILING STRUCTURE
+ */
+
+/**
+ * struct hl_debug_params - Coresight debug parameters.
+ * @input: pointer to component specific input parameters.
+ * @output: pointer to component specific output parameters.
+ * @output_size: size of output buffer.
+ * @reg_idx: relevant register ID.
+ * @op: component operation to execute.
+ * @enable: true if to enable component debugging, false otherwise.
+ */
+struct hl_debug_params {
+ void *input;
+ void *output;
+ u32 output_size;
+ u32 reg_idx;
+ u32 op;
+ bool enable;
+};
+
/*
* FILE PRIVATE STRUCTURE
*/
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 9000ff615805..810e65446075 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -12,6 +12,17 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
+static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
+ [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
+ [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
+ [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
+ [HL_DEBUG_OP_FUNNEL] = 0,
+ [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
+ [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
+ [HL_DEBUG_OP_TIMESTAMP] = 0
+
+};
+
static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_device_status dev_stat = {0};
@@ -114,6 +125,71 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
}
+static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
+{
+ struct hl_debug_params *params;
+ void *input = NULL, *output = NULL;
+ int rc;
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ params->reg_idx = args->reg_idx;
+ params->enable = args->enable;
+ params->op = args->op;
+
+ if (args->input_ptr && args->input_size) {
+ input = memdup_user((const void __user *) args->input_ptr,
+ args->input_size);
+ if (IS_ERR(input)) {
+ rc = PTR_ERR(input);
+ input = NULL;
+ dev_err(hdev->dev,
+ "error %d when copying input debug data\n", rc);
+ goto out;
+ }
+
+ params->input = input;
+ }
+
+ if (args->output_ptr && args->output_size) {
+ output = kzalloc(args->output_size, GFP_KERNEL);
+ if (!output) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ params->output = output;
+ params->output_size = args->output_size;
+ }
+
+ rc = hdev->asic_funcs->debug_coresight(hdev, params);
+ if (rc) {
+ dev_err(hdev->dev,
+ "debug coresight operation failed %d\n", rc);
+ goto out;
+ }
+
+ if (output) {
+ if (copy_to_user((void __user *) (uintptr_t) args->output_ptr,
+ output,
+ args->output_size)) {
+ dev_err(hdev->dev,
+ "copy to user failed in debug ioctl\n");
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+
+out:
+ kfree(params);
+ kfree(output);
+ kfree(input);
+
+ return rc;
+}
+
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_info_args *args = data;
@@ -156,6 +232,40 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
return rc;
}
+static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ struct hl_debug_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ int rc = 0;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute DEBUG IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ switch (args->op) {
+ case HL_DEBUG_OP_ETR:
+ case HL_DEBUG_OP_ETF:
+ case HL_DEBUG_OP_STM:
+ case HL_DEBUG_OP_FUNNEL:
+ case HL_DEBUG_OP_BMON:
+ case HL_DEBUG_OP_SPMU:
+ case HL_DEBUG_OP_TIMESTAMP:
+ args->input_size =
+ min(args->input_size, hl_debug_struct_size[args->op]);
+ rc = debug_coresight(hdev, args);
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid request %d\n", args->op);
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
+
#define HL_IOCTL_DEF(ioctl, _func) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
@@ -164,7 +274,8 @@ static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl)
+ HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls)