summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
authorKoby Elbaz <kelbaz@habana.ai>2023-03-27 11:56:16 +0300
committerOded Gabbay <ogabbay@kernel.org>2023-06-08 12:35:54 +0300
commit67d19a2f49b07b2c4a15e6a96e1de46d95209ae4 (patch)
tree1344825cc09958c0d27777e0dfe7bc8f3f0f1274 /drivers/accel
parent3b9abb4fa642ee28bcd22a0398087a2d87d3c987 (diff)
downloadlinux-67d19a2f49b07b2c4a15e6a96e1de46d95209ae4.tar.xz
accel/habanalabs: poll for device status update following WFE cmd
Currently, we rely on COMMS protocol's ack to verify that WFE command has been acknowledged by the FW. However, this does not guarantee that the device status has been updated. Although unlikely, this could trigger a race since the driver expects the device to be halted at that stage, but it might not be. Therefore, we increase WFE's robustness by polling on the status register that will be updated once the device is actually halted. Signed-off-by: Koby Elbaz <kelbaz@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/habanalabs/common/firmware_if.c28
1 files changed, 23 insertions, 5 deletions
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index e48f024d8649..eb51d7f70aec 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -1368,8 +1368,10 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev)
void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
{
- struct static_fw_load_mgr *static_loader =
- &hdev->fw_loader.static_loader;
+ struct fw_load_mgr *fw_loader = &hdev->fw_loader;
+ u32 status, cpu_boot_status_reg, cpu_timeout;
+ struct static_fw_load_mgr *static_loader;
+ struct pre_fw_load_props *pre_fw_load;
int rc;
if (hdev->device_cpu_is_halted)
@@ -1377,12 +1379,28 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
/* Stop device CPU to make sure nothing bad happens */
if (hdev->asic_prop.dynamic_fw_load) {
+ pre_fw_load = &fw_loader->pre_fw_load;
+ cpu_timeout = fw_loader->cpu_timeout;
+ cpu_boot_status_reg = pre_fw_load->cpu_boot_status_reg;
+
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
- COMMS_GOTO_WFE, 0, false,
- hdev->fw_loader.cpu_timeout);
- if (rc)
+ COMMS_GOTO_WFE, 0, false, cpu_timeout);
+ if (rc) {
dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n");
+ } else {
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ status == CPU_BOOT_STATUS_IN_WFE,
+ hdev->fw_poll_interval_usec,
+ cpu_timeout);
+ if (rc)
+ dev_err(hdev->dev, "Current status=%u. Timed-out updating to WFE\n",
+ status);
+ }
} else {
+ static_loader = &hdev->fw_loader.static_loader;
WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
msleep(static_loader->cpu_reset_wait_msec);