summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
authorfarah kassabri <fkassabri@habana.ai>2023-08-27 19:01:20 +0300
committerOded Gabbay <ogabbay@kernel.org>2023-10-09 12:37:21 +0300
commit051868d93cfd342b4ff8e6297b93a6a43dbe81b3 (patch)
treed1e8be371a20ba38c3343b091c82bf99aa9ff94e /drivers/accel
parent764bfd138f359423b299b7bf3fcbabb56b981ef5 (diff)
downloadlinux-051868d93cfd342b4ff8e6297b93a6a43dbe81b3.tar.xz
accel/habanalabs: prevent sending heartbeat before events are enabled
After the heartbeat mechanism is now expanded to be used also for EQ health check, we shouldn't send heartbeat messages to FW before driver allow events to be received from FW. Because if the driver will send two heartbeats before it enables events to be received from FW, then the EQ health check will fail and reset the device. Signed-off-by: farah kassabri <fkassabri@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/habanalabs/common/device.c10
1 files changed, 3 insertions, 7 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 1d1ccd8d5c75..0c9ba09c1b75 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -994,12 +994,7 @@ static void hl_device_eq_heartbeat(struct hl_device *hdev)
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- /*
- * This feature supported in FW version 1.12.0 45.2.0 and above,
- * only on those FW versions eq_health_check_supported will be set.
- * Start checking eq health only after driver has enabled events from FW.
- */
- if (!prop->cpucp_info.eq_health_check_supported || !hdev->init_done)
+ if (!prop->cpucp_info.eq_health_check_supported)
return;
if (hdev->eq_heartbeat_received)
@@ -1015,7 +1010,8 @@ static void hl_device_heartbeat(struct work_struct *work)
struct hl_info_fw_err_info info = {0};
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
- if (!hl_device_operational(hdev, NULL))
+ /* Start heartbeat checks only after driver has enabled events from FW */
+ if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
goto reschedule;
/*