From adda800c049a4580aaec4d710a9972cdad5b14e3 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 3 May 2023 14:47:54 +0300 Subject: accel/habanalabs: print max timeout value on CS stuck If a workload got stuck, we print an error to the kernel log about it. Add to that print the configured max timeout value, as that value is not fixed between ASICs and in addition it can be configured using a kernel module parameter. Signed-off-by: Oded Gabbay Reviewed-by: Ofir Bitton --- .../accel/habanalabs/common/command_submission.c | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'drivers/accel') diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 4ff3256fdf8b..cc205bad476e 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -804,12 +804,14 @@ out: static void cs_timedout(struct work_struct *work) { + struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); + bool skip_reset_on_timeout, device_reset = false; struct hl_device *hdev; u64 event_mask = 0x0; + uint timeout_sec; int rc; - struct hl_cs *cs = container_of(work, struct hl_cs, - work_tdr.work); - bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false; + + skip_reset_on_timeout = cs->skip_reset_on_timeout; rc = cs_get_unless_zero(cs); if (!rc) @@ -840,29 +842,31 @@ static void cs_timedout(struct work_struct *work) event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; } + timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; + switch (cs->type) { case CS_TYPE_SIGNAL: dev_err(hdev->dev, - "Signal command submission %llu has not finished in time!\n", - cs->sequence); + "Signal command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; case CS_TYPE_WAIT: dev_err(hdev->dev, - "Wait command submission %llu has not finished in time!\n", - cs->sequence); + "Wait command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; case CS_TYPE_COLLECTIVE_WAIT: dev_err(hdev->dev, - "Collective Wait command submission %llu has not finished in time!\n", - cs->sequence); + "Collective Wait command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; default: dev_err(hdev->dev, - "Command submission %llu has not finished in time!\n", - cs->sequence); + "Command submission %llu has not finished in %u seconds!\n", + cs->sequence, timeout_sec); break; } -- cgit v1.2.3