summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/firmware_if.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/firmware_if.c')
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c238
1 files changed, 216 insertions, 22 deletions
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 09706c571e95..832dd5c5bb06 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -293,6 +293,7 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
{
u32 err_val, security_val;
+ bool err_exists = false;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
@@ -307,48 +308,102 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return 0;
- if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
- if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
+ }
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
- if (hdev->bmc_enable)
- dev_warn(hdev->dev,
+ if (hdev->bmc_enable) {
+ dev_err(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
- else
+ err_exists = true;
+ } else {
+ dev_info(hdev->dev,
+ "Device boot message - Skipped waiting for BMC\n");
+ /* This is an info so we don't want it to disable the
+ * device
+ */
err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+ }
}
- if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
- if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
dev_warn(hdev->dev,
"Device boot warning - security not ready\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
dev_err(hdev->dev, "Device boot error - security failure\n");
- if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
- if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
dev_err(hdev->dev, "Device boot error - PLL failure\n");
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
+ dev_err(hdev->dev,
+ "Device boot error - device unusable\n");
+ err_exists = true;
+ }
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
- if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+ if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
+ dev_err(hdev->dev,
+ "Device boot error - unknown error 0x%08x\n",
+ err_val);
+ err_exists = true;
+ }
+
+ if (err_exists)
return -EIO;
return 0;
@@ -419,6 +474,73 @@ out:
return rc;
}
+static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
+{
+ struct cpucp_array_data_packet *pkt;
+ size_t total_pkt_size, data_size;
+ u64 result;
+ int rc;
+
+ /* skip sending this info for unsupported ASICs */
+ if (!hdev->asic_funcs->get_msi_info)
+ return 0;
+
+ data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
+ total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
+
+ /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
+ total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+ /* total_pkt_size is casted to u16 later on */
+ if (total_pkt_size > USHRT_MAX) {
+ dev_err(hdev->dev, "CPUCP array data is too big\n");
+ return -EINVAL;
+ }
+
+ pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
+
+ hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
+
+ pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
+ total_pkt_size, 0, &result);
+
+ /*
+ * in case packet result is invalid it means that FW does not support
+ * this feature and will use default/hard coded MSI values. no reason
+ * to stop the boot
+ */
+ if (rc && result == cpucp_packet_invalid)
+ rc = 0;
+
+ if (rc)
+ dev_err(hdev->dev, "failed to send CPUCP array data\n");
+
+ kfree(pkt);
+
+ return rc;
+}
+
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+ u32 cpu_security_boot_status_reg,
+ u32 boot_err0_reg)
+{
+ int rc;
+
+ rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
+ boot_err0_reg);
+ if (rc)
+ return rc;
+
+ return hl_fw_send_msi_info_msg(hdev);
+}
+
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
{
struct cpucp_packet pkt = {};
@@ -539,18 +661,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
return rc;
}
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+ enum pll_index *pll_index)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u8 pll_byte, pll_bit_off;
+ bool dynamic_pll;
+
+ if (input_pll_index >= PLL_MAX) {
+ dev_err(hdev->dev, "PLL index %d is out of range\n",
+ input_pll_index);
+ return -EINVAL;
+ }
+
+ dynamic_pll = prop->fw_security_status_valid &&
+ (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
+
+ if (!dynamic_pll) {
+ /*
+ * in case we are working with legacy FW (each asic has unique
+ * PLL numbering) extract the legacy numbering
+ */
+ *pll_index = hdev->legacy_pll_map[input_pll_index];
+ return 0;
+ }
+
+ /* PLL map is a u8 array */
+ pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
+ pll_bit_off = input_pll_index & 0x7;
+
+ if (!(pll_byte & BIT(pll_bit_off))) {
+ dev_err(hdev->dev, "PLL index %d is not supported\n",
+ input_pll_index);
+ return -EINVAL;
+ }
+
+ *pll_index = input_pll_index;
+
+ return 0;
+}
+
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr)
{
struct cpucp_packet pkt;
+ enum pll_index used_pll_idx;
u64 result;
int rc;
+ rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+ if (rc)
+ return rc;
+
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_type = __cpu_to_le16(pll_index);
+ pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
@@ -565,6 +732,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
return rc;
}
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
+{
+ struct cpucp_packet pkt;
+ u64 result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+ return rc;
+ }
+
+ *power = result;
+
+ return rc;
+}
+
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
{
/* Some of the status codes below are deprecated in newer f/w
@@ -623,7 +813,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 status, security_status;
int rc;
- if (!hdev->cpu_enable)
+ /* pldm was added for cases in which we use preboot on pldm and want
+ * to load boot fit, but we can't wait for preboot because it runs
+ * very slowly
+ */
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
return 0;
/* Need to check two possible scenarios:
@@ -677,16 +871,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_security_status_valid = 1;
+ /* FW security should be derived from PCI ID, we keep this
+ * check for backward compatibility
+ */
if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
prop->fw_security_disabled = false;
- else
- prop->fw_security_disabled = true;
if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
} else {
prop->fw_security_status_valid = 0;
- prop->fw_security_disabled = true;
}
dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
@@ -710,7 +904,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 status;
int rc;
- if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
+ if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
return 0;
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
@@ -801,7 +995,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
- if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
+ if (!(hdev->fw_components & FW_TYPE_LINUX)) {
dev_info(hdev->dev, "Skip loading Linux F/W\n");
goto out;
}