diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/habanalabs_drv.c')
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 82 |
1 files changed, 63 insertions, 19 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 37edb69a7255..f733ead605e7 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -54,10 +54,15 @@ MODULE_PARM_DESC(boot_error_status_mask, #define PCI_IDS_GAUDI 0x1000 #define PCI_IDS_GAUDI_SEC 0x1010 +#define PCI_IDS_GAUDI2 0x1020 +#define PCI_IDS_GAUDI2_SEC 0x1030 + static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -84,6 +89,12 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI_SEC: asic_type = ASIC_GAUDI_SEC; break; + case PCI_IDS_GAUDI2: + asic_type = ASIC_GAUDI2; + break; + case PCI_IDS_GAUDI2_SEC: + asic_type = ASIC_GAUDI2_SEC; + break; default: asic_type = ASIC_INVALID; break; @@ -96,6 +107,7 @@ static bool is_asic_secured(enum hl_asic_type asic_type) { switch (asic_type) { case ASIC_GAUDI_SEC: + case ASIC_GAUDI2_SEC: return true; default: return false; @@ -137,6 +149,7 @@ int hl_device_open(struct inode *inode, struct file *filp) mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); + mutex_init(&hpriv->ctx_lock); kref_init(&hpriv->refcount); nonseekable_open(inode, filp); @@ -152,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp) "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); - if (status == HL_DEVICE_STATUS_IN_RESET) + if (status == HL_DEVICE_STATUS_IN_RESET || + status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE) rc = -EAGAIN; else rc = -EPERM; @@ -195,8 +209,9 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_timeout.write_disable, 0); - atomic_set(&hdev->last_error.razwi.write_disable, 0); + atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); + atomic_set(&hdev->last_error.razwi.write_enable, 1); + hdev->last_error.undef_opcode.write_enable = true; hdev->open_counter++; hdev->last_successful_open_jif = jiffies; @@ -209,6 +224,7 @@ out_err: hl_mem_mgr_fini(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; + mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); @@ -277,43 +293,56 @@ out_err: static void set_driver_behavior_per_device(struct hl_device *hdev) { - hdev->pldm = 0; + hdev->nic_ports_mask = 0; hdev->fw_components = FW_TYPE_ALL_TYPES; + hdev->mmu_enable = MMU_EN_ALL; hdev->cpu_queues_enable = 1; - hdev->heartbeat = 1; - hdev->mmu_enable = 1; - hdev->sram_scrambler_enable = 1; - hdev->dram_scrambler_enable = 1; - hdev->bmc_enable = 1; + hdev->pldm = 0; hdev->hard_reset_on_fw_events = 1; + hdev->bmc_enable = 1; hdev->reset_on_preboot_fail = 1; - hdev->reset_if_device_not_idle = 1; - - hdev->reset_pcilink = 0; - hdev->axi_drain = 0; + hdev->heartbeat = 1; } static void copy_kernel_module_params_to_device(struct hl_device *hdev) { + hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); + hdev->major = hl_major; hdev->memory_scrub = memory_scrub; hdev->reset_on_lockup = reset_on_lockup; hdev->boot_error_status_mask = boot_error_status_mask; +} - if (timeout_locked) - hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); - else - hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; +static void fixup_device_params_per_asic(struct hl_device *hdev) +{ + switch (hdev->asic_type) { + case ASIC_GOYA: + case ASIC_GAUDI: + case ASIC_GAUDI_SEC: + hdev->reset_upon_device_release = 0; + break; + default: + hdev->reset_upon_device_release = 1; + break; + } } static int fixup_device_params(struct hl_device *hdev) { - hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); + int tmp_timeout; + + tmp_timeout = timeout_locked; hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; + if (tmp_timeout) + hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000); + else + hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + hdev->stop_on_err = true; hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; @@ -321,6 +350,18 @@ static int fixup_device_params(struct hl_device *hdev) /* Enable only after the initialization of the device */ hdev->disabled = true; + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && + (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { + pr_err("Preboot must be set along with other components"); + return -EINVAL; + } + + /* If CPU queues not enabled, no way to do heartbeat */ + if (!hdev->cpu_queues_enable) + hdev->heartbeat = 0; + + fixup_device_params_per_asic(hdev); + return 0; } @@ -345,7 +386,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) if (!hdev) return -ENOMEM; - /* can be NULL in case of simulator device */ + /* Will be NULL in case of simulator device */ hdev->pdev = pdev; /* Assign status description string */ @@ -355,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], "in device creation", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], + "in reset after device release", HL_STR_MAX); + /* First, we must find out which ASIC are we handling. This is needed * to configure the behavior of the driver (kernel parameters) |