summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c153
1 files changed, 126 insertions, 27 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7eeaf0aa7f81..5bb444bb36ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -162,6 +162,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t ppos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t bytes_read;
+
+ switch (ppos) {
+ case AMDGPU_SYS_REG_STATE_XGMI:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_WAFL:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_PCIE:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR_1:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return bytes_read;
+}
+
+BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+ AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return 0;
+
+ ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+ return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return;
+ sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
/**
* DOC: board_info
*
@@ -1540,7 +1599,7 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
if (adev->mman.keep_stolen_vga_memory)
return false;
- return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
+ return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
}
/*
@@ -1551,11 +1610,15 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
*/
-static bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
{
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
+ /* eGPU change speeds based on USB4 fabric conditions */
+ if (dev_is_removable(adev->dev))
+ return true;
+
if (c->x86_vendor == X86_VENDOR_INTEL)
return false;
#endif
@@ -2188,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
- if (adev->mman.discovery_bin) {
- /*
- * FIXME: The bounding box is still needed by Navi12, so
- * temporarily read it from gpu_info firmware. Should be dropped
- * when DAL no longer needs it.
- */
- if (adev->asic_type != CHIP_NAVI12)
- return 0;
- }
+ if (adev->mman.discovery_bin)
+ return 0;
switch (adev->asic_type) {
default:
@@ -2395,7 +2451,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
- if (!amdgpu_device_pcie_dynamic_switching_supported())
+ if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
total = true;
@@ -2573,7 +2629,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
break;
}
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
ring->num_hw_submission, 0,
timeout, adev->reset_domain->wq,
@@ -2676,6 +2732,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
goto init_failed;
}
}
+
+ r = amdgpu_seq64_init(adev);
+ if (r) {
+ DRM_ERROR("allocate seq64 failed %d\n", r);
+ goto init_failed;
+ }
}
}
@@ -3138,6 +3200,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_device_wb_fini(adev);
amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
+ amdgpu_seq64_fini(adev);
}
r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
@@ -3791,10 +3854,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
adev->gfx.mcbp = true;
else if (amdgpu_mcbp == 0)
adev->gfx.mcbp = false;
- else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
- (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
- adev->gfx.num_gfx_rings)
- adev->gfx.mcbp = true;
if (amdgpu_sriov_vf(adev))
adev->gfx.mcbp = true;
@@ -4222,6 +4281,7 @@ fence_driver_init:
"Could not create amdgpu board attributes\n");
amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -4344,6 +4404,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
amdgpu_fru_sysfs_fini(adev);
+ amdgpu_reg_state_sysfs_fini(adev);
+
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
@@ -4520,8 +4582,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_ras_suspend(adev);
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
-
amdgpu_device_ip_suspend_phase1(adev);
if (!adev->in_s0ix)
@@ -4531,6 +4591,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (r)
return r;
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
amdgpu_fence_driver_hw_fini(adev);
amdgpu_device_ip_suspend_phase2(adev);
@@ -4538,6 +4600,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
+ r = amdgpu_dpm_notify_rlc_state(adev, false);
+ if (r)
+ return r;
+
return 0;
}
@@ -4964,7 +5030,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
spin_lock(&ring->sched.job_list_lock);
@@ -5103,7 +5169,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
/* Clear job fence from fence drv to avoid force_completion
@@ -5592,7 +5658,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5668,7 +5734,7 @@ skip_hw_reset:
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_start(&ring->sched, true);
@@ -5731,6 +5797,39 @@ recover_end:
}
/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ *speed = PCI_SPEED_UNKNOWN;
+ *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+ while ((parent = pci_upstream_bridge(parent))) {
+ /* skip upstream/downstream switches internal to dGPU*/
+ if (parent->vendor == PCI_VENDOR_ID_ATI)
+ continue;
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ break;
+ }
+}
+
+/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
* @adev: amdgpu_device pointer
@@ -5763,8 +5862,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- pcie_bandwidth_available(adev->pdev, NULL,
- &platform_speed_cap, &platform_link_width);
+ amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+ &platform_link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
@@ -5991,7 +6090,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_stop(&ring->sched, NULL);
@@ -6119,7 +6218,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!ring || !drm_sched_wqueue_ready(&ring->sched))
continue;
drm_sched_start(&ring->sched, true);