diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 83 |
1 files changed, 53 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 475e47027354..49f40d9f16e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1166,7 +1166,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) props->weight = 20; else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) - props->weight = 15 * iolink->num_hops_xgmi; + props->weight = iolink->weight_xgmi; else props->weight = node_distance(id_from, id_to); @@ -1405,7 +1405,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } -int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) +int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; @@ -1524,7 +1524,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); break; default: *pcache_info = dummy_cache_info; @@ -1858,7 +1858,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) } static int kfd_fill_gpu_memory_affinity(int *avail_size, - struct kfd_dev *kdev, uint8_t type, uint64_t size, + struct kfd_node *kdev, uint8_t type, uint64_t size, struct crat_subtype_memory *sub_type_hdr, uint32_t proximity_domain, const struct kfd_local_mem_info *local_mem_info) @@ -1887,7 +1887,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, } #ifdef CONFIG_ACPI_NUMA -static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) +static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) { struct acpi_table_header *table_header = NULL; struct acpi_subtable_header *sub_header = NULL; @@ -1972,6 +1972,9 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) } #endif +#define KFD_CRAT_INTRA_SOCKET_WEIGHT 13 +#define KFD_CRAT_XGMI_WEIGHT 15 + /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU * to its NUMA node * @avail_size: Available size in the memory @@ -1982,7 +1985,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) * Return 0 if successful else return -ve value */ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, - struct kfd_dev *kdev, + struct kfd_node *kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { @@ -2002,7 +2005,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, /* Fill in IOLINK subtype. * TODO: Fill-in other fields of iolink subtype */ - if (kdev->adev->gmc.xgmi.connected_to_cpu) { + if (kdev->adev->gmc.xgmi.connected_to_cpu || + (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) && + kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) == + AMDGPU_PKG_TYPE_APU)) { + bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3); + int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT : + KFD_CRAT_INTRA_SOCKET_WEIGHT; + uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( + kdev->adev, NULL, true) : mem_bw; + /* * with host gpu xgmi link, host can access gpu memory whether * or not pcie bar type is large, so always create bidirectional @@ -2010,14 +2022,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, */ sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; - sub_type_hdr->num_hops_xgmi = 1; - if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { - sub_type_hdr->minimum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( - kdev->adev, NULL, true); - sub_type_hdr->maximum_bandwidth_mbs = - sub_type_hdr->minimum_bandwidth_mbs; - } + sub_type_hdr->weight_xgmi = weight; + sub_type_hdr->minimum_bandwidth_mbs = bandwidth; + sub_type_hdr->maximum_bandwidth_mbs = bandwidth; } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; sub_type_hdr->minimum_bandwidth_mbs = @@ -2029,7 +2036,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->proximity_domain_from = proximity_domain; #ifdef CONFIG_ACPI_NUMA - if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) + if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE && + num_possible_nodes() > 1) kfd_find_numa_node_in_srat(kdev); #endif #ifdef CONFIG_NUMA @@ -2044,12 +2052,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, } static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, - struct kfd_dev *kdev, - struct kfd_dev *peer_kdev, + struct kfd_node *kdev, + struct kfd_node *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) { + bool use_ta_info = kdev->kfd->num_nodes == 1; + *avail_size -= sizeof(struct crat_subtype_iolink); if (*avail_size < 0) return -ENOMEM; @@ -2064,12 +2074,25 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; - sub_type_hdr->num_hops_xgmi = - amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); - sub_type_hdr->maximum_bandwidth_mbs = - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); - sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? - amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; + + if (use_ta_info) { + sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT * + amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, + peer_kdev->adev, false); + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; + } else { + bool is_single_hop = kdev->kfd == peer_kdev->kfd; + int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT : + (2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT; + int mem_bw = 819200; + + sub_type_hdr->weight_xgmi = weight; + sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0; + sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0; + } return 0; } @@ -2081,7 +2104,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_gpu(void *pcrat_image, - size_t *size, struct kfd_dev *kdev, + size_t *size, struct kfd_node *kdev, uint32_t proximity_domain) { struct crat_header *crat_table = (struct crat_header *)pcrat_image; @@ -2153,7 +2176,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - if (!kfd_iommu_check_device(kdev)) + if (!kfd_iommu_check_device(kdev->kfd)) cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; @@ -2216,12 +2239,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (from other GPU to this GPU) will be added * in kfd_parse_subtype_iolink. */ - if (kdev->hive_id) { + if (kdev->kfd->hive_id) { for (nid = 0; nid < proximity_domain; ++nid) { peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); if (!peer_dev->gpu) continue; - if (peer_dev->gpu->hive_id != kdev->hive_id) + if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) continue; sub_type_hdr = (typeof(sub_type_hdr))( (char *)sub_type_hdr + @@ -2255,12 +2278,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU * -- this option is not currently implemented. * The assumption is that all AMD APUs will have CRAT - * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU * * Return 0 if successful else return -ve value */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, + int flags, struct kfd_node *kdev, uint32_t proximity_domain) { void *pcrat_image = NULL; |