summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/driver-api/edac.rst120
-rw-r--r--arch/x86/kernel/amd_nb.c50
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c2
-rw-r--r--drivers/edac/amd64_edac.c386
-rw-r--r--drivers/edac/amd64_edac.h1
-rw-r--r--drivers/edac/mce_amd.c3
-rw-r--r--drivers/ras/debugfs.c2
-rw-r--r--include/linux/pci_ids.h1
9 files changed, 513 insertions, 58 deletions
diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst
index b8c742aa0a71..f4f044b95c4f 100644
--- a/Documentation/driver-api/edac.rst
+++ b/Documentation/driver-api/edac.rst
@@ -106,6 +106,16 @@ will occupy those chip-select rows.
This term is avoided because it is unclear when needing to distinguish
between chip-select rows and socket sets.
+* High Bandwidth Memory (HBM)
+
+HBM is a new memory type with low power consumption and ultra-wide
+communication lanes. It uses vertically stacked memory chips (DRAM dies)
+interconnected by microscopic wires called "through-silicon vias," or
+TSVs.
+
+Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast
+interconnect called the "interposer". Therefore, HBM's characteristics
+are nearly indistinguishable from on-chip integrated RAM.
Memory Controllers
------------------
@@ -176,3 +186,113 @@ nodes::
the L1 and L2 directories would be "edac_device_block's"
.. kernel-doc:: drivers/edac/edac_device.h
+
+
+Heterogeneous system support
+----------------------------
+
+An AMD heterogeneous system is built by connecting the data fabrics of
+both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the
+GPU nodes can be accessed the same way as the data fabric on CPU nodes.
+
+The MI200 accelerators are data center GPUs. They have 2 data fabrics,
+and each GPU data fabric contains four Unified Memory Controllers (UMC).
+Each UMC contains eight channels. Each UMC channel controls one 128-bit
+HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total
+of 4096-bits of DRAM data bus.
+
+While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC
+channel is interfacing 2GB of DRAM (represented as rank).
+
+Memory controllers on AMD GPU nodes can be represented in EDAC thusly:
+
+ GPU DF / GPU Node -> EDAC MC
+ GPU UMC -> EDAC CSROW
+ GPU UMC channel -> EDAC CHANNEL
+
+For example: a heterogeneous system with 1 AMD CPU is connected to
+4 MI200 (Aldebaran) GPUs using xGMI.
+
+Some more heterogeneous hardware details:
+
+- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC.
+ They have chip selects (csrows) and channels. However, the layouts are different
+ for performance, physical layout, or other reasons.
+- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the
+ marketing speak. CPU has X memory channels, etc.
+- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW.
+- GPU UMCs use 1 chip select, So UMC = EDAC CSROW.
+- GPU UMCs use 8 channels, So UMC channel = EDAC channel.
+
+The EDAC subsystem provides a mechanism to handle AMD heterogeneous
+systems by calling system specific ops for both CPUs and GPUs.
+
+AMD GPU nodes are enumerated in sequential order based on the PCI
+hierarchy, and the first GPU node is assumed to have a Node ID value
+following those of the CPU nodes after latter are fully populated::
+
+ $ ls /sys/devices/system/edac/mc/
+ mc0 - CPU MC node 0
+ mc1 |
+ mc2 |- GPU card[0] => node 0(mc1), node 1(mc2)
+ mc3 |
+ mc4 |- GPU card[1] => node 0(mc3), node 1(mc4)
+ mc5 |
+ mc6 |- GPU card[2] => node 0(mc5), node 1(mc6)
+ mc7 |
+ mc8 |- GPU card[3] => node 0(mc7), node 1(mc8)
+
+For example, a heterogeneous system with one AMD CPU is connected to
+four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented
+via the following sysfs entries::
+
+ /sys/devices/system/edac/mc/..
+
+ CPU # CPU node
+ ├── mc 0
+
+ GPU Nodes are enumerated sequentially after CPU nodes have been populated
+ GPU card 1 # Each MI200 GPU has 2 nodes/mcs
+ ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs
+ │   ├── csrow 0 # UMC 0
+ │   │   ├── channel 0 # Each UMC has 8 channels
+ │   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB
+ │   │   ├── channel 2
+ │   │   ├── channel 3
+ │   │   ├── channel 4
+ │   │   ├── channel 5
+ │   │   ├── channel 6
+ │   │   ├── channel 7
+ │   ├── csrow 1 # UMC 1
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── .. ..
+ │   ├── csrow 3 # UMC 3
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── rank 0
+ │   ├── .. ..
+ │   ├── rank 31 # total 32 ranks/dimms from 4 UMCs
+ ├
+ ├── mc 2 # GPU node 1 == mc2
+ │   ├── .. # each GPU has total 64 GB
+
+ GPU card 2
+ ├── mc 3
+ │   ├── ..
+ ├── mc 4
+ │   ├── ..
+
+ GPU card 3
+ ├── mc 5
+ │   ├── ..
+ ├── mc 6
+ │   ├── ..
+
+ GPU card 4
+ ├── mc 7
+ │   ├── ..
+ ├── mc 8
+ │   ├── ..
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 7e331e8f3692..035a3db5330b 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -15,28 +15,31 @@
#include <linux/pci_ids.h>
#include <asm/amd_nb.h>
-#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
-#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
-#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
-#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
-#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
-#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
-#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
-#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
-#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
-#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
-#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
-#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
-#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
-#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
-#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
-#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
-#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
-#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
-#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
-#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
+#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
+#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
+#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
+#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
+#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
+#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
+#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
+#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb
+
+#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
+#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
+#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
+#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
+#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
+#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -53,6 +56,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
{}
};
@@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
{}
};
@@ -101,6 +106,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
{}
};
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 0b971f974096..5e74610b39e7 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
bool amd_mce_is_memory_error(struct mce *m)
{
+ enum smca_bank_types bank_type;
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;
+ bank_type = smca_get_bank_type(m->extcpu, m->bank);
if (mce_flags.smca)
- return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
+ return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
- if (b && bank_type == SMCA_UMC) {
+ if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
return NULL;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2eec60f50057..22dfcb2adcd7 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1533,7 +1533,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- if (kill_current_task)
+ if (!mce_usable_address(&m))
queue_task_work(&m, msg, kill_me_now);
else
queue_task_work(&m, msg, kill_me_maybe);
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index de3ea2c1807d..597dae7692b1 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -975,6 +975,74 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
return csrow;
}
+/*
+ * See AMD PPR DF::LclNodeTypeMap
+ *
+ * This register gives information for nodes of the same type within a system.
+ *
+ * Reading this register from a GPU node will tell how many GPU nodes are in the
+ * system and what the lowest AMD Node ID value is for the GPU nodes. Use this
+ * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC.
+ */
+static struct local_node_map {
+ u16 node_count;
+ u16 base_node_id;
+} gpu_node_map;
+
+#define PCI_DEVICE_ID_AMD_MI200_DF_F1 0x14d1
+#define REG_LOCAL_NODE_TYPE_MAP 0x144
+
+/* Local Node Type Map (LNTM) fields */
+#define LNTM_NODE_COUNT GENMASK(27, 16)
+#define LNTM_BASE_NODE_ID GENMASK(11, 0)
+
+static int gpu_get_node_map(void)
+{
+ struct pci_dev *pdev;
+ int ret;
+ u32 tmp;
+
+ /*
+ * Node ID 0 is reserved for CPUs.
+ * Therefore, a non-zero Node ID means we've already cached the values.
+ */
+ if (gpu_node_map.base_node_id)
+ return 0;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL);
+ if (!pdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+ if (ret)
+ goto out;
+
+ gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
+ gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
+
+out:
+ pci_dev_put(pdev);
+ return ret;
+}
+
+static int fixup_node_id(int node_id, struct mce *m)
+{
+ /* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
+ u8 nid = (m->ipid >> 44) & 0xF;
+
+ if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+ return node_id;
+
+ /* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
+ if (nid < gpu_node_map.base_node_id)
+ return node_id;
+
+ /* Convert the hardware-provided AMD Node ID to a Linux logical one. */
+ return nid - gpu_node_map.base_node_id + 1;
+}
+
/* Protect the PCI config register pairs used for DF indirect access. */
static DEFINE_MUTEX(df_indirect_mutex);
@@ -1426,12 +1494,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
+static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
+ int csrow_nr, int dimm)
+{
+ u32 msb, weight, num_zero_bits;
+ u32 addr_mask_deinterleaved;
+ int size = 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
int cs_mask_nr = csrow_nr;
+ u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1475,33 +1578,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
else
addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
-
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
-
- /* Return size in MBs. */
- return size >> 10;
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -2992,6 +3069,8 @@ static void decode_umc_error(int node_id, struct mce *m)
struct err_info err;
u64 sys_addr;
+ node_id = fixup_node_id(node_id, m);
+
mci = edac_mc_find(node_id);
if (!mci)
return;
@@ -3675,6 +3754,227 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
return 0;
}
+/*
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ *
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer to comment on gpu_get_umc_base().
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
+ */
+static void gpu_get_err_info(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+}
+
+static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs_mode, cs = 0;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
+static void gpu_dump_misc_regs(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ gpu_debug_display_dimm_sizes(pvt, i);
+ }
+}
+
+static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+{
+ u32 nr_pages;
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void gpu_init_csrows(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_HBM2;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+}
+
+static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_HBM2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->edac_cap = EDAC_FLAG_EC;
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ gpu_init_csrows(mci);
+}
+
+/* ECC is enabled by default on GPU nodes */
+static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
+{
+ return true;
+}
+
+static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return 0x50000 + (umc << 20) + ((channel % 4) << 12);
+}
+
+static void gpu_read_mc_regs(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = gpu_get_umc_base(i, 0);
+ umc = &pvt->umc[i];
+
+ amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
+ amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
+ amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
+ }
+}
+
+static void gpu_read_base_mask(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = gpu_get_umc_base(umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = gpu_get_umc_base(umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static int gpu_hw_info_get(struct amd64_pvt *pvt)
+{
+ int ret;
+
+ ret = gpu_get_node_map();
+ if (ret)
+ return ret;
+
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ gpu_prep_chip_selects(pvt);
+ gpu_read_base_mask(pvt);
+ gpu_read_mc_regs(pvt);
+
+ return 0;
+}
+
static void hw_info_put(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
@@ -3690,6 +3990,14 @@ static struct low_ops umc_ops = {
.get_err_info = umc_get_err_info,
};
+static struct low_ops gpu_ops = {
+ .hw_info_get = gpu_hw_info_get,
+ .ecc_enabled = gpu_ecc_enabled,
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs,
+ .dump_misc_regs = gpu_dump_misc_regs,
+ .get_err_info = gpu_get_err_info,
+};
+
/* Use Family 16h versions for defaults and adjust as needed below. */
static struct low_ops dct_ops = {
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -3813,6 +4121,16 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x20 ... 0x2f:
pvt->ctl_name = "F19h_M20h";
break;
+ case 0x30 ... 0x3f:
+ if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
+ pvt->ctl_name = "MI200";
+ pvt->max_mcs = 4;
+ pvt->ops = &gpu_ops;
+ } else {
+ pvt->ctl_name = "F19h_M30h";
+ pvt->max_mcs = 8;
+ }
+ break;
case 0x50 ... 0x5f:
pvt->ctl_name = "F19h_M50h";
break;
@@ -3854,11 +4172,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
struct edac_mc_layer layers[2];
int ret = -ENOMEM;
+ /*
+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
+ * be swapped to fit into the layers.
+ */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->max_mcs : pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = pvt->max_mcs;
+ layers[1].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->csels[0].b_cnt : pvt->max_mcs;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 0bde0db76f7a..5a4e4a59682b 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/edac.h>
+#include <linux/bitfield.h>
#include <asm/cpu_device_id.h>
#include <asm/msr.h>
#include "edac_module.h"
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63feb26a..9215c06783df 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1186,7 +1186,8 @@ static void decode_smca_error(struct mce *m)
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
+ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
+ xec == 0 && decode_dram_ecc)
decode_dram_ecc(topology_die_id(m->extcpu), m);
}
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
index f0a6391b1146..ffb973c328e3 100644
--- a/drivers/ras/debugfs.c
+++ b/drivers/ras/debugfs.c
@@ -46,7 +46,7 @@ int __init ras_add_daemon_trace(void)
fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
NULL, &trace_fops);
- if (!fentry)
+ if (IS_ERR(fentry))
return -ENODEV;
return 0;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 95f33dadb2be..a99b1fcfc617 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -568,6 +568,7 @@
#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
+#define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001