From fb4b9685779f25ff063358623f3da4f3344be9bb Mon Sep 17 00:00:00 2001 From: ran jianping Date: Sun, 24 Apr 2022 06:21:26 +0000 Subject: EDAC/wq: Remove unneeded flush_workqueue() destroy_workqueue() already takes care of flushing the workqueue so there is no need to flush it explicitly. [ bp: Massage commit message. ] Reported-by: Zeal Robot Signed-off-by: ran jianping Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220424062127.3219542-1-ran.jianping@zte.com.cn --- drivers/edac/wq.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index d021d287eaec..ad3f516627c5 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -37,7 +37,6 @@ int edac_workqueue_setup(void) void edac_workqueue_teardown(void) { - flush_workqueue(wq); destroy_workqueue(wq); wq = NULL; } -- cgit v1.2.3 From 93df19476535a4ed871bc5eae719b6d67cf3abc3 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 22 Aug 2022 22:07:23 +0300 Subject: EDAC/mc: Drop duplicated dimm->nr_pages debug printout The duplicated edac_dbg()-based dimm->nr_pages print was introduced in 6e84d359b2be ("edac_mc: Cleanup per-dimm_info debug messages"). The duplicated line can be found even in the commit message text: [ 1011.380101] EDAC DEBUG: edac_mc_dump_dimm: dimm->nr_pages = 0x40000 [ 1011.380103] EDAC DEBUG: edac_mc_dump_dimm: dimm->grain = 8 [ 1011.380104] EDAC DEBUG: edac_mc_dump_dimm: dimm->nr_pages = 0x40000 Drop the second edac_dbg() call. [ bp: Massage commit message. ] Signed-off-by: Serge Semin Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220822190730.27277-14-Sergey.Semin@baikalelectronics.ru --- drivers/edac/edac_mc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index eb58644bb019..6faeb2ab3960 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm) edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); edac_dbg(4, " dimm->grain = %d\n", dimm->grain); - edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) -- cgit v1.2.3 From fe32f366931a950889e8d72be86fafc867dab777 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 1 Sep 2022 12:43:08 -0700 Subject: EDAC/skx_common: Use driver decoder first The performance of driver decoder[1] is better than the performance of firmware decoder[2], especially on frequent correctable errors. So use the driver decoder first, fall back to firmware decoder if the driver decoder is unavailable. Also rename the function pointer skx_decode to driver_decode (better name to contrast with adxl_decode). [1] Decode errors by extracting error information from registers of memory controllers and/or MCA bank registers. [2] Decode errors by calling ACPI DSM methods. Co-developed-by: Youquan Song Signed-off-by: Youquan Song Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220901194310.115427-1-tony.luck@intel.com/ --- drivers/edac/skx_base.c | 9 +++++++-- drivers/edac/skx_common.c | 16 +++++++++------- drivers/edac/skx_common.h | 1 + 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 1abc020d49ab..7e2762f62eec 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -714,8 +714,13 @@ static int __init skx_init(void) skx_set_decode(skx_decode, skx_show_retry_rd_err_log); - if (nvdimm_count && skx_adxl_get() == -ENODEV) - skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + if (nvdimm_count && skx_adxl_get() != -ENODEV) { + skx_set_decode(NULL, skx_show_retry_rd_err_log); + } else { + if (nvdimm_count) + skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); + } /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 19c17c5198c5..9b10c359849b 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -40,7 +40,7 @@ static char *adxl_msg; static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; -static skx_decode_f skx_decode; +static skx_decode_f driver_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); @@ -173,6 +173,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me break; } + res->decoded_by_adxl = true; + return true; } @@ -183,7 +185,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { - skx_decode = decode; + driver_decode = decode; skx_show_retry_rd_err_log = show_retry_log; } @@ -591,7 +593,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; } } - if (adxl_component_count) { + if (res->decoded_by_adxl) { len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", @@ -651,11 +653,11 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, memset(&res, 0, sizeof(res)); res.addr = mce->addr; - if (adxl_component_count) { - if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) + /* Try driver decoder first */ + if (!(driver_decode && driver_decode(&res))) { + /* Then try firmware decoder (ACPI DSM methods) */ + if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) return NOTIFY_DONE; - } else if (!skx_decode || !skx_decode(&res)) { - return NOTIFY_DONE; } mci = res.dev->imc[res.imc].mci; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 03ac067a80b9..880ecd15ca42 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -136,6 +136,7 @@ struct decoded_addr { int column; int bank_address; int bank_group; + bool decoded_by_adxl; }; struct res_config { -- cgit v1.2.3 From 627d551a9e75ef81525822ba2a0d9d5a64791d89 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 1 Sep 2022 12:43:09 -0700 Subject: EDAC/skx_common: Make output format similar The decoded output format of driver decoder is different from the output format of firmware decoder. Make output format similar regardless of decode function (Align driver decoder's to firmware decoder's). Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220901194310.115427-1-tony.luck@intel.com/ --- drivers/edac/skx_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 9b10c359849b..16ca3de57c24 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -600,12 +600,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, mscod, errcode, adxl_msg); } else { len = snprintf(skx_msg, MSG_SIZE, - "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x", + "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, res->socket, res->imc, res->rank, - res->bank_group, res->bank_address, res->row, res->column); + res->row, res->column, res->bank_address, res->bank_group); } if (skx_show_retry_rd_err_log) -- cgit v1.2.3 From 2738c69a8813453b35549465867ae591f8598eb0 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Thu, 1 Sep 2022 12:43:10 -0700 Subject: EDAC/i10nm: Add driver decoder for Ice Lake and Tremont CPUs Current i10nm_edac only supports firmware decoder (ACPI DSM methods). MCA bank registers of Ice Lake or Tremont CPUs contain the information to decode DDR memory errors. To get better decoding performance, add the driver decoder (decoding DDR memory errors via extracting error information from MCA bank registers) for Ice Lake and Tremont CPUs. Co-developed-by: Qiuxu Zhuo Signed-off-by: Qiuxu Zhuo Signed-off-by: Youquan Song Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220901194310.115427-1-tony.luck@intel.com/ --- arch/x86/include/asm/mce.h | 1 + drivers/edac/i10nm_base.c | 134 ++++++++++++++++++++++++++++++++++++++++++++- drivers/edac/skx_common.c | 1 + drivers/edac/skx_common.h | 5 ++ 4 files changed, 139 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cc73061e7255..6e986088817d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -42,6 +42,7 @@ #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) +#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff) /* AMD-specific bits */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 6cf50ee0b77c..817f618fcff0 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -74,6 +74,8 @@ static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; static int retry_rd_err_log; +static int decoding_via_mca; +static bool mem_cfg_2lm; static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; @@ -231,6 +233,103 @@ static bool i10nm_check_2lm(struct res_config *cfg) return false; } +/* + * Check whether the error comes from DDRT by ICX/Tremont model specific error code. + * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. + */ +static bool i10nm_mscod_is_ddrt(u32 mscod) +{ + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + return false; +} + +static bool i10nm_mc_decode_available(struct mce *mce) +{ + u8 bank; + + if (!decoding_via_mca || mem_cfg_2lm) + return false; + + if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + return false; + + bank = mce->bank; + + switch (res_cfg->type) { + case I10NM: + if (bank < 13 || bank > 26) + return false; + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + /* Check whether one of {13,14,17,18,21,22,25,26} */ + return ((bank - 13) & BIT(1)) == 0; + default: + return false; + } +} + +static bool i10nm_mc_decode(struct decoded_addr *res) +{ + struct mce *m = res->mce; + struct skx_dev *d; + u8 bank; + + if (!i10nm_mc_decode_available(m)) + return false; + + list_for_each_entry(d, i10nm_edac_list, list) { + if (d->imc[0].src_id == m->socketid) { + res->socket = m->socketid; + res->dev = d; + break; + } + } + + switch (res_cfg->type) { + case I10NM: + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + break; + default: + return false; + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + m->socketid, res->imc); + return false; + } + + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + + return true; +} + static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; @@ -574,7 +673,8 @@ static int __init i10nm_init(void) return -ENODEV; } - skx_set_mem_cfg(i10nm_check_2lm(cfg)); + mem_cfg_2lm = i10nm_check_2lm(cfg); + skx_set_mem_cfg(mem_cfg_2lm); rc = i10nm_get_ddr_munits(); @@ -626,9 +726,11 @@ static int __init i10nm_init(void) setup_i10nm_debug(); if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { - skx_set_decode(NULL, show_retry_rd_err_log); + skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); + } else { + skx_set_decode(i10nm_mc_decode, NULL); } i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); @@ -658,6 +760,34 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + + if (ret || val > 1) + return -EINVAL; + + if (val && mem_cfg_2lm) { + i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n"); + return -EIO; + } + + ret = param_set_int(buf, kp); + + return ret; +} + +static const struct kernel_param_ops decoding_via_mca_param_ops = { + .set = set_decoding_via_mca, + .get = param_get_int, +}; + +module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); +MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable"); + module_param(retry_rd_err_log, int, 0444); MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 16ca3de57c24..7276ce3a33e1 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -651,6 +651,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, return NOTIFY_DONE; memset(&res, 0, sizeof(res)); + res.mce = mce; res.addr = mce->addr; /* Try driver decoder first */ diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 880ecd15ca42..c542f1562825 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -10,6 +10,7 @@ #define _SKX_COMM_EDAC_H #include +#include #define MSG_SIZE 1024 @@ -52,6 +53,9 @@ #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) +#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) +#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -120,6 +124,7 @@ enum { #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) struct decoded_addr { + struct mce *mce; struct skx_dev *dev; u64 addr; int socket; -- cgit v1.2.3 From d42d6f5a5fcd734669e75867c632e724f1dc3552 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 11 Sep 2022 17:40:38 +0800 Subject: EDAC: Remove obsolete declarations in edac_module.h Commit 4de78c6877ec ("drivers/edac: mod PCI poll names"), renamed the respective variables and accessors but left the old accessor declarations edac_get_log_ce(), edac_get_log_ue(), edac_get_poll_msec() and edac_get_panic_on_ue() in place. Remove them. [ bp: Masssage commit message. ] Signed-off-by: Gaosheng Cui Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220911094038.3224365-1-cuigaosheng1@huawei.com --- drivers/edac/edac_module.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 96f6de0c8ff6..50ed9f2425bb 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void); extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); -extern int edac_get_log_ue(void); -extern int edac_get_log_ce(void); -extern int edac_get_panic_on_ue(void); extern int edac_mc_get_log_ue(void); extern int edac_mc_get_log_ce(void); extern int edac_mc_get_panic_on_ue(void); -extern int edac_get_poll_msec(void); extern unsigned int edac_mc_get_poll_msec(void); unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, -- cgit v1.2.3 From d5e4eeea0c20b7467458cf3f3d887f59075db93e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sun, 18 Sep 2022 01:20:13 +0200 Subject: EDAC/ppc_4xx: Reorder symbols to get rid of a few forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When moving the definition of ppc4xx_edac_driver further down, the forward declarations can just be dropped. Do this to reduce needless line repetition. Signed-off-by: Uwe Kleine-König Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220917232013.489931-1-u.kleine-koenig@pengutronix.de --- drivers/edac/ppc4xx_edac.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 0bc670778c99..046969b4e82e 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -178,11 +178,6 @@ struct ppc4xx_ecc_status { u32 wmirq; }; -/* Function Prototypes */ - -static int ppc4xx_edac_probe(struct platform_device *device); -static int ppc4xx_edac_remove(struct platform_device *device); - /* Global Variables */ /* @@ -197,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = { }; MODULE_DEVICE_TABLE(of, ppc4xx_edac_match); -static struct platform_driver ppc4xx_edac_driver = { - .probe = ppc4xx_edac_probe, - .remove = ppc4xx_edac_remove, - .driver = { - .name = PPC4XX_EDAC_MODULE_NAME, - .of_match_table = ppc4xx_edac_match, - }, -}; - /* * TODO: The row and channel parameters likely need to be dynamically * set based on the aforementioned variant controller realizations. @@ -1391,6 +1377,15 @@ ppc4xx_edac_opstate_init(void) EDAC_OPSTATE_UNKNOWN_STR))); } +static struct platform_driver ppc4xx_edac_driver = { + .probe = ppc4xx_edac_probe, + .remove = ppc4xx_edac_remove, + .driver = { + .name = PPC4XX_EDAC_MODULE_NAME, + .of_match_table = ppc4xx_edac_match, + }, +}; + /** * ppc4xx_edac_init - driver/module insertion entry point * -- cgit v1.2.3 From 14646de48bd77947cd6a325b42eecddcec5a35c7 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 22 Jul 2022 16:33:35 -0700 Subject: EDAC/skx_common: Add ChipSelect ADXL component Each pseudo channel of HBM has its own retry_rd_err_log registers. The bit 0 of ChipSelect ADXL component encodes the pseudo channel number of HBM memory. So add ChipSelect ADXL component to get HBM pseudo channel number. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220722233338.341567-1-tony.luck@intel.com --- drivers/edac/skx_common.c | 5 +++++ drivers/edac/skx_common.h | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 7276ce3a33e1..f0f8e98f6efb 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -27,9 +27,11 @@ static const char * const component_names[] = { [INDEX_MEMCTRL] = "MemoryControllerId", [INDEX_CHANNEL] = "ChannelId", [INDEX_DIMM] = "DimmSlotId", + [INDEX_CS] = "ChipSelect", [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", [INDEX_NM_CHANNEL] = "NmChannelId", [INDEX_NM_DIMM] = "NmDimmSlotId", + [INDEX_NM_CS] = "NmChipSelect", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? + (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; } else { res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + res->cs = (int)adxl_values[component_indices[INDEX_CS]]; } if (res->imc > NUM_IMC - 1 || res->imc < 0) { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index c542f1562825..167760fd75ba 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -112,16 +112,19 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_CS, INDEX_NM_FIRST, INDEX_NM_MEMCTRL = INDEX_NM_FIRST, INDEX_NM_CHANNEL, INDEX_NM_DIMM, + INDEX_NM_CS, INDEX_MAX }; #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) +#define BIT_NM_CS BIT_ULL(INDEX_NM_CS) struct decoded_addr { struct mce *mce; @@ -134,6 +137,7 @@ struct decoded_addr { int sktways; int chanways; int dimm; + int cs; int rank; int channel_rank; u64 rank_address; -- cgit v1.2.3 From acd4cf68fefe70138926056e3137c9ea99ef7ebf Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 22 Jul 2022 16:33:36 -0700 Subject: EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM An HBM memory channel is divided into two pseudo channels. Each pseudo channel has its own retry_rd_err_log registers. Retrieve and print retry_rd_err_log registers of the HBM pseudo channel if the memory error is from HBM. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220722233338.341567-1-tony.luck@intel.com --- drivers/edac/i10nm_base.c | 84 +++++++++++++++++++++++++++++++++++++---------- drivers/edac/skx_common.h | 4 +++ 2 files changed, 71 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 817f618fcff0..b5e9db162915 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -79,18 +79,20 @@ static bool mem_cfg_2lm; static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; +static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; +static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable) +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, + u32 *offsets_scrub, u32 *offsets_demand) { u32 s, d; - if (!imc->mbase) - return; - - s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]); + s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); + d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); if (enable) { /* Save default configurations */ @@ -117,21 +119,39 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable d &= ~RETRY_RD_ERR_LOG_EN; } - I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d); + I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); + I10NM_SET_REG32(imc, chan, offsets_demand[0], d); } static void enable_retry_rd_err_log(bool enable) { + struct skx_imc *imc; struct skx_dev *d; int i, j; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) - for (i = 0; i < I10NM_NUM_IMC; i++) - for (j = 0; j < I10NM_NUM_CHANNELS; j++) - __enable_retry_rd_err_log(&d->imc[i], j, enable); + for (i = 0; i < I10NM_NUM_IMC; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + for (j = 0; j < I10NM_NUM_CHANNELS; j++) { + if (imc->hbm_mc) { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm0, + res_cfg->offsets_demand_hbm0); + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm1, + res_cfg->offsets_demand_hbm1); + } else { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub, + res_cfg->offsets_demand); + } + } + } } static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, @@ -142,12 +162,24 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, u32 corr0, corr1, corr2, corr3; u64 log2a, log5; u32 *offsets; - int n; + int n, pch; if (!imc->mbase) return; - offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand; + if (imc->hbm_mc) { + pch = res->cs & 1; + + if (pch) + offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : + res_cfg->offsets_demand_hbm1; + else + offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : + res_cfg->offsets_demand_hbm0; + } else { + offsets = scrub_err ? res_cfg->offsets_scrub : + res_cfg->offsets_demand; + } log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); @@ -165,10 +197,24 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log0, log1, log2, log3, log4, log5); } - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + if (imc->hbm_mc) { + if (pch) { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + } + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + } if (len - n > 0) snprintf(msg + n, len - n, @@ -519,7 +565,11 @@ static struct res_config spr_cfg = { .sad_all_devfn = PCI_DEVFN(10, 0), .sad_all_offset = 0x300, .offsets_scrub = offsets_scrub_spr, + .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, + .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, .offsets_demand = offsets_demand_spr, + .offsets_demand_hbm0 = offsets_demand_spr_hbm0, + .offsets_demand_hbm1 = offsets_demand_spr_hbm1, }; static const struct x86_cpu_id i10nm_cpuids[] = { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 167760fd75ba..455e652c0e46 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -164,7 +164,11 @@ struct res_config { int sad_all_offset; /* Offsets of retry_rd_err_log registers */ u32 *offsets_scrub; + u32 *offsets_scrub_hbm0; + u32 *offsets_scrub_hbm1; u32 *offsets_demand; + u32 *offsets_demand_hbm0; + u32 *offsets_demand_hbm1; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, -- cgit v1.2.3 From d5f5e49953f68bb7b15afd6e32ad176b987c6525 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 22 Jul 2022 16:33:37 -0700 Subject: EDAC/i10nm: Print an extra register set of retry_rd_err_log Sapphire Rapids server adds an extra register set for logging more retry_rd_err_log data. So add code to print the extra register set. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220722233338.341567-1-tony.luck@intel.com --- drivers/edac/i10nm_base.c | 81 ++++++++++++++++++++++++++++++++++++++++------- drivers/edac/skx_common.h | 2 ++ 2 files changed, 72 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index b5e9db162915..a22ea053f8e1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -83,26 +83,38 @@ static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, - u32 *offsets_scrub, u32 *offsets_demand) + u32 *offsets_scrub, u32 *offsets_demand, + u32 *offsets_demand2) { - u32 s, d; + u32 s, d, d2; s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); + if (offsets_demand2) + d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); if (enable) { /* Save default configurations */ imc->chan[chan].retry_rd_err_log_s = s; imc->chan[chan].retry_rd_err_log_d = d; + if (offsets_demand2) + imc->chan[chan].retry_rd_err_log_d2 = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; d |= RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + d2 &= ~RETRY_RD_ERR_LOG_UC; + d2 |= RETRY_RD_ERR_LOG_NOOVER; + d2 |= RETRY_RD_ERR_LOG_EN; + } } else { /* Restore default configurations */ if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) @@ -117,10 +129,21 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable d |= RETRY_RD_ERR_LOG_NOOVER; if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + d2 |= RETRY_RD_ERR_LOG_UC; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + d2 &= ~RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + d2 &= ~RETRY_RD_ERR_LOG_EN; + } } I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); I10NM_SET_REG32(imc, chan, offsets_demand[0], d); + if (offsets_demand2) + I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); } static void enable_retry_rd_err_log(bool enable) @@ -141,14 +164,17 @@ static void enable_retry_rd_err_log(bool enable) if (imc->hbm_mc) { __enable_retry_rd_err_log(imc, j, enable, res_cfg->offsets_scrub_hbm0, - res_cfg->offsets_demand_hbm0); + res_cfg->offsets_demand_hbm0, + NULL); __enable_retry_rd_err_log(imc, j, enable, res_cfg->offsets_scrub_hbm1, - res_cfg->offsets_demand_hbm1); + res_cfg->offsets_demand_hbm1, + NULL); } else { __enable_retry_rd_err_log(imc, j, enable, res_cfg->offsets_scrub, - res_cfg->offsets_demand); + res_cfg->offsets_demand, + res_cfg->offsets_demand2); } } } @@ -160,7 +186,10 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, struct skx_imc *imc = &res->dev->imc[res->imc]; u32 log0, log1, log2, log3, log4; u32 corr0, corr1, corr2, corr3; + u32 lxg0, lxg1, lxg3, lxg4; + u32 *xffsets = NULL; u64 log2a, log5; + u64 lxg2a, lxg5; u32 *offsets; int n, pch; @@ -177,8 +206,12 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : res_cfg->offsets_demand_hbm0; } else { - offsets = scrub_err ? res_cfg->offsets_scrub : - res_cfg->offsets_demand; + if (scrub_err) { + offsets = res_cfg->offsets_scrub; + } else { + offsets = res_cfg->offsets_demand; + xffsets = res_cfg->offsets_demand2; + } } log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); @@ -187,10 +220,28 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); + if (xffsets) { + lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); + lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); + lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); + lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); + lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); + } + if (res_cfg->type == SPR) { log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]", + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", log0, log1, log2a, log3, log4, log5); + + if (len - n > 0) { + if (xffsets) { + lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); + n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", + lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); + } else { + n += snprintf(msg + n, len - n, "]"); + } + } } else { log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", @@ -225,9 +276,16 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, corr3 & 0xffff, corr3 >> 16); /* Clear status bits */ - if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + if (retry_rd_err_log == 2) { + if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { + log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + } + + if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { + lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); + } } } @@ -568,6 +626,7 @@ static struct res_config spr_cfg = { .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, .offsets_demand = offsets_demand_spr, + .offsets_demand2 = offsets_demand2_spr, .offsets_demand_hbm0 = offsets_demand_spr_hbm0, .offsets_demand_hbm1 = offsets_demand_spr_hbm1, }; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 455e652c0e46..0cbadd3d2cd3 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -86,6 +86,7 @@ struct skx_dev { struct pci_dev *edev; u32 retry_rd_err_log_s; u32 retry_rd_err_log_d; + u32 retry_rd_err_log_d2; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -167,6 +168,7 @@ struct res_config { u32 *offsets_scrub_hbm0; u32 *offsets_scrub_hbm1; u32 *offsets_demand; + u32 *offsets_demand2; u32 *offsets_demand_hbm0; u32 *offsets_demand_hbm1; }; -- cgit v1.2.3 From d389059685b46861c264cda4f37a33feeab91dfc Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 22 Jul 2022 16:33:38 -0700 Subject: x86/sb_edac: Add row column translation for Broadwell The sb_edac driver lacks translation for DIMM internal address. Add memory address translation for row/column/bank/bank_group on Broadwell. Signed-off-by: Youquan Song Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20220722233338.341567-1-tony.luck@intel.com --- drivers/edac/sb_edac.c | 148 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 9678ab97c7ac..8e39370fdb5c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -335,6 +335,12 @@ struct sbridge_info { struct sbridge_channel { u32 ranks; u32 dimms; + struct dimm { + u32 rowbits; + u32 colbits; + u32 bank_xor_enable; + u32 amap_fine; + } dimm[MAX_DIMMS]; }; struct pci_id_descr { @@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, banks = 8; for (i = 0; i < channels; i++) { - u32 mtr; + u32 mtr, amap = 0; int max_dimms_per_channel; @@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, max_dimms_per_channel = ARRAY_SIZE(mtr_regs); if (!pvt->pci_tad[i]) continue; + pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap); } for (j = 0; j < max_dimms_per_channel; j++) { @@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, mtr_regs[j], &mtr); } edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) { if (!IS_ECC_ENABLED(pvt->info.mcmtr)) { sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n", @@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci, dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->mtype = mtype; dimm->edac_mode = mode; + pvt->channel[i].dimm[j].rowbits = order_base_2(rows); + pvt->channel[i].dimm[j].colbits = order_base_2(cols); + pvt->channel[i].dimm[j].bank_xor_enable = + GET_BITFIELD(pvt->info.mcmtr, 9, 9); + pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j); @@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha) return NULL; } +static u8 sb_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; + +static u8 sb_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; + +static u8 sb_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; + +static u8 sb_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; + +static u8 sb_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int sb_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + int dimmno = 0; + int row, col, bank_address, bank_group; + struct sbridge_pvt *pvt; + u32 bg0 = 0, rowbits = 0, colbits = 0; + u32 amap_fine = 0, bank_xor_enable = 0; + + dimmno = (rank < 12) ? rank / 4 : 2; + pvt = mci->pvt_info; + amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine; + bg0 = amap_fine ? 6 : 13; + rowbits = pvt->channel[ch].dimm[dimmno].rowbits; + colbits = pvt->channel[ch].dimm[dimmno].colbits; + bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable; + + if (pvt->is_lockstep) { + pr_warn_once("LockStep row/column decode is not supported yet!\n"); + msg[0] = '\0'; + return false; + } + + if (pvt->is_close_pg) { + row = sb_bits(rank_addr, rowbits, sb_close_row); + col = sb_bits(rank_addr, colbits, sb_close_column); + col |= 0x400; /* C10 is autoprecharge, always set */ + bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28); + bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21); + } else { + row = sb_bits(rank_addr, rowbits, sb_open_row); + if (amap_fine) + col = sb_bits(rank_addr, colbits, sb_open_fine_column); + else + col = sb_bits(rank_addr, colbits, sb_open_column); + bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23); + bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21); + } + + row &= (1u << rowbits) - 1; + + sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d", + row, col, bank_address, bank_group); + return true; +} + +static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + pr_warn_once("DDR3 row/column decode not support yet!\n"); + msg[0] = '\0'; + return false; +} + static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, u8 *socket, u8 *ha, @@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci, int interleave_mode, shiftup = 0; unsigned int sad_interleave[MAX_INTERLEAVE]; u32 reg, dram_rule; - u8 ch_way, sck_way, pkg, sad_ha = 0; + u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0; u32 tad_offset; u32 rir_way; u32 mb, gb; u64 ch_addr, offset, limit = 0, prv = 0; - + u64 rank_addr; + enum mem_type mtype; /* * Step 0) Check if the address is at special memory ranges @@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®); *rank = RIR_RNK_TGT(pvt->info.type, reg); + if (pvt->info.type == BROADWELL) { + if (pvt->is_close_pg) + shiftup = 6; + else + shiftup = 13; + + rank_addr = ch_addr >> shiftup; + rank_addr /= (1 << rir_way); + rank_addr <<= shiftup; + rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0); + rank_addr -= RIR_OFFSET(pvt->info.type, reg); + + mtype = pvt->info.get_memory_type(pvt); + rankid = *rank; + if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) + sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg); + else + sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg); + } else { + msg[0] = '\0'; + } + edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, ch_addr, @@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *optype, msg[256]; + char *optype, msg[256], msg_full[512]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, */ if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) channel = first_channel; - - snprintf(msg, sizeof(msg), - "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d", + snprintf(msg_full, sizeof(msg_full), + "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", area_type, mscod, errcode, socket, ha, channel_mask, - rank); + rank, msg); - edac_dbg(0, "%s\n", msg); + edac_dbg(0, "%s\n", msg_full); /* FIXME: need support for channel mask */ @@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, dimm, -1, - optype, msg); + optype, msg_full); return; err_parsing: edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -- cgit v1.2.3 From d3923513edd7f4a614a169122b0eb6b9acb2c8a3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 5 Aug 2022 13:50:08 +0100 Subject: EDAC/i7300: Correct the i7300_exit() function name in comment The incorrect function name is being used in the comment for function i7300_exit. Correct this. Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220805125008.2346559-1-colin.i.king@gmail.com --- drivers/edac/i7300_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 4f28b8c8d378..61adaa872ba7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -1193,7 +1193,7 @@ static int __init i7300_init(void) } /** - * i7300_init() - Unregisters the driver + * i7300_exit() - Unregisters the driver */ static void __exit i7300_exit(void) { -- cgit v1.2.3