summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/einj.c71
-rw-r--r--drivers/acpi/apei/ghes.c118
2 files changed, 160 insertions, 29 deletions
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 013eb621dc92..89fb9331c611 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -73,6 +73,7 @@ static u32 notrigger;
static u32 vendor_flags;
static struct debugfs_blob_wrapper vendor_blob;
+static struct debugfs_blob_wrapper vendor_errors;
static char vendor_dev[64];
/*
@@ -182,6 +183,21 @@ static int einj_timedout(u64 *t)
return 0;
}
+static void get_oem_vendor_struct(u64 paddr, int offset,
+ struct vendor_error_type_extension *v)
+{
+ unsigned long vendor_size;
+ u64 target_pa = paddr + offset + sizeof(struct vendor_error_type_extension);
+
+ vendor_size = v->length - sizeof(struct vendor_error_type_extension);
+
+ if (vendor_size)
+ vendor_errors.data = acpi_os_map_memory(target_pa, vendor_size);
+
+ if (vendor_errors.data)
+ vendor_errors.size = vendor_size;
+}
+
static void check_vendor_extension(u64 paddr,
struct set_error_type_with_address *v5param)
{
@@ -194,6 +210,7 @@ static void check_vendor_extension(u64 paddr,
v = acpi_os_map_iomem(paddr + offset, sizeof(*v));
if (!v)
return;
+ get_oem_vendor_struct(paddr, offset, v);
sbdf = v->pcie_sbdf;
sprintf(vendor_dev, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n",
sbdf >> 24, (sbdf >> 16) & 0xff,
@@ -577,38 +594,40 @@ static u64 error_param2;
static u64 error_param3;
static u64 error_param4;
static struct dentry *einj_debug_dir;
-static const char * const einj_error_type_string[] = {
- "0x00000001\tProcessor Correctable\n",
- "0x00000002\tProcessor Uncorrectable non-fatal\n",
- "0x00000004\tProcessor Uncorrectable fatal\n",
- "0x00000008\tMemory Correctable\n",
- "0x00000010\tMemory Uncorrectable non-fatal\n",
- "0x00000020\tMemory Uncorrectable fatal\n",
- "0x00000040\tPCI Express Correctable\n",
- "0x00000080\tPCI Express Uncorrectable non-fatal\n",
- "0x00000100\tPCI Express Uncorrectable fatal\n",
- "0x00000200\tPlatform Correctable\n",
- "0x00000400\tPlatform Uncorrectable non-fatal\n",
- "0x00000800\tPlatform Uncorrectable fatal\n",
- "0x00001000\tCXL.cache Protocol Correctable\n",
- "0x00002000\tCXL.cache Protocol Uncorrectable non-fatal\n",
- "0x00004000\tCXL.cache Protocol Uncorrectable fatal\n",
- "0x00008000\tCXL.mem Protocol Correctable\n",
- "0x00010000\tCXL.mem Protocol Uncorrectable non-fatal\n",
- "0x00020000\tCXL.mem Protocol Uncorrectable fatal\n",
+static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
+ { BIT(0), "Processor Correctable" },
+ { BIT(1), "Processor Uncorrectable non-fatal" },
+ { BIT(2), "Processor Uncorrectable fatal" },
+ { BIT(3), "Memory Correctable" },
+ { BIT(4), "Memory Uncorrectable non-fatal" },
+ { BIT(5), "Memory Uncorrectable fatal" },
+ { BIT(6), "PCI Express Correctable" },
+ { BIT(7), "PCI Express Uncorrectable non-fatal" },
+ { BIT(8), "PCI Express Uncorrectable fatal" },
+ { BIT(9), "Platform Correctable" },
+ { BIT(10), "Platform Uncorrectable non-fatal" },
+ { BIT(11), "Platform Uncorrectable fatal"},
+ { BIT(12), "CXL.cache Protocol Correctable" },
+ { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" },
+ { BIT(14), "CXL.cache Protocol Uncorrectable fatal" },
+ { BIT(15), "CXL.mem Protocol Correctable" },
+ { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" },
+ { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
+ { BIT(31), "Vendor Defined Error Types" },
};
static int available_error_type_show(struct seq_file *m, void *v)
{
int rc;
- u32 available_error_type = 0;
+ u32 error_type = 0;
- rc = einj_get_available_error_type(&available_error_type);
+ rc = einj_get_available_error_type(&error_type);
if (rc)
return rc;
for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
- if (available_error_type & BIT(pos))
- seq_puts(m, einj_error_type_string[pos]);
+ if (error_type & einj_error_type_string[pos].mask)
+ seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
+ einj_error_type_string[pos].str);
return 0;
}
@@ -767,6 +786,10 @@ static int __init einj_init(void)
einj_debug_dir, &vendor_flags);
}
+ if (vendor_errors.size)
+ debugfs_create_blob("oem_error", 0600, einj_debug_dir,
+ &vendor_errors);
+
pr_info("Error INJection is initialized.\n");
return 0;
@@ -792,6 +815,8 @@ static void __exit einj_exit(void)
sizeof(struct einj_parameter);
acpi_os_unmap_iomem(einj_param, size);
+ if (vendor_errors.size)
+ acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
}
einj_exec_ctx_init(&ctx);
apei_exec_post_unmap_gars(&ctx);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 63ad0541db38..7b7c605166e0 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,6 +26,7 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
+#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -102,6 +103,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
}
/*
+ * A platform may describe one error source for the handling of synchronous
+ * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
+ * or External Interrupt). On x86, the HEST notifications are always
+ * asynchronous, so only SEA on ARM is delivered as a synchronous
+ * notification.
+ */
+static inline bool is_hest_sync_notify(struct ghes *ghes)
+{
+ u8 notify_type = ghes->generic->notify.type;
+
+ return notify_type == ACPI_HEST_NOTIFY_SEA;
+}
+
+/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
* compatible with existing boot arg use cases.
@@ -489,7 +504,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
}
static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev)
+ int sev, bool sync)
{
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
@@ -503,7 +518,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE;
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
- flags = 0;
+ flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1)
return ghes_do_memory_failure(mem_err->physical_addr, flags);
@@ -511,9 +526,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
return false;
}
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ int flags = sync ? MF_ACTION_REQUIRED : 0;
bool queued = false;
int sec_sev, i;
char *p;
@@ -538,7 +555,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
* and don't filter out 'corrected' error here.
*/
if (is_cache && has_pa) {
- queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
p += err_info->length;
continue;
}
@@ -657,6 +674,78 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
+/*
+ * Only a single callback can be registered for CXL CPER events.
+ */
+static DECLARE_RWSEM(cxl_cper_rw_sem);
+static cxl_cper_callback cper_callback;
+
+/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CPER_SEC_CXL_GEN_MEDIA_GUID \
+ GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CPER_SEC_CXL_DRAM_GUID \
+ GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CPER_SEC_CXL_MEM_MODULE_GUID \
+ GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+
+static void cxl_cper_post_event(enum cxl_event_type event_type,
+ struct cxl_cper_event_rec *rec)
+{
+ if (rec->hdr.length <= sizeof(rec->hdr) ||
+ rec->hdr.length > sizeof(*rec)) {
+ pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
+ rec->hdr.length);
+ return;
+ }
+
+ if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
+ pr_err(FW_WARN "CXL CPER invalid event\n");
+ return;
+ }
+
+ guard(rwsem_read)(&cxl_cper_rw_sem);
+ if (cper_callback)
+ cper_callback(event_type, rec);
+}
+
+int cxl_cper_register_callback(cxl_cper_callback callback)
+{
+ guard(rwsem_write)(&cxl_cper_rw_sem);
+ if (cper_callback)
+ return -EINVAL;
+ cper_callback = callback;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
+
+int cxl_cper_unregister_callback(cxl_cper_callback callback)
+{
+ guard(rwsem_write)(&cxl_cper_rw_sem);
+ if (callback != cper_callback)
+ return -EINVAL;
+ cper_callback = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
+
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -666,6 +755,7 @@ static bool ghes_do_proc(struct ghes *ghes,
const guid_t *fru_id = &guid_null;
char *fru_text = "";
bool queued = false;
+ bool sync = is_hest_sync_notify(ghes);
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -683,13 +773,29 @@ static bool ghes_do_proc(struct ghes *ghes,
atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
- queued = ghes_handle_memory_failure(gdata, sev);
+ queued = ghes_handle_memory_failure(gdata, sev, sync);
}
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- queued = ghes_handle_arm_hw_error(gdata, sev);
+ queued = ghes_handle_arm_hw_error(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
+ } else if (guid_equal(sec_type,
+ &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);