summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei/ghes.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r--drivers/acpi/apei/ghes.c118
1 files changed, 112 insertions, 6 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 63ad0541db38..7b7c605166e0 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,6 +26,7 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
+#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -102,6 +103,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
}
/*
+ * A platform may describe one error source for the handling of synchronous
+ * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
+ * or External Interrupt). On x86, the HEST notifications are always
+ * asynchronous, so only SEA on ARM is delivered as a synchronous
+ * notification.
+ */
+static inline bool is_hest_sync_notify(struct ghes *ghes)
+{
+ u8 notify_type = ghes->generic->notify.type;
+
+ return notify_type == ACPI_HEST_NOTIFY_SEA;
+}
+
+/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
* compatible with existing boot arg use cases.
@@ -489,7 +504,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
}
static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev)
+ int sev, bool sync)
{
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
@@ -503,7 +518,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE;
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
- flags = 0;
+ flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1)
return ghes_do_memory_failure(mem_err->physical_addr, flags);
@@ -511,9 +526,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
return false;
}
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ int flags = sync ? MF_ACTION_REQUIRED : 0;
bool queued = false;
int sec_sev, i;
char *p;
@@ -538,7 +555,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
* and don't filter out 'corrected' error here.
*/
if (is_cache && has_pa) {
- queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
p += err_info->length;
continue;
}
@@ -657,6 +674,78 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}
+/*
+ * Only a single callback can be registered for CXL CPER events.
+ */
+static DECLARE_RWSEM(cxl_cper_rw_sem);
+static cxl_cper_callback cper_callback;
+
+/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CPER_SEC_CXL_GEN_MEDIA_GUID \
+ GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6)
+
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CPER_SEC_CXL_DRAM_GUID \
+ GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24)
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CPER_SEC_CXL_MEM_MODULE_GUID \
+ GUID_INIT(0xfe927475, 0xdd59, 0x4339, \
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74)
+
+static void cxl_cper_post_event(enum cxl_event_type event_type,
+ struct cxl_cper_event_rec *rec)
+{
+ if (rec->hdr.length <= sizeof(rec->hdr) ||
+ rec->hdr.length > sizeof(*rec)) {
+ pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
+ rec->hdr.length);
+ return;
+ }
+
+ if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
+ pr_err(FW_WARN "CXL CPER invalid event\n");
+ return;
+ }
+
+ guard(rwsem_read)(&cxl_cper_rw_sem);
+ if (cper_callback)
+ cper_callback(event_type, rec);
+}
+
+int cxl_cper_register_callback(cxl_cper_callback callback)
+{
+ guard(rwsem_write)(&cxl_cper_rw_sem);
+ if (cper_callback)
+ return -EINVAL;
+ cper_callback = callback;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
+
+int cxl_cper_unregister_callback(cxl_cper_callback callback)
+{
+ guard(rwsem_write)(&cxl_cper_rw_sem);
+ if (callback != cper_callback)
+ return -EINVAL;
+ cper_callback = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
+
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -666,6 +755,7 @@ static bool ghes_do_proc(struct ghes *ghes,
const guid_t *fru_id = &guid_null;
char *fru_text = "";
bool queued = false;
+ bool sync = is_hest_sync_notify(ghes);
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -683,13 +773,29 @@ static bool ghes_do_proc(struct ghes *ghes,
atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
- queued = ghes_handle_memory_failure(gdata, sev);
+ queued = ghes_handle_memory_failure(gdata, sev, sync);
}
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- queued = ghes_handle_arm_hw_error(gdata, sev);
+ queued = ghes_handle_arm_hw_error(gdata, sev, sync);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
+ } else if (guid_equal(sec_type,
+ &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+ struct cxl_cper_event_rec *rec =
+ acpi_hest_get_payload(gdata);
+
+ cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);