diff options
author | Jason M. Bills <jason.m.bills@linux.intel.com> | 2020-12-08 00:45:20 +0300 |
---|---|---|
committer | Jason M. Bills <jason.m.bills@linux.intel.com> | 2020-12-10 01:15:05 +0300 |
commit | 82dbc15a05125a812c140a3c8cff81c366482229 (patch) | |
tree | 9c8f1ad262a2e281f20340cf8646aca6f8596044 /meta-openbmc-mods/meta-wht | |
parent | 8d6ae7f2a817751fad151168fa10ce28ee0869d8 (diff) | |
download | openbmc-82dbc15a05125a812c140a3c8cff81c366482229.tar.xz |
Update to internal 0.26
Signed-off-by: Jason M. Bills <jason.m.bills@linux.intel.com>
Diffstat (limited to 'meta-openbmc-mods/meta-wht')
2 files changed, 134 insertions, 0 deletions
diff --git a/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor/0002-Add-a-workaround-for-spurious-CPU-errors.patch b/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor/0002-Add-a-workaround-for-spurious-CPU-errors.patch new file mode 100644 index 000000000..2a573311f --- /dev/null +++ b/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor/0002-Add-a-workaround-for-spurious-CPU-errors.patch @@ -0,0 +1,133 @@ +From d0e4130b2d1e0e44efc8fd6e180487853625edd6 Mon Sep 17 00:00:00 2001 +From: "Jason M. Bills" <jason.m.bills@intel.com> +Date: Mon, 17 Aug 2020 15:52:22 -0700 +Subject: [PATCH] Add a workaround for spurious CPU errors + +There is a possible issue where GPIO event interrupts are getting +missed causing false errors to be logged. + +This adds a check that the host is still on and the error is still +asserted before logging an error. + +Tested: +Confirmed that a spurious SMI event was ignored correctly after +this change. + +Change-Id: Id83d9d67b15dcf9035e6448086b140e5c7dab4fe +Signed-off-by: Jason M. Bills <jason.m.bills@intel.com> +--- + src/host_error_monitor.cpp | 77 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 77 insertions(+) + +diff --git a/src/host_error_monitor.cpp b/src/host_error_monitor.cpp +index ca089f70d..fd453ccdc 100644 +--- a/src/host_error_monitor.cpp ++++ b/src/host_error_monitor.cpp +@@ -797,6 +797,18 @@ static void caterrAssertHandler() + } + return; + } ++ // Confirm that this is a real failure by checking that the host is on ++ if (hostOff) ++ { ++ return; ++ } ++ // And that the signal is still asserted ++ if (caterrLine.get_value() != 0) ++ { ++ std::cerr ++ << "CPU_CATERR not asserted after timeout. Error ignored.\n"; ++ return; ++ } + std::cerr << "CATERR asserted for " << std::to_string(caterrTimeoutMs) + << " ms\n"; + beep(beepCPUIERR); +@@ -1270,6 +1282,48 @@ static void errXAssertHandler(const int errPin, + } + return; + } ++ // Confirm that this is a real failure by checking that the host is on ++ if (hostOff) ++ { ++ return; ++ } ++ // And that the signal is still asserted ++ switch (errPin) ++ { ++ case 0: ++ { ++ if (err0Line.get_value() != 0) ++ { ++ std::cerr << "CPU_ERR0 not asserted after timeout. Error " ++ "ignored.\n"; ++ return; ++ } ++ break; ++ } ++ case 1: ++ { ++ if (err1Line.get_value() != 0) ++ { ++ std::cerr << "CPU_ERR1 not asserted after timeout. Error " ++ "ignored.\n"; ++ return; ++ } ++ break; ++ } ++ case 2: ++ { ++ if (err2Line.get_value() != 0) ++ { ++ std::cerr << "CPU_ERR2 not asserted after timeout. Error " ++ "ignored.\n"; ++ return; ++ } ++ break; ++ } ++ default: ++ std::cerr << "Invalid ERR pin asserted\n"; ++ return; ++ } + std::cerr << "ERR" << std::to_string(errPin) << " asserted for " + << std::to_string(errTimeoutMs) << " ms\n"; + if (errPinCPUs.count()) +@@ -1379,6 +1433,18 @@ static void err2AssertHandler() + } + return; + } ++ // Confirm that this is a real failure by checking that the host is on ++ if (hostOff) ++ { ++ return; ++ } ++ // And that the signal is still asserted ++ if (err2Line.get_value() != 0) ++ { ++ std::cerr ++ << "CPU_ERR2 not asserted after timeout. Error ignored.\n"; ++ return; ++ } + conn->async_method_call( + [](boost::system::error_code ec, + const std::variant<bool>& property) { +@@ -1447,6 +1513,17 @@ static void smiAssertHandler() + } + return; + } ++ // Confirm that this is a real failure by checking that the host is on ++ if (hostOff) ++ { ++ return; ++ } ++ // And that the signal is still asserted ++ if (smiLine.get_value() != 0) ++ { ++ std::cerr << "SMI not asserted after timeout. Error ignored.\n"; ++ return; ++ } + std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs) + << " ms\n"; + smiTimeoutLog(); +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor_%.bbappend b/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor_%.bbappend index 4b79757c0..0d1fd91d2 100644 --- a/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor_%.bbappend +++ b/meta-openbmc-mods/meta-wht/recipes-core/host-error-monitor/host-error-monitor_%.bbappend @@ -2,4 +2,5 @@ FILESEXTRAPATHS_append := "${THISDIR}/${PN}:" SRC_URI += " \ file://0001-Filter-memory-thermtrip-events-based-on-DIMM-status.patch \ + file://0002-Add-a-workaround-for-spurious-CPU-errors.patch \ " |