summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason M. Bills <jason.m.bills@intel.com>2019-08-06 21:52:58 +0300
committerJason M. Bills <jason.m.bills@linux.intel.com>2019-09-06 03:12:22 +0300
commit3de7fa0c8477d94dfd7bcbf81b079cb89eba19e3 (patch)
treec7caf2f0cae9f68e3dc99468b617c6247f141f28
parent735bac3bd01e21418aeacbc478ed791422796efe (diff)
downloadprovingground-3de7fa0c8477d94dfd7bcbf81b079cb89eba19e3.tar.xz
Increment the CPU Error Count setting on an IERR
When an IERR is detected on a CPU, increment the CPU Error Count setting. Tested: Manually triggered an IERR and confirmed that the CPU Error Count for the correct CPU was incremented. Change-Id: I5d1e9de506bbcbf04446b173a649f88e8e5303bb Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
-rw-r--r--host_error_monitor/src/host_error_monitor.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/host_error_monitor/src/host_error_monitor.cpp b/host_error_monitor/src/host_error_monitor.cpp
index a9ebd7e..7454a2d 100644
--- a/host_error_monitor/src/host_error_monitor.cpp
+++ b/host_error_monitor/src/host_error_monitor.cpp
@@ -22,6 +22,7 @@
#include <gpiod.hpp>
#include <iostream>
#include <sdbusplus/asio/object_server.hpp>
+#include <variant>
namespace host_error_monitor
{
@@ -284,6 +285,54 @@ static void startCrashdumpAndRecovery(bool recoverSystem)
"com.intel.crashdump.Stored", "GenerateStoredLog");
}
+static void incrementCPUErrorCount(int cpuNum)
+{
+ std::string propertyName = "ErrorCountCPU" + std::to_string(cpuNum + 1);
+
+ // Get the current count
+ conn->async_method_call(
+ [propertyName](boost::system::error_code ec,
+ const std::variant<uint8_t>& property) {
+ if (ec)
+ {
+ std::cerr << "Failed to read " << propertyName << ": "
+ << ec.message() << "\n";
+ return;
+ }
+ const uint8_t* errorCountVariant = std::get_if<uint8_t>(&property);
+ if (errorCountVariant == nullptr)
+ {
+ std::cerr << propertyName << " invalid\n";
+ return;
+ }
+ uint8_t errorCount = *errorCountVariant;
+ if (errorCount == std::numeric_limits<uint8_t>::max())
+ {
+ std::cerr << "Maximum error count reached\n";
+ return;
+ }
+ // Increment the count
+ errorCount++;
+ conn->async_method_call(
+ [propertyName](boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "Failed to set " << propertyName << ": "
+ << ec.message() << "\n";
+ }
+ },
+ "xyz.openbmc_project.Settings",
+ "/xyz/openbmc_project/control/processor_error_config",
+ "org.freedesktop.DBus.Properties", "Set",
+ "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName,
+ std::variant<uint8_t>{errorCount});
+ },
+ "xyz.openbmc_project.Settings",
+ "/xyz/openbmc_project/control/processor_error_config",
+ "org.freedesktop.DBus.Properties", "Get",
+ "xyz.openbmc_project.Control.Processor.ErrConfig", propertyName);
+}
+
static bool checkIERRCPUs()
{
bool cpuIERRFound = false;
@@ -339,6 +388,7 @@ static bool checkIERRCPUs()
{
// TODO: Light the CPU fault LED?
cpuIERRFound = true;
+ incrementCPUErrorCount(cpu);
// Next check if it's a CPU/VR mismatch by reading the
// IA32_MC4_STATUS MSR (0x411)
uint64_t mc4Status = 0;
@@ -424,6 +474,7 @@ static bool checkIERRCPUs()
{
// TODO: Light the CPU fault LED?
cpuIERRFound = true;
+ incrementCPUErrorCount(cpu);
// Next check if it's a CPU/VR mismatch by reading the
// IA32_MC4_STATUS MSR (0x411)
uint64_t mc4Status = 0;