summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason M. Bills <jason.m.bills@intel.com>2019-08-06 21:10:02 +0300
committerJason M. Bills <jason.m.bills@linux.intel.com>2019-09-04 19:05:50 +0300
commit735bac3bd01e21418aeacbc478ed791422796efe (patch)
treeab2e54484cf0f9cc6971fba0990b54c230651bef
parent4a7b10afa702c98a83122e59ce4929afad8cf881 (diff)
downloadprovingground-735bac3bd01e21418aeacbc478ed791422796efe.tar.xz
Add SMI timeout monitoring and logging
This adds SMI timeout monitoring to the host error monitor. When the SMI signal is asserted for more than 90 seconds, the BMC will log it, trigger a Crashdump, and reset the system if enabled. Tested: Manually triggered an SMI timeout and confirmed that the event is handled and logged correctly. Change-Id: I0579c96211d8e6abcdc190c154f3671151d5e60d Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
-rw-r--r--host_error_monitor/src/host_error_monitor.cpp96
1 files changed, 96 insertions, 0 deletions
diff --git a/host_error_monitor/src/host_error_monitor.cpp b/host_error_monitor/src/host_error_monitor.cpp
index 9376a3e..a9ebd7e 100644
--- a/host_error_monitor/src/host_error_monitor.cpp
+++ b/host_error_monitor/src/host_error_monitor.cpp
@@ -32,6 +32,7 @@ static bool hostOff = true;
const static constexpr int caterrTimeoutMs = 2000;
const static constexpr int err2TimeoutMs = 90000;
+const static constexpr int smiTimeoutMs = 90000;
const static constexpr int crashdumpTimeoutS = 300;
// Timers
@@ -39,12 +40,16 @@ const static constexpr int crashdumpTimeoutS = 300;
static boost::asio::steady_timer caterrAssertTimer(io);
// Timer for ERR2 asserted
static boost::asio::steady_timer err2AssertTimer(io);
+// Timer for SMI asserted
+static boost::asio::steady_timer smiAssertTimer(io);
// GPIO Lines and Event Descriptors
static gpiod::line caterrLine;
static boost::asio::posix::stream_descriptor caterrEvent(io);
static gpiod::line err2Line;
static boost::asio::posix::stream_descriptor err2Event(io);
+static gpiod::line smiLine;
+static boost::asio::posix::stream_descriptor smiEvent(io);
//----------------------------------
// PCH_BMC_THERMTRIP function related definition
//----------------------------------
@@ -93,6 +98,13 @@ static void cpuERR2Log(const int cpuNum)
"REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL);
}
+static void smiTimeoutLog()
+{
+ sd_journal_send("MESSAGE=HostError: SMI Timeout", "PRIORITY=%i", LOG_INFO,
+ "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError",
+ "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL);
+}
+
static void initializeErrorState();
static void initializeHostState()
{
@@ -152,6 +164,7 @@ static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
{
caterrAssertTimer.cancel();
err2AssertTimer.cancel();
+ smiAssertTimer.cancel();
}
});
}
@@ -754,6 +767,75 @@ static void err2Handler()
});
}
+static void smiAssertHandler()
+{
+ smiAssertTimer.expires_after(std::chrono::milliseconds(smiTimeoutMs));
+ smiAssertTimer.async_wait([](const boost::system::error_code ec) {
+ if (ec)
+ {
+ // operation_aborted is expected if timer is canceled before
+ // completion.
+ if (ec != boost::asio::error::operation_aborted)
+ {
+ std::cerr << "smi timeout async_wait failed: " << ec.message()
+ << "\n";
+ }
+ return;
+ }
+ std::cerr << "SMI asserted for " << std::to_string(smiTimeoutMs)
+ << " ms\n";
+ smiTimeoutLog();
+ conn->async_method_call(
+ [](boost::system::error_code ec,
+ const std::variant<bool>& property) {
+ if (ec)
+ {
+ return;
+ }
+ const bool* reset = std::get_if<bool>(&property);
+ if (reset == nullptr)
+ {
+ std::cerr << "Unable to read reset on SMI value\n";
+ return;
+ }
+ startCrashdumpAndRecovery(*reset);
+ },
+ "xyz.openbmc_project.Settings",
+ "/xyz/openbmc_project/control/bmc_reset_disables",
+ "org.freedesktop.DBus.Properties", "Get",
+ "xyz.openbmc_project.Control.ResetDisables", "ResetOnSMI");
+ });
+}
+
+static void smiHandler()
+{
+ if (!hostOff)
+ {
+ gpiod::line_event gpioLineEvent = smiLine.event_read();
+
+ bool smi = gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+ if (smi)
+ {
+ smiAssertHandler();
+ }
+ else
+ {
+ smiAssertTimer.cancel();
+ }
+ }
+ smiEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr
+ << "smi handler error: " << ec.message()
+ << "\n";
+ return;
+ }
+ smiHandler();
+ });
+}
+
static void initializeErrorState()
{
// Handle CPU_CATERR if it's asserted now
@@ -767,6 +849,12 @@ static void initializeErrorState()
{
err2AssertHandler();
}
+
+ // Handle SMI if it's asserted now
+ if (smiLine.get_value() == 0)
+ {
+ smiAssertHandler();
+ }
}
} // namespace host_error_monitor
@@ -805,6 +893,14 @@ int main(int argc, char* argv[])
return -1;
}
+ // Request SMI GPIO events
+ if (!host_error_monitor::requestGPIOEvents(
+ "SMI", host_error_monitor::smiHandler, host_error_monitor::smiLine,
+ host_error_monitor::smiEvent))
+ {
+ return -1;
+ }
+
// Request PCH_BMC_THERMTRIP GPIO events
if (!host_error_monitor::requestGPIOEvents(
"PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,