diff options
author | Jason M. Bills <jason.m.bills@intel.com> | 2019-08-16 20:01:44 +0300 |
---|---|---|
committer | Jason M. Bills <jason.m.bills@linux.intel.com> | 2019-08-26 18:04:33 +0300 |
commit | 10f4d8299e2b6d6251a33ea3da94a315bfdaf131 (patch) | |
tree | a558e5657581d8592906e6dbc1f3d99ec0a0cccd | |
parent | dd93b578f1b193cb08f46e7fced6efdeb38dcc96 (diff) | |
download | provingground-10f4d8299e2b6d6251a33ea3da94a315bfdaf131.tar.xz |
Check for initial error state on startup
Since the error monitor is looking at GPIO edge events, an error
won't be detected if it is already asserted when the monintor
starts. This change detects if the host is already on when it
starts monitoring and will check if an error is already asserted.
Tested:
Injected an IERR while the error monitor was stopped. After
checking that the IERR pin was asserted, started the error monitor
and confirmed that the error was detected, logged, and handled
correctly.
Change-Id: Ie1e0238c914e6b2cda4b121f579776b813b191c1
Signed-off-by: Jason M. Bills <jason.m.bills@intel.com>
-rw-r--r-- | host_error_monitor/src/host_error_monitor.cpp | 95 |
1 files changed, 55 insertions, 40 deletions
diff --git a/host_error_monitor/src/host_error_monitor.cpp b/host_error_monitor/src/host_error_monitor.cpp index b9d214a..07ee69a 100644 --- a/host_error_monitor/src/host_error_monitor.cpp +++ b/host_error_monitor/src/host_error_monitor.cpp @@ -44,6 +44,7 @@ static boost::asio::posix::stream_descriptor caterrEvent(io); static gpiod::line pchThermtripLine; static boost::asio::posix::stream_descriptor pchThermtripEvent(io); +static void initializeErrorState(); static void initializeHostState() { conn->async_method_call( @@ -60,6 +61,11 @@ static void initializeHostState() return; } hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off"; + // If the system is on, initialize the error state + if (!hostOff) + { + initializeErrorState(); + } }, "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", "org.freedesktop.DBus.Properties", "Get", @@ -215,6 +221,46 @@ static void startCrashdumpAndRecovery(bool recoverSystem) "com.intel.crashdump.Stored", "GenerateStoredLog"); } +static void caterrAssertHandler() +{ + std::cout << "CPU CATERR detected, starting timer\n"; + caterrAssertTimer.expires_after(std::chrono::milliseconds(caterrTimeoutMs)); + caterrAssertTimer.async_wait([](const boost::system::error_code ec) { + if (ec) + { + // operation_aborted is expected if timer is canceled + // before completion. + if (ec != boost::asio::error::operation_aborted) + { + std::cerr << "caterr timeout async_wait failed: " + << ec.message() << "\n"; + } + std::cout << "CATERR assert timer canceled\n"; + return; + } + std::cout << "CATERR asset timer completed\n"; + conn->async_method_call( + [](boost::system::error_code ec, + const std::variant<bool>& property) { + if (ec) + { + return; + } + const bool* reset = std::get_if<bool>(&property); + if (reset == nullptr) + { + std::cerr << "Unable to read reset on CATERR value\n"; + return; + } + startCrashdumpAndRecovery(*reset); + }, + "xyz.openbmc_project.Settings", + "/xyz/openbmc_project/control/processor_error_config", + "org.freedesktop.DBus.Properties", "Get", + "xyz.openbmc_project.Control.Processor.ErrConfig", "ResetOnCATERR"); + }); +} + static void caterrHandler() { if (!hostOff) @@ -225,46 +271,7 @@ static void caterrHandler() gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE; if (caterr) { - std::cout << "CPU CATERR detected, starting timer\n"; - caterrAssertTimer.expires_after( - std::chrono::milliseconds(caterrTimeoutMs)); - caterrAssertTimer.async_wait( - [](const boost::system::error_code ec) { - if (ec) - { - // operation_aborted is expected if timer is canceled - // before completion. - if (ec != boost::asio::error::operation_aborted) - { - std::cerr << "caterr timeout async_wait failed: " - << ec.message() << "\n"; - } - std::cout << "CATERR assert timer canceled\n"; - return; - } - std::cout << "CATERR asset timer completed\n"; - conn->async_method_call( - [](boost::system::error_code ec, - const std::variant<bool>& property) { - if (ec) - { - return; - } - const bool* reset = std::get_if<bool>(&property); - if (reset == nullptr) - { - std::cerr - << "Unable to read reset on CATERR value\n"; - return; - } - startCrashdumpAndRecovery(*reset); - }, - "xyz.openbmc_project.Settings", - "/xyz/openbmc_project/control/processor_error_config", - "org.freedesktop.DBus.Properties", "Get", - "xyz.openbmc_project.Control.Processor.ErrConfig", - "ResetOnCATERR"); - }); + caterrAssertHandler(); } else { @@ -312,6 +319,14 @@ static void pchThermtripHandler() }); } +static void initializeErrorState() +{ + // Handle CPU_CATERR if it's asserted now + if (caterrLine.get_value() == 0) + { + caterrAssertHandler(); + } +} } // namespace host_error_monitor int main(int argc, char* argv[]) |