summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason M. Bills <jason.m.bills@linux.intel.com>2019-08-19 21:22:38 +0300
committerJason M. Bills <jason.m.bills@linux.intel.com>2019-08-22 04:55:13 +0300
commitdd93b578f1b193cb08f46e7fced6efdeb38dcc96 (patch)
tree00418a1343e15611c34c5727b660fdc156cf5ed7
parentbce43ab726d060d614ee8a99f7989f0a412c4cfe (diff)
downloadprovingground-dd93b578f1b193cb08f46e7fced6efdeb38dcc96.tar.xz
Move host-error-monitor to proving-ground
The old repo is being deprecated, so move host-error-monitor from the old at-scale-debug repo at commit 6ed87657 to here. Change-Id: Ife30c3996675ebf2559aa4ed8fa7ada6af1b7f34 Signed-off-by: Jason M. Bills <jason.m.bills@linux.intel.com>
-rw-r--r--host_error_monitor/.clang-format98
-rw-r--r--host_error_monitor/CMakeLists.txt36
-rw-r--r--host_error_monitor/cmake-format.json12
-rw-r--r--host_error_monitor/service_files/xyz.openbmc_project.HostErrorMonitor.service10
-rw-r--r--host_error_monitor/src/host_error_monitor.cpp356
5 files changed, 512 insertions, 0 deletions
diff --git a/host_error_monitor/.clang-format b/host_error_monitor/.clang-format
new file mode 100644
index 0000000..ae9ad39
--- /dev/null
+++ b/host_error_monitor/.clang-format
@@ -0,0 +1,98 @@
+---
+Language: Cpp
+# BasedOnStyle: LLVM
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: false
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: true
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: true
+ AfterStruct: true
+ AfterUnion: true
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+PointerAlignment: Left
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeBlocks: Regroup
+IncludeCategories:
+ - Regex: '^[<"](gtest|gmock)'
+ Priority: 5
+ - Regex: '^"config.h"'
+ Priority: -1
+ - Regex: '^".*\.hpp"'
+ Priority: 1
+ - Regex: '^<.*\.h>'
+ Priority: 2
+ - Regex: '^<.*'
+ Priority: 3
+ - Regex: '.*'
+ Priority: 4
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+ReflowComments: true
+SortIncludes: true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
+...
diff --git a/host_error_monitor/CMakeLists.txt b/host_error_monitor/CMakeLists.txt
new file mode 100644
index 0000000..ce7dbc1
--- /dev/null
+++ b/host_error_monitor/CMakeLists.txt
@@ -0,0 +1,36 @@
+cmake_minimum_required (VERSION 3.6)
+project (host-error-monitor CXX)
+set (CMAKE_CXX_STANDARD 17)
+set (CMAKE_CXX_STANDARD_REQUIRED ON)
+
+add_executable (host-error-monitor src/host_error_monitor.cpp)
+
+target_include_directories (host-error-monitor PRIVATE ${CMAKE_SOURCE_DIR})
+
+target_link_libraries (host-error-monitor sdbusplus -lsystemd gpiodcxx)
+
+include_directories (${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+install (TARGETS host-error-monitor
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib/static)
+
+find_package (Boost 1.66 REQUIRED)
+include_directories (${BOOST_SRC_DIR})
+
+add_definitions (-DBOOST_ERROR_CODE_HEADER_ONLY)
+add_definitions (-DBOOST_SYSTEM_NO_DEPRECATED)
+add_definitions (-DBOOST_ALL_NO_LIB)
+add_definitions (-DBOOST_NO_RTTI)
+add_definitions (-DBOOST_NO_TYPEID)
+add_definitions (-DBOOST_ASIO_DISABLE_THREADS)
+
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-rtti")
+
+set (
+ SERVICE_FILES
+ ${PROJECT_SOURCE_DIR}/service_files/xyz.openbmc_project.HostErrorMonitor.service
+)
+install (FILES ${SERVICE_FILES} DESTINATION /lib/systemd/system/)
diff --git a/host_error_monitor/cmake-format.json b/host_error_monitor/cmake-format.json
new file mode 100644
index 0000000..4a701ae
--- /dev/null
+++ b/host_error_monitor/cmake-format.json
@@ -0,0 +1,12 @@
+{
+ "enum_char": ".",
+ "line_ending": "unix",
+ "bullet_char": "*",
+ "max_subargs_per_line": 99,
+ "command_case": "lower",
+ "tab_size": 4,
+ "line_width": 80,
+ "separate_fn_name_with_space": true,
+ "dangle_parens": true,
+ "separate_ctrl_name_with_space": true
+}
diff --git a/host_error_monitor/service_files/xyz.openbmc_project.HostErrorMonitor.service b/host_error_monitor/service_files/xyz.openbmc_project.HostErrorMonitor.service
new file mode 100644
index 0000000..cf789e9
--- /dev/null
+++ b/host_error_monitor/service_files/xyz.openbmc_project.HostErrorMonitor.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=Host Error Monitor
+
+[Service]
+Restart=always
+ExecStart=/usr/bin/host-error-monitor
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
diff --git a/host_error_monitor/src/host_error_monitor.cpp b/host_error_monitor/src/host_error_monitor.cpp
new file mode 100644
index 0000000..b9d214a
--- /dev/null
+++ b/host_error_monitor/src/host_error_monitor.cpp
@@ -0,0 +1,356 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+#include <systemd/sd-journal.h>
+
+#include <boost/asio/posix/stream_descriptor.hpp>
+#include <gpiod.hpp>
+#include <iostream>
+#include <sdbusplus/asio/object_server.hpp>
+
+namespace host_error_monitor
+{
+static boost::asio::io_service io;
+static std::shared_ptr<sdbusplus::asio::connection> conn;
+
+static bool hostOff = true;
+
+const static constexpr int caterrTimeoutMs = 1000;
+const static constexpr int crashdumpTimeoutS = 300;
+
+// Timers
+// Timer for CATERR asserted
+static boost::asio::steady_timer caterrAssertTimer(io);
+
+// GPIO Lines and Event Descriptors
+static gpiod::line caterrLine;
+static boost::asio::posix::stream_descriptor caterrEvent(io);
+//----------------------------------
+// PCH_BMC_THERMTRIP function related definition
+//----------------------------------
+// GPIO Lines and Event Descriptors
+static gpiod::line pchThermtripLine;
+static boost::asio::posix::stream_descriptor pchThermtripEvent(io);
+
+static void initializeHostState()
+{
+ conn->async_method_call(
+ [](boost::system::error_code ec,
+ const std::variant<std::string>& property) {
+ if (ec)
+ {
+ return;
+ }
+ const std::string* state = std::get_if<std::string>(&property);
+ if (state == nullptr)
+ {
+ std::cerr << "Unable to read host state value\n";
+ return;
+ }
+ hostOff = *state == "xyz.openbmc_project.State.Host.HostState.Off";
+ },
+ "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
+ "org.freedesktop.DBus.Properties", "Get",
+ "xyz.openbmc_project.State.Host", "CurrentHostState");
+}
+
+static std::shared_ptr<sdbusplus::bus::match::match> startHostStateMonitor()
+{
+ return std::make_shared<sdbusplus::bus::match::match>(
+ *conn,
+ "type='signal',interface='org.freedesktop.DBus.Properties',"
+ "member='PropertiesChanged',arg0namespace='xyz.openbmc_project.State."
+ "Host'",
+ [](sdbusplus::message::message& msg) {
+ std::string interfaceName;
+ boost::container::flat_map<std::string, std::variant<std::string>>
+ propertiesChanged;
+ std::string state;
+ try
+ {
+ msg.read(interfaceName, propertiesChanged);
+ state =
+ std::get<std::string>(propertiesChanged.begin()->second);
+ }
+ catch (std::exception& e)
+ {
+ std::cerr << "Unable to read host state\n";
+ return;
+ }
+ hostOff = state == "xyz.openbmc_project.State.Host.HostState.Off";
+
+ // No host events should fire while off, so cancel any pending
+ // timers
+ if (hostOff)
+ {
+ caterrAssertTimer.cancel();
+ }
+ });
+}
+
+static bool requestGPIOEvents(
+ const std::string& name, const std::function<void()>& handler,
+ gpiod::line& gpioLine,
+ boost::asio::posix::stream_descriptor& gpioEventDescriptor)
+{
+ // Find the GPIO line
+ gpioLine = gpiod::find_line(name);
+ if (!gpioLine)
+ {
+ std::cerr << "Failed to find the " << name << " line\n";
+ return false;
+ }
+
+ try
+ {
+ gpioLine.request(
+ {"host-error-monitor", gpiod::line_request::EVENT_BOTH_EDGES});
+ }
+ catch (std::exception&)
+ {
+ std::cerr << "Failed to request events for " << name << "\n";
+ return false;
+ }
+
+ int gpioLineFd = gpioLine.event_get_fd();
+ if (gpioLineFd < 0)
+ {
+ std::cerr << "Failed to get " << name << " fd\n";
+ return false;
+ }
+
+ gpioEventDescriptor.assign(gpioLineFd);
+
+ gpioEventDescriptor.async_wait(
+ boost::asio::posix::stream_descriptor::wait_read,
+ [&name, handler](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << name << " fd handler error: " << ec.message()
+ << "\n";
+ return;
+ }
+ handler();
+ });
+ return true;
+}
+
+static void startPowerCycle()
+{
+ conn->async_method_call(
+ [](boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "failed to set Chassis State\n";
+ }
+ },
+ "xyz.openbmc_project.State.Chassis",
+ "/xyz/openbmc_project/state/chassis0",
+ "org.freedesktop.DBus.Properties", "Set",
+ "xyz.openbmc_project.State.Chassis", "RequestedPowerTransition",
+ std::variant<std::string>{
+ "xyz.openbmc_project.State.Chassis.Transition.PowerCycle"});
+}
+
+static void startCrashdumpAndRecovery(bool recoverSystem)
+{
+ std::cout << "Starting crashdump\n";
+ static std::shared_ptr<sdbusplus::bus::match::match> crashdumpCompleteMatch;
+ static boost::asio::steady_timer crashdumpTimer(io);
+
+ crashdumpCompleteMatch = std::make_shared<sdbusplus::bus::match::match>(
+ *conn,
+ "type='signal',interface='org.freedesktop.DBus.Properties',"
+ "member='PropertiesChanged',arg0namespace='com.intel.crashdump'",
+ [recoverSystem](sdbusplus::message::message& msg) {
+ crashdumpTimer.cancel();
+ std::cout << "Crashdump completed\n";
+ if (recoverSystem)
+ {
+ std::cout << "Recovering the system\n";
+ startPowerCycle();
+ }
+ crashdumpCompleteMatch.reset();
+ });
+
+ crashdumpTimer.expires_after(std::chrono::seconds(crashdumpTimeoutS));
+ crashdumpTimer.async_wait([](const boost::system::error_code ec) {
+ if (ec)
+ {
+ // operation_aborted is expected if timer is canceled
+ if (ec != boost::asio::error::operation_aborted)
+ {
+ std::cerr << "Crashdump async_wait failed: " << ec.message()
+ << "\n";
+ }
+ std::cout << "Crashdump timer canceled\n";
+ return;
+ }
+ std::cerr << "Crashdump failed to complete before timeout\n";
+ crashdumpCompleteMatch.reset();
+ });
+
+ conn->async_method_call(
+ [](boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "failed to start Crashdump\n";
+ crashdumpTimer.cancel();
+ crashdumpCompleteMatch.reset();
+ }
+ },
+ "com.intel.crashdump", "/com/intel/crashdump",
+ "com.intel.crashdump.Stored", "GenerateStoredLog");
+}
+
+static void caterrHandler()
+{
+ if (!hostOff)
+ {
+ gpiod::line_event gpioLineEvent = caterrLine.event_read();
+
+ bool caterr =
+ gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+ if (caterr)
+ {
+ std::cout << "CPU CATERR detected, starting timer\n";
+ caterrAssertTimer.expires_after(
+ std::chrono::milliseconds(caterrTimeoutMs));
+ caterrAssertTimer.async_wait(
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ // operation_aborted is expected if timer is canceled
+ // before completion.
+ if (ec != boost::asio::error::operation_aborted)
+ {
+ std::cerr << "caterr timeout async_wait failed: "
+ << ec.message() << "\n";
+ }
+ std::cout << "CATERR assert timer canceled\n";
+ return;
+ }
+ std::cout << "CATERR asset timer completed\n";
+ conn->async_method_call(
+ [](boost::system::error_code ec,
+ const std::variant<bool>& property) {
+ if (ec)
+ {
+ return;
+ }
+ const bool* reset = std::get_if<bool>(&property);
+ if (reset == nullptr)
+ {
+ std::cerr
+ << "Unable to read reset on CATERR value\n";
+ return;
+ }
+ startCrashdumpAndRecovery(*reset);
+ },
+ "xyz.openbmc_project.Settings",
+ "/xyz/openbmc_project/control/processor_error_config",
+ "org.freedesktop.DBus.Properties", "Get",
+ "xyz.openbmc_project.Control.Processor.ErrConfig",
+ "ResetOnCATERR");
+ });
+ }
+ else
+ {
+ caterrAssertTimer.cancel();
+ }
+ }
+ caterrEvent.async_wait(boost::asio::posix::stream_descriptor::wait_read,
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "caterr handler error: "
+ << ec.message() << "\n";
+ return;
+ }
+ caterrHandler();
+ });
+}
+static void pchThermtripHandler()
+{
+ if (!hostOff)
+ {
+ gpiod::line_event gpioLineEvent = pchThermtripLine.event_read();
+
+ bool pchThermtrip =
+ gpioLineEvent.event_type == gpiod::line_event::FALLING_EDGE;
+ if (pchThermtrip)
+ {
+ std::cout << "PCH Thermtrip detected \n";
+ // log to redfish, call API
+ sd_journal_send("MESSAGE=SSBThermtrip: SSB Thermtrip",
+ "PRIORITY=%i", LOG_INFO, "REDFISH_MESSAGE_ID=%s",
+ "OpenBMC.0.1.SSBThermtrip", NULL);
+ }
+ }
+ pchThermtripEvent.async_wait(
+ boost::asio::posix::stream_descriptor::wait_read,
+ [](const boost::system::error_code ec) {
+ if (ec)
+ {
+ std::cerr << "PCH Thermtrip handler error: " << ec.message()
+ << "\n";
+ return;
+ }
+ pchThermtripHandler();
+ });
+}
+
+} // namespace host_error_monitor
+
+int main(int argc, char* argv[])
+{
+ // setup connection to dbus
+ host_error_monitor::conn =
+ std::make_shared<sdbusplus::asio::connection>(host_error_monitor::io);
+
+ // Host Error Monitor Object
+ host_error_monitor::conn->request_name(
+ "xyz.openbmc_project.HostErrorMonitor");
+ sdbusplus::asio::object_server server =
+ sdbusplus::asio::object_server(host_error_monitor::conn);
+
+ // Start tracking host state
+ std::shared_ptr<sdbusplus::bus::match::match> hostStateMonitor =
+ host_error_monitor::startHostStateMonitor();
+
+ // Initialize the host state
+ host_error_monitor::initializeHostState();
+
+ // Request CPU_CATERR GPIO events
+ if (!host_error_monitor::requestGPIOEvents(
+ "CPU_CATERR", host_error_monitor::caterrHandler,
+ host_error_monitor::caterrLine, host_error_monitor::caterrEvent))
+ {
+ return -1;
+ }
+
+ // Request PCH_BMC_THERMTRIP GPIO events
+ if (!host_error_monitor::requestGPIOEvents(
+ "PCH_BMC_THERMTRIP", host_error_monitor::pchThermtripHandler,
+ host_error_monitor::pchThermtripLine,
+ host_error_monitor::pchThermtripEvent))
+ {
+ return -1;
+ }
+
+ host_error_monitor::io.run();
+
+ return 0;
+}