From 82facc00fddc73f43a13ae72686bdb5d1744e12f Mon Sep 17 00:00:00 2001 From: "Jason M. Bills" Date: Mon, 9 Sep 2019 14:45:38 -0700 Subject: Enable boot FIVR fault monitoring and logging A boot FIVR fault will assert the CPU FIVR Fault GPIO and then assert the thermtrip GPIO. This adds a check for if the CPU FIVR Fault GPIO is asserted to determine whether to log a boot FIVR fault or a CPU thermal trip. Tested: Pulled the CPU FIVR Fault GPIO high and moved the jumper to assert a thermtrip. After the system shut down, confirmed that the event was logged as a boot FIVR fault. Change-Id: Ic4292a3fa9135c7367764f2b126937e33c5ad652 Signed-off-by: Jason M. Bills --- host_error_monitor/src/host_error_monitor.cpp | 67 ++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/host_error_monitor/src/host_error_monitor.cpp b/host_error_monitor/src/host_error_monitor.cpp index 0dc620b..900ead6 100644 --- a/host_error_monitor/src/host_error_monitor.cpp +++ b/host_error_monitor/src/host_error_monitor.cpp @@ -58,8 +58,10 @@ static gpiod::line err2Line; static boost::asio::posix::stream_descriptor err2Event(io); static gpiod::line smiLine; static boost::asio::posix::stream_descriptor smiEvent(io); +static gpiod::line cpu1FIVRFaultLine; static gpiod::line cpu1ThermtripLine; static boost::asio::posix::stream_descriptor cpu1ThermtripEvent(io); +static gpiod::line cpu2FIVRFaultLine; static gpiod::line cpu2ThermtripLine; static boost::asio::posix::stream_descriptor cpu2ThermtripEvent(io); static gpiod::line cpu1VRHotLine; @@ -131,6 +133,15 @@ static void smiTimeoutLog() "REDFISH_MESSAGE_ARGS=%s", "SMI Timeout", NULL); } +static void cpuBootFIVRFaultLog(const int cpuNum) +{ + std::string msg = "Boot FIVR Fault on CPU " + std::to_string(cpuNum); + + sd_journal_send("MESSAGE=HostError: %s", msg.c_str(), "PRIORITY=%i", + LOG_INFO, "REDFISH_MESSAGE_ID=%s", "OpenBMC.0.1.CPUError", + "REDFISH_MESSAGE_ARGS=%s", msg.c_str(), NULL); +} + static void cpuThermTripLog(const int cpuNum) { std::string msg = "CPU " + std::to_string(cpuNum) + " thermal trip"; @@ -276,6 +287,30 @@ static bool requestGPIOEvents( return true; } +static bool requestGPIOInput(const std::string& name, gpiod::line& gpioLine) +{ + // Find the GPIO line + gpioLine = gpiod::find_line(name); + if (!gpioLine) + { + std::cerr << "Failed to find the " << name << " line.\n"; + return false; + } + + // Request GPIO input + try + { + gpioLine.request({__FUNCTION__, gpiod::line_request::DIRECTION_INPUT}); + } + catch (std::exception&) + { + std::cerr << "Failed to request " << name << " input\n"; + return false; + } + + return true; +} + static void startPowerCycle() { conn->async_method_call( @@ -667,7 +702,14 @@ static void caterrHandler() static void cpu1ThermtripAssertHandler() { - cpuThermTripLog(1); + if (cpu1FIVRFaultLine.get_value() == 0) + { + cpuBootFIVRFaultLog(1); + } + else + { + cpuThermTripLog(1); + } } static void cpu1ThermtripHandler() @@ -698,7 +740,14 @@ static void cpu1ThermtripHandler() static void cpu2ThermtripAssertHandler() { - cpuThermTripLog(2); + if (cpu2FIVRFaultLine.get_value() == 0) + { + cpuBootFIVRFaultLog(2); + } + else + { + cpuThermTripLog(2); + } } static void cpu2ThermtripHandler() @@ -1385,6 +1434,13 @@ int main(int argc, char* argv[]) return -1; } + // Request CPU1_FIVR_FAULT GPIO input + if (!host_error_monitor::requestGPIOInput( + "CPU1_FIVR_FAULT", host_error_monitor::cpu1FIVRFaultLine)) + { + return -1; + } + // Request CPU1_THERMTRIP GPIO events if (!host_error_monitor::requestGPIOEvents( "CPU1_THERMTRIP", host_error_monitor::cpu1ThermtripHandler, @@ -1394,6 +1450,13 @@ int main(int argc, char* argv[]) return -1; } + // Request CPU2_FIVR_FAULT GPIO input + if (!host_error_monitor::requestGPIOInput( + "CPU2_FIVR_FAULT", host_error_monitor::cpu2FIVRFaultLine)) + { + return -1; + } + // Request CPU2_THERMTRIP GPIO events if (!host_error_monitor::requestGPIOEvents( "CPU2_THERMTRIP", host_error_monitor::cpu2ThermtripHandler, -- cgit v1.2.3