diff options
author | Puli, Apparao <apparao.puli@intel.com> | 2019-07-31 19:12:54 +0300 |
---|---|---|
committer | Gerrit Code Review <gerrit@localhost> | 2019-07-31 19:12:54 +0300 |
commit | 1135f5c809de345946ce5cc7f8f043b203f6a379 (patch) | |
tree | 0bc71d19ae467b30da4359b1faedfdc15742d5cb | |
parent | 2d5edef696e3ab593e4c8956269d6d96f619162c (diff) | |
parent | 6dc70cd47b55c196a943c374794deb1561fa7520 (diff) | |
download | provingground-1135f5c809de345946ce5cc7f8f043b203f6a379.tar.xz |
Merge "Adding PFR Redfish event log support."
-rw-r--r-- | intel-pfr-manager/libpfr/inc/pfr.hpp | 11 | ||||
-rw-r--r-- | intel-pfr-manager/libpfr/src/pfr.cpp | 46 | ||||
-rw-r--r-- | intel-pfr-manager/service/src/mainapp.cpp | 290 |
3 files changed, 347 insertions, 0 deletions
diff --git a/intel-pfr-manager/libpfr/inc/pfr.hpp b/intel-pfr-manager/libpfr/inc/pfr.hpp index bc1f3df..012c647 100644 --- a/intel-pfr-manager/libpfr/inc/pfr.hpp +++ b/intel-pfr-manager/libpfr/inc/pfr.hpp @@ -31,8 +31,19 @@ enum class ImageType bmcRecovery }; +enum class ActionType +{ + recoveryCount, + recoveryReason, + panicCount, + panicReason, + majorError, + minorError +}; + std::string getVersionInfoCPLD(ImageType &imgType); int getProvisioningStatus(bool &ufmLocked, bool &ufmProvisioned); +int readCpldReg(const ActionType &action, uint8_t value); } // namespace pfr } // namespace intel diff --git a/intel-pfr-manager/libpfr/src/pfr.cpp b/intel-pfr-manager/libpfr/src/pfr.cpp index 59929a2..1ad98b2 100644 --- a/intel-pfr-manager/libpfr/src/pfr.cpp +++ b/intel-pfr-manager/libpfr/src/pfr.cpp @@ -140,5 +140,51 @@ int getProvisioningStatus(bool& ufmLocked, bool& ufmProvisioned) } } +int readCpldReg(const ActionType& action, uint8_t value) +{ + uint8_t cpldReg; + + switch (action) + { + case (ActionType::recoveryCount): + cpldReg = recoveryCount; + break; + case (ActionType::recoveryReason): + cpldReg = lastRecoveryReason; + break; + case (ActionType::panicCount): + cpldReg = panicEventCount; + break; + case (ActionType::panicReason): + cpldReg = panicEventReason; + break; + case (ActionType::majorError): + cpldReg = majorErrorCode; + break; + case (ActionType::minorError): + cpldReg = minorErrorCode; + break; + + default: + phosphor::logging::log<phosphor::logging::level::ERR>( + "Invalid CPLD read action."); + return -1; + } + + try + { + I2CFile cpldDev(i2cBusNumber, i2cSlaveAddress, O_RDWR | O_CLOEXEC); + value = cpldDev.i2cReadByteData(cpldReg); + return 0; + } + catch (const std::exception& e) + { + phosphor::logging::log<phosphor::logging::level::ERR>( + "Exception caught in readCpldReg.", + phosphor::logging::entry("MSG=%s", e.what())); + return -1; + } +} + } // namespace pfr } // namespace intel diff --git a/intel-pfr-manager/service/src/mainapp.cpp b/intel-pfr-manager/service/src/mainapp.cpp index d1f2b20..7be7a45 100644 --- a/intel-pfr-manager/service/src/mainapp.cpp +++ b/intel-pfr-manager/service/src/mainapp.cpp @@ -14,16 +14,183 @@ // limitations under the License. */ +#include <systemd/sd-journal.h> + #include "pfr_mgr.hpp" +#include "pfr.hpp" static std::array<std::string, 5> listVersionPaths = { "bmc_active", "bmc_recovery", "bios_active", "bios_recovery", "cpld"}; +// Caches the last Recovery/Panic Count to +// identify any new Recovery/panic actions. +/* TODO: When BMC Reset's, these values will be lost + * Persist this info using settingsd */ +static uint8_t lastRecoveryCount = 0; +static uint8_t lastPanicCount = 0; +static uint8_t lastMajorErr = 0; +static uint8_t lastMinorErr = 0; + +static bool stateTimerRunning = false; +std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr; + +// Recovery reason map. { <CPLD association>, <Recovery Reason> } +static std::map<uint8_t, std::string> recoveryReasonMap = { + {0x01, "PCH active authentication failure"}, + {0x02, "PCH recovery authentication failure"}, + {0x03, "ACM launch failure"}, + {0x04, "IBB launch failure"}, + {0x05, "OBB launch failure"}, + {0x06, "BMC active authentication failure"}, + {0x07, "BMC recovery authentication failure"}, + {0x08, "BMC launch failure"}, + {0x09, "CPLD watchdog expired"}}; + +// Panic Reason map. { <CPLD association>, <Panic reason> } +static std::map<uint8_t, std::string> panicReasonMap = { + {0x01, "CPLD WDT expired"}, + {0x02, "BMC WDT expired"}, + {0x03, "ME WDT expired"}, + {0x04, "ACM WDT expired"}, + {0x05, "IBB WDT expired"}, + {0x06, "OBB WDT expired"}, + {0x07, "BMC active authentication failure"}, + {0x08, "BMC recovery authentication failure"}, + {0x09, "PCH active authentication failure"}, + {0x0A, "PCH recovery authentication failure"}, + {0x0B, "IBB authentication failure"}, + {0x0C, "OBB authentication failure"}, + {0x0D, "BMC authentication failure"}, + {0x0E, "PCH active update intent"}, + {0x0F, "BMC active update intent"}, + {0x10, "PCH recovery update intent"}, + {0x11, "BMC recovery update intent"}}; + +static void logLastRecoveryEvent() +{ + uint8_t reason = 0; + if (0 != + intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason)) + { + return; + } + + std::map<uint8_t, std::string>::const_iterator it = + recoveryReasonMap.find(reason); + if (it == recoveryReasonMap.end()) + { + // No matching found. So just return without logging event. + return; + } + + sd_journal_send("MESSAGE=%s", "Platform firmware recovered.", "PRIORITY=%i", + LOG_ERR, "REDFISH_MESSAGE_ID=%s", + "OpenBMC.0.1.PlatformFWRecovered", + "REDFISH_MESSAGE_ARGS=%s", it->second.c_str(), NULL); +} + +static void logLastPanicEvent() +{ + uint8_t reason = 0; + if (0 != + intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason)) + { + return; + } + + std::map<uint8_t, std::string>::const_iterator it = + panicReasonMap.find(reason); + if (it == panicReasonMap.end()) + { + // No matching found. So just return without logging event. + return; + } + + sd_journal_send("MESSAGE=%s", "Platform panic event triggered.", + "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", + "OpenBMC.0.1.PlatformFWPanicTriggered", + "REDFISH_MESSAGE_ARGS=%s", it->second.c_str(), NULL); +} + +static void checkAndLogEvents() +{ + uint8_t currPanicCount = 0; + if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount, + currPanicCount)) + { + if (lastPanicCount != currPanicCount) + { + // Update cached data and log redfish event by reading reason. + lastPanicCount = currPanicCount; + logLastPanicEvent(); + } + } + + uint8_t currRecoveryCount = 0; + if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount, + currRecoveryCount)) + { + if (lastRecoveryCount != currRecoveryCount) + { + // Update cached data and log redfish event by reading reason. + lastRecoveryCount = currRecoveryCount; + logLastRecoveryEvent(); + } + } + + uint8_t majorErr = 0; + uint8_t minorErr = 0; + if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError, + majorErr)) || + (0 == + intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr))) + { + if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr)) + { + lastMajorErr = majorErr; + lastMinorErr = minorErr; + + if (majorErr || minorErr) + { + sd_journal_send( + "MESSAGE=%s", "Error occurred on platform firmware.", + "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", + "OpenBMC.0.1.PlatformFWErrorOccurred", + "REDFISH_MESSAGE_ARGS=%i,%i", majorErr, minorErr, NULL); + } + } + } +} + +static void monitorPlatformStateChange( + sdbusplus::asio::object_server& server, + std::shared_ptr<sdbusplus::asio::connection>& conn) +{ + constexpr size_t pollTimeout = 10; // seconds + stateTimer->expires_after(std::chrono::seconds(pollTimeout)); + stateTimer->async_wait( + [&server, &conn](const boost::system::error_code& ec) { + if (ec == boost::asio::error::operation_aborted) + { + // Timer reset. + return; + } + if (ec) + { + // Platform State Monitor - Timer cancelled. + return; + } + checkAndLogEvents(); + monitorPlatformStateChange(server, conn); + }); +} + int main() { // setup connection to dbus boost::asio::io_service io; auto conn = std::make_shared<sdbusplus::asio::connection>(io); + stateTimer = std::make_unique<boost::asio::steady_timer>(io); conn->request_name("xyz.openbmc_project.Intel.PFR.Manager"); auto server = sdbusplus::asio::object_server(conn, true); @@ -36,6 +203,129 @@ int main() intel::pfr::PfrVersion obj(server, conn, path); } + // Capture the Chassis state and Start the monitor timer + // if state changed to 'On'. Run timer until OS boot. + // Stop timer if state changed to 'Off'. + static auto matchChassisState = sdbusplus::bus::match::match( + static_cast<sdbusplus::bus::bus&>(*conn), + "type='signal',member='PropertiesChanged', " + "interface='org.freedesktop.DBus.Properties', " + "sender='xyz.openbmc_project.State.Chassis', " + "arg0namespace='xyz.openbmc_project.State.Chassis'", + [&server, &conn](sdbusplus::message::message& message) { + std::string intfName; + std::map<std::string, std::variant<std::string>> properties; + message.read(intfName, properties); + + const auto it = properties.find("CurrentPowerState"); + if (it != properties.end()) + { + const std::string* state = + std::get_if<std::string>(&it->second); + if (state != nullptr) + { + if ((*state == + "xyz.openbmc_project.State.Chassis.PowerState.On") && + (!stateTimerRunning)) + { + stateTimerRunning = true; + monitorPlatformStateChange(server, conn); + } + else if ((*state == "xyz.openbmc_project.State.Chassis." + "PowerState.Off") && + (stateTimerRunning)) + { + stateTimer->cancel(); + checkAndLogEvents(); + stateTimerRunning = false; + } + } + } + }); + + // Capture the Host state and Start the monitor timer + // if state changed to 'Running'. Run timer until OS boot. + // Stop timer if state changed to 'Off'. + static auto matchHostState = sdbusplus::bus::match::match( + static_cast<sdbusplus::bus::bus&>(*conn), + "type='signal',member='PropertiesChanged', " + "interface='org.freedesktop.DBus.Properties', " + "sender='xyz.openbmc_project.State.Chassis', " + "arg0namespace='xyz.openbmc_project.State.Host'", + [&server, &conn](sdbusplus::message::message& message) { + std::string intfName; + std::map<std::string, std::variant<std::string>> properties; + message.read(intfName, properties); + + const auto it = properties.find("CurrentHostState"); + if (it != properties.end()) + { + const std::string* state = + std::get_if<std::string>(&it->second); + if (state != nullptr) + { + if ((*state == + "xyz.openbmc_project.State.Host.HostState.Running") && + (!stateTimerRunning)) + { + stateTimerRunning = true; + monitorPlatformStateChange(server, conn); + } + else if (((*state == "xyz.openbmc_project.State.Host." + "HostState.Off") || + (*state == "xyz.openbmc_project.State.Host." + "HostState.Quiesced")) && + (stateTimerRunning)) + { + stateTimer->cancel(); + checkAndLogEvents(); + stateTimerRunning = false; + } + } + } + }); + + // Capture the OS state change and stop monitor timer + // if OS boots completly or becomes Inactive. + // start timer in other cases to mnitor states. + static auto matchOsState = sdbusplus::bus::match::match( + static_cast<sdbusplus::bus::bus&>(*conn), + "type='signal',member='PropertiesChanged', " + "interface='org.freedesktop.DBus.Properties', " + "sender='xyz.openbmc_project.State.Chassis', " + "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'", + [&server, &conn](sdbusplus::message::message& message) { + std::string intfName; + std::map<std::string, std::variant<std::string>> properties; + message.read(intfName, properties); + + const auto it = properties.find("OperatingSystemState"); + if (it != properties.end()) + { + const std::string* state = + std::get_if<std::string>(&it->second); + if (state != nullptr) + { + if (((*state == "BootComplete") || + (*state == "Inactive")) && + (stateTimerRunning)) + { + stateTimer->cancel(); + checkAndLogEvents(); + stateTimerRunning = false; + } + else if (!stateTimerRunning) + { + stateTimerRunning = true; + monitorPlatformStateChange(server, conn); + } + } + } + }); + + // First time, check and log events if any. + checkAndLogEvents(); + phosphor::logging::log<phosphor::logging::level::INFO>( "Intel PFR service started successfully"); |