diff options
Diffstat (limited to 'meta-openbmc-mods/meta-ast2500/recipes-phosphor')
10 files changed, 1009 insertions, 18 deletions
diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/configuration/entity-manager/WC-Baseboard.json b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/configuration/entity-manager/WC-Baseboard.json index 1d2048335..840553183 100644 --- a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/configuration/entity-manager/WC-Baseboard.json +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/configuration/entity-manager/WC-Baseboard.json @@ -805,7 +805,7 @@ "Type": "IpmbSensor" }, { - "Address": "0xDC", + "Address": "0xE0", "Class": "PxeBridgeTemp", "Name": "CPU1 VR Mem ABCD Temp", "Thresholds": [ @@ -2356,7 +2356,7 @@ "Type": "TMP75" }, { - "Address": "0x48", + "Address": "0x4C", "Bus": 6, "Name": "PCH M.2 Temp", "Thresholds": [ diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0002-Fix-missing-threshold-de-assert-event-when-threshold.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0002-Fix-missing-threshold-de-assert-event-when-threshold.patch index 13f9a157a..419119ee7 100644 --- a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0002-Fix-missing-threshold-de-assert-event-when-threshold.patch +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0002-Fix-missing-threshold-de-assert-event-when-threshold.patch @@ -1,11 +1,11 @@ -From 8990f3d3d3d74c0bed2c0920073e1ecfd4ff422d Mon Sep 17 00:00:00 2001 +From ea4181e323a222909250f641e9153a37c238efdc Mon Sep 17 00:00:00 2001 From: Zhikui Ren <zhikui.ren@intel.com> Date: Thu, 24 Sep 2020 14:27:32 -0700 Subject: [PATCH] Fix missing threshold de-assert event when threshold changes. Destructor can be called when sensor interface changes -like a new threshold value. Ensure adc sensor hresholds are de-asserted -on destruction. These events can be missed if the new threshold +like a new threshold value. Ensure adc sensor thresholds are de-asserted +dueing re-construction. These events can be missed if the new threshold value fixed the alarm because default state for new threshold interface is de-asserted. @@ -47,33 +47,40 @@ index ca2b0a0..1d1b1b5 100644 { bool used; diff --git a/src/ADCSensor.cpp b/src/ADCSensor.cpp -index 7afb2ab..701c58e 100644 +index c3980e0..440073e 100644 --- a/src/ADCSensor.cpp +++ b/src/ADCSensor.cpp -@@ -88,6 +88,18 @@ ADCSensor::~ADCSensor() - // close the input dev to cancel async operations - inputDev.close(); - waitTimer.cancel(); +@@ -80,6 +80,17 @@ ADCSensor::ADCSensor(const std::string& path, + association = objectServer.add_interface( + "/xyz/openbmc_project/sensors/voltage/" + name, association::interface); + setInitialProperties(conn); + + // Destructor can be called when sensor interface changes -+ // like a new threshold value. Ensure LOW thresholds are de-asserted -+ // on destruction. These events can be missed if the new threshold -+ // value fixed the alarm because default state for new threshold ++ // like a new threshold value. Ensure thresholds are de-asserted ++ // on construction. These events can be missed if the new threshold ++ // value fixes the alarm because default state for new threshold + // interface is de-asserted. + for (auto& threshold : thresholds) + { + thresholds::forceDeassertThresholds(this, threshold.level, + threshold.direction); + } + } + + ADCSensor::~ADCSensor() +@@ -87,6 +98,7 @@ ADCSensor::~ADCSensor() + // close the input dev to cancel async operations + inputDev.close(); + waitTimer.cancel(); + objServer.remove_interface(thresholdInterfaceWarning); objServer.remove_interface(thresholdInterfaceCritical); objServer.remove_interface(sensorInterface); diff --git a/src/Thresholds.cpp b/src/Thresholds.cpp -index 6aa077c..e480554 100644 +index caab22c..850a295 100644 --- a/src/Thresholds.cpp +++ b/src/Thresholds.cpp -@@ -344,6 +344,7 @@ bool checkThresholds(Sensor* sensor) +@@ -332,6 +332,7 @@ bool checkThresholds(Sensor* sensor) { bool status = true; std::vector<ChangeParam> changes = checkThresholds(sensor, sensor->value); @@ -81,7 +88,7 @@ index 6aa077c..e480554 100644 for (const auto& change : changes) { assertThresholds(sensor, change.assertValue, change.threshold.level, -@@ -452,6 +453,67 @@ void assertThresholds(Sensor* sensor, double assertValue, +@@ -450,6 +451,67 @@ void assertThresholds(Sensor* sensor, double assertValue, } } @@ -126,7 +133,7 @@ index 6aa077c..e480554 100644 + return; + } + -+ if (interface->set_property<bool, true>(property, false)) ++ if (interface->set_property<bool, false>(property, false)) + { + try + { diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0007-Fix-ADC-threshold-hysteresis-to-one-percent-of-criti.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0007-Fix-ADC-threshold-hysteresis-to-one-percent-of-criti.patch new file mode 100644 index 000000000..c86ec7730 --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0007-Fix-ADC-threshold-hysteresis-to-one-percent-of-criti.patch @@ -0,0 +1,58 @@ +From 845e79aefc258780815c07cf0bcf4bb850b8a387 Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Thu, 8 Oct 2020 14:40:45 -0700 +Subject: [PATCH] Fix ADC reading max value + +Correct the constant for adc's maxReading. Threshold hysteresis +is calculated as one percent of the ADC range. This fixes the +issue that lower voltage thresholds do not get de-asserted. + +Tested: +Overwrite the sensor to high value and then back to normal reading. +Verified that threshold is asserted and then de-asserted. + +Console output: +busctl set-property xyz.openbmc_project.ADCSensor /xyz/openbmc_project/sensors/voltage/PVDQ_ABC_CPU1 xyz.openbmc_project.Sensor.Value Value d 1.348 +intel-obmc adcsensor[412]: Sensor PVDQ_ABC_CPU1 high threshold 1.301 assert: value 1.348 raw data 1244 +intel-obmc adcsensor[412]: Sensor PVDQ_ABC_CPU1 high threshold 1.263 assert: value 1.348 raw data 1244 +intel-obmc ipmid[242]: thresholdChanged: Assert +intel-obmc ipmid[242]: thresholdChanged: Assert +intel-obmc sel-logger[288]: PVDQ_ABC_CPU1 sensor crossed a critical high threshold going high. Reading=1.348000 Threshold=1.301000. +intel-obmc sel-logger[288]: PVDQ_ABC_CPU1 sensor crossed a warning high threshold going high. Reading=1.348000 Threshold=1.263000. +intel-obmc ipmid[242]: thresholdChanged: deassert +intel-obmc ipmid[242]: thresholdChanged: deassert +intel-obmc sel-logger[288]: PVDQ_ABC_CPU1 sensor crossed a critical high threshold going low. Reading=1.244000 Threshold=1.301000. +intel-obmc sel-logger[288]: PVDQ_ABC_CPU1 sensor crossed a warning high threshold going low. Reading=1.244000 Threshold=1.263000. + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +--- + src/ADCSensor.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/ADCSensor.cpp b/src/ADCSensor.cpp +index ede4dfd..a446030 100644 +--- a/src/ADCSensor.cpp ++++ b/src/ADCSensor.cpp +@@ -42,7 +42,7 @@ static constexpr unsigned int gpioBridgeEnableMs = 20; + static constexpr unsigned int sensorScaleFactor = 1000; + + static constexpr double roundFactor = 10000; // 3 decimal places +-static constexpr double maxReading = 20; ++static constexpr double maxReading = 1.8; // adc reference is 1.8Volt + static constexpr double minReading = 0; + + ADCSensor::ADCSensor(const std::string& path, +@@ -55,8 +55,8 @@ ADCSensor::ADCSensor(const std::string& path, + std::optional<BridgeGpio>&& bridgeGpio) : + Sensor(boost::replace_all_copy(sensorName, " ", "_"), + std::move(_thresholds), sensorConfiguration, +- "xyz.openbmc_project.Configuration.ADC", maxReading, minReading, +- readState), ++ "xyz.openbmc_project.Configuration.ADC", maxReading / scaleFactor, ++ minReading / scaleFactor, readState), + std::enable_shared_from_this<ADCSensor>(), objServer(objectServer), + inputDev(io, open(path.c_str(), O_RDONLY)), waitTimer(io), path(path), + scaleFactor(scaleFactor), bridgeGpio(std::move(bridgeGpio)), +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0008-Fix-sensor-lost-issue-on-TEMP-PSU-etc.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0008-Fix-sensor-lost-issue-on-TEMP-PSU-etc.patch new file mode 100644 index 000000000..8d8dae230 --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0008-Fix-sensor-lost-issue-on-TEMP-PSU-etc.patch @@ -0,0 +1,35 @@ +From dd4ac2200b111ac0215d7a2695137b9b0c382fc7 Mon Sep 17 00:00:00 2001 +From: Kuiying Wang <kuiying.wang@intel.com> +Date: Thu, 15 Oct 2020 15:46:48 +0800 +Subject: [PATCH] Fix sensor lost issue on TEMP/PSU/etc. + +When AC cycle or FW update, some sensors like TEMP and PSU +related are lost due to entity manager is not ready before +sensor service creating sensors. +Enable retry by default to make sure entity manager is ready. + +Tested: +All sensors like TEMP and PSU related are working well, +even at AC cycle and FW update. + +Signed-off-by: Kuiying Wang <kuiying.wang@intel.com> +--- + include/Utils.hpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/Utils.hpp b/include/Utils.hpp +index 2c6ce9e..7b38bc1 100644 +--- a/include/Utils.hpp ++++ b/include/Utils.hpp +@@ -211,7 +211,7 @@ struct GetSensorConfiguration : + } + + void getConfiguration(const std::vector<std::string>& interfaces, +- size_t retries = 0) ++ size_t retries = 3) + { + if (retries > 5) + { +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0009-NVMeSensor-use-available-interface-for-error-handlin.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0009-NVMeSensor-use-available-interface-for-error-handlin.patch new file mode 100644 index 000000000..e32c4aa9e --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0009-NVMeSensor-use-available-interface-for-error-handlin.patch @@ -0,0 +1,52 @@ +From f9c37dfab29007ce988ddb32293d9c31de0eb042 Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Wed, 11 Nov 2020 07:41:03 -0800 +Subject: [PATCH 09/12] NVMeSensor: use available interface for error handling + +Sensor available and functional interfaces are used to +handle sensor not readable and read errors. +Update NVMeSensor to use them. + +Tested +No regression detected in NVMeSensor read value + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +Change-Id: Iabaf2508d14b3562cfe04992c9558a0114a9665e +--- + src/NVMeSensor.cpp | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/src/NVMeSensor.cpp b/src/NVMeSensor.cpp +index cbc88ec..8d6d4b3 100644 +--- a/src/NVMeSensor.cpp ++++ b/src/NVMeSensor.cpp +@@ -265,7 +265,7 @@ static double getTemperatureReading(int8_t reading) + { + // 0x80 = No temperature data or temperature data is more the 5 s + // old 0x81 = Temperature sensor failure +- return maxReading; ++ return std::numeric_limits<double>::quiet_NaN(); + } + + return reading; +@@ -365,7 +365,16 @@ void rxMessage(uint8_t eid, void*, void* msg, size_t len) + << " Celsius for device " << sensorInfo->name << "\n"; + } + +- sensorInfo->updateValue(getTemperatureReading(messageData[5])); ++ double value = getTemperatureReading(messageData[5]); ++ if (!std::isfinite(value)) ++ { ++ sensorInfo->markAvailable(false); ++ sensorInfo->incrementError(); ++ } ++ else ++ { ++ sensorInfo->updateValue(value); ++ } + + if (DEBUG) + { +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0010-revert-revert-log-debug-information-for-sensor-thres.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0010-revert-revert-log-debug-information-for-sensor-thres.patch new file mode 100644 index 000000000..9aef0d714 --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0010-revert-revert-log-debug-information-for-sensor-thres.patch @@ -0,0 +1,283 @@ +From b52dd2b668fda6cd2e4afb7662a4d2721efe9855 Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Fri, 11 Sep 2020 17:02:01 -0700 +Subject: [PATCH 10/12] revert "revert log debug information for sensor + threshold assert events" + +Add back the orignial submit 9bf6804c2d76b92005ad9851cb052d407ea3117f +and includes the fix the bug that crashed ipmbsensor. + +log debug information for sensor threshold assert events + +There are sightings that TCPUx_P12_PVCCIO_VS_Temp Sensor +reports reading of zero and trips the low critical threshold. +Add debug prints to gather data. + +Also add logs for raw value in sensor base class to help +debug threshold assert events for other sensor type. + +Tested: +Verified that log messages show up as expected for threshold +assert events. There is no unwanted log messages on systems that +do not have bad sensor readings. +Verified system stayed up for 30 minutes without crashing. + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +Change-Id: I73e00e24bbae463dbe0f34e2308ee934588028d1 +--- + include/sensor.hpp | 1 + + src/ADCSensor.cpp | 20 ++++++++++++++++---- + src/CPUSensor.cpp | 4 ++-- + src/HwmonTempSensor.cpp | 21 +++++++++++++++++---- + src/IpmbSensor.cpp | 12 ++++++++++++ + src/PSUSensor.cpp | 4 ++-- + src/TachSensor.cpp | 4 ++-- + src/Thresholds.cpp | 16 ++++++++++++++-- + 8 files changed, 66 insertions(+), 16 deletions(-) + +diff --git a/include/sensor.hpp b/include/sensor.hpp +index a8321fd..7fa9300 100644 +--- a/include/sensor.hpp ++++ b/include/sensor.hpp +@@ -48,6 +48,7 @@ struct Sensor + std::shared_ptr<sdbusplus::asio::dbus_interface> availableInterface; + std::shared_ptr<sdbusplus::asio::dbus_interface> operationalInterface; + double value = std::numeric_limits<double>::quiet_NaN(); ++ double rawValue = std::numeric_limits<double>::quiet_NaN(); + bool overriddenState = false; + bool internalSet = false; + double hysteresisTrigger; +diff --git a/src/ADCSensor.cpp b/src/ADCSensor.cpp +index a446030..0cdb0ac 100644 +--- a/src/ADCSensor.cpp ++++ b/src/ADCSensor.cpp +@@ -79,6 +79,7 @@ ADCSensor::ADCSensor(const std::string& path, + } + association = objectServer.add_interface( + "/xyz/openbmc_project/sensors/voltage/" + name, association::interface); ++ + setInitialProperties(conn); + } + +@@ -179,11 +180,9 @@ void ADCSensor::handleResponse(const boost::system::error_code& err) + // todo read scaling factors from configuration + try + { +- double nvalue = std::stod(response); +- +- nvalue = (nvalue / sensorScaleFactor) / scaleFactor; ++ rawValue = std::stod(response); ++ double nvalue = (rawValue / sensorScaleFactor) / scaleFactor; + nvalue = std::round(nvalue * roundFactor) / roundFactor; +- + updateValue(nvalue); + } + catch (std::invalid_argument&) +@@ -205,6 +204,7 @@ void ADCSensor::handleResponse(const boost::system::error_code& err) + int fd = open(path.c_str(), O_RDONLY); + if (fd < 0) + { ++ std::cerr << "adcsensor " << name << " failed to open " << path << "\n"; + return; // we're no longer valid + } + inputDev.assign(fd); +@@ -213,6 +213,14 @@ void ADCSensor::handleResponse(const boost::system::error_code& err) + std::shared_ptr<ADCSensor> self = weakRef.lock(); + if (ec == boost::asio::error::operation_aborted) + { ++ if (self) ++ { ++ std::cerr << "adcsensor " << self->name << " read cancelled\n"; ++ } ++ else ++ { ++ std::cerr << "adcsensor read cancelled no self\n"; ++ } + return; // we're being canceled + } + +@@ -220,6 +228,10 @@ void ADCSensor::handleResponse(const boost::system::error_code& err) + { + self->setupRead(); + } ++ else ++ { ++ std::cerr << "adcsensor weakref no self\n"; ++ } + }); + } + +diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp +index 401412f..f368150 100644 +--- a/src/CPUSensor.cpp ++++ b/src/CPUSensor.cpp +@@ -223,9 +223,9 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + try + { + std::getline(responseStream, response); +- double nvalue = std::stod(response); ++ rawValue = std::stod(response); + responseStream.clear(); +- nvalue /= CPUSensor::sensorScaleFactor; ++ double nvalue = rawValue / CPUSensor::sensorScaleFactor; + + if (show) + { +diff --git a/src/HwmonTempSensor.cpp b/src/HwmonTempSensor.cpp +index 5514504..8b91804 100644 +--- a/src/HwmonTempSensor.cpp ++++ b/src/HwmonTempSensor.cpp +@@ -106,6 +106,8 @@ void HwmonTempSensor::handleResponse(const boost::system::error_code& err) + if ((err == boost::system::errc::bad_file_descriptor) || + (err == boost::asio::error::misc_errors::not_found)) + { ++ std::cerr << "Hwmon temp sensor " << name << " removed " << path ++ << "\n"; + return; // we're being destroyed + } + std::istream responseStream(&readBuf); +@@ -115,16 +117,16 @@ void HwmonTempSensor::handleResponse(const boost::system::error_code& err) + std::getline(responseStream, response); + try + { +- double nvalue = std::stod(response); +- if (nvalue < 0) ++ rawValue = std::stod(response); ++ if (rawValue < 0) + { + std::cerr << "Hwmon temp sensor " << name +- << ": ignore negative rawValue " << nvalue << "\n"; ++ << ": ignore negative rawValue " << rawValue << "\n"; + incrementError(); + } + else + { +- nvalue /= sensorScaleFactor; ++ double nvalue = rawValue / sensorScaleFactor; + updateValue(nvalue); + } + } +@@ -143,6 +145,8 @@ void HwmonTempSensor::handleResponse(const boost::system::error_code& err) + int fd = open(path.c_str(), O_RDONLY); + if (fd < 0) + { ++ std::cerr << "Hwmon temp sensor " << name << " not valid " << path ++ << "\n"; + return; // we're no longer valid + } + inputDev.assign(fd); +@@ -152,6 +156,15 @@ void HwmonTempSensor::handleResponse(const boost::system::error_code& err) + std::shared_ptr<HwmonTempSensor> self = weakRef.lock(); + if (ec == boost::asio::error::operation_aborted) + { ++ if (self) ++ { ++ std::cerr << "Hwmon temp sensor " << self->name ++ << " read cancelled " << self->path << "\n"; ++ } ++ else ++ { ++ std::cerr << "Hwmon sensor read cancelled, no self\n"; ++ } + return; // we're being canceled + } + if (self) +diff --git a/src/IpmbSensor.cpp b/src/IpmbSensor.cpp +index 1855519..983e6d4 100644 +--- a/src/IpmbSensor.cpp ++++ b/src/IpmbSensor.cpp +@@ -349,6 +349,18 @@ void IpmbSensor::read(void) + read(); + return; + } ++ else ++ { ++ // rawValue only used in debug logging ++ // up to 5th byte in data are used to derive value ++ size_t end = std::min(sizeof(uint64_t), data.size()); ++ uint64_t rawData = 0; ++ for (size_t i = 0; i < end; i++) ++ { ++ reinterpret_cast<uint8_t*>(&rawData)[i] = data[i]; ++ } ++ rawValue = static_cast<double>(rawData); ++ } + + /* Adjust value as per scale and offset */ + value = (value * scaleVal) + offsetVal; +diff --git a/src/PSUSensor.cpp b/src/PSUSensor.cpp +index 6b27207..f93846d 100644 +--- a/src/PSUSensor.cpp ++++ b/src/PSUSensor.cpp +@@ -143,9 +143,9 @@ void PSUSensor::handleResponse(const boost::system::error_code& err) + try + { + std::getline(responseStream, response); +- double nvalue = std::stod(response); ++ rawValue = std::stod(response); + responseStream.clear(); +- nvalue /= sensorFactor; ++ double nvalue = rawValue / sensorFactor; + + updateValue(nvalue); + } +diff --git a/src/TachSensor.cpp b/src/TachSensor.cpp +index ba3b0a1..acfe659 100644 +--- a/src/TachSensor.cpp ++++ b/src/TachSensor.cpp +@@ -149,9 +149,9 @@ void TachSensor::handleResponse(const boost::system::error_code& err) + try + { + std::getline(responseStream, response); +- double nvalue = std::stod(response); ++ rawValue = std::stod(response); + responseStream.clear(); +- updateValue(nvalue); ++ updateValue(rawValue); + } + catch (const std::invalid_argument&) + { +diff --git a/src/Thresholds.cpp b/src/Thresholds.cpp +index ce1c759..30f8021 100644 +--- a/src/Thresholds.cpp ++++ b/src/Thresholds.cpp +@@ -244,6 +244,7 @@ static int cLoTrue = 0; + static int cLoFalse = 0; + static int cLoMidstate = 0; + static int cDebugThrottle = 0; ++static constexpr int assertLogCount = 10; + + struct ChangeParam + { +@@ -276,7 +277,12 @@ static std::vector<ChangeParam> checkThresholds(Sensor* sensor, double value) + if (value >= threshold.value) + { + thresholdChanges.emplace_back(threshold, true, value); +- ++cHiTrue; ++ if (++cHiTrue < assertLogCount) ++ { ++ std::cerr << "Sensor " << sensor->name << " high threshold " ++ << threshold.value << " assert: value " << value ++ << " raw data " << sensor->rawValue << "\n"; ++ } + } + else if (value < (threshold.value - sensor->hysteresisTrigger)) + { +@@ -293,7 +299,13 @@ static std::vector<ChangeParam> checkThresholds(Sensor* sensor, double value) + if (value <= threshold.value) + { + thresholdChanges.emplace_back(threshold, true, value); +- ++cLoTrue; ++ if (++cLoTrue < assertLogCount) ++ { ++ std::cerr << "Sensor " << sensor->name << " low threshold " ++ << threshold.value << " assert: value " ++ << sensor->value << " raw data " ++ << sensor->rawValue << "\n"; ++ } + } + else if (value > (threshold.value + sensor->hysteresisTrigger)) + { +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0011-Check-readingStateGood-before-updating-thresholds-pr.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0011-Check-readingStateGood-before-updating-thresholds-pr.patch new file mode 100644 index 000000000..9fdea10fc --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0011-Check-readingStateGood-before-updating-thresholds-pr.patch @@ -0,0 +1,137 @@ +From 42edd5d855e2b405700fa5e25564d2df3bd171d9 Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Thu, 1 Oct 2020 08:54:32 -0700 +Subject: [PATCH 11/12] Check readingStateGood before updating thresholds + property + +Sensor read function checks for readingStateGood before +start. But if host resets while reading is in progress, +incorrect sensor value maybe returned. +Check readingStateGood before changing threshold. + +Update checkThresholdsPowerDelay to start timer for both +high and low thresholds. +Update CPUSensor to use checkThresholdsPowerDelay to filter +out the high events that can happen during host reset. + +Tested: +Run host reset test for 500 cycles, no erronous sensor +events were reported. Before the change, same tests +never run more than 200 cycles. + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +Change-Id: I6a6645c2842651e07bbcefa295ae331ee036de45 +--- + include/CPUSensor.hpp | 1 + + src/CPUSensor.cpp | 22 ++++++++-------------- + src/Thresholds.cpp | 41 ++++++++++++++++++++--------------------- + 3 files changed, 29 insertions(+), 35 deletions(-) + +diff --git a/include/CPUSensor.hpp b/include/CPUSensor.hpp +index 4a56b21..4f8f52c 100644 +--- a/include/CPUSensor.hpp ++++ b/include/CPUSensor.hpp +@@ -50,6 +50,7 @@ class CPUSensor : public Sensor + bool loggedInterfaceDown = false; + void setupRead(void); + void handleResponse(const boost::system::error_code& err); ++ thresholds::ThresholdTimer thresholdTimer; + void checkThresholds(void) override; + void updateMinMaxValues(void); + }; +diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp +index f368150..52d2a32 100644 +--- a/src/CPUSensor.cpp ++++ b/src/CPUSensor.cpp +@@ -47,7 +47,8 @@ CPUSensor::CPUSensor(const std::string& path, const std::string& objectType, + minReading, PowerState::on), + objServer(objectServer), busConn(conn), inputDev(io), waitTimer(io), + path(path), privTcontrol(std::numeric_limits<double>::quiet_NaN()), +- dtsOffset(dtsOffset), show(show), pollTime(CPUSensor::sensorPollMs) ++ dtsOffset(dtsOffset), show(show), pollTime(CPUSensor::sensorPollMs), ++ thresholdTimer(io, this) + { + nameTcontrol = labelTcontrol; + nameTcontrol += " CPU" + std::to_string(cpuId); +@@ -288,18 +289,11 @@ void CPUSensor::checkThresholds(void) + { + if (show) + { +- // give the power match callback to have a chance to run +- // checkThresholds checks for host power state +- auto timer = std::make_shared<boost::asio::steady_timer>( +- busConn->get_io_context()); +- timer->expires_after(std::chrono::milliseconds(100)); +- timer->async_wait([this, timer](boost::system::error_code ec) { +- if (ec) +- { +- // log the error but still check the thresholds +- std::cerr << "Cpu sensor threshold timer error!\n"; +- } +- thresholds::checkThresholds(this); +- }); ++ if (!readingStateGood()) ++ { ++ return; ++ } ++ ++ thresholds::checkThresholdsPowerDelay(this, thresholdTimer); + } + } +diff --git a/src/Thresholds.cpp b/src/Thresholds.cpp +index 30f8021..3150faf 100644 +--- a/src/Thresholds.cpp ++++ b/src/Thresholds.cpp +@@ -365,29 +365,28 @@ void checkThresholdsPowerDelay(Sensor* sensor, ThresholdTimer& thresholdTimer) + std::vector<ChangeParam> changes = checkThresholds(sensor, sensor->value); + for (const auto& change : changes) + { +- // When CPU is powered off, some volatges are expected to +- // go below low thresholds. Filter these events with thresholdTimer. +- // 1. always delay the assertion of low events to see if they are +- // caused by power off event. +- // 2. conditional delay the de-assertion of low events if there is +- // an existing timer for assertion. +- // 3. no delays for de-assert of low events if there is an existing +- // de-assert for low event. This means 2nd de-assert would happen +- // first and when timer expires for the previous one, no additional +- // signal will be logged. +- // 4. no delays for all high events. +- if (change.threshold.direction == thresholds::Direction::LOW) ++ // When CPU is powered off, some voltages are expected to ++ // go below low thresholds. ++ // Some CPU sensors may appear to be high during the CPU reset. ++ // Delay the threshold check to let CPU power status gets updated first. ++ // 1. If there is already a timer for the same event, ++ // but not the opposite event, do nothing. ++ // 2. Add a delay timer for this event when ++ // a) There is already a timer for the same event and a timer for ++ // opposite event ++ // b) There is a timer for the opposite event only ++ // c) There is no timer for this threshold ++ // This would ensure that any "pulse" event is logged and ++ // last log represents the latest reading ++ ++ if (thresholdTimer.hasActiveTimer(change.threshold, change.asserted) && ++ !thresholdTimer.hasActiveTimer(change.threshold, !change.asserted)) + { +- if (change.asserted || thresholdTimer.hasActiveTimer( +- change.threshold, !change.asserted)) +- { +- thresholdTimer.startTimer(change.threshold, change.asserted, +- change.assertValue); +- continue; +- } ++ continue; // case 1 + } +- assertThresholds(sensor, change.assertValue, change.threshold.level, +- change.threshold.direction, change.asserted); ++ ++ thresholdTimer.startTimer(change.threshold, change.asserted, ++ change.assertValue); + } + } + +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0012-PSUSensors-Move-to-GetSensorConfiguration.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0012-PSUSensors-Move-to-GetSensorConfiguration.patch new file mode 100644 index 000000000..b71956523 --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0012-PSUSensors-Move-to-GetSensorConfiguration.patch @@ -0,0 +1,148 @@ +From 867d1a00e330e2490184794f2c3913694ad36f09 Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Thu, 5 Nov 2020 22:32:28 -0800 +Subject: [PATCH 12/12] PSUSensors: Move to GetSensorConfiguration + +GetSensorConfiguration has improved to have retries +and avoid more expensive GetManagedOjects calls. Use +it. + +Tested +PSU sensors are created consistently ac_cycle test + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +Change-Id: Id6b374ff415cc7c6c0d83b55f12551556838f2b7 +--- + src/PSUSensorMain.cpp | 69 +++++++++++++++++++++++++++++-------------- + 1 file changed, 47 insertions(+), 22 deletions(-) + +diff --git a/src/PSUSensorMain.cpp b/src/PSUSensorMain.cpp +index be55089..86bf393 100644 +--- a/src/PSUSensorMain.cpp ++++ b/src/PSUSensorMain.cpp +@@ -216,27 +216,15 @@ static void + } + } + +-void createSensors(boost::asio::io_service& io, +- sdbusplus::asio::object_server& objectServer, +- std::shared_ptr<sdbusplus::asio::connection>& dbusConnection) ++static void createSensorsCallback( ++ boost::asio::io_service& io, sdbusplus::asio::object_server& objectServer, ++ std::shared_ptr<sdbusplus::asio::connection>& dbusConnection, ++ const ManagedObjectType& sensorConfigs, ++ const std::shared_ptr<boost::container::flat_set<std::string>>& ++ sensorsChanged) + { +- +- ManagedObjectType sensorConfigs; + int numCreated = 0; +- bool useCache = false; +- +- // TODO may need only modify the ones that need to be changed. +- sensors.clear(); +- for (const char* type : sensorTypes) +- { +- if (!getSensorConfiguration(type, dbusConnection, sensorConfigs, +- useCache)) +- { +- std::cerr << "error get sensor config from entity manager\n"; +- return; +- } +- useCache = true; +- } ++ bool firstScan = sensorsChanged == nullptr; + + std::vector<fs::path> pmbusPaths; + if (!findFiles(fs::path("/sys/class/hwmon"), "name", pmbusPaths)) +@@ -400,6 +388,23 @@ void createSensors(boost::asio::io_service& io, + std::cerr << "Cannot find psu name, invalid configuration\n"; + continue; + } ++ ++ // on rescans, only update sensors we were signaled by ++ if (!firstScan) ++ { ++ std::string psuNameStr = "/" + *psuName; ++ auto it = ++ std::find_if(sensorsChanged->begin(), sensorsChanged->end(), ++ [psuNameStr](std::string& s) { ++ return boost::ends_with(s, psuNameStr); ++ }); ++ ++ if (it == sensorsChanged->end()) ++ { ++ continue; ++ } ++ sensorsChanged->erase(it); ++ } + checkEvent(directory.string(), eventMatch, eventPathList); + checkGroupEvent(directory.string(), groupEventMatch, + groupEventPathList); +@@ -779,7 +784,8 @@ void createSensors(boost::asio::io_service& io, + << sensorPathStr << "\" type \"" << sensorType + << "\"\n"; + } +- ++ // destruct existing one first if already created ++ sensors[sensorName] = nullptr; + sensors[sensorName] = std::make_shared<PSUSensor>( + sensorPathStr, sensorType, objectServer, dbusConnection, io, + sensorName, std::move(sensorThresholds), *interfacePath, +@@ -808,6 +814,22 @@ void createSensors(boost::asio::io_service& io, + return; + } + ++void createSensors( ++ boost::asio::io_service& io, sdbusplus::asio::object_server& objectServer, ++ std::shared_ptr<sdbusplus::asio::connection>& dbusConnection, ++ const std::shared_ptr<boost::container::flat_set<std::string>>& ++ sensorsChanged) ++{ ++ auto getter = std::make_shared<GetSensorConfiguration>( ++ dbusConnection, [&io, &objectServer, &dbusConnection, sensorsChanged]( ++ const ManagedObjectType& sensorConfigs) { ++ createSensorsCallback(io, objectServer, dbusConnection, ++ sensorConfigs, sensorsChanged); ++ }); ++ getter->getConfiguration( ++ std::vector<std::string>(sensorTypes.begin(), sensorTypes.end())); ++} ++ + void propertyInitialize(void) + { + sensorTable = {{"power", "power/"}, +@@ -891,10 +913,12 @@ int main() + systemBus->request_name("xyz.openbmc_project.PSUSensor"); + sdbusplus::asio::object_server objectServer(systemBus); + std::vector<std::unique_ptr<sdbusplus::bus::match::match>> matches; ++ auto sensorsChanged = ++ std::make_shared<boost::container::flat_set<std::string>>(); + + propertyInitialize(); + +- io.post([&]() { createSensors(io, objectServer, systemBus); }); ++ io.post([&]() { createSensors(io, objectServer, systemBus, nullptr); }); + boost::asio::deadline_timer filterTimer(io); + std::function<void(sdbusplus::message::message&)> eventHandler = + [&](sdbusplus::message::message& message) { +@@ -903,6 +927,7 @@ int main() + std::cerr << "callback method error\n"; + return; + } ++ sensorsChanged->insert(message.get_path()); + filterTimer.expires_from_now(boost::posix_time::seconds(3)); + filterTimer.async_wait([&](const boost::system::error_code& ec) { + if (ec == boost::asio::error::operation_aborted) +@@ -913,7 +938,7 @@ int main() + { + std::cerr << "timer error\n"; + } +- createSensors(io, objectServer, systemBus); ++ createSensors(io, objectServer, systemBus, sensorsChanged); + }); + }; + +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0013-Fix-ExitAirTempSensor-calculation.patch b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0013-Fix-ExitAirTempSensor-calculation.patch new file mode 100644 index 000000000..8410083a6 --- /dev/null +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors/0013-Fix-ExitAirTempSensor-calculation.patch @@ -0,0 +1,263 @@ +From beab088e948956abd3d940c9cdc673e869f70e7c Mon Sep 17 00:00:00 2001 +From: Zhikui Ren <zhikui.ren@intel.com> +Date: Thu, 3 Dec 2020 15:14:49 -0800 +Subject: [PATCH] Fix ExitAirTempSensor calculation + +Correct the scaling for tachMaxReading and tachMinReading + +Tested: +Use debug prints to verify that the scaling factor is calculated correctly. + +Signed-off-by: Zhikui Ren <zhikui.ren@intel.com> +Change-Id: Idf9c2aa916ac741ff047c5baea51a664c101c33d +--- + include/ExitAirTempSensor.hpp | 9 +- + src/ExitAirTempSensor.cpp | 151 ++++++++++++++++++++++++---------- + 2 files changed, 112 insertions(+), 48 deletions(-) + +diff --git a/include/ExitAirTempSensor.hpp b/include/ExitAirTempSensor.hpp +index e3c2ef5..a56eba5 100644 +--- a/include/ExitAirTempSensor.hpp ++++ b/include/ExitAirTempSensor.hpp +@@ -61,9 +61,6 @@ struct ExitAirTempSensor : + double alphaF; + double pOffset = 0; + +- // todo: make this private once we don't have to hack in a reading +- boost::container::flat_map<std::string, double> powerReadings; +- + ExitAirTempSensor(std::shared_ptr<sdbusplus::asio::connection>& conn, + const std::string& name, + const std::string& sensorConfiguration, +@@ -74,13 +71,17 @@ struct ExitAirTempSensor : + void checkThresholds(void) override; + void updateReading(void); + void setupMatches(void); ++ void addPowerRanges(const std::string& serviceName, ++ const std::string& path); + + private: + double lastReading; + + std::vector<sdbusplus::bus::match::match> matches; + double inletTemp = std::numeric_limits<double>::quiet_NaN(); +- ++ boost::container::flat_map<std::string, double> powerReadings; ++ boost::container::flat_map<std::string, std::pair<double, double>> ++ powerRanges; + std::shared_ptr<sdbusplus::asio::connection> dbusConnection; + sdbusplus::asio::object_server& objServer; + std::chrono::time_point<std::chrono::system_clock> lastTime; +diff --git a/src/ExitAirTempSensor.cpp b/src/ExitAirTempSensor.cpp +index 94bfe5d..d27aa06 100644 +--- a/src/ExitAirTempSensor.cpp ++++ b/src/ExitAirTempSensor.cpp +@@ -207,22 +207,21 @@ void CFMSensor::setupMatches() + { + + std::shared_ptr<CFMSensor> self = shared_from_this(); +- setupSensorMatch(matches, *dbusConnection, "fan_tach", +- std::move([self](const double& value, +- sdbusplus::message::message& message) { +- self->tachReadings[message.get_path()] = value; +- if (self->tachRanges.find(message.get_path()) == +- self->tachRanges.end()) +- { +- // calls update reading after updating ranges +- self->addTachRanges(message.get_sender(), +- message.get_path()); +- } +- else +- { +- self->updateReading(); +- } +- })); ++ setupSensorMatch( ++ matches, *dbusConnection, "fan_tach", ++ [self](const double& value, sdbusplus::message::message& message) { ++ self->tachReadings[message.get_path()] = value; ++ if (self->tachRanges.find(message.get_path()) == ++ self->tachRanges.end()) ++ { ++ // calls update reading after updating ranges ++ self->addTachRanges(message.get_sender(), message.get_path()); ++ } ++ else ++ { ++ self->updateReading(); ++ } ++ }); + + dbusConnection->async_method_call( + [self](const boost::system::error_code ec, +@@ -539,25 +538,36 @@ void ExitAirTempSensor::setupMatches(void) + std::shared_ptr<ExitAirTempSensor> self = shared_from_this(); + for (const std::string& type : matchTypes) + { +- setupSensorMatch(matches, *dbusConnection, type, +- [self, type](const double& value, +- sdbusplus::message::message& message) { +- if (type == "power") +- { +- std::string path = message.get_path(); +- if (path.find("PS") != std::string::npos && +- boost::ends_with(path, "Input_Power")) +- { +- self->powerReadings[message.get_path()] = +- value; +- } +- } +- else if (type == inletTemperatureSensor) +- { +- self->inletTemp = value; +- } +- self->updateReading(); +- }); ++ setupSensorMatch( ++ matches, *dbusConnection, type, ++ [self, type](const double& value, ++ sdbusplus::message::message& message) { ++ if (type == "power") ++ { ++ std::string path = message.get_path(); ++ if (path.find("PS") != std::string::npos && ++ boost::ends_with(path, "Input_Power")) ++ { ++ self->powerReadings[message.get_path()] = value; ++ if (self->powerRanges.find(message.get_path()) == ++ self->powerRanges.end()) ++ { ++ // calls update reading after updating ranges ++ self->addPowerRanges(message.get_sender(), ++ message.get_path()); ++ } ++ else ++ { ++ self->updateReading(); ++ } ++ } ++ } ++ else if (type == inletTemperatureSensor) ++ { ++ self->inletTemp = value; ++ self->updateReading(); ++ } ++ }); + } + dbusConnection->async_method_call( + [self](boost::system::error_code ec, +@@ -620,6 +630,28 @@ void ExitAirTempSensor::setupMatches(void) + "/xyz/openbmc_project/sensors/power", 0, + std::array<const char*, 1>{sensorValueInterface}); + } ++void ExitAirTempSensor::addPowerRanges(const std::string& serviceName, ++ const std::string& path) ++{ ++ std::shared_ptr<ExitAirTempSensor> self = shared_from_this(); ++ dbusConnection->async_method_call( ++ [self, path](const boost::system::error_code ec, ++ const boost::container::flat_map<std::string, ++ BasicVariantType>& data) { ++ if (ec) ++ { ++ std::cerr << "Error getting properties from " << path << "\n"; ++ return; ++ } ++ ++ double max = loadVariant<double>(data, "MaxValue"); ++ double min = loadVariant<double>(data, "MinValue"); ++ self->powerRanges[path] = std::make_pair(min, max); ++ self->updateReading(); ++ }, ++ serviceName, path, "org.freedesktop.DBus.Properties", "GetAll", ++ "xyz.openbmc_project.Sensor.Value"); ++} + + void ExitAirTempSensor::updateReading(void) + { +@@ -654,14 +686,19 @@ double ExitAirTempSensor::getTotalCFM(void) + + bool ExitAirTempSensor::calculate(double& val) + { +- constexpr size_t maxErrorPrint = 1; ++ constexpr size_t maxErrorPrint = 5; + static bool firstRead = false; + static size_t errorPrint = maxErrorPrint; + + double cfm = getTotalCFM(); +- if (cfm <= 0) ++ if (cfm <= 0 || cfm > cfmMaxReading) + { +- std::cerr << "Error getting cfm\n"; ++ if (errorPrint > 0) ++ { ++ errorPrint--; ++ std::cerr << "Error getting cfm " << cfm << "\n"; ++ } ++ val = 0; + return false; + } + +@@ -691,6 +728,35 @@ bool ExitAirTempSensor::calculate(double& val) + { + continue; + } ++ ++ auto findRange = std::find_if( ++ powerRanges.begin(), powerRanges.end(), [&](const auto& item) { ++ return boost::ends_with(item.first, reading.first); ++ }); ++ ++ if (findRange == powerRanges.end()) ++ { ++ if (errorPrint > 0) ++ { ++ errorPrint--; ++ std::cerr << "Can't find ranges for " << reading.first ++ << " in ranges\n"; ++ } ++ continue; // haven't gotten a max / min ++ } ++ ++ if (reading.second > findRange->second.second) ++ { ++ if (errorPrint > 0) ++ { ++ errorPrint--; ++ std::cerr << "power reading " << reading.second ++ << " exceeded max " << findRange->second.second ++ << "\n"; ++ } ++ continue; ++ } ++ + totalPower += reading.second; + } + +@@ -836,6 +902,7 @@ void createSensor(sdbusplus::asio::object_server& objectServer, + std::move([&objectServer, &dbusConnection, + &exitAirSensor](const ManagedObjectType& resp) { + cfmSensors.clear(); ++ exitAirSensor = nullptr; + for (const auto& pathPair : resp) + { + for (const auto& entry : pathPair.second) +@@ -889,13 +956,9 @@ void createSensor(sdbusplus::asio::object_server& objectServer, + sensor->c2 = + loadVariant<double>(entry.second, "C2") / 100; + sensor->tachMinPercent = +- loadVariant<double>(entry.second, +- "TachMinPercent") / +- 100; ++ loadVariant<double>(entry.second, "TachMinPercent"); + sensor->tachMaxPercent = +- loadVariant<double>(entry.second, +- "TachMaxPercent") / +- 100; ++ loadVariant<double>(entry.second, "TachMaxPercent"); + sensor->createMaxCFMIface(); + sensor->setupMatches(); + +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors_%.bbappend b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors_%.bbappend index a265f04df..c9dcae795 100644 --- a/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors_%.bbappend +++ b/meta-openbmc-mods/meta-ast2500/recipes-phosphor/sensors/dbus-sensors_%.bbappend @@ -5,4 +5,12 @@ SRC_URI += "file://0001-Only-allow-drive-sensors-on-bus-2-for-ast2500.patch \ file://0003-Fix-PSU-PWM-fan-control.patch \ file://0004-Check-readingStateGood-before-updating-thresholds-pr.patch \ file://0005-ExitAir-Move-to-GetSensorConfiguration.patch \ - file://0006-Treat-negative-temperatures-as-errors.patch" + file://0006-Treat-negative-temperatures-as-errors.patch \ + file://0007-Fix-ADC-threshold-hysteresis-to-one-percent-of-criti.patch \ + file://0008-Fix-sensor-lost-issue-on-TEMP-PSU-etc.patch \ + file://0009-NVMeSensor-use-available-interface-for-error-handlin.patch \ + file://0010-revert-revert-log-debug-information-for-sensor-thres.patch \ + file://0011-Check-readingStateGood-before-updating-thresholds-pr.patch \ + file://0012-PSUSensors-Move-to-GetSensorConfiguration.patch \ + file://0013-Fix-ExitAirTempSensor-calculation.patch \ + " |