From d477be5da32c62fc30096a99d1e601ed7c42a10c Mon Sep 17 00:00:00 2001 From: Zhikui Ren Date: Tue, 22 Jun 2021 11:35:12 -0700 Subject: [PATCH] Fix missing de-assert event when threshold changes Issue: Sensor can be re-constructed when sensor configuration changes like a new threshold value. Threshold deassert can be missed if the new threshold value fixes the alarm because the default state for new threshold interface is de-asserted. Resolution: Add a member variable hadValidSensor that is initialized to false for new sensor. When hadValidSensor is false, threshold property changed message will be emitted even if threshold property did not change, If the previous sensor instance had the threshold raised, Phosphor-sel-logger would notice the change and log a de-assert event. If the previous sensor instance did not have the threshold raised, Phosphor-sel-logger would notice this is not a change and not create new SEL log. Set hadValidSensor to true when sensor value is updated with a value that is not NaN. This is done after threshold property changed message is emitted. Tested: 1. Change threshold value for a voltage sensor to force a SEL. ipmitool raw 0x04 0x26 0x60 0x1b 0x95 0x6b 0x00 0x99 0xa6 0x00 2. Verify SEL logged threshold assert event as expected ipmitool sel list 1 | Pre-Init |0000007277| Voltage #0x60 | Upper Non-critical going high | Asserted 3. Use ipmitool to change threshold value back to normal ipmitool raw 0x04 0x26 0x60 0x1b 0x95 0x6b 0x00 0xa4 0xa6 0x00 4. Verify SEL logged threshold de-assert event as expected ipmitool sel list 1 | Pre-Init |0000007277| Voltage #0x60 | Upper Non-critical going high | Asserted 2 | Pre-Init |0000007304| Voltage #0x60 | Upper Non-critical going high | Deasserted Signed-off-by: Zhikui Ren --- include/Thresholds.hpp | 2 +- include/sensor.hpp | 2 ++ src/Thresholds.cpp | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/Thresholds.hpp b/include/Thresholds.hpp index 209d68e..640fdb4 100644 --- a/include/Thresholds.hpp +++ b/include/Thresholds.hpp @@ -47,7 +47,7 @@ struct Threshold void assertThresholds(Sensor* sensor, double assertValue, thresholds::Level level, thresholds::Direction direction, - bool assert); + bool assert, bool force = false); struct TimerUsed { diff --git a/include/sensor.hpp b/include/sensor.hpp index d38bcde..b8cfd66 100644 --- a/include/sensor.hpp +++ b/include/sensor.hpp @@ -81,6 +81,7 @@ struct Sensor std::shared_ptr valueMutabilityInterface; double value = std::numeric_limits::quiet_NaN(); double rawValue = std::numeric_limits::quiet_NaN(); + bool hadValidValue = false; bool overriddenState = false; bool internalSet = false; double hysteresisTrigger; @@ -462,6 +463,7 @@ struct Sensor { markFunctional(true); markAvailable(true); + hadValidValue = true; } } diff --git a/src/Thresholds.cpp b/src/Thresholds.cpp index 0581f21..84df7cf 100644 --- a/src/Thresholds.cpp +++ b/src/Thresholds.cpp @@ -426,10 +426,19 @@ bool checkThresholds(Sensor* sensor) { bool status = true; std::vector changes = checkThresholds(sensor, sensor->value); + + // Sensor can be reconstructed when sensor configuration changes + // like a new threshold value. Threshold deassert can be missed + // if the new threshold value fixes the alarm because + // default state for new threshold interface is de-asserted. + // force sending assert/de-assert message when a not NaN value is updated + // for the first time even when threshold property did not change. + bool forceAssert = !sensor->hadValidValue; for (const auto& change : changes) { assertThresholds(sensor, change.assertValue, change.threshold.level, - change.threshold.direction, change.asserted); + change.threshold.direction, change.asserted, + forceAssert); if (change.threshold.level == thresholds::Level::CRITICAL && change.asserted) { @@ -451,6 +460,7 @@ void checkThresholdsPowerDelay(const std::weak_ptr& weakSensor, Sensor* sensor = sensorPtr.get(); std::vector changes = checkThresholds(sensor, sensor->value); + bool forceAssert = !sensor->hadValidValue; for (const auto& change : changes) { // When CPU is powered off, some volatges are expected to @@ -475,13 +485,13 @@ void checkThresholdsPowerDelay(const std::weak_ptr& weakSensor, } } assertThresholds(sensor, change.assertValue, change.threshold.level, - change.threshold.direction, change.asserted); + change.threshold.direction, change.asserted, forceAssert); } } void assertThresholds(Sensor* sensor, double assertValue, thresholds::Level level, thresholds::Direction direction, - bool assert) + bool assert, bool force) { std::string property; std::shared_ptr interface; @@ -521,7 +531,9 @@ void assertThresholds(Sensor* sensor, double assertValue, return; } - if (interface->set_property(property, assert)) + bool propertyChanged = + interface->set_property(property, assert); + if (force || propertyChanged) { try { -- 2.17.1