From db4353de222b51726c8e3c765cc8f1df4ad67687 Mon Sep 17 00:00:00 2001 From: Zhikui Ren Date: Tue, 22 Jun 2021 11:35:12 -0700 Subject: [PATCH] Fix missing de-assert event when threshold changes Issue: Sensor can be re-constructed when sensor configuration changes like a new threshold value. Threshold deassert can be missed if the new threshold value fixes the alarm because the default state for new threshold interface is de-asserted. Resolution: Add a member variable hadValidSensor that is initialized to false for new sensor. When hadValidSensor is false, threshold property changed message will be emitted even if threshold property did not change, If the previous sensor instance had the threshold raised, Phosphor-sel-logger would notice the change and log a de-assert event. If the previous sensor instance did not have the threshold raised, Phosphor-sel-logger would notice this is not a change and not create new SEL log. Set hadValidSensor to true when sensor value is updated with a value that is not NaN. This is done after threshold property changed message is emitted. Tested: 1. Change threshold value for a voltage sensor to force a SEL. ipmitool raw 0x04 0x26 0x60 0x1b 0x95 0x6b 0x00 0x99 0xa6 0x00 2. Verify SEL logged threshold assert event as expected ipmitool sel list 1 | Pre-Init |0000007277| Voltage #0x60 | Upper Non-critical going high | Asserted 3. Use ipmitool to change threshold value back to normal ipmitool raw 0x04 0x26 0x60 0x1b 0x95 0x6b 0x00 0xa4 0xa6 0x00 4. Verify SEL logged threshold de-assert event as expected ipmitool sel list 1 | Pre-Init |0000007277| Voltage #0x60 | Upper Non-critical going high | Asserted 2 | Pre-Init |0000007304| Voltage #0x60 | Upper Non-critical going high | Deasserted Signed-off-by: Zhikui Ren --- include/Thresholds.hpp | 2 +- include/sensor.hpp | 2 ++ src/Thresholds.cpp | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/Thresholds.hpp b/include/Thresholds.hpp index af63f72..fd507d0 100644 --- a/include/Thresholds.hpp +++ b/include/Thresholds.hpp @@ -44,7 +44,7 @@ struct Threshold void assertThresholds(Sensor* sensor, double assertValue, thresholds::Level level, thresholds::Direction direction, - bool assert); + bool assert, bool force = false); struct TimerUsed { diff --git a/include/sensor.hpp b/include/sensor.hpp index b98241b..6235674 100644 --- a/include/sensor.hpp +++ b/include/sensor.hpp @@ -71,6 +71,7 @@ struct Sensor std::shared_ptr operationalInterface; double value = std::numeric_limits::quiet_NaN(); double rawValue = std::numeric_limits::quiet_NaN(); + bool hadValidValue = false; bool overriddenState = false; bool internalSet = false; double hysteresisTrigger; @@ -432,6 +433,7 @@ struct Sensor { markFunctional(true); markAvailable(true); + hadValidValue = true; } } diff --git a/src/Thresholds.cpp b/src/Thresholds.cpp index 821083a..da0d650 100644 --- a/src/Thresholds.cpp +++ b/src/Thresholds.cpp @@ -418,10 +418,19 @@ bool checkThresholds(Sensor* sensor) { bool status = true; std::vector changes = checkThresholds(sensor, sensor->value); + + // Sensor can be reconstructed when sensor configuration changes + // like a new threshold value. Threshold deassert can be missed + // if the new threshold value fixes the alarm because + // default state for new threshold interface is de-asserted. + // force sending assert/de-assert message when a not NaN value is updated + // for the first time even when threshold property did not change. + bool forceAssert = !sensor->hadValidValue; for (const auto& change : changes) { assertThresholds(sensor, change.assertValue, change.threshold.level, - change.threshold.direction, change.asserted); + change.threshold.direction, change.asserted, + forceAssert); if (change.threshold.level == thresholds::Level::CRITICAL && change.asserted) { @@ -443,6 +452,7 @@ void checkThresholdsPowerDelay(const std::weak_ptr& weakSensor, Sensor* sensor = sensorPtr.get(); std::vector changes = checkThresholds(sensor, sensor->value); + bool forceAssert = !sensor->hadValidValue; for (const auto& change : changes) { // When CPU is powered off, some volatges are expected to @@ -467,13 +477,13 @@ void checkThresholdsPowerDelay(const std::weak_ptr& weakSensor, } } assertThresholds(sensor, change.assertValue, change.threshold.level, - change.threshold.direction, change.asserted); + change.threshold.direction, change.asserted, forceAssert); } } void assertThresholds(Sensor* sensor, double assertValue, thresholds::Level level, thresholds::Direction direction, - bool assert) + bool assert, bool force) { std::string property; std::shared_ptr interface; @@ -513,7 +523,9 @@ void assertThresholds(Sensor* sensor, double assertValue, return; } - if (interface->set_property(property, assert)) + bool propertyChanged = + interface->set_property(property, assert); + if (force || propertyChanged) { try { -- 2.17.1