From 9cc183a531e1e309a99784f65b15c0fb1a18ddef Mon Sep 17 00:00:00 2001 From: P Dheeraj Srujan Kumar Date: Fri, 6 May 2022 06:11:20 +0530 Subject: Update to internal 1-0.91-67 Signed-off-by: P Dheeraj Srujan Kumar --- ...Add-support-for-Get-PMBUS-Readings-method.patch | 457 +++++++++++++++++++++ ...0011-Fix-for-cpusensor-going-into-D-state.patch | 224 ++++++++++ .../0012-Serialize-cpusensor-polling.patch | 343 ++++++++++++++++ .../0013-Add-dummy-cpu-sensor-flag.patch | 60 +++ ...temperatures-readings-as-errors-in-IpmbSe.patch | 49 +++ .../sensors/dbus-sensors_%.bbappend | 5 + 6 files changed, 1138 insertions(+) create mode 100644 meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0010-Add-support-for-Get-PMBUS-Readings-method.patch create mode 100644 meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0011-Fix-for-cpusensor-going-into-D-state.patch create mode 100644 meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0012-Serialize-cpusensor-polling.patch create mode 100644 meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0013-Add-dummy-cpu-sensor-flag.patch create mode 100644 meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0014-Treat-zero-temperatures-readings-as-errors-in-IpmbSe.patch (limited to 'meta-openbmc-mods/meta-common/recipes-phosphor/sensors') diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0010-Add-support-for-Get-PMBUS-Readings-method.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0010-Add-support-for-Get-PMBUS-Readings-method.patch new file mode 100644 index 000000000..d305ef008 --- /dev/null +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0010-Add-support-for-Get-PMBUS-Readings-method.patch @@ -0,0 +1,457 @@ +From 5c2fbc31b076a32d0291e74046a576de852ac90c Mon Sep 17 00:00:00 2001 +From: Arun Lal K M +Date: Thu, 6 Jan 2022 20:30:09 +0000 +Subject: [PATCH] Add support for 'Get PMBUS Readings' method. + +VR sensor is currently read in the following way: +BMC gives read command, and ME proxy forward it to VR sensor. + +In 'Get PMBUS Readings' method BMC reads the data from ME. +This is used in platforms where ME cannot proxy forward PMBUS command. + +Command: +0x2E 0xF5 0x57 0x01 0x00 0x 0x0F 0x00 + +0x2E is net function +0xF5 is corresponding to 'Get PMBUS Readings' +0x57 0x01 0x00 is Intel manufacturers ID +ID is the ID of senssor in ME (not actual sensor address) +0x0F is PMBUS-enabled Device Index +0x00 is reserved byte + +New configuration parameters in Baseboard.json: +1) DeviceIndex: ID of the sensor in ME. +2) ReadMethod: use "ReadME" for 'Get PMBUS Readings'. +3) Register: Register to read in the response byte array. + For example, + Registers 1 = Temperature + Registers 2 = Voltage + Registers 3 = Current + +Note: +This is not a complete replacement for the old method, we are adding +one more way to read these sensors. The old implementation is still +present, as in, new code with old configuration file +(xx_Baseboard.json) will work (using the old method of reading the +sensor). + +References: +1) Intelligent Power Node Manager External Interface Specification + Using IPMI. +2) PMBus-Specification. + +Tested: +Case 1: Using proxy (Backward compatibility testing) +Sample configuration in Baseboard.json: +{ + "Address": "0xXX", + "Class": "MpsBridgeTemp", + "Name": "XX VR Temp", + "Thresholds": [...], + "Type": "IpmbSensor" +} + +Give command 'ipmitool sdr elist' +Response: +CPU1 PVCCD VR Te | 31h | ok | 0.1 | 37 degrees C +CPU2 PVCCD VR Te | 36h | ok | 0.1 | 37 degrees C +CPU1 PVCCFA EHV | 32h | ok | 0.1 | 34 degrees C +CPU2 PVCCFA EHV | 37h | ok | 0.1 | 33 degrees C +CPU1 VCCIN VR Te | 34h | ok | 0.1 | 48 degrees C +CPU2 VCCIN VR Te | 39h | ok | 0.1 | 58 degrees C + +Case 2: Using 'Get PMBUS Readings' method +Sample configuration in Baseboard.json: +{ + "Address": "0xXX", + "DeviceIndex": "0xXX", + "Class": "MpsBridgeTemp", + "Register": 1, + "ReadMethod": "ReadME", + "Name": "XX VR Temp", + "Thresholds": [...], + "Type": "IpmbSensor" +} + +Give command 'ipmitool sdr elist' +Response: +CPU1 PVCCD VR Te | 31h | ok | 0.1 | 41 degrees C +CPU2 PVCCD VR Te | 36h | ok | 0.1 | 43 degrees C +CPU1 PVCCFA EHV | 32h | ok | 0.1 | 37 degrees C +CPU2 PVCCFA EHV | 37h | ok | 0.1 | 37 degrees C +CPU1 VCCIN VR Te | 34h | ok | 0.1 | 60 degrees C +CPU2 VCCIN VR Te | 39h | ok | 0.1 | 56 degrees C + +Signed-off-by: Arun Lal K M +--- + include/IpmbSensor.hpp | 51 ++++++++++ + src/IpmbSensor.cpp | 217 ++++++++++++++++++++++++++++++++++------- + 2 files changed, 233 insertions(+), 35 deletions(-) + +diff --git a/include/IpmbSensor.hpp b/include/IpmbSensor.hpp +index 18d10c1..2a89251 100644 +--- a/include/IpmbSensor.hpp ++++ b/include/IpmbSensor.hpp +@@ -43,6 +43,51 @@ namespace sensor + { + constexpr uint8_t netFn = 0x04; + constexpr uint8_t getSensorReading = 0x2d; ++constexpr uint8_t manufacturerId[3] = {0x57, 0x01, 0x00}; ++ ++namespace read_me ++{ ++/** ++ * Refernce: ++ * Intelligent Power Node Manager External Interface Specification ++ * getPmbusReadings = Get PMBUS Readings (F5h) ++ * ++ * bytesForTimestamp and bytesForManufacturerId are decoded from ++ * response bytes for Get PMBUS Readings. ++ */ ++constexpr uint8_t getPmbusReadings = 0xF5; ++constexpr uint8_t bytesForTimestamp = 4; ++constexpr uint8_t bytesForManufacturerId = 3; ++ ++constexpr size_t fixedOffset = bytesForTimestamp + bytesForManufacturerId; ++ ++void getRawData(uint8_t registerToRead, const std::vector& input, ++ std::vector& result) ++{ ++ if (input.size() < 3) ++ { ++ return; ++ } ++ ++ /* Every register is two bytes*/ ++ size_t offset = fixedOffset + (registerToRead * 2); ++ if (input.size() <= (offset + 1)) ++ { ++ return; ++ } ++ ++ result.reserve(5); ++ ++ // ID ++ result.emplace_back(input[0]); ++ result.emplace_back(input[1]); ++ result.emplace_back(input[2]); ++ ++ // Value in registerToRead ++ result.emplace_back(input[offset]); ++ result.emplace_back(input[offset + 1]); ++} ++} // namespace read_me + + static bool isValid(const std::vector& data) + { +@@ -91,6 +136,7 @@ struct IpmbSensor : public Sensor + void loadDefaults(void); + void runInitCmd(void); + bool processReading(const std::vector& data, double& resp); ++ void setReadMethod(const SensorBaseConfigMap& sensorBaseConfig); + + IpmbType type; + IpmbSubType subType; +@@ -102,6 +148,9 @@ struct IpmbSensor : public Sensor + uint8_t deviceAddress; + uint8_t errorCount; + uint8_t hostSMbusIndex; ++ uint8_t registerToRead = 0; ++ bool isReadMe = false; ++ uint8_t deviceIndex = 0; + std::vector commandData; + std::optional initCommand; + std::vector initData; +@@ -112,4 +161,6 @@ struct IpmbSensor : public Sensor + private: + sdbusplus::asio::object_server& objectServer; + boost::asio::deadline_timer waitTimer; ++ ++ void getMeCommand(); + }; +diff --git a/src/IpmbSensor.cpp b/src/IpmbSensor.cpp +index eedf21e..75f74b5 100644 +--- a/src/IpmbSensor.cpp ++++ b/src/IpmbSensor.cpp +@@ -150,6 +150,39 @@ void IpmbSensor::runInitCmd() + } + } + ++/** ++ * Refernce: ++ * Intelligent Power Node Manager External Interface Specification ++ */ ++void IpmbSensor::getMeCommand() ++{ ++ /* ++ * Byte 1, 2, 3 = Manufacturer ID. ++ */ ++ commandData.emplace_back(ipmi::sensor::manufacturerId[0]); ++ commandData.emplace_back(ipmi::sensor::manufacturerId[1]); ++ commandData.emplace_back(ipmi::sensor::manufacturerId[2]); ++ ++ /* ++ * Byte 4 = Device Index. ++ */ ++ commandData.emplace_back(deviceIndex); ++ ++ /* ++ * Byte 5 = History index. ++ * bit 0 to 3 = History index. Supported value: 0Fh to retrieve ++ * current samples. ++ * bit 4 to 7 = Page number – used only for devices which support ++ * pages. ++ */ ++ commandData.emplace_back(0x0F); ++ ++ /* ++ * Byte 6 = First Register Offset. ++ */ ++ commandData.emplace_back(0x00); ++} ++ + void IpmbSensor::loadDefaults() + { + if (type == IpmbType::meSensor) +@@ -164,28 +197,44 @@ void IpmbSensor::loadDefaults() + { + commandAddress = meAddress; + netfn = ipmi::me_bridge::netFn; +- command = ipmi::me_bridge::sendRawPmbus; +- initCommand = ipmi::me_bridge::sendRawPmbus; +- // pmbus read temp +- commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, +- deviceAddress, 0x00, 0x00, 0x00, 0x00, +- 0x01, 0x02, 0x8d}; +- // goto page 0 +- initData = {0x57, 0x01, 0x00, 0x14, hostSMbusIndex, +- deviceAddress, 0x00, 0x00, 0x00, 0x00, +- 0x02, 0x00, 0x00, 0x00}; + readingFormat = ReadingFormat::linearElevenBit; ++ if (isReadMe) ++ { ++ command = ipmi::sensor::read_me::getPmbusReadings; ++ getMeCommand(); ++ } ++ else ++ { ++ command = ipmi::me_bridge::sendRawPmbus; ++ initCommand = ipmi::me_bridge::sendRawPmbus; ++ // pmbus read temp ++ commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, ++ deviceAddress, 0x00, 0x00, 0x00, 0x00, ++ 0x01, 0x02, 0x8d}; ++ // goto page 0 ++ initData = {0x57, 0x01, 0x00, 0x14, hostSMbusIndex, ++ deviceAddress, 0x00, 0x00, 0x00, 0x00, ++ 0x02, 0x00, 0x00, 0x00}; ++ } + } + else if (type == IpmbType::IR38363VR) + { + commandAddress = meAddress; + netfn = ipmi::me_bridge::netFn; +- command = ipmi::me_bridge::sendRawPmbus; +- // pmbus read temp +- commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, +- deviceAddress, 00, 0x00, 0x00, 0x00, +- 0x01, 0x02, 0x8D}; + readingFormat = ReadingFormat::elevenBitShift; ++ if (isReadMe) ++ { ++ command = ipmi::sensor::read_me::getPmbusReadings; ++ getMeCommand(); ++ } ++ else ++ { ++ command = ipmi::me_bridge::sendRawPmbus; ++ // pmbus read temp ++ commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, ++ deviceAddress, 00, 0x00, 0x00, 0x00, ++ 0x01, 0x02, 0x8D}; ++ } + } + else if (type == IpmbType::ADM1278HSC) + { +@@ -194,20 +243,28 @@ void IpmbSensor::loadDefaults() + { + case IpmbSubType::temp: + case IpmbSubType::curr: +- uint8_t snsNum; +- if (subType == IpmbSubType::temp) ++ netfn = ipmi::me_bridge::netFn; ++ readingFormat = ReadingFormat::elevenBit; ++ if (isReadMe) + { +- snsNum = 0x8d; ++ command = ipmi::sensor::read_me::getPmbusReadings; ++ getMeCommand(); + } + else + { +- snsNum = 0x8c; ++ uint8_t snsNum; ++ if (subType == IpmbSubType::temp) ++ { ++ snsNum = 0x8d; ++ } ++ else ++ { ++ snsNum = 0x8c; ++ } ++ command = ipmi::me_bridge::sendRawPmbus; ++ commandData = {0x57, 0x01, 0x00, 0x86, deviceAddress, ++ 0x00, 0x00, 0x01, 0x02, snsNum}; + } +- netfn = ipmi::me_bridge::netFn; +- command = ipmi::me_bridge::sendRawPmbus; +- commandData = {0x57, 0x01, 0x00, 0x86, deviceAddress, +- 0x00, 0x00, 0x01, 0x02, snsNum}; +- readingFormat = ReadingFormat::elevenBit; + break; + case IpmbSubType::power: + case IpmbSubType::volt: +@@ -224,17 +281,25 @@ void IpmbSensor::loadDefaults() + { + commandAddress = meAddress; + netfn = ipmi::me_bridge::netFn; +- command = ipmi::me_bridge::sendRawPmbus; +- initCommand = ipmi::me_bridge::sendRawPmbus; +- // pmbus read temp +- commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, +- deviceAddress, 0x00, 0x00, 0x00, 0x00, +- 0x01, 0x02, 0x8d}; +- // goto page 0 +- initData = {0x57, 0x01, 0x00, 0x14, hostSMbusIndex, +- deviceAddress, 0x00, 0x00, 0x00, 0x00, +- 0x02, 0x00, 0x00, 0x00}; + readingFormat = ReadingFormat::byte3; ++ if (isReadMe) ++ { ++ command = ipmi::sensor::read_me::getPmbusReadings; ++ getMeCommand(); ++ } ++ else ++ { ++ command = ipmi::me_bridge::sendRawPmbus; ++ initCommand = ipmi::me_bridge::sendRawPmbus; ++ // pmbus read temp ++ commandData = {0x57, 0x01, 0x00, 0x16, hostSMbusIndex, ++ deviceAddress, 0x00, 0x00, 0x00, 0x00, ++ 0x01, 0x02, 0x8d}; ++ // goto page 0 ++ initData = {0x57, 0x01, 0x00, 0x14, hostSMbusIndex, ++ deviceAddress, 0x00, 0x00, 0x00, 0x00, ++ 0x02, 0x00, 0x00, 0x00}; ++ } + } + else + { +@@ -362,7 +427,19 @@ void IpmbSensor::read(void) + read(); + return; + } +- const std::vector& data = std::get<5>(response); ++ ++ std::vector data; ++ ++ if (isReadMe) ++ { ++ ipmi::sensor::read_me::getRawData( ++ registerToRead, std::get<5>(response), data); ++ } ++ else ++ { ++ data = std::get<5>(response); ++ } ++ + if constexpr (debug) + { + std::cout << name << ": "; +@@ -408,6 +485,74 @@ void IpmbSensor::read(void) + "sendRequest", commandAddress, netfn, lun, command, commandData); + }); + } ++ ++void IpmbSensor::setReadMethod(const SensorBaseConfigMap& sensorBaseConfig) ++{ ++ /* ++ * Some sensor can be read in two ways ++ * 1) Using proxy: BMC read command is proxy forward by ME ++ * to sensor. 2) Using 'Get PMBUS Readings': ME responds to ++ * BMC with sensor data. ++ * ++ * By default we assume the method is 1. And if ReadMethod ++ * == "ReadME" we switch to method 2. ++ */ ++ auto readMethod = sensorBaseConfig.find("ReadMethod"); ++ if (readMethod == sensorBaseConfig.end()) ++ { ++ std::cerr << "'ReadMethod' not found, defaulting to " ++ "proxy method of reading sensor\n"; ++ return; ++ } ++ ++ if (std::visit(VariantToStringVisitor(), readMethod->second) != "ReadME") ++ { ++ std::cerr << "'ReadMethod' != 'ReadME', defaulting to " ++ "proxy method of reading sensor\n"; ++ return; ++ } ++ ++ /* ++ * In 'Get PMBUS Readings' the response containt a ++ * set of registers from the sensor. And different ++ * values such as temperature power voltage will be ++ * mapped to different registers. ++ */ ++ auto registerToReadConfig = sensorBaseConfig.find("Register"); ++ if (registerToReadConfig == sensorBaseConfig.end()) ++ { ++ std::cerr << "'Register' not found, defaulting to " ++ "proxy method of reading sensor\n"; ++ return; ++ } ++ ++ registerToRead = ++ std::visit(VariantToUnsignedIntVisitor(), registerToReadConfig->second); ++ ++ /* ++ * In 'Get PMBUS Readings' since ME is ++ * responding with the sensor data we need ++ * to use the address for sensor in ME, this ++ * is different from the actual sensor ++ * address. ++ */ ++ auto deviceIndexConfig = sensorBaseConfig.find("SensorMeAddress"); ++ if (deviceIndexConfig == sensorBaseConfig.end()) ++ { ++ std::cerr << "'SensorMeAddress' not found, defaulting to " ++ "proxy method of reading sensor\n"; ++ return; ++ } ++ ++ deviceIndex = ++ std::visit(VariantToUnsignedIntVisitor(), deviceIndexConfig->second); ++ ++ /* ++ * We found all parameters to use 'Get PMBUS Readings' ++ * method. ++ */ ++ isReadMe = true; ++} + void createSensors( + boost::asio::io_service& io, sdbusplus::asio::object_server& objectServer, + boost::container::flat_map>& +@@ -485,6 +630,8 @@ void createSensors( + std::move(sensorThresholds), deviceAddress, + hostSMbusIndex, pollRate, sensorTypeName); + ++ sensor->setReadMethod(entry.second); ++ + /* Initialize scale and offset value */ + sensor->scaleVal = 1; + sensor->offsetVal = 0; +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0011-Fix-for-cpusensor-going-into-D-state.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0011-Fix-for-cpusensor-going-into-D-state.patch new file mode 100644 index 000000000..cc9587bf7 --- /dev/null +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0011-Fix-for-cpusensor-going-into-D-state.patch @@ -0,0 +1,224 @@ +From dd880b25d01c0c7e25d65930ba0d38ddd1a6ec76 Mon Sep 17 00:00:00 2001 +From: Arun Lal K M +Date: Fri, 31 Dec 2021 13:29:56 +0000 +Subject: [PATCH] Fix for cpusensor going into D state. + +When PECI driver returns the error code EAGAIN, async_read_until will +go into a keep trying again loop. As async_read_until does not handle +EAGAIN reliably. + +Same issue was identified and fix for PSUSensor: +https://github.com/openbmc/dbus-sensors/commit/bcf76717697238e380be785bd28150963ecf0e9e + +Fix for this is to use async_wait instead of async_read_until. + +Tested: +In Intel system verified cpusensor daemon is not getting into D state +when there is a peci error EAGAIN. +Verified sensors are getting populated as expected when there is no PECI +error. + +Signed-off-by: Arun Lal K M +Signed-off-by: Arun P. Mohanan +Change-Id: I52e35075e522d0ae0d99e1c893db76156e299871 +--- + include/CPUSensor.hpp | 3 ++ + src/CPUSensor.cpp | 119 ++++++++++++++++++++++++++---------------- + 2 files changed, 77 insertions(+), 45 deletions(-) + +diff --git a/include/CPUSensor.hpp b/include/CPUSensor.hpp +index a6fbdad..12d8788 100644 +--- a/include/CPUSensor.hpp ++++ b/include/CPUSensor.hpp +@@ -64,9 +64,12 @@ class CPUSensor : public Sensor, public std::enable_shared_from_this + bool loggedInterfaceDown = false; + uint8_t minMaxReadCounter; + unsigned int scaleFactor; ++ int fd; + void handleResponse(const boost::system::error_code& err); + void checkThresholds(void) override; + void updateMinMaxValues(void); ++ bool initInputDev(); ++ void restartRead(void); + }; + + extern boost::container::flat_map> +diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp +index de33f9b..4ced5e2 100644 +--- a/src/CPUSensor.cpp ++++ b/src/CPUSensor.cpp +@@ -75,6 +75,7 @@ CPUSensor::CPUSensor(const std::string& path, const std::string& objectType, + + // call setup always as not all sensors call setInitialProperties + setupPowerMatch(conn); ++ initInputDev(); + } + + // Create a dummy "not available" CPUSensor +@@ -161,56 +162,60 @@ CPUSensor::~CPUSensor() + } + } + +-void CPUSensor::setupRead(void) ++bool CPUSensor::initInputDev() + { +- std::weak_ptr weakRef = weak_from_this(); +- +- if (readingStateGood()) +- { +- inputDev.close(); +- int fd = open(path.c_str(), O_RDONLY); +- if (fd >= 0) +- { +- inputDev.assign(fd); +- +- boost::asio::async_read_until( +- inputDev, readBuf, '\n', +- [weakRef](const boost::system::error_code& ec, +- std::size_t /*bytes_transfered*/) { +- std::shared_ptr self = weakRef.lock(); +- if (!self) +- { +- return; +- } +- self->handleResponse(ec); +- }); +- } +- else +- { +- std::cerr << name << " unable to open fd!\n"; +- pollTime = sensorFailedPollTimeMs; +- } +- } +- else ++ fd = open(path.c_str(), O_RDONLY | O_NONBLOCK); ++ if (fd < 0) + { +- pollTime = sensorFailedPollTimeMs; +- markAvailable(false); ++ std::cerr << "CPU sensor failed to open file\n"; ++ return false; + } ++ ++ inputDev.assign(fd); ++ return true; ++} ++ ++void CPUSensor::restartRead(void) ++{ ++ std::weak_ptr weakRef = weak_from_this(); + waitTimer.expires_from_now(boost::posix_time::milliseconds(pollTime)); + waitTimer.async_wait([weakRef](const boost::system::error_code& ec) { + if (ec == boost::asio::error::operation_aborted) + { +- return; // we're being canceled ++ std::cerr << "Failed to reschedule\n"; ++ return; + } + std::shared_ptr self = weakRef.lock(); +- if (!self) ++ ++ if (self) + { +- return; ++ self->setupRead(); + } +- self->setupRead(); + }); + } + ++void CPUSensor::setupRead(void) ++{ ++ if (!readingStateGood()) ++ { ++ markAvailable(false); ++ updateValue(std::numeric_limits::quiet_NaN()); ++ restartRead(); ++ return; ++ } ++ ++ std::weak_ptr weakRef = weak_from_this(); ++ inputDev.async_wait(boost::asio::posix::descriptor_base::wait_read, ++ [weakRef](const boost::system::error_code& ec) { ++ std::shared_ptr self = weakRef.lock(); ++ ++ if (self) ++ { ++ self->handleResponse(ec); ++ } ++ }); ++} ++ + void CPUSensor::updateMinMaxValues(void) + { + double newMin = std::numeric_limits::quiet_NaN(); +@@ -265,8 +270,8 @@ void CPUSensor::updateMinMaxValues(void) + + void CPUSensor::handleResponse(const boost::system::error_code& err) + { +- +- if (err == boost::system::errc::bad_file_descriptor) ++ if ((err == boost::system::errc::bad_file_descriptor) || ++ (err == boost::asio::error::misc_errors::not_found)) + { + return; // we're being destroyed + } +@@ -285,16 +290,35 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + return; + } + loggedInterfaceDown = false; +- pollTime = CPUSensor::sensorPollMs; +- std::istream responseStream(&readBuf); +- if (!err) ++ ++ if (err) + { +- std::string response; ++ pollTime = sensorFailedPollTimeMs; ++ incrementError(); ++ if (fd >= 0) ++ { ++ lseek(fd, 0, SEEK_SET); ++ } ++ return; ++ } ++ ++ static constexpr uint32_t bufLen = 128; ++ std::string response; ++ response.resize(bufLen); ++ int rdLen = 0; ++ ++ if (fd >= 0) ++ { ++ lseek(fd, 0, SEEK_SET); ++ rdLen = read(fd, response.data(), bufLen); ++ } ++ ++ if (rdLen > 0) ++ { ++ + try + { +- std::getline(responseStream, response); + rawValue = std::stod(response); +- responseStream.clear(); + double nvalue = rawValue / scaleFactor; + + if (show) +@@ -358,7 +382,12 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + incrementError(); + } + +- responseStream.clear(); ++ if (fd >= 0) ++ { ++ lseek(fd, 0, SEEK_SET); ++ } ++ ++ restartRead(); + } + + void CPUSensor::checkThresholds(void) +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0012-Serialize-cpusensor-polling.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0012-Serialize-cpusensor-polling.patch new file mode 100644 index 000000000..4617235f1 --- /dev/null +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0012-Serialize-cpusensor-polling.patch @@ -0,0 +1,343 @@ +From dbcdef1ba1ee2445d2a571d1fd47be256833f94b Mon Sep 17 00:00:00 2001 +From: Arun Lal K M +Date: Fri, 31 Dec 2021 13:29:56 +0000 +Subject: [PATCH] Serialize cpusensor polling + +Say we have 100 cpusensor objects, there will be 100 reads made +to peci driver. And in peci driver, if a read fails, it will keep +trying again till timeout. Timeout is 0.7 sec. + +Which means, under peci error state, cpusensor daemon will +make 100 calls to peci driver. And each of that call will take 0.7 sec. + +This ultimately result in io having a queue of 100 request at the same +time. And since we only has one thread for io (and peci can only do one read +at a time) these calls will make io nonresponsive. + +As io is busy: +1) busctl tree or introspect to xyz.openbmc_project.CPUSensor will fail. +2) phosphor-pid-control will try to get data from cpusensor, +and after trying for 5 times, phosphor-pid-control will gracefully exit. + +Fix for this is to serialize the requests made to peci from cpusensor daemon. +This is done via single poll loop for all cpu sensors using coroutine. +This basically avoid overloading the io with flooding of requests. + +Also, the polling frequency of individual sensors are modified to small +delay between 2 sensor reads. + +Tested: +Negative test case: In Intel system which has peci error. +By giving busctl tree or introspect to xyz.openbmc_project.CPUSensor +And making sure tree is populated. + +Positive test case: In Intel system which does not have peci error. +By giving command ipmitool sensor list and making sure CPU sensors +in are showing values as expected. + +Signed-off-by: Arun Lal K M +Signed-off-by: Arun P. Mohanan +Change-Id: Id435d198d6427e3141517ea517a8fa6f946c5dd5 +--- + .clang-tidy | 1 + + include/CPUSensor.hpp | 7 ++---- + meson.build | 6 +++++ + src/CPUSensor.cpp | 53 ++++++++++------------------------------ + src/CPUSensorMain.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++- + 5 files changed, 78 insertions(+), 46 deletions(-) + +diff --git a/.clang-tidy b/.clang-tidy +index 13f6a4b..9d58421 100644 +--- a/.clang-tidy ++++ b/.clang-tidy +@@ -258,4 +258,5 @@ CheckOptions: + - { key: readability-identifier-naming.ParameterCase, value: camelBack } + - { key: readability-identifier-naming.NamespaceCase, value: lower_case } + - { key: readability-identifier-naming.StructCase, value: CamelCase } ++ - { key: performance-unnecessary-value-param.AllowedTypes, value: 'boost::asio::yield_context' } + +diff --git a/include/CPUSensor.hpp b/include/CPUSensor.hpp +index 12d8788..0ba4090 100644 +--- a/include/CPUSensor.hpp ++++ b/include/CPUSensor.hpp +@@ -45,22 +45,20 @@ class CPUSensor : public Sensor, public std::enable_shared_from_this + const std::string& sensorConfiguration); + + ~CPUSensor() override; +- static constexpr unsigned int sensorPollMs = 1000; ++ static constexpr unsigned int sensorScaleFactor = 1000; + static constexpr size_t warnAfterErrorCount = 10; + static constexpr const char* labelTcontrol = "Tcontrol"; +- void setupRead(void); ++ void setupRead(boost::asio::yield_context yield); + + private: + sdbusplus::asio::object_server& objServer; + boost::asio::streambuf readBuf; + boost::asio::posix::stream_descriptor inputDev; +- boost::asio::deadline_timer waitTimer; + std::string nameTcontrol; + std::string path; + double privTcontrol; + double dtsOffset; + bool show; +- size_t pollTime; + bool loggedInterfaceDown = false; + uint8_t minMaxReadCounter; + unsigned int scaleFactor; +@@ -69,7 +67,6 @@ class CPUSensor : public Sensor, public std::enable_shared_from_this + void checkThresholds(void) override; + void updateMinMaxValues(void); + bool initInputDev(); +- void restartRead(void); + }; + + extern boost::container::flat_map> +diff --git a/meson.build b/meson.build +index 7e8a3e6..b9dd039 100644 +--- a/meson.build ++++ b/meson.build +@@ -43,6 +43,11 @@ sdbusplus = dependency( + ], + ) + ++boost = dependency( ++ 'boost', ++ modules: ['coroutine'], ++) ++ + phosphor_logging_dep = dependency( + 'phosphor-logging', + fallback: ['phosphor-logging', 'phosphor_logging_dep'], +@@ -70,6 +75,7 @@ default_deps = [ + nlohmann_json, + phosphor_logging_dep, + sdbusplus, ++ boost, + ] + + thresholds_a = static_library( +diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp +index 4ced5e2..8c49bc5 100644 +--- a/src/CPUSensor.cpp ++++ b/src/CPUSensor.cpp +@@ -45,10 +45,10 @@ CPUSensor::CPUSensor(const std::string& path, const std::string& objectType, + objectType, false, false, sensorProperties.max, sensorProperties.min, + conn, PowerState::on), + std::enable_shared_from_this(), objServer(objectServer), +- inputDev(io), waitTimer(io), path(path), ++ inputDev(io), path(path), + privTcontrol(std::numeric_limits::quiet_NaN()), +- dtsOffset(dtsOffset), show(show), pollTime(CPUSensor::sensorPollMs), +- minMaxReadCounter(0), scaleFactor(sensorProperties.scaleFactor) ++ dtsOffset(dtsOffset), show(show), minMaxReadCounter(0), ++ scaleFactor(sensorProperties.scaleFactor) + { + nameTcontrol = labelTcontrol; + nameTcontrol += " CPU" + std::to_string(cpuId); +@@ -89,9 +89,9 @@ CPUSensor::CPUSensor(const std::string& objectType, + const std::string& sensorConfiguration) : + Sensor(escapeName(sensorName), std::move(thresholdsIn), sensorConfiguration, + objectType, false, false, 0, 0, conn, PowerState::on), +- objServer(objectServer), inputDev(io), waitTimer(io), ++ objServer(objectServer), inputDev(io), + privTcontrol(std::numeric_limits::quiet_NaN()), dtsOffset(0), +- show(true), pollTime(CPUSensor::sensorPollMs), minMaxReadCounter(0) ++ show(true), minMaxReadCounter(0) + { + // assume it is a temperature sensor for now + // support for other type can be added later +@@ -150,7 +150,6 @@ CPUSensor::~CPUSensor() + { + // close the input dev to cancel async operations + inputDev.close(); +- waitTimer.cancel(); + if (show) + { + objServer.remove_interface(thresholdInterfaceWarning); +@@ -175,45 +174,24 @@ bool CPUSensor::initInputDev() + return true; + } + +-void CPUSensor::restartRead(void) +-{ +- std::weak_ptr weakRef = weak_from_this(); +- waitTimer.expires_from_now(boost::posix_time::milliseconds(pollTime)); +- waitTimer.async_wait([weakRef](const boost::system::error_code& ec) { +- if (ec == boost::asio::error::operation_aborted) +- { +- std::cerr << "Failed to reschedule\n"; +- return; +- } +- std::shared_ptr self = weakRef.lock(); +- +- if (self) +- { +- self->setupRead(); +- } +- }); +-} +- +-void CPUSensor::setupRead(void) ++void CPUSensor::setupRead(boost::asio::yield_context yield) + { + if (!readingStateGood()) + { + markAvailable(false); + updateValue(std::numeric_limits::quiet_NaN()); +- restartRead(); + return; + } + + std::weak_ptr weakRef = weak_from_this(); ++ boost::system::error_code ec; + inputDev.async_wait(boost::asio::posix::descriptor_base::wait_read, +- [weakRef](const boost::system::error_code& ec) { +- std::shared_ptr self = weakRef.lock(); +- +- if (self) +- { +- self->handleResponse(ec); +- } +- }); ++ yield[ec]); ++ std::shared_ptr self = weakRef.lock(); ++ if (self) ++ { ++ self->handleResponse(ec); ++ } + } + + void CPUSensor::updateMinMaxValues(void) +@@ -284,7 +262,6 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + std::cerr << name << " interface down!\n"; + loggedInterfaceDown = true; + } +- pollTime = CPUSensor::sensorPollMs * 10u; + markFunctional(false); + } + return; +@@ -293,7 +270,6 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + + if (err) + { +- pollTime = sensorFailedPollTimeMs; + incrementError(); + if (fd >= 0) + { +@@ -378,7 +354,6 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + } + else + { +- pollTime = sensorFailedPollTimeMs; + incrementError(); + } + +@@ -386,8 +361,6 @@ void CPUSensor::handleResponse(const boost::system::error_code& err) + { + lseek(fd, 0, SEEK_SET); + } +- +- restartRead(); + } + + void CPUSensor::checkThresholds(void) +diff --git a/src/CPUSensorMain.cpp b/src/CPUSensorMain.cpp +index 32cb6b3..0de6673 100644 +--- a/src/CPUSensorMain.cpp ++++ b/src/CPUSensorMain.cpp +@@ -51,6 +51,10 @@ + // clang-format on + + static constexpr bool debug = false; ++static std::unique_ptr waitTimer = nullptr; ++static bool sensorMapUpdated = false; ++static constexpr unsigned int sensorPollWaitMs = 20; ++static constexpr unsigned int sensorEmptyWaitMs = 500; + + boost::container::flat_map> gCpuSensors; + boost::container::flat_mapsetupRead(); ++ sensorMapUpdated = true; + createdSensors.insert(sensorName); + if (debug) + { +@@ -427,6 +431,52 @@ bool createSensors(boost::asio::io_service& io, + return true; + } + ++bool doWait(boost::asio::yield_context yield, int delay) ++{ ++ boost::system::error_code ec; ++ waitTimer->expires_from_now(boost::posix_time::milliseconds(delay)); ++ waitTimer->async_wait(yield[ec]); ++ if (ec == boost::asio::error::operation_aborted) ++ { ++ std::cerr << "Timer aborted\n"; ++ return false; ++ } ++ if (ec) ++ { ++ std::cerr << "Timer failed\n"; ++ return false; ++ } ++ return true; ++} ++ ++void pollCPUSensors(boost::asio::yield_context yield) ++{ ++ while (true) ++ { ++ sensorMapUpdated = false; ++ for (auto& [name, sensor] : gCpuSensors) ++ { ++ sensor->setupRead(yield); ++ if (!doWait(yield, sensorPollWaitMs)) ++ { ++ throw std::runtime_error("Wait timer failed"); ++ } ++ if (sensorMapUpdated) ++ { ++ break; ++ } ++ } ++ ++ if (gCpuSensors.size() == 0) ++ { ++ if (!doWait(yield, sensorEmptyWaitMs)) ++ { ++ throw std::runtime_error("Wait timer failed"); ++ } ++ } ++ } ++} ++ + int exportDevice(const CPUConfig& config) + { + std::ostringstream hex; +@@ -905,6 +955,7 @@ int main() + boost::asio::deadline_timer pingTimer(io); + boost::asio::deadline_timer creationTimer(io); + boost::asio::deadline_timer filterTimer(io); ++ waitTimer = std::make_unique(io); + ManagedObjectType sensorConfigs; + + filterTimer.expires_from_now(boost::posix_time::seconds(1)); +@@ -966,6 +1017,10 @@ int main() + systemBus->request_name("xyz.openbmc_project.CPUSensor"); + + setupManufacturingModeMatch(*systemBus); ++ ++ boost::asio::spawn( ++ io, [](boost::asio::yield_context yield) { pollCPUSensors(yield); }); ++ + io.run(); + return 0; + } +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0013-Add-dummy-cpu-sensor-flag.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0013-Add-dummy-cpu-sensor-flag.patch new file mode 100644 index 000000000..01f3fda09 --- /dev/null +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0013-Add-dummy-cpu-sensor-flag.patch @@ -0,0 +1,60 @@ +From 1429b5e9d7a1aa2b0ac9b997b56f53728f8de712 Mon Sep 17 00:00:00 2001 +From: "Arun P. Mohanan" +Date: Thu, 3 Feb 2022 23:56:33 +0530 +Subject: [PATCH] Add dummy cpu sensor flag + +With updated cpu sensor logic, the setupRead() will get called for +dummy sensors as well. This will cause cpu sensor to exit unsuccessfully. + +Add a flag to skip dummy sensors from polling. + +Tested: +CPU sensor polling worked as expected. +Unsuccessfull service exit issue is not observed. + +Signed-off-by: Arun P. Mohanan +--- + include/CPUSensor.hpp | 1 + + src/CPUSensor.cpp | 7 ++++++- + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/include/CPUSensor.hpp b/include/CPUSensor.hpp +index 0ba4090..b5921b9 100644 +--- a/include/CPUSensor.hpp ++++ b/include/CPUSensor.hpp +@@ -63,6 +63,7 @@ class CPUSensor : public Sensor, public std::enable_shared_from_this + uint8_t minMaxReadCounter; + unsigned int scaleFactor; + int fd; ++ bool dummySensor = false; + void handleResponse(const boost::system::error_code& err); + void checkThresholds(void) override; + void updateMinMaxValues(void); +diff --git a/src/CPUSensor.cpp b/src/CPUSensor.cpp +index 8c49bc5..7990e99 100644 +--- a/src/CPUSensor.cpp ++++ b/src/CPUSensor.cpp +@@ -91,7 +91,7 @@ CPUSensor::CPUSensor(const std::string& objectType, + objectType, false, false, 0, 0, conn, PowerState::on), + objServer(objectServer), inputDev(io), + privTcontrol(std::numeric_limits::quiet_NaN()), dtsOffset(0), +- show(true), minMaxReadCounter(0) ++ show(true), minMaxReadCounter(0), dummySensor(true) + { + // assume it is a temperature sensor for now + // support for other type can be added later +@@ -176,6 +176,11 @@ bool CPUSensor::initInputDev() + + void CPUSensor::setupRead(boost::asio::yield_context yield) + { ++ if (dummySensor) ++ { ++ return; ++ } ++ + if (!readingStateGood()) + { + markAvailable(false); +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0014-Treat-zero-temperatures-readings-as-errors-in-IpmbSe.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0014-Treat-zero-temperatures-readings-as-errors-in-IpmbSe.patch new file mode 100644 index 000000000..25b6d5cab --- /dev/null +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors/0014-Treat-zero-temperatures-readings-as-errors-in-IpmbSe.patch @@ -0,0 +1,49 @@ +From 3659064b5ef97e78f054823657670d226bb0420e Mon Sep 17 00:00:00 2001 +From: Arun Lal K M +Date: Thu, 10 Feb 2022 17:21:30 +0000 +Subject: [PATCH] Treat zero temperatures readings as errors in IpmbSensors + +During normal operations, IpmbSensors temperature sensors are not +expected to read zero value. There might be some other root cause +need to be identified and addressed. Treat zeros readings as errors +in temperatures as a workaround. + +Tested: +Valid temperature reading is being reported in IpmbSensors. +By giving ipmitool sensors list command. + +CPU1 North VR Te | 36.000 +CPU1 PVCCD VR Te | 39.000 +CPU1 PVCCFA EHV | 37.000 +CPU1 South VR Te | 37.000 +CPU1 VCCIN VR Te | 51.000 +CPU2 North VR Te | 40.000 +CPU2 PVCCD VR Te | 43.000 +CPU2 PVCCFA EHV | 38.000 +CPU2 South VR Te | 36.000 +CPU2 VCCIN VR Te | 50.000 + +Signed-off-by: Arun Lal K M +--- + src/IpmbSensor.cpp | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/IpmbSensor.cpp b/src/IpmbSensor.cpp +index 75f74b5..e7d1ded 100644 +--- a/src/IpmbSensor.cpp ++++ b/src/IpmbSensor.cpp +@@ -458,7 +458,10 @@ void IpmbSensor::read(void) + + double value = 0; + +- if (!processReading(data, value)) ++ // Temperature sensors are not expected to read 0 ++ // treat them as errors ++ if (!processReading(data, value) || ++ (subType == IpmbSubType::temp && value == 0.0)) + { + incrementError(); + read(); +-- +2.17.1 + diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors_%.bbappend b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors_%.bbappend index 23a44d69a..09a2a50ed 100644 --- a/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors_%.bbappend +++ b/meta-openbmc-mods/meta-common/recipes-phosphor/sensors/dbus-sensors_%.bbappend @@ -15,6 +15,11 @@ SRC_URI += "\ file://0007-Add-support-for-the-energy-hwmon-type.patch \ file://0008-CPUSensor-additional-debug-message.patch \ file://0009-CPUSensor-Create-CPUConfig-for-each-PECI-adapter.patch \ + file://0010-Add-support-for-Get-PMBUS-Readings-method.patch \ + file://0011-Fix-for-cpusensor-going-into-D-state.patch \ + file://0012-Serialize-cpusensor-polling.patch \ + file://0013-Add-dummy-cpu-sensor-flag.patch \ + file://0014-Treat-zero-temperatures-readings-as-errors-in-IpmbSe.patch \ " DEPENDS:append = " libgpiod libmctp" -- cgit v1.2.3