summaryrefslogtreecommitdiff
path: root/meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch')
-rw-r--r--meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch172
1 files changed, 172 insertions, 0 deletions
diff --git a/meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch b/meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch
new file mode 100644
index 000000000..d2a8d7c40
--- /dev/null
+++ b/meta-openbmc-mods/meta-common/recipes-phosphor/fans/phosphor-pid-control/0001-Eliminate-swampd-core-dump-after-D-Bus-updates-senso.patch
@@ -0,0 +1,172 @@
+From 26db33e341f7e96931905aee4358353b0c6aee39 Mon Sep 17 00:00:00 2001
+From: Johnathan Mantey <johnathanx.mantey@intel.com>
+Date: Mon, 28 Sep 2020 11:06:58 -0700
+Subject: [PATCH] Eliminate swampd core dump after D-Bus updates sensors
+
+The swamp daemon intializes a list of sensors and uses those to
+periodically scan the state associated devices. Reading the sensors is
+done with an async timer, that runs code to re-arm an async timer.
+
+There is also a D-Bus update cycle that is independent of the async
+timer reading the sensors. When the D-Bus updates the number of
+sensors in the system a new list must be created. In order to create
+the new list the timers using the old list must be stopped. Only after
+those timers have stopped may a new list be generated, and a new set of
+timers started.
+
+The two processes are unware of each other. To safely perform the
+change the pointers to the list of zones and timers must be kept alive
+until all timer actions complete. Only after all references to the
+pointers have been release may the new state be built, and new timers
+started.
+
+Prior to this change swampd would throw a SYSSEGV fault due to an
+attempt to use a pointer that was no longer active.
+
+Tested:
+Issued a "reset -w" (Warm Reset command) from the EFI shell.
+Waited for the system to reboot, and enter EFI
+Checked for a core file in /var/lib/systemd/coredump
+Repeated step 1 if coredump file was not present.
+Completed 2900+ passes successfully when ealier code failed at less
+than 800 passes.
+
+Change-Id: I10ab824d8050be9eca63c18d7e5a62bdb41e9c64
+Signed-off-by: Johnathan Mantey <johnathanx.mantey@intel.com>
+---
+ main.cpp | 14 ++++++++++----
+ pid/builder.cpp | 6 +++---
+ pid/builder.hpp | 2 +-
+ pid/pidloop.cpp | 13 +++++++------
+ pid/pidloop.hpp | 3 ++-
+ 5 files changed, 23 insertions(+), 15 deletions(-)
+
+diff --git a/main.cpp b/main.cpp
+index 2ab3fc4..46cb38d 100644
+--- a/main.cpp
++++ b/main.cpp
+@@ -72,10 +72,15 @@ static sdbusplus::asio::connection
+ void restartControlLoops()
+ {
+ static SensorManager mgmr;
+- static std::unordered_map<int64_t, std::unique_ptr<PIDZone>> zones;
+- static std::list<boost::asio::steady_timer> timers;
++ static std::unordered_map<int64_t, std::shared_ptr<PIDZone>> zones;
++ static std::vector<std::shared_ptr<boost::asio::steady_timer>> timers;
+
++ for (const auto timer : timers)
++ {
++ timer->cancel();
++ }
+ timers.clear();
++ zones.clear();
+
+ #if CONFIGURE_DBUS
+
+@@ -117,9 +122,10 @@ void restartControlLoops()
+
+ for (const auto& i : zones)
+ {
+- auto& timer = timers.emplace_back(io);
++ std::shared_ptr<boost::asio::steady_timer> timer = timers.emplace_back(
++ std::make_shared<boost::asio::steady_timer>(io));
+ std::cerr << "pushing zone " << i.first << "\n";
+- pidControlLoop(i.second.get(), timer);
++ pidControlLoop(i.second, timer);
+ }
+ }
+
+diff --git a/pid/builder.cpp b/pid/builder.cpp
+index 1fbfbd4..15fc4cd 100644
+--- a/pid/builder.cpp
++++ b/pid/builder.cpp
+@@ -35,12 +35,12 @@ static std::string getControlPath(int64_t zone)
+ return std::string(objectPath) + std::to_string(zone);
+ }
+
+-std::unordered_map<int64_t, std::unique_ptr<PIDZone>>
++std::unordered_map<int64_t, std::shared_ptr<PIDZone>>
+ buildZones(std::map<int64_t, conf::PIDConf>& zonePids,
+ std::map<int64_t, struct conf::ZoneConfig>& zoneConfigs,
+ SensorManager& mgr, sdbusplus::bus::bus& modeControlBus)
+ {
+- std::unordered_map<int64_t, std::unique_ptr<PIDZone>> zones;
++ std::unordered_map<int64_t, std::shared_ptr<PIDZone>> zones;
+
+ for (const auto& zi : zonePids)
+ {
+@@ -62,7 +62,7 @@ std::unordered_map<int64_t, std::unique_ptr<PIDZone>>
+
+ const conf::PIDConf& pidConfig = zi.second;
+
+- auto zone = std::make_unique<PIDZone>(
++ auto zone = std::make_shared<PIDZone>(
+ zoneId, zoneConf->second.minThermalOutput,
+ zoneConf->second.failsafePercent, mgr, modeControlBus,
+ getControlPath(zi.first).c_str(), deferSignals);
+diff --git a/pid/builder.hpp b/pid/builder.hpp
+index e500503..e3ba88c 100644
+--- a/pid/builder.hpp
++++ b/pid/builder.hpp
+@@ -7,7 +7,7 @@
+ #include <sdbusplus/bus.hpp>
+ #include <unordered_map>
+
+-std::unordered_map<int64_t, std::unique_ptr<PIDZone>>
++std::unordered_map<int64_t, std::shared_ptr<PIDZone>>
+ buildZones(std::map<int64_t, conf::PIDConf>& zonePids,
+ std::map<int64_t, struct conf::ZoneConfig>& zoneConfigs,
+ SensorManager& mgr, sdbusplus::bus::bus& modeControlBus);
+diff --git a/pid/pidloop.cpp b/pid/pidloop.cpp
+index 56bf8bd..14225ec 100644
+--- a/pid/pidloop.cpp
++++ b/pid/pidloop.cpp
+@@ -27,7 +27,7 @@
+ #include <thread>
+ #include <vector>
+
+-static void processThermals(PIDZone* zone)
++static void processThermals(std::shared_ptr<PIDZone> zone)
+ {
+ // Get the latest margins.
+ zone->updateSensors();
+@@ -40,8 +40,9 @@ static void processThermals(PIDZone* zone)
+ zone->determineMaxSetPointRequest();
+ }
+
+-void pidControlLoop(PIDZone* zone, boost::asio::steady_timer& timer, bool first,
+- int ms100cnt)
++void pidControlLoop(std::shared_ptr<PIDZone> zone,
++ std::shared_ptr<boost::asio::steady_timer> timer,
++ bool first, int ms100cnt)
+ {
+ if (first)
+ {
+@@ -54,9 +55,9 @@ void pidControlLoop(PIDZone* zone, boost::asio::steady_timer& timer, bool first,
+ processThermals(zone);
+ }
+
+- timer.expires_after(std::chrono::milliseconds(100));
+- timer.async_wait(
+- [zone, &timer, ms100cnt](const boost::system::error_code& ec) mutable {
++ timer->expires_after(std::chrono::milliseconds(100));
++ timer->async_wait(
++ [zone, timer, ms100cnt](const boost::system::error_code& ec) mutable {
+ if (ec == boost::asio::error::operation_aborted)
+ {
+ return; // timer being canceled, stop loop
+diff --git a/pid/pidloop.hpp b/pid/pidloop.hpp
+index 3a67954..7aef73a 100644
+--- a/pid/pidloop.hpp
++++ b/pid/pidloop.hpp
+@@ -14,5 +14,6 @@
+ * @param[in] first - boolean to denote if initialization needs to be run.
+ * @param[in] ms100cnt - loop timer counter.
+ */
+-void pidControlLoop(PIDZone* zone, boost::asio::steady_timer& timer,
++void pidControlLoop(std::shared_ptr<PIDZone> zone,
++ std::shared_ptr<boost::asio::steady_timer> timer,
+ bool first = true, int ms100cnt = 0);
+--
+2.26.2
+