summaryrefslogtreecommitdiff
path: root/meta-quanta/meta-gbs/recipes-phosphor/sensors/phosphor-hwmon/0001-sensor-Implement-sensor-ASYNC_READ_TIMEOUT.patch
blob: 88f55d055b4e3d2bf75eb2b71a2bf9d83a53d38b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
From c1b3d32430df12ab01f9cacf132aedff8324d55b Mon Sep 17 00:00:00 2001
From: Brandon Kim <brandonkim@google.com>
Date: Fri, 9 Aug 2019 15:38:53 -0700
Subject: [PATCH] sensor: Implement sensor "ASYNC_READ_TIMEOUT"

This change will prevent sensors from blocking all other sensor reads
and D-Bus if they do not report failures quickly enough.

If "ASYNC_READ_TIMEOUT" environment variable is defined in the
sensor's config file for a key_type, the sensor read will be
asynchronous with timeout set in milliseconds.

For example for "sensor1":
ASYNC_READ_TIMEOUT_sensor1="1000"  // Timeout will be set to 1 sec

If the read times out, the sensor read will be skipped and the
sensor's functional property will be set to 'false'. Timed out futures
will be placed in a map to prevent their destructor from running and
blocking until the read completes (limiation of std::async).

Change-Id: I3d9ed4d5c9cc87d3196fc281451834f3001d0b48
Signed-off-by: Brandon Kim <brandonkim@google.com>
---
 Makefile.am  |  2 ++
 mainloop.cpp | 73 +++++++++++++++++++++++++++++++++++++++++++++++---
 mainloop.hpp |  3 +++
 meson.build  |  1 +
 sensor.cpp   | 75 ++++++++++++++++++++++++++++++++++++++++++++++------
 sensor.hpp   | 20 ++++++++++++--
 6 files changed, 160 insertions(+), 14 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 706a6cc..c620fa4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,6 +46,7 @@ libhwmon_la_LIBADD = \
 	$(SDEVENTPLUS_LIBS) \
 	$(PHOSPHOR_DBUS_INTERFACES_LIBS) \
 	$(PHOSPHOR_LOGGING_LIBS) \
+	$(PTHREAD_LIBS) \
 	$(GPIOPLUS_LIBS) \
 	$(STDPLUS_LIBS) \
 	$(CODE_COVERAGE_LIBS) \
@@ -55,6 +56,7 @@ libhwmon_la_CXXFLAGS = \
 	$(SDEVENTPLUS_CFLAGS) \
 	$(PHOSPHOR_DBUS_INTERFACES_CFLAGS) \
 	$(PHOSPHOR_LOGGING_CFLAGS) \
+	$(PTHREAD_CFLAGS) \
 	$(STDPLUS_CFLAGS) \
 	$(CODE_COVERAGE_CXXFLAGS)
 
diff --git a/mainloop.cpp b/mainloop.cpp
index ecceee5..29dc26a 100644
--- a/mainloop.cpp
+++ b/mainloop.cpp
@@ -34,6 +34,7 @@
 #include <cassert>
 #include <cstdlib>
 #include <functional>
+#include <future>
 #include <iostream>
 #include <memory>
 #include <phosphor-logging/elog-errors.hpp>
@@ -242,7 +243,7 @@ std::optional<ObjectStateData>
     {
         // Add status interface based on _fault file being present
         sensorObj->addStatus(info);
-        valueInterface = sensorObj->addValue(retryIO, info);
+        valueInterface = sensorObj->addValue(retryIO, info, _timedoutMap);
     }
     catch (const std::system_error& e)
     {
@@ -478,10 +479,74 @@ void MainLoop::read()
                 // RAII object for GPIO unlock / lock
                 auto locker = sensor::gpioUnlock(sensor->getGpio());
 
-                // Retry for up to a second if device is busy
-                // or has a transient error.
-                value = _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
+                // For sensors with attribute ASYNC_READ_TIMEOUT,
+                // spawn a thread with timeout
+                auto asyncRead =
+                    env::getEnv("ASYNC_READ_TIMEOUT", sensorSetKey);
+                if (!asyncRead.empty())
+                {
+                    // Default async read timeout
+                    std::chrono::milliseconds asyncReadTimeout{
+                        std::stoi(asyncRead)};
+                    bool valueIsValid = false;
+                    std::future<int64_t> asyncThread;
+
+                    auto asyncIter = _timedoutMap.find(sensorSetKey);
+                    if (asyncIter == _timedoutMap.end())
+                    {
+                        // If sensor not found in timedoutMap, spawn an async
+                        // thread
+                        asyncThread = std::async(
+                            std::launch::async,
+                            &hwmonio::HwmonIOInterface::read, _ioAccess,
+                            sensorSysfsType, sensorSysfsNum, input,
+                            hwmonio::retries, hwmonio::delay);
+                        valueIsValid = true;
+                    }
+                    else
+                    {
+                        // If we already have the async thread in the
+                        // timedoutMap, it means this sensor has already timed
+                        // out in the previous reads. No need to wait on
+                        // subsequent reads
+                        asyncReadTimeout = std::chrono::seconds(0);
+                        asyncThread = std::move(asyncIter->second);
+                    }
+
+                    std::future_status status =
+                        asyncThread.wait_for(asyncReadTimeout);
+                    switch (status)
+                    {
+                        // Read has finished
+                        case std::future_status::ready:
+                            // Read has finished
+                            if (valueIsValid)
+                            {
+                                value = asyncThread.get();
+                                break;
+                                // Good sensor reads should skip the code below
+                            }
+                            // Async read thread has completed, erase from
+                            // timedoutMap to allow retry then throw
+                            _timedoutMap.erase(sensorSetKey);
+                            throw sensor::AsyncSensorReadTimeOut();
+                        default:
+                            // Read timed out so add the thread to the
+                            // timedoutMap (if the entry already exists,
+                            // operator[] updates it)
+                            _timedoutMap[sensorSetKey] = std::move(asyncThread);
+                            throw sensor::AsyncSensorReadTimeOut();
+                    }
+                }
+                else
+                {
+                    // Retry for up to a second if device is busy
+                    // or has a transient error.
+                    value =
+                        _ioAccess->read(sensorSysfsType, sensorSysfsNum, input,
                                         hwmonio::retries, hwmonio::delay);
+                }
+
                 // Set functional property to true if we could read sensor
                 statusIface->functional(true);
 
diff --git a/mainloop.hpp b/mainloop.hpp
index b3de022..6803c4b 100644
--- a/mainloop.hpp
+++ b/mainloop.hpp
@@ -9,6 +9,7 @@
 #include "types.hpp"
 
 #include <any>
+#include <future>
 #include <memory>
 #include <optional>
 #include <sdbusplus/server.hpp>
@@ -116,6 +117,8 @@ class MainLoop
     /** @brief Store the specifications of sensor objects */
     std::map<SensorSet::key_type, std::unique_ptr<sensor::Sensor>>
         _sensorObjects;
+    /** @brief Store the async futures of timed out sensor objects */
+    std::map<SensorSet::key_type, std::future<int64_t>> _timedoutMap;
 
     /**
      * @brief Map of removed sensors
diff --git a/meson.build b/meson.build
index 66e6801..d6a92f8 100644
--- a/meson.build
+++ b/meson.build
@@ -84,6 +84,7 @@ libhwmon_all = static_library(
         gpioplus,
         phosphor_dbus_interfaces,
         phosphor_logging,
+        threads,
     ],
     link_with: [
         libaverage,
diff --git a/sensor.cpp b/sensor.cpp
index 09aeca6..ac2f896 100644
--- a/sensor.cpp
+++ b/sensor.cpp
@@ -15,6 +15,7 @@
 #include <cmath>
 #include <cstring>
 #include <filesystem>
+#include <future>
 #include <phosphor-logging/elog-errors.hpp>
 #include <thread>
 #include <xyz/openbmc_project/Common/error.hpp>
@@ -116,8 +117,9 @@ SensorValueType Sensor::adjustValue(SensorValueType value)
     return value;
 }
 
-std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
-                                              ObjectInfo& info)
+std::shared_ptr<ValueObject> Sensor::addValue(
+    const RetryIO& retryIO, ObjectInfo& info,
+    std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap)
 {
     static constexpr bool deferSignals = true;
 
@@ -144,12 +146,69 @@ std::shared_ptr<ValueObject> Sensor::addValue(const RetryIO& retryIO,
             // RAII object for GPIO unlock / lock
             auto locker = gpioUnlock(getGpio());
 
-            // Retry for up to a second if device is busy
-            // or has a transient error.
-            val =
-                _ioAccess->read(_sensor.first, _sensor.second,
-                                hwmon::entry::cinput, std::get<size_t>(retryIO),
-                                std::get<std::chrono::milliseconds>(retryIO));
+            // For sensors with attribute ASYNC_READ_TIMEOUT,
+            // spawn a thread with timeout
+            auto asyncRead = env::getEnv("ASYNC_READ_TIMEOUT", _sensor);
+            if (!asyncRead.empty())
+            {
+                // Default async read timeout
+                std::chrono::milliseconds asyncReadTimeout{
+                    std::stoi(asyncRead)};
+                bool valueIsValid = false;
+                std::future<int64_t> asyncThread;
+
+                auto asyncIter = timedoutMap.find(_sensor);
+                if (asyncIter == timedoutMap.end())
+                {
+                    // If sensor not found in timedoutMap, spawn an async thread
+                    asyncThread = std::async(
+                        std::launch::async, &hwmonio::HwmonIOInterface::read,
+                        _ioAccess, _sensor.first, _sensor.second,
+                        hwmon::entry::cinput, std::get<size_t>(retryIO),
+                        std::get<std::chrono::milliseconds>(retryIO));
+                    valueIsValid = true;
+                }
+                else
+                {
+                    // If we already have the async thread in the timedoutMap,
+                    // it means this sensor has already timed out in the
+                    // previous reads. No need to wait on subsequent reads
+                    asyncReadTimeout = std::chrono::seconds(0);
+                    asyncThread = std::move(asyncIter->second);
+                }
+
+                std::future_status status =
+                    asyncThread.wait_for(asyncReadTimeout);
+                switch (status)
+                {
+                    case std::future_status::ready:
+                        // Read has finished
+                        if (valueIsValid)
+                        {
+                            val = asyncThread.get();
+                            break;
+                            // Good sensor reads should skip the code below
+                        }
+                        // Async read thread has completed, erase from
+                        // timedoutMap to allow retry then throw
+                        timedoutMap.erase(_sensor);
+                        throw AsyncSensorReadTimeOut();
+                    default:
+                        // Read timed out so add the thread to the timedoutMap
+                        // (if the entry already exists, operator[] updates it)
+                        timedoutMap[_sensor] = std::move(asyncThread);
+                        throw AsyncSensorReadTimeOut();
+                }
+            }
+            else
+            {
+                // Retry for up to a second if device is busy
+                // or has a transient error.
+                val = _ioAccess->read(
+                    _sensor.first, _sensor.second, hwmon::entry::cinput,
+                    std::get<size_t>(retryIO),
+                    std::get<std::chrono::milliseconds>(retryIO));
+            }
         }
 #ifdef UPDATE_FUNCTIONAL_ON_FAIL
         catch (const std::system_error& e)
diff --git a/sensor.hpp b/sensor.hpp
index 4b2d281..64d6e48 100644
--- a/sensor.hpp
+++ b/sensor.hpp
@@ -4,6 +4,8 @@
 #include "sensorset.hpp"
 #include "types.hpp"
 
+#include <cerrno>
+#include <future>
 #include <gpioplus/handle.hpp>
 #include <memory>
 #include <optional>
@@ -20,6 +22,17 @@ struct valueAdjust
     std::unordered_set<int> rmRCs;
 };
 
+/** @brief Custom exception for async sensor reading timeout
+ */
+struct AsyncSensorReadTimeOut : public std::system_error
+{
+    AsyncSensorReadTimeOut() :
+        system_error(std::error_code(ETIMEDOUT, std::system_category()),
+                     "Async sensor read timed out")
+    {
+    }
+};
+
 /** @class Sensor
  *  @brief Sensor object based on a SensorSet container's key type
  *  @details Sensor object to create and modify an associated device's sensor
@@ -87,10 +100,13 @@ class Sensor
      *                      (number of and delay between)
      * @param[in] info - Sensor object information
      *
+     * @param[in] timedoutMap - Map to track timed out threads
+     *
      * @return - Shared pointer to the value object
      */
-    std::shared_ptr<ValueObject> addValue(const RetryIO& retryIO,
-                                          ObjectInfo& info);
+    std::shared_ptr<ValueObject> addValue(
+        const RetryIO& retryIO, ObjectInfo& info,
+        std::map<SensorSet::key_type, std::future<int64_t>>& timedoutMap);
 
     /**
      * @brief Add status interface and functional property for sensor
-- 
2.21.0