From 513d1ffca957889c765569cb3d7d713c12394073 Mon Sep 17 00:00:00 2001 From: Carson Labrado Date: Tue, 19 Jul 2022 00:38:15 +0000 Subject: HTTP Client: Improve handling operation timeouts Now that we are using timer.async_wait() with the async http operations we need to account for the scenario where the timer fails before the operation fails. When that occurs we need to abort the operation once its callback gets called. Currently we proceed as if the timer doesn't exist. This causes a fault if one of the operations times out. This patch adds a check to the start of each async operation so that we do not continue with the normal message sending flow when an operation times out. Tested: In order to create a connection timeout I created a dummy interface and set the IP of my satellite BMC to route to the interface: ip link add dummy0 type dummy ip link set dev dummy0 up ip route add 120.60.30.15 dev dummy0 All packets sent to 120.60.30.15 will get dropped and thus connection attempts will timeout. This does not cause bmcweb to crash. To make the satellite reachable again I used this command to delete the routing: ip route del 120.60.31.15 dev dummy0 After doing so messages were once again able to be forwarded correctly to the satellite. Signed-off-by: Carson Labrado Change-Id: Ie8d022c2195838e383eefcd0e12ae8cfab76e3e1 --- http/http_client.hpp | 68 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 22 deletions(-) (limited to 'http/http_client.hpp') diff --git a/http/http_client.hpp b/http/http_client.hpp index cceb00761a..117584570a 100644 --- a/http/http_client.hpp +++ b/http/http_client.hpp @@ -197,6 +197,13 @@ class ConnectionInfo : public std::enable_shared_from_this boost::beast::error_code ec, const boost::asio::ip::tcp::endpoint& endpoint) { + // The operation already timed out. We don't want do continue down + // this branch + if (ec && ec == boost::asio::error::operation_aborted) + { + return; + } + timer.cancel(); if (ec) { @@ -238,6 +245,13 @@ class ConnectionInfo : public std::enable_shared_from_this void afterSslHandshake(const std::shared_ptr& /*self*/, boost::beast::error_code ec) { + // The operation already timed out. We don't want do continue down + // this branch + if (ec && ec == boost::asio::error::operation_aborted) + { + return; + } + timer.cancel(); if (ec) { @@ -282,6 +296,13 @@ class ConnectionInfo : public std::enable_shared_from_this void afterWrite(const std::shared_ptr& /*self*/, const boost::beast::error_code& ec, size_t bytesTransferred) { + // The operation already timed out. We don't want do continue down + // this branch + if (ec && ec == boost::asio::error::operation_aborted) + { + return; + } + timer.cancel(); if (ec) { @@ -327,6 +348,13 @@ class ConnectionInfo : public std::enable_shared_from_this const boost::beast::error_code& ec, const std::size_t& bytesTransferred) { + // The operation already timed out. We don't want do continue down + // this branch + if (ec && ec == boost::asio::error::operation_aborted) + { + return; + } + timer.cancel(); if (ec && ec != boost::asio::ssl::error::stream_truncated) { @@ -366,9 +394,9 @@ class ConnectionInfo : public std::enable_shared_from_this // Copy the response into a Response object so that it can be // processed by the callback function. - res.clear(); res.stringResponse = parser->release(); callback(parser->keep_alive(), connId, res); + res.clear(); } static void onTimeout(const std::weak_ptr& weakSelf, @@ -377,8 +405,7 @@ class ConnectionInfo : public std::enable_shared_from_this if (ec == boost::asio::error::operation_aborted) { BMCWEB_LOG_DEBUG - << "async_wait failed since the operation is aborted" - << ec.message(); + << "async_wait failed since the operation is aborted"; return; } if (ec) @@ -404,22 +431,22 @@ class ConnectionInfo : public std::enable_shared_from_this BMCWEB_LOG_DEBUG << "Retry policy: " << retryPolicy.retryPolicyAction; - // We want to return a 502 to indicate there was an error with the - // external server - res.clear(); - res.result(boost::beast::http::status::bad_gateway); - if (retryPolicy.retryPolicyAction == "TerminateAfterRetries") { // TODO: delete subscription state = ConnState::terminated; - callback(false, connId, res); } if (retryPolicy.retryPolicyAction == "SuspendRetries") { state = ConnState::suspended; - callback(false, connId, res); } + + // We want to return a 502 to indicate there was an error with + // the external server + res.result(boost::beast::http::status::bad_gateway); + callback(false, connId, res); + res.clear(); + // Reset the retrycount to zero so that client can try connecting // again if needed retryCount = 0; @@ -468,7 +495,7 @@ class ConnectionInfo : public std::enable_shared_from_this { BMCWEB_LOG_ERROR << host << ":" << std::to_string(port) << ", id: " << std::to_string(connId) - << "shutdown failed: " << ec.message(); + << " shutdown failed: " << ec.message(); } else { @@ -477,18 +504,15 @@ class ConnectionInfo : public std::enable_shared_from_this << " closed gracefully"; } - if ((state != ConnState::suspended) && (state != ConnState::terminated)) + if (retry) { - if (retry) - { - // Now let's try to resend the data - state = ConnState::retry; - doResolve(); - } - else - { - state = ConnState::closed; - } + // Now let's try to resend the data + state = ConnState::retry; + doResolve(); + } + else + { + state = ConnState::closed; } } -- cgit v1.2.3