diff options
author | Ed Tanous <ed@tanous.net> | 2020-07-21 18:46:25 +0300 |
---|---|---|
committer | Ed Tanous <ed@tanous.net> | 2021-12-20 04:00:35 +0300 |
commit | af4edf686e684d728fccbb69a8f550fd2adab46a (patch) | |
tree | 1c97b4f7b75a310105ab7ba86fdbf100117c3782 /include/multipart_parser.hpp | |
parent | 47c9e106e0057dd70133d50e928e48cbc68e709a (diff) | |
download | bmcweb-af4edf686e684d728fccbb69a8f550fd2adab46a.tar.xz |
Implement MIME parsing
This commit adds two core features to bmcweb:
1. A multipart mime parser that can read multipart form requests into
bmcweb. This is implemented as a generic parser that identifies the
content-type strings and parses them into structures.
2. A /login route that can be logged into with a multipart form. This
is to allow changing the login screen to a purely forms based
implementation, thus removing the very large whitelist we currently have
to maintain, and removing javascript from our threat envelope.
More testing is still needed, as this is a parser that exists outside of
the secured areas, but in this simple example, it seems to work well.
Tested: curl -vvvvv --insecure -X POST -F 'username=root' -F
'password=0penBmc' https://<bmc ip address>:18080/login
Returned; { "data": "User 'root' logged in", "message": "200 OK",
"status": "ok" }
Change-Id: Icc3f4c082d584170b65b9e82f7876926cd38035d
Signed-off-by: Ed Tanous<ed@tanous.net>
Signed-off-by: George Liu <liuxiwei@inspur.com>
Diffstat (limited to 'include/multipart_parser.hpp')
-rw-r--r-- | include/multipart_parser.hpp | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/include/multipart_parser.hpp b/include/multipart_parser.hpp new file mode 100644 index 0000000000..3728311fbe --- /dev/null +++ b/include/multipart_parser.hpp @@ -0,0 +1,338 @@ +#pragma once + +#include <boost/algorithm/string/predicate.hpp> +#include <boost/beast/http/fields.hpp> +#include <http_request.hpp> + +#include <string> +#include <string_view> + +enum class ParserError +{ + PARSER_SUCCESS, + ERROR_BOUNDARY_FORMAT, + ERROR_BOUNDARY_CR, + ERROR_BOUNDARY_LF, + ERROR_BOUNDARY_DATA, + ERROR_EMPTY_HEADER, + ERROR_HEADER_NAME, + ERROR_HEADER_VALUE, + ERROR_HEADER_ENDING +}; + +enum class State +{ + START, + START_BOUNDARY, + HEADER_FIELD_START, + HEADER_FIELD, + HEADER_VALUE_START, + HEADER_VALUE, + HEADER_VALUE_ALMOST_DONE, + HEADERS_ALMOST_DONE, + PART_DATA_START, + PART_DATA, + END +}; + +enum class Boundary +{ + NON_BOUNDARY, + PART_BOUNDARY, + END_BOUNDARY, +}; + +struct FormPart +{ + boost::beast::http::fields fields; + std::string content; +}; + +class MultipartParser +{ + public: + MultipartParser() = default; + + [[nodiscard]] ParserError parse(const crow::Request& req) + { + std::string_view contentType = req.getHeaderValue("content-type"); + + const std::string boundaryFormat = "multipart/form-data; boundary="; + if (!boost::starts_with(req.getHeaderValue("content-type"), + boundaryFormat)) + { + return ParserError::ERROR_BOUNDARY_FORMAT; + } + + std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); + + boundary = "\r\n--"; + boundary += ctBoundary; + indexBoundary(); + lookbehind.resize(boundary.size() + 8); + state = State::START; + + const char* buffer = req.body.data(); + size_t len = req.body.size(); + size_t prevIndex = index; + char cl = 0; + + for (size_t i = 0; i < len; i++) + { + char c = buffer[i]; + switch (state) + { + case State::START: + index = 0; + state = State::START_BOUNDARY; + [[fallthrough]]; + case State::START_BOUNDARY: + if (index == boundary.size() - 2) + { + if (c != cr) + { + return ParserError::ERROR_BOUNDARY_CR; + } + index++; + break; + } + else if (index - 1 == boundary.size() - 2) + { + if (c != lf) + { + return ParserError::ERROR_BOUNDARY_LF; + } + index = 0; + mime_fields.push_back({}); + state = State::HEADER_FIELD_START; + break; + } + if (c != boundary[index + 2]) + { + return ParserError::ERROR_BOUNDARY_DATA; + } + index++; + break; + case State::HEADER_FIELD_START: + currentHeaderName.resize(0); + state = State::HEADER_FIELD; + headerFieldMark = i; + index = 0; + [[fallthrough]]; + case State::HEADER_FIELD: + if (c == cr) + { + headerFieldMark = 0; + state = State::HEADERS_ALMOST_DONE; + break; + } + + index++; + if (c == hyphen) + { + break; + } + + if (c == colon) + { + if (index == 1) + { + return ParserError::ERROR_EMPTY_HEADER; + } + currentHeaderName.append(buffer + headerFieldMark, + i - headerFieldMark); + state = State::HEADER_VALUE_START; + break; + } + cl = lower(c); + if (cl < 'a' || cl > 'z') + { + return ParserError::ERROR_HEADER_NAME; + } + break; + case State::HEADER_VALUE_START: + if (c == space) + { + break; + } + headerValueMark = i; + state = State::HEADER_VALUE; + [[fallthrough]]; + case State::HEADER_VALUE: + if (c == cr) + { + std::string_view value(buffer + headerValueMark, + i - headerValueMark); + mime_fields.rbegin()->fields.set(currentHeaderName, + value); + state = State::HEADER_VALUE_ALMOST_DONE; + } + break; + case State::HEADER_VALUE_ALMOST_DONE: + if (c != lf) + { + return ParserError::ERROR_HEADER_VALUE; + } + state = State::HEADER_FIELD_START; + break; + case State::HEADERS_ALMOST_DONE: + if (c != lf) + { + return ParserError::ERROR_HEADER_ENDING; + } + state = State::PART_DATA_START; + break; + case State::PART_DATA_START: + state = State::PART_DATA; + partDataMark = i; + [[fallthrough]]; + case State::PART_DATA: + if (index == 0) + { + skipNonBoundary(buffer, len, boundary.size() - 1, i); + c = buffer[i]; + } + processPartData(prevIndex, index, buffer, i, c, state); + break; + case State::END: + break; + } + } + return ParserError::PARSER_SUCCESS; + } + std::vector<FormPart> mime_fields; + std::string boundary; + + private: + void indexBoundary() + { + std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); + for (const char current : boundary) + { + boundaryIndex[static_cast<unsigned char>(current)] = true; + } + } + + char lower(char c) const + { + return static_cast<char>(c | 0x20); + } + + inline bool isBoundaryChar(char c) const + { + return boundaryIndex[static_cast<unsigned char>(c)]; + } + + void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, + size_t& i) + { + // boyer-moore derived algorithm to safely skip non-boundary data + while (i + boundary.size() <= len) + { + if (isBoundaryChar(buffer[i + boundaryEnd])) + { + break; + } + i += boundary.size(); + } + } + + void processPartData(size_t& prevIndex, size_t& index, const char* buffer, + size_t& i, char c, State& state) + { + prevIndex = index; + + if (index < boundary.size()) + { + if (boundary[index] == c) + { + if (index == 0) + { + mime_fields.rbegin()->content += std::string_view( + buffer + partDataMark, i - partDataMark); + } + index++; + } + else + { + index = 0; + } + } + else if (index == boundary.size()) + { + index++; + if (c == cr) + { + // cr = part boundary + flags = Boundary::PART_BOUNDARY; + } + else if (c == hyphen) + { + // hyphen = end boundary + flags = Boundary::END_BOUNDARY; + } + else + { + index = 0; + } + } + else + { + if (flags == Boundary::PART_BOUNDARY) + { + index = 0; + if (c == lf) + { + // unset the PART_BOUNDARY flag + flags = Boundary::NON_BOUNDARY; + mime_fields.push_back({}); + state = State::HEADER_FIELD_START; + return; + } + } + if (flags == Boundary::END_BOUNDARY) + { + if (c == hyphen) + { + state = State::END; + } + } + } + + if (index > 0) + { + lookbehind[index - 1] = c; + } + else if (prevIndex > 0) + { + // if our boundary turned out to be rubbish, the captured + // lookbehind belongs to partData + + mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); + prevIndex = 0; + partDataMark = i; + + // reconsider the current character even so it interrupted + // the sequence it could be the beginning of a new sequence + i--; + } + } + + std::string currentHeaderName; + std::string currentHeaderValue; + + static constexpr char cr = '\r'; + static constexpr char lf = '\n'; + static constexpr char space = ' '; + static constexpr char hyphen = '-'; + static constexpr char colon = ':'; + + std::array<bool, 256> boundaryIndex; + std::string lookbehind; + State state; + Boundary flags; + size_t index = 0; + size_t partDataMark = 0; + size_t headerFieldMark = 0; + size_t headerValueMark = 0; +}; |