#pragma once #include "http_request.hpp" #include #include #include #include enum class ParserError { PARSER_SUCCESS, ERROR_BOUNDARY_FORMAT, ERROR_BOUNDARY_CR, ERROR_BOUNDARY_LF, ERROR_BOUNDARY_DATA, ERROR_EMPTY_HEADER, ERROR_HEADER_NAME, ERROR_HEADER_VALUE, ERROR_HEADER_ENDING, ERROR_UNEXPECTED_END_OF_HEADER, ERROR_UNEXPECTED_END_OF_INPUT, ERROR_OUT_OF_RANGE }; enum class State { START, START_BOUNDARY, HEADER_FIELD_START, HEADER_FIELD, HEADER_VALUE_START, HEADER_VALUE, HEADER_VALUE_ALMOST_DONE, HEADERS_ALMOST_DONE, PART_DATA_START, PART_DATA, END }; enum class Boundary { NON_BOUNDARY, PART_BOUNDARY, END_BOUNDARY, }; struct FormPart { boost::beast::http::fields fields; std::string content; }; class MultipartParser { public: MultipartParser() = default; [[nodiscard]] ParserError parse(const crow::Request& req) { std::string_view contentType = req.getHeaderValue("content-type"); const std::string boundaryFormat = "multipart/form-data; boundary="; if (!contentType.starts_with(boundaryFormat)) { return ParserError::ERROR_BOUNDARY_FORMAT; } std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); boundary = "\r\n--"; boundary += ctBoundary; indexBoundary(); lookbehind.resize(boundary.size() + 8); state = State::START; const std::string& buffer = req.body(); size_t len = buffer.size(); char cl = 0; for (size_t i = 0; i < len; i++) { char c = buffer[i]; switch (state) { case State::START: index = 0; state = State::START_BOUNDARY; [[fallthrough]]; case State::START_BOUNDARY: if (index == boundary.size() - 2) { if (c != cr) { return ParserError::ERROR_BOUNDARY_CR; } index++; break; } else if (index - 1 == boundary.size() - 2) { if (c != lf) { return ParserError::ERROR_BOUNDARY_LF; } index = 0; mime_fields.emplace_back(); state = State::HEADER_FIELD_START; break; } if (c != boundary[index + 2]) { return ParserError::ERROR_BOUNDARY_DATA; } index++; break; case State::HEADER_FIELD_START: currentHeaderName.resize(0); state = State::HEADER_FIELD; headerFieldMark = i; index = 0; [[fallthrough]]; case State::HEADER_FIELD: if (c == cr) { headerFieldMark = 0; state = State::HEADERS_ALMOST_DONE; break; } index++; if (c == hyphen) { break; } if (c == colon) { if (index == 1) { return ParserError::ERROR_EMPTY_HEADER; } currentHeaderName.append(&buffer[headerFieldMark], i - headerFieldMark); state = State::HEADER_VALUE_START; break; } cl = lower(c); if (cl < 'a' || cl > 'z') { return ParserError::ERROR_HEADER_NAME; } break; case State::HEADER_VALUE_START: if (c == space) { break; } headerValueMark = i; state = State::HEADER_VALUE; [[fallthrough]]; case State::HEADER_VALUE: if (c == cr) { std::string_view value(&buffer[headerValueMark], i - headerValueMark); mime_fields.rbegin()->fields.set(currentHeaderName, value); state = State::HEADER_VALUE_ALMOST_DONE; } break; case State::HEADER_VALUE_ALMOST_DONE: if (c != lf) { return ParserError::ERROR_HEADER_VALUE; } state = State::HEADER_FIELD_START; break; case State::HEADERS_ALMOST_DONE: if (c != lf) { return ParserError::ERROR_HEADER_ENDING; } if (index > 0) { return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; } state = State::PART_DATA_START; break; case State::PART_DATA_START: state = State::PART_DATA; partDataMark = i; [[fallthrough]]; case State::PART_DATA: { if (index == 0) { skipNonBoundary(buffer, boundary.size() - 1, i); c = buffer[i]; } if (auto ec = processPartData(buffer, i, c); ec != ParserError::PARSER_SUCCESS) { return ec; } break; } case State::END: break; default: return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; } } if (state != State::END) { return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; } return ParserError::PARSER_SUCCESS; } std::vector mime_fields; std::string boundary; private: void indexBoundary() { std::ranges::fill(boundaryIndex, 0); for (const char current : boundary) { boundaryIndex[static_cast(current)] = true; } } static char lower(char c) { return static_cast(c | 0x20); } bool isBoundaryChar(char c) const { return boundaryIndex[static_cast(c)]; } void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, size_t& i) { // boyer-moore derived algorithm to safely skip non-boundary data while (i + boundary.size() <= buffer.length()) { if (isBoundaryChar(buffer[i + boundaryEnd])) { break; } i += boundary.size(); } } ParserError processPartData(const std::string& buffer, size_t& i, char c) { size_t prevIndex = index; if (index < boundary.size()) { if (boundary[index] == c) { if (index == 0) { const char* start = &buffer[partDataMark]; size_t size = i - partDataMark; mime_fields.rbegin()->content += std::string_view(start, size); } index++; } else { index = 0; } } else if (index == boundary.size()) { index++; if (c == cr) { // cr = part boundary flags = Boundary::PART_BOUNDARY; } else if (c == hyphen) { // hyphen = end boundary flags = Boundary::END_BOUNDARY; } else { index = 0; } } else { if (flags == Boundary::PART_BOUNDARY) { index = 0; if (c == lf) { // unset the PART_BOUNDARY flag flags = Boundary::NON_BOUNDARY; mime_fields.emplace_back(); state = State::HEADER_FIELD_START; return ParserError::PARSER_SUCCESS; } } if (flags == Boundary::END_BOUNDARY) { if (c == hyphen) { state = State::END; } else { flags = Boundary::NON_BOUNDARY; index = 0; } } } if (index > 0) { if ((index - 1) >= lookbehind.size()) { // Should never happen, but when it does it won't cause crash return ParserError::ERROR_OUT_OF_RANGE; } lookbehind[index - 1] = c; } else if (prevIndex > 0) { // if our boundary turned out to be rubbish, the captured // lookbehind belongs to partData mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); partDataMark = i; // reconsider the current character even so it interrupted // the sequence it could be the beginning of a new sequence i--; } return ParserError::PARSER_SUCCESS; } std::string currentHeaderName; std::string currentHeaderValue; static constexpr char cr = '\r'; static constexpr char lf = '\n'; static constexpr char space = ' '; static constexpr char hyphen = '-'; static constexpr char colon = ':'; std::array boundaryIndex{}; std::string lookbehind; State state{State::START}; Boundary flags{Boundary::NON_BOUNDARY}; size_t index = 0; size_t partDataMark = 0; size_t headerFieldMark = 0; size_t headerValueMark = 0; };