common/Util.hpp | 23 ++++++++ net/Socket.cpp | 126 ++++++++++++++++++++++++++++++++++++++++++- net/Socket.hpp | 22 ++++++- net/WebSocketHandler.hpp | 8 +- test/UnitHTTP.cpp | 135 +++++++++++++++++++++++++++++++++++++++++++++-- test/helpers.hpp | 23 ++++++-- wsd/LOOLWSD.cpp | 24 +++++--- 7 files changed, 335 insertions(+), 26 deletions(-)
New commits: commit 05ca302c2bc5fd3be3598da503b209c5cdde5dbe Author: Michael Meeks <michael.me...@collabora.com> AuthorDate: Wed May 22 10:54:36 2019 +0100 Commit: Michael Meeks <michael.me...@collabora.com> CommitDate: Wed May 22 11:07:42 2019 +0100 tests for chunked transfer encoding parser. Change-Id: Ic55669ab7cc55bb44e8f7a00f30231b44f10535a diff --git a/test/UnitHTTP.cpp b/test/UnitHTTP.cpp index d0530fe10..e86593f22 100644 --- a/test/UnitHTTP.cpp +++ b/test/UnitHTTP.cpp @@ -13,6 +13,7 @@ #include <helpers.hpp> #include <Poco/Util/Application.h> +#include <Poco/Net/StreamSocket.h> #include <Poco/Net/StringPartSource.h> #include <Poco/Net/HTMLForm.h> #include <Poco/Net/HTTPRequest.h> @@ -37,9 +38,9 @@ public: config.setBool("ssl.enable", true); } - // FIXME: can hook with (UnitWSD::get().handleHttpRequest(request, message, socket)) ... - void invokeTest() override + void testContinue() { + std::cerr << "testContinue\n"; for (int i = 0; i < 3; ++i) { std::unique_ptr<Poco::Net::HTTPClientSession> session(helpers::createSession(Poco::URI(helpers::getTestServerURI()))); @@ -81,9 +82,135 @@ public: return; } } - // Give those convertors time to save and cleanup. - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + void writeString(const std::shared_ptr<Poco::Net::StreamSocket> &socket, std::string str) + { + socket->sendBytes(str.c_str(), str.size()); + } + + bool expectString(const std::shared_ptr<Poco::Net::StreamSocket> &socket, std::string str) + { + char buffer[str.size() + 64] = { 0, }; + int got = socket->receiveBytes(buffer, str.size()); + if (got != (int)str.size() || + strncmp(buffer, str.c_str(), got)) + { + std::cerr << "testChunks got " << got << " mismatching strings '" << buffer << " vs. expected '" << str << "'\n"; + exitTest(TestResult::Failed); + return false; + } + else + return true; + } + + void testChunks() + { + std::cerr << "testChunks\n"; + + std::shared_ptr<Poco::Net::StreamSocket> socket = helpers::createRawSocket(); + + writeString( + socket, + "POST /lool/convert-to/txt HTTP/1.1\r\n" + "Host: localhost:9980\r\n" + "User-Agent: looltests/1.2.3\r\n" + "Accept: */*\r\n" + "Expect: 100-continue\r\n" + "Transfer-Encoding: chunked\r\n" + "Content-Type: multipart/form-data; " + "boundary=------------------------5a0cd5c881663db4\r\n\r\n"); + if (!expectString( + socket, + "HTTP/1.1 100 Continue\r\n\r\n")) + return; + +#define START_CHUNK_HEX(len) len "\r\n" +#define END_CHUNK "\r\n" + writeString( + socket, + START_CHUNK_HEX("8A") + "--------------------------5a0cd5c881663db4\r\n" + "Content-Disposition: form-data; name=\"data\"; filename=\"test.txt\"\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + END_CHUNK + + START_CHUNK_HEX("12") + "This is some text." + END_CHUNK + + START_CHUNK_HEX("1") + "\n" + END_CHUNK + + " 4 room:for expansion!! cf. leading spaces and nasies <>!\"\'?=)\r\n" + "And " + END_CHUNK + + START_CHUNK_HEX("1") + "s" + END_CHUNK + + START_CHUNK_HEX("a") + "ome more.\n" + END_CHUNK + ); + writeString( + socket, + START_CHUNK_HEX("30") + "\r\n" + "--------------------------5a0cd5c881663db4--\r\n" + END_CHUNK); + + writeString(socket, START_CHUNK_HEX("0")); + + char buffer[4096] = { 0, }; + int got = socket->receiveBytes(buffer, 4096); + std::string start = + "HTTP/1.0 200 OK\r\n" + "Content-Disposition: attachment; filename=\"test.txt\"\r\n"; + + if (strncmp(buffer, start.c_str(), start.size())) + { + std::cerr << "missing pre-amble " << got << " '" << buffer << " vs. expected '" << start << "'\n"; + exitTest(TestResult::Failed); + return; + } + // TODO: check content-length etc. + + const char *ptr = strstr(buffer, "\r\n\r\n"); + if (!ptr) + { + std::cerr << "missing separator " << got << " '" << buffer << "\n"; + exitTest(TestResult::Failed); + return; + } + + // Oddly we need another read to get the content. + got = socket->receiveBytes(buffer, 4096); + if (got >=0 ) + buffer[got] = '\0'; + else + { + std::cerr << "No content returned " << got << "\n"; + exitTest(TestResult::Failed); + return; + } + + if (strcmp(buffer, "\357\273\277This is some text.\nAnd some more.\n")) + { + std::cerr << "unexpected file content " << got << " '" << buffer << "\n"; + exitTest(TestResult::Failed); + return; + } + } + + void invokeTest() override + { + testChunks(); + testContinue(); std::cerr << "All tests passed.\n"; exitTest(TestResult::Ok); } diff --git a/test/helpers.hpp b/test/helpers.hpp index c0f9d112c..708806cd2 100644 --- a/test/helpers.hpp +++ b/test/helpers.hpp @@ -21,6 +21,8 @@ #include <Poco/Net/HTTPResponse.h> #include <Poco/Net/HTTPSClientSession.h> #include <Poco/Net/NetException.h> +#include <Poco/Net/StreamSocket.h> +#include <Poco/Net/SecureStreamSocket.h> #include <Poco/Net/Socket.h> #include <Poco/Path.h> #include <Poco/StringTokenizer.h> @@ -175,18 +177,33 @@ Poco::Net::HTTPClientSession* createSession(const Poco::URI& uri) return new Poco::Net::HTTPClientSession(uri.getHost(), uri.getPort()); } -inline -std::string const & getTestServerURI() +inline int getClientPort() { static const char* clientPort = std::getenv("LOOL_TEST_CLIENT_PORT"); + return clientPort? atoi(clientPort) : DEFAULT_CLIENT_PORT_NUMBER; +} + +inline std::shared_ptr<Poco::Net::StreamSocket> createRawSocket() +{ + return +#if ENABLE_SSL + std::make_shared<Poco::Net::SecureStreamSocket> +#else + std::make_shared<Poco::Net::StreamSocket> +#endif + (Poco::Net::SocketAddress("127.0.0.1", getClientPort())); +} +inline +std::string const & getTestServerURI() +{ static std::string serverURI( #if ENABLE_SSL "https://127.0.0.1:" #else "http://127.0.0.1:" #endif - + (clientPort? std::string(clientPort) : std::to_string(DEFAULT_CLIENT_PORT_NUMBER))); + + std::to_string(getClientPort())); return serverURI; } commit 9d723cb230986e2d464d19a6a1518c7bf0f8df1e Author: Michael Meeks <michael.me...@collabora.com> AuthorDate: Wed May 22 03:21:50 2019 +0100 Commit: Michael Meeks <michael.me...@collabora.com> CommitDate: Wed May 22 11:07:42 2019 +0100 Initial chunked transfer encoding. Important for convert-to on larger documents and/or with newer curls. Change-Id: Id18be6d22741a3af7cee39a069c509e4f662977b diff --git a/common/Util.hpp b/common/Util.hpp index 7c967784c..e6e426ed0 100644 --- a/common/Util.hpp +++ b/common/Util.hpp @@ -165,6 +165,18 @@ namespace Util // Extract all json entries into a map. std::map<std::string, std::string> JsonToMap(const std::string& jsonString); + inline int hexDigitFromChar(char c) + { + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + else + return -1; + } + /// Dump a lineof data as hex inline std::string stringifyHexLine( const std::vector<char> &buffer, @@ -233,6 +245,17 @@ namespace Util os.flush(); } + inline std::string dumpHex (const char *legend, const char *prefix, + const std::vector<char>::iterator &startIt, + const std::vector<char>::iterator &endIt, + bool skipDup = true, const unsigned int width = 32) + { + std::ostringstream oss; + std::vector<char> data(startIt, endIt); + dumpHex(oss, legend, prefix, data, skipDup, width); + return oss.str(); + } + /// Trim spaces from the left. Just spaces. inline std::string& ltrim(std::string& s) { diff --git a/net/Socket.cpp b/net/Socket.cpp index ba72e16c1..34942d2f1 100644 --- a/net/Socket.cpp +++ b/net/Socket.cpp @@ -628,14 +628,21 @@ std::string LocalServerSocket::bind() return ""; } +// For a verbose life, tweak here: +#if 0 +# define LOG_CHUNK(X) LOG_TRC(X) +#else +# define LOG_CHUNK(X) +#endif + bool StreamSocket::parseHeader(const char *clientName, Poco::MemoryInputStream &message, Poco::Net::HTTPRequest &request, - size_t *requestSize) + MessageMap *map) { LOG_TRC("#" << getFD() << " handling incoming " << _inBuffer.size() << " bytes."); - assert(!requestSize || *requestSize == 0); + assert(!map || (map->_headerSize == 0 && map->_messageSize == 0)); // Find the end of the header, if any. static const std::string marker("\r\n\r\n"); @@ -649,8 +656,11 @@ bool StreamSocket::parseHeader(const char *clientName, // Skip the marker. itBody += marker.size(); - if (requestSize) - *requestSize = static_cast<size_t>(itBody - _inBuffer.begin()); + if (map) // a reasonable guess so far + { + map->_headerSize = static_cast<size_t>(itBody - _inBuffer.begin()); + map->_messageSize = map->_headerSize; + } try { @@ -681,6 +691,8 @@ bool StreamSocket::parseHeader(const char *clientName, LOG_DBG("Not enough content yet: ContentLength: " << contentLength << ", available: " << available); return false; } + if (map) + map->_messageSize += contentLength; if (request.getExpectContinue() && !_sentHTTPContinue) { @@ -690,6 +702,79 @@ bool StreamSocket::parseHeader(const char *clientName, sizeof("HTTP/1.1 100 Continue\r\n\r\n") - 1); _sentHTTPContinue = true; } + + if (request.getChunkedTransferEncoding()) + { + // keep the header + if (map) + map->_spans.push_back(std::pair<size_t, size_t>(0, itBody - _inBuffer.begin())); + + int chunk = 0; + while (itBody != _inBuffer.end()) + { + auto chunkStart = itBody; + + // skip whitespace + for (; itBody != _inBuffer.end() && isascii(*itBody) && isspace(*itBody); ++itBody) + ; // skip. + + // each chunk is preceeded by its length in hex. + size_t chunkLen = 0; + for (; itBody != _inBuffer.end(); ++itBody) + { + int digit = Util::hexDigitFromChar(*itBody); + if (digit >= 0) + chunkLen = chunkLen * 16 + digit; + else + break; + } + + LOG_CHUNK("Chunk of length " << chunkLen); + + for (; itBody != _inBuffer.end() && *itBody != '\n'; ++itBody) + ; // skip to end of line + + if (itBody != _inBuffer.end()) + itBody++; /* \n */; + + // skip the chunk. + auto chunkOffset = itBody - _inBuffer.begin(); + auto chunkAvailable = _inBuffer.size() - chunkOffset; + + if (chunkLen == 0) // we're complete. + { + map->_messageSize = chunkOffset; + return true; + } + + if (chunkLen > chunkAvailable + 2) + { + LOG_DBG("Not enough content yet in chunk " << chunk << + " starting at offset " << (chunkStart - _inBuffer.begin()) << + " chunk len: " << chunkLen << ", available: " << chunkAvailable); + return false; + } + itBody += chunkLen; + + map->_spans.push_back(std::pair<size_t,size_t>(chunkOffset, chunkLen)); + + if (*itBody != '\r' || *(itBody + 1) != '\n') + { + LOG_ERR("Missing \\r\\n at end of chunk " << chunk << " of length " << chunkLen); + LOG_CHUNK("Chunk " << chunk << " is: \n" << Util::dumpHex("", "", chunkStart, itBody + 1, false)); + return false; // TODO: throw something sensible in this case + } + else + { + LOG_CHUNK("Chunk " << chunk << " is: \n" << Util::dumpHex("", "", chunkStart, itBody + 1, false)); + } + + itBody+=2; + chunk++; + } + LOG_TRC("Not enough chunks yet, so far " << chunk << " chunks of total length " << (itBody - _inBuffer.begin())); + return false; + } } catch (const Poco::Exception& exc) { @@ -709,6 +794,39 @@ bool StreamSocket::parseHeader(const char *clientName, return true; } +bool StreamSocket::compactChunks(MessageMap *map) +{ + assert (map); + if (!map->_spans.size()) + return false; // single message. + + LOG_CHUNK("Pre-compact " << map->_spans.size() << " chunks: \n" << + Util::dumpHex("", "", _inBuffer.begin(), _inBuffer.end(), false)); + + char *first = &_inBuffer[0]; + char *dest = first; + for (auto &span : map->_spans) + { + std::memmove(dest, &_inBuffer[span.first], span.second); + dest += span.second; + } + + // Erase the duplicate bits. + size_t newEnd = dest - first; + size_t gap = map->_messageSize - newEnd; + _inBuffer.erase(_inBuffer.begin() + newEnd, _inBuffer.begin() + map->_messageSize); + + LOG_CHUNK("Post-compact with erase of " << newEnd << " to " << map->_messageSize << " giving: \n" << + Util::dumpHex("", "", _inBuffer.begin(), _inBuffer.end(), false)); + + // shrink our size to fit + map->_messageSize -= gap; + + dumpState(std::cerr); + + return true; +} + namespace HttpHelper { void sendUncompressedFileContent(const std::shared_ptr<StreamSocket>& socket, diff --git a/net/Socket.hpp b/net/Socket.hpp index 5995570dc..2d96d1981 100644 --- a/net/Socket.hpp +++ b/net/Socket.hpp @@ -949,9 +949,21 @@ public: return socket; } + /// Messages can be in chunks, only parts of message being valid. + struct MessageMap { + MessageMap() : _headerSize(0), _messageSize(0) {} + /// Size of HTTP headers + size_t _headerSize; + /// Entire size of data associated with this message + size_t _messageSize; + // offset + lengths to collate into the real stream + std::vector<std::pair<size_t, size_t>> _spans; + }; + /// Remove the first @count bytes from input buffer - void eraseFirstInputBytes(size_t count) + void eraseFirstInputBytes(const MessageMap &map) { + size_t count = map._headerSize; size_t toErase = std::min(count, _inBuffer.size()); if (toErase < count) LOG_ERR("#" << getFD() << ": attempted to remove: " << count << " which is > size: " << _inBuffer.size() << " clamped to " << toErase); @@ -959,12 +971,16 @@ public: _inBuffer.erase(_inBuffer.begin(), _inBuffer.begin() + count); } + /// Compacts chunk headers away leaving just the data we want + /// returns true if we did any re-sizing/movement of _inBuffer. + bool compactChunks(MessageMap *map); + /// Detects if we have an HTTP header in the provided message and /// populates a request for that. bool parseHeader(const char *clientLoggingName, Poco::MemoryInputStream &message, Poco::Net::HTTPRequest &request, - size_t *requestSize = nullptr); + MessageMap *map = nullptr); /// Get input/output statistics on this stream void getIOStats(uint64_t &sent, uint64_t &recv) @@ -985,6 +1001,8 @@ public: protected: + std::vector<std::pair<size_t, size_t>> findChunks(Poco::Net::HTTPRequest &request); + /// Called when a polling event is received. /// @events is the mask of events that triggered the wake. void handlePoll(SocketDisposition &disposition, diff --git a/net/WebSocketHandler.hpp b/net/WebSocketHandler.hpp index c943a5bd1..f5f767e4f 100644 --- a/net/WebSocketHandler.hpp +++ b/net/WebSocketHandler.hpp @@ -692,7 +692,7 @@ protected: LOG_TRC("Incoming client websocket upgrade response: " << std::string(&socket->getInBuffer()[0], socket->getInBuffer().size())); bool bOk = false; - size_t responseSize = 0; + StreamSocket::MessageMap map; try { @@ -708,7 +708,7 @@ protected: marker.begin(), marker.end()); if (itBody != socket->getInBuffer().end()) - responseSize = itBody - socket->getInBuffer().begin() + marker.size(); + map._headerSize = itBody - socket->getInBuffer().begin() + marker.size(); } if (response.getStatus() == Poco::Net::HTTPResponse::HTTP_SWITCHING_PROTOCOLS && @@ -737,7 +737,7 @@ protected: LOG_DBG("handleClientUpgrade exception caught."); } - if (!bOk || responseSize == 0) + if (!bOk || map._headerSize == 0) { LOG_ERR("Bad websocker server response."); @@ -746,7 +746,7 @@ protected: } setWebSocket(); - socket->eraseFirstInputBytes(responseSize); + socket->eraseFirstInputBytes(map); } #endif diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index 25877811a..98a97fb9f 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -1836,9 +1836,7 @@ private: try { #if !MOBILEAPP - size_t requestSize = 0; - - if (!socket->parseHeader("Prisoner", message, request, &requestSize)) + if (!socket->parseHeader("Prisoner", message, request)) return; LOG_TRC("Child connection with URI [" << LOOLWSD::anonymizeUrl(request.getURI()) << "]."); @@ -2051,16 +2049,23 @@ private: return; } - Poco::MemoryInputStream message(&socket->getInBuffer()[0], - socket->getInBuffer().size());; + Poco::MemoryInputStream startmessage(&socket->getInBuffer()[0], + socket->getInBuffer().size());; Poco::Net::HTTPRequest request; - size_t requestSize = 0; - if (!socket->parseHeader("Client", message, request, &requestSize)) + StreamSocket::MessageMap map; + if (!socket->parseHeader("Client", startmessage, request, &map)) return; try { + // We may need to re-write the chunks moving the inBuffer. + socket->compactChunks(&map); + Poco::MemoryInputStream message(&socket->getInBuffer()[0], + socket->getInBuffer().size()); + // update the read cursor - headers are not altered by chunks. + message.seekg(startmessage.tellg(), std::ios::beg); + // Check and remove the ServiceRoot from the request.getURI() if (!Util::startsWith(request.getURI(), LOOLWSD::ServiceRoot)) throw BadRequestException("The request does not start with prefix: " + LOOLWSD::ServiceRoot); @@ -2171,7 +2176,7 @@ private: // if we succeeded - remove the request from our input buffer // we expect one request per socket - socket->eraseFirstInputBytes(requestSize); + socket->eraseFirstInputBytes(map); #else Poco::Net::HTTPRequest request; handleClientWsUpgrade(request, std::string(socket->getInBuffer().data(), socket->getInBuffer().size()), disposition); @@ -2336,7 +2341,8 @@ private: return "application/octet-stream"; } - void handlePostRequest(const Poco::Net::HTTPRequest& request, Poco::MemoryInputStream& message, + void handlePostRequest(const Poco::Net::HTTPRequest& request, + Poco::MemoryInputStream& message, SocketDisposition &disposition) { LOG_INF("Post request: [" << LOOLWSD::anonymizeUrl(request.getURI()) << "]"); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits