common/Protocol.hpp | 12 ++++---- common/Session.cpp | 2 - common/StringVector.cpp | 68 +++++++++++++++++++++++++++++++++++++++--------- common/StringVector.hpp | 39 ++++++++++++++++++++++----- common/Util.cpp | 9 +++++- kit/ChildSession.cpp | 14 ++++----- kit/ForKit.cpp | 11 ++++--- kit/Kit.cpp | 12 ++++---- test/WhiteBoxTests.cpp | 24 ++++++++++++++++ wsd/ClientSession.cpp | 8 ++--- wsd/DocumentBroker.cpp | 37 ++++++++++++++------------ wsd/LOOLWSD.cpp | 7 ++-- wsd/Storage.cpp | 2 - wsd/TileDesc.hpp | 2 - wsd/TraceFile.hpp | 2 - 15 files changed, 178 insertions(+), 71 deletions(-)
New commits: commit 547f9ea73186d274d512a9fa1f01e6edcad66bab Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Fri Feb 28 14:51:22 2020 +0100 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Fri Feb 28 18:31:37 2020 +0100 Rework StringVector to have a single underlying string This is meant to reduce lots of small allocations and instead have pointers into the single string for the various tokens instead. This has a few requirements, though: 1) It's no longer OK to modify the tokens, changing their length would invalidate the start/length of other tokens. Rework DocumentBroker::load() to avoid such mutation. 2) The iterators no longer expose zero-terminated strings, so Poco::cat() doesn't work anymore: add an own cat() instead and use that in e.g. ChildSession. The own cat() has the benefit that it won't read past the end of the array if the begin index is out of bounds to add more safety. (This nicely works towards killing Poco usage in general.) 3) If zero-terminated strings for all individual tokens is needed, a copy has to be made, as done in spawnProcess(). (For all of these requirements, the build fails if there are problems.) Change-Id: Iea40e4400e630b2d669f5c72aea85cb40edf9a2c Reviewed-on: https://gerrit.libreoffice.org/c/online/+/89711 Reviewed-by: Michael Meeks <michael.me...@collabora.com> Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> diff --git a/common/Protocol.hpp b/common/Protocol.hpp index 787be3706..3574d2027 100644 --- a/common/Protocol.hpp +++ b/common/Protocol.hpp @@ -85,7 +85,7 @@ namespace LOOLProtocol { for (const auto& token : tokens) { - if (getTokenString(token, name, value)) + if (getTokenString(tokens.getParam(token), name, value)) { return true; } @@ -101,10 +101,10 @@ namespace LOOLProtocol inline StringVector tokenize(const char* data, const size_t size, const char delimiter = ' ') { - std::vector<std::string> tokens; + std::vector<StringToken> tokens; if (size == 0 || data == nullptr) { - return StringVector(tokens); + return StringVector(std::string(), {}); } tokens.reserve(8); @@ -116,7 +116,7 @@ namespace LOOLProtocol { if (start != end && *start != delimiter) { - tokens.emplace_back(start, end); + tokens.emplace_back(start - data, end - start); } start = end; @@ -129,10 +129,10 @@ namespace LOOLProtocol if (start != end && *start != delimiter && *start != '\n') { - tokens.emplace_back(start, end); + tokens.emplace_back(start - data, end - start); } - return StringVector(tokens); + return StringVector(std::string(data, size), tokens); } inline diff --git a/common/Session.cpp b/common/Session.cpp index 815fd5391..a0cdabe41 100644 --- a/common/Session.cpp +++ b/common/Session.cpp @@ -182,7 +182,7 @@ void Session::parseDocOptions(const StringVector& tokens, int& part, std::string if (getTokenString(tokens[offset], "options", _docOptions)) { if (tokens.size() > offset + 1) - _docOptions += Poco::cat(std::string(" "), tokens.begin() + offset + 1, tokens.end()); + _docOptions += tokens.cat(std::string(" "), offset + 1); } } } diff --git a/common/StringVector.cpp b/common/StringVector.cpp index a4bf78109..89777166b 100644 --- a/common/StringVector.cpp +++ b/common/StringVector.cpp @@ -11,35 +11,79 @@ StringVector::StringVector() = default; -StringVector::StringVector(const std::vector<std::string>& vector) { _vector = vector; } +StringVector::StringVector(const std::string& string, const std::vector<StringToken>& tokens) + : _string(string), + _tokens(tokens) +{ +} std::string StringVector::operator[](size_t index) const { - if (index >= _vector.size()) + if (index >= _tokens.size()) { return std::string(); } - return _vector[index]; + const StringToken& token = _tokens[index]; + return _string.substr(token._index, token._length); } -size_t StringVector::size() const { return _vector.size(); } +size_t StringVector::size() const { return _tokens.size(); } + +bool StringVector::empty() const { return _tokens.empty(); } -bool StringVector::empty() const { return _vector.empty(); } +std::vector<StringToken>::const_iterator StringVector::begin() const { return _tokens.begin(); } -std::vector<std::string>::const_iterator StringVector::begin() const { return _vector.begin(); } +std::vector<StringToken>::iterator StringVector::begin() { return _tokens.begin(); } -std::vector<std::string>::iterator StringVector::begin() { return _vector.begin(); } +std::vector<StringToken>::const_iterator StringVector::end() const { return _tokens.end(); } -std::vector<std::string>::const_iterator StringVector::end() const { return _vector.end(); } +std::vector<StringToken>::iterator StringVector::end() { return _tokens.end(); } -std::vector<std::string>::iterator StringVector::end() { return _vector.end(); } +std::vector<StringToken>::iterator StringVector::erase(std::vector<StringToken>::const_iterator it) +{ + return _tokens.erase(it); +} -std::vector<std::string>::iterator StringVector::erase(std::vector<std::string>::const_iterator it) +void StringVector::push_back(const std::string& string) { - return _vector.erase(it); + StringToken token; + token._index = _string.length(); + token._length = string.length(); + _tokens.push_back(token); + _string += string; } -void StringVector::push_back(const std::string& string) { _vector.push_back(string); } +std::string StringVector::getParam(const StringToken& token) const +{ + return _string.substr(token._index, token._length); +} + +std::string StringVector::cat(const std::string& separator, size_t begin) const +{ + std::string ret; + bool first = true; + + if (begin >= _tokens.size()) + { + return ret; + } + + for (auto it = _tokens.begin() + begin; it != _tokens.end(); ++it) + { + if (first) + { + first = false; + } + else + { + ret += separator; + } + + ret += getParam(*it); + } + + return ret; +} /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/common/StringVector.hpp b/common/StringVector.hpp index 346bfabff..4255e9d54 100644 --- a/common/StringVector.hpp +++ b/common/StringVector.hpp @@ -13,18 +13,37 @@ #include <string> #include <vector> +/** + * Stores an offset and a length into the single underlying string of StringVector. + */ +struct StringToken +{ + size_t _index; + size_t _length; + + StringToken() = default; + + StringToken(size_t index, size_t length) + : _index(index), + _length(length) + { + } +}; + /** * Safe wrapper around an std::vector<std::string>. Gives you an empty string if you would read past * the ends of the vector. */ class StringVector { - std::vector<std::string> _vector; + /// All tokens are substrings of this string. + std::string _string; + std::vector<StringToken> _tokens; public: explicit StringVector(); - explicit StringVector(const std::vector<std::string>& vector); + explicit StringVector(const std::string& string, const std::vector<StringToken>& tokens); /// Unlike std::vector, gives an empty string if index is unexpected. std::string operator[](size_t index) const; @@ -33,17 +52,23 @@ public: bool empty() const; - std::vector<std::string>::const_iterator begin() const; + std::vector<StringToken>::const_iterator begin() const; - std::vector<std::string>::iterator begin(); + std::vector<StringToken>::iterator begin(); - std::vector<std::string>::const_iterator end() const; + std::vector<StringToken>::const_iterator end() const; - std::vector<std::string>::iterator end(); + std::vector<StringToken>::iterator end(); - std::vector<std::string>::iterator erase(std::vector<std::string>::const_iterator it); + std::vector<StringToken>::iterator erase(std::vector<StringToken>::const_iterator it); void push_back(const std::string& string); + + /// Gets the underlying string of a single token. + std::string getParam(const StringToken& token) const; + + /// Concats tokens starting from begin, using separator as separator. + std::string cat(const std::string& separator, size_t begin) const; }; #endif diff --git a/common/Util.cpp b/common/Util.cpp index 9a2273eaf..137ec3897 100644 --- a/common/Util.cpp +++ b/common/Util.cpp @@ -257,9 +257,16 @@ namespace Util } } + // Create a vector of zero-terminated strings. + std::vector<std::string> argStrings(args.size()); + for (const auto& arg : args) + { + argStrings.emplace_back(args.getParam(arg)); + } + std::vector<char *> params; params.push_back(const_cast<char *>(cmd.c_str())); - for (const auto& i : args) + for (const auto& i : argStrings) params.push_back(const_cast<char *>(i.c_str())); params.push_back(nullptr); diff --git a/kit/ChildSession.cpp b/kit/ChildSession.cpp index 13bc1d8e4..3cd8642bd 100644 --- a/kit/ChildSession.cpp +++ b/kit/ChildSession.cpp @@ -365,10 +365,10 @@ bool ChildSession::_handleInput(const char *buffer, int length) if (tokens[1].find(".uno:SpellCheckApplySuggestion") != std::string::npos || tokens[1].find(".uno:LanguageStatus") != std::string::npos) { - std::vector<std::string> newTokens; + StringVector newTokens; newTokens.push_back(tokens[0]); newTokens.push_back(firstLine.substr(4)); // Copy the remaining part. - return unoCommand(buffer, length, StringVector(newTokens)); + return unoCommand(buffer, length, newTokens); } return unoCommand(buffer, length, tokens); } @@ -686,7 +686,7 @@ bool ChildSession::sendFontRendering(const char* /*buffer*/, int /*length*/, con return false; } - const std::string response = "renderfont: " + Poco::cat(std::string(" "), tokens.begin() + 1, tokens.end()) + "\n"; + const std::string response = "renderfont: " + tokens.cat(std::string(" "), 1) + "\n"; std::vector<char> output; output.resize(response.size()); @@ -903,7 +903,7 @@ bool ChildSession::downloadAs(const char* /*buffer*/, int /*length*/, const Stri { if (tokens.size() > 5) { - filterOptions += Poco::cat(std::string(" "), tokens.begin() + 5, tokens.end()); + filterOptions += tokens.cat(std::string(" "), 5); } } @@ -1413,7 +1413,7 @@ bool ChildSession::dialogEvent(const char* /*buffer*/, int /*length*/, const Str unsigned nLOKWindowId = std::stoi(tokens[1].c_str()); getLOKitDocument()->sendDialogEvent(nLOKWindowId, - Poco::cat(std::string(" "), tokens.begin() + 2, tokens.end()).c_str()); + tokens.cat(std::string(" "), 2).c_str()); return true; } @@ -1468,7 +1468,7 @@ bool ChildSession::unoCommand(const char* /*buffer*/, int /*length*/, const Stri else { getLOKitDocument()->postUnoCommand(tokens[1].c_str(), - Poco::cat(std::string(" "), tokens.begin() + 2, tokens.end()).c_str(), + tokens.cat(std::string(" "), 2).c_str(), bNotify); } @@ -2002,7 +2002,7 @@ bool ChildSession::saveAs(const char* /*buffer*/, int /*length*/, const StringVe { if (tokens.size() > 4) { - filterOptions += Poco::cat(std::string(" "), tokens.begin() + 4, tokens.end()); + filterOptions += tokens.cat(std::string(" "), 4); } } diff --git a/kit/ForKit.cpp b/kit/ForKit.cpp index 90a7947fd..3af723a99 100644 --- a/kit/ForKit.cpp +++ b/kit/ForKit.cpp @@ -442,13 +442,16 @@ int main(int argc, char** argv) eq = std::strchr(cmd, '='); const std::string rlimits = std::string(eq+1); StringVector tokens = LOOLProtocol::tokenize(rlimits, ';'); - for (const std::string& cmdLimit : tokens) + for (const auto& cmdLimit : tokens) { - const std::pair<std::string, std::string> pair = Util::split(cmdLimit, ':'); - StringVector tokensLimit({ "setconfig", pair.first, pair.second }); + const std::pair<std::string, std::string> pair = Util::split(tokens.getParam(cmdLimit), ':'); + StringVector tokensLimit; + tokensLimit.push_back("setconfig"); + tokensLimit.push_back(pair.first); + tokensLimit.push_back(pair.second); if (!Rlimit::handleSetrlimitCommand(tokensLimit)) { - LOG_ERR("Unknown rlimits command: " << cmdLimit); + LOG_ERR("Unknown rlimits command: " << tokens.getParam(cmdLimit)); } } } diff --git a/kit/Kit.cpp b/kit/Kit.cpp index 201628c54..883efa6fc 100644 --- a/kit/Kit.cpp +++ b/kit/Kit.cpp @@ -2106,16 +2106,16 @@ protected: if (logger.enabled()) { logger << _socketName << ": recv ["; - for (const std::string& token : tokens) + for (const auto& token : tokens) { // Don't log user-data, there are anonymized versions that get logged instead. - if (Util::startsWith(token, "jail") || - Util::startsWith(token, "author") || - Util::startsWith(token, "name") || - Util::startsWith(token, "url")) + if (Util::startsWith(tokens.getParam(token), "jail") || + Util::startsWith(tokens.getParam(token), "author") || + Util::startsWith(tokens.getParam(token), "name") || + Util::startsWith(tokens.getParam(token), "url")) continue; - logger << token << ' '; + logger << tokens.getParam(token) << ' '; } LOG_END(logger, true); diff --git a/test/WhiteBoxTests.cpp b/test/WhiteBoxTests.cpp index 67dd8644a..5c843adea 100644 --- a/test/WhiteBoxTests.cpp +++ b/test/WhiteBoxTests.cpp @@ -41,6 +41,7 @@ class WhiteBoxTests : public CPPUNIT_NS::TestFixture CPPUNIT_TEST(testJson); CPPUNIT_TEST(testAnonymization); CPPUNIT_TEST(testTime); + CPPUNIT_TEST(testStringVector); CPPUNIT_TEST_SUITE_END(); @@ -57,6 +58,7 @@ class WhiteBoxTests : public CPPUNIT_NS::TestFixture void testJson(); void testAnonymization(); void testTime(); + void testStringVector(); }; void WhiteBoxTests::testLOOLProtocolFunctions() @@ -795,6 +797,28 @@ void WhiteBoxTests::testTime() } } +void WhiteBoxTests::testStringVector() +{ + // Test push_back() and getParam(). + StringVector vector; + vector.push_back("a"); + vector.push_back("b"); + CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), vector.size()); + auto it = vector.begin(); + CPPUNIT_ASSERT_EQUAL(std::string("a"), vector.getParam(*it)); + ++it; + CPPUNIT_ASSERT_EQUAL(std::string("b"), vector.getParam(*it)); + + // Test cat(). + CPPUNIT_ASSERT_EQUAL(std::string("a b"), vector.cat(" ", 0)); + CPPUNIT_ASSERT_EQUAL(std::string(), vector.cat(" ", 3)); + CPPUNIT_ASSERT_EQUAL(std::string(), vector.cat(" ", 42)); + + // Test operator [](). + CPPUNIT_ASSERT_EQUAL(std::string("a"), vector[0]); + CPPUNIT_ASSERT_EQUAL(std::string(""), vector[2]); +} + CPPUNIT_TEST_SUITE_REGISTRATION(WhiteBoxTests); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/wsd/ClientSession.cpp b/wsd/ClientSession.cpp index fedbf422e..8a79d46bb 100644 --- a/wsd/ClientSession.cpp +++ b/wsd/ClientSession.cpp @@ -878,7 +878,7 @@ bool ClientSession::sendFontRendering(const char *buffer, int length, const Stri TileCache::Tile cachedTile = docBroker->tileCache().lookupCachedStream(TileCache::StreamType::Font, font+text); if (cachedTile) { - const std::string response = "renderfont: " + Poco::cat(std::string(" "), tokens.begin() + 1, tokens.end()) + "\n"; + const std::string response = "renderfont: " + tokens.cat(std::string(" "), 1) + "\n"; return sendTile(response, cachedTile); } @@ -1410,7 +1410,7 @@ bool ClientSession::handleKitToClientMessage(const char* buffer, const int lengt { // Need to get the initial part id from status message int part = -1; - if(getTokenInteger(token, "current", part)) + if(getTokenInteger(tokens.getParam(token), "current", part)) { _clientSelectedPart = part; resetWireIdMap(); @@ -1418,14 +1418,14 @@ bool ClientSession::handleKitToClientMessage(const char* buffer, const int lengt // Get document type too std::string docType; - if(getTokenString(token, "type", docType)) + if(getTokenString(tokens.getParam(token), "type", docType)) { _isTextDocument = docType.find("text") != std::string::npos; } // Store our Kit ViewId int viewId = -1; - if(getTokenInteger(token, "viewid", viewId)) + if(getTokenInteger(tokens.getParam(token), "viewid", viewId)) _kitViewId = viewId; } diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp index bdb3d61ed..74b827678 100644 --- a/wsd/DocumentBroker.cpp +++ b/wsd/DocumentBroker.cpp @@ -767,7 +767,7 @@ bool DocumentBroker::load(const std::shared_ptr<ClientSession>& session, const s { const std::string extension(plugin->getString("prefilter.extension")); const std::string newExtension(plugin->getString("prefilter.newextension")); - const std::string commandLine(plugin->getString("prefilter.commandline")); + std::string commandLine(plugin->getString("prefilter.commandline")); if (localPath.length() > extension.length()+1 && strcasecmp(localPath.substr(localPath.length() - extension.length() -1).data(), (std::string(".") + extension).data()) == 0) @@ -777,27 +777,30 @@ bool DocumentBroker::load(const std::shared_ptr<ClientSession>& session, const s const std::string newRootPath = _storage->getRootFilePath() + "." + newExtension; - StringVector args(LOOLProtocol::tokenize(commandLine, ' ')); - std::string command(args[0]); - args.erase(args.begin()); // strip the command - // The commandline must contain the space-separated substring @INPUT@ that is // replaced with the input file name, and @OUTPUT@ for the output file name. int inputs(0), outputs(0); - for (auto it = args.begin(); it != args.end(); ++it) + + std::string input("@INPUT"); + size_t pos = commandLine.find(input); + if (pos != std::string::npos) + { + commandLine.replace(pos, input.length(), _storage->getRootFilePath()); + ++inputs; + } + + std::string output("@OUTPUT@"); + pos = commandLine.find(output); + if (pos != std::string::npos) { - if (*it == "@INPUT@") - { - *it = _storage->getRootFilePath(); - ++inputs; - } - else if (*it == "@OUTPUT@") - { - *it = newRootPath; - ++outputs; - } + commandLine.replace(pos, output.length(), newRootPath); + ++outputs; } + StringVector args(LOOLProtocol::tokenize(commandLine, ' ')); + std::string command(args[0]); + args.erase(args.begin()); // strip the command + if (inputs != 1 || outputs != 1) throw std::exception(); @@ -2019,7 +2022,7 @@ bool DocumentBroker::forwardToChild(const std::string& viewId, const std::string msg = tokens[0] + ' ' + tokens[1] + ' ' + tokens[2]; msg += " jail=" + _uriJailed; msg += " xjail=" + _uriJailedAnonym; - msg += ' ' + Poco::cat(std::string(" "), tokens.begin() + 3, tokens.end()); + msg += ' ' + tokens.cat(std::string(" "), 3); } _childProcess->sendTextFrame(msg); diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index d84f77b91..5611edba5 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -1666,7 +1666,7 @@ bool LOOLWSD::createForKit() ++OutstandingForks; LOG_INF("Launching forkit process: " << forKitPath << ' ' << - Poco::cat(std::string(" "), args.begin(), args.end())); + args.cat(std::string(" "), 0)); LastForkRequestTime = std::chrono::steady_clock::now(); int childStdin = -1; @@ -2063,9 +2063,10 @@ public: { const std::string fowardedData = request.get("X-Forwarded-For"); StringVector tokens = LOOLProtocol::tokenize(fowardedData, ','); - for(std::string& token : tokens) + for (const auto& token : tokens) { - addressToCheck = Util::trim(token); + std::string param = tokens.getParam(token); + addressToCheck = Util::trim(param); try { hostToCheck = Poco::Net::DNS::resolve(addressToCheck).name(); diff --git a/wsd/Storage.cpp b/wsd/Storage.cpp index a9caad8bb..9adc57656 100644 --- a/wsd/Storage.cpp +++ b/wsd/Storage.cpp @@ -439,7 +439,7 @@ static void addStorageReuseCookie(Poco::Net::HTTPRequest& request, const std::st StringVector cookies = LOOLProtocol::tokenize(reuseStorageCookies, ':'); for (auto cookie : cookies) { - StringVector cookieTokens = LOOLProtocol::tokenize(cookie, '='); + StringVector cookieTokens = LOOLProtocol::tokenize(cookies.getParam(cookie), '='); if (cookieTokens.size() == 2) { nvcCookies.add(cookieTokens[0], cookieTokens[1]); diff --git a/wsd/TileDesc.hpp b/wsd/TileDesc.hpp index d1296f0c8..e8eebdcf2 100644 --- a/wsd/TileDesc.hpp +++ b/wsd/TileDesc.hpp @@ -472,7 +472,7 @@ public: { std::string name; std::string value; - if (LOOLProtocol::parseNameValuePair(token, name, value)) + if (LOOLProtocol::parseNameValuePair(tokens.getParam(token), name, value)) { if (name == "tileposx") { diff --git a/wsd/TraceFile.hpp b/wsd/TraceFile.hpp index 466e06fbc..8ef7eb201 100644 --- a/wsd/TraceFile.hpp +++ b/wsd/TraceFile.hpp @@ -222,7 +222,7 @@ public: std::string newData; for (const auto& token : tokens) { - newData += token + ' '; + newData += tokens.getParam(token) + ' '; } writeLocked(id, sessionId, newData, static_cast<char>(TraceFileRecord::Direction::Incoming)); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits