github-actions[bot] commented on code in PR #19185: URL: https://github.com/apache/doris/pull/19185#discussion_r1188180922
########## be/src/util/jsonb_document.h: ########## @@ -213,6 +224,91 @@ char payload_[0]; }; +/// A simple input stream class for the JSON path parser. +class Stream { +public: + /// Creates an input stream reading from a character string. + /// @param string the input string + /// @param length the length of the input string + Stream(const char *string, size_t length) + : m_position(string), m_end(string + length), leg() {} + + /// Returns a pointer to the current position in the stream. + const char *position() const { return m_position; } + + /// Returns a pointer to the position just after the end of the stream. + const char *end() const { return m_end; } + + /// Returns the number of bytes remaining in the stream. + size_t remaining() const { + assert(m_position <= m_end); + return m_end - m_position; + } + + /// Tells if the stream has been exhausted. + bool exhausted() const { return remaining() == 0; } + + /// Reads the next byte from the stream and moves the position forward. + char read() { + assert(!exhausted()); + return *m_position++; + } + + /// Reads the next byte from the stream without moving the position forward. + char peek() const { + assert(!exhausted()); + return *m_position; + } + + /// Moves the position to the next non-whitespace character. + void skip_whitespace() { + m_position = std::find_if_not(m_position, m_end, + [](char c) { return std::isspace(c); }); Review Comment: warning: no member named 'isspace' in namespace 'std' [clang-diagnostic-error] ```cpp [](char c) { return std::isspace(c); }); ^ ``` ########## be/src/util/jsonb_document.h: ########## @@ -213,6 +224,91 @@ class JsonbDocument { char payload_[0]; }; +/// A simple input stream class for the JSON path parser. +class Stream { +public: + /// Creates an input stream reading from a character string. + /// @param string the input string + /// @param length the length of the input string + Stream(const char *string, size_t length) + : m_position(string), m_end(string + length), leg() {} + + /// Returns a pointer to the current position in the stream. + const char *position() const { return m_position; } + + /// Returns a pointer to the position just after the end of the stream. + const char *end() const { return m_end; } + + /// Returns the number of bytes remaining in the stream. + size_t remaining() const { + assert(m_position <= m_end); + return m_end - m_position; + } + + /// Tells if the stream has been exhausted. + bool exhausted() const { return remaining() == 0; } + + /// Reads the next byte from the stream and moves the position forward. + char read() { + assert(!exhausted()); + return *m_position++; + } + + /// Reads the next byte from the stream without moving the position forward. + char peek() const { + assert(!exhausted()); + return *m_position; + } + + /// Moves the position to the next non-whitespace character. + void skip_whitespace() { + m_position = std::find_if_not(m_position, m_end, Review Comment: warning: no member named 'find_if_not' in namespace 'std' [clang-diagnostic-error] ```cpp m_position = std::find_if_not(m_position, m_end, ^ ``` ########## be/src/util/jsonb_document.h: ########## @@ -213,6 +224,91 @@ char payload_[0]; }; +/// A simple input stream class for the JSON path parser. +class Stream { +public: + /// Creates an input stream reading from a character string. + /// @param string the input string + /// @param length the length of the input string + Stream(const char *string, size_t length) + : m_position(string), m_end(string + length), leg() {} + + /// Returns a pointer to the current position in the stream. + const char *position() const { return m_position; } + + /// Returns a pointer to the position just after the end of the stream. + const char *end() const { return m_end; } + + /// Returns the number of bytes remaining in the stream. + size_t remaining() const { + assert(m_position <= m_end); + return m_end - m_position; + } + + /// Tells if the stream has been exhausted. + bool exhausted() const { return remaining() == 0; } + + /// Reads the next byte from the stream and moves the position forward. + char read() { + assert(!exhausted()); + return *m_position++; + } + + /// Reads the next byte from the stream without moving the position forward. + char peek() const { + assert(!exhausted()); + return *m_position; + } + + /// Moves the position to the next non-whitespace character. + void skip_whitespace() { + m_position = std::find_if_not(m_position, m_end, + [](char c) { return std::isspace(c); }); + } + + /// Moves the position n bytes forward. + void skip(size_t n) { + assert(remaining() >= n); + m_position += n; + } + + void appendLeg(char a) { + leg += a; + } + + void clearLeg() { + leg.clear(); + } + + void setLeg(std::string a) { + clearLeg(); + leg = a; + } + + std::string getLeg() { Review Comment: warning: no type named 'string' in namespace 'std' [clang-diagnostic-error] ```cpp std::string getLeg() { ^ ``` ########## be/src/util/jsonb_document.h: ########## @@ -1072,103 +1168,159 @@ } inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int kp_len, - const char* delim = ".", hDictFind handler = nullptr) { - if (!key_path) return nullptr; + hDictFind handler = nullptr) { + if(!key_path) return nullptr; if (kp_len == 0) return this; + Stream stream(key_path, kp_len); + stream.skip_whitespace(); + if (stream.exhausted() || stream.read() != SCOPE) return nullptr; - // skip $ and . at beginning - if (kp_len > 0 && *key_path == '$') { - key_path++; - kp_len--; - if (kp_len > 0 && *key_path == '.') { - key_path++; - kp_len--; - } - } + JsonbValue* pval = this; - if (kp_len == 0) return this; + while ( pval && !stream.exhausted() ) { - if (!delim) delim = "."; // default delimiter + stream.clearLeg(); - JsonbValue* pval = this; - const char* fence = key_path + kp_len; - char idx_buf[21]; // buffer to parse array index (integer value) - - while (pval && key_path < fence) { - const char* key = key_path; - unsigned int klen = 0; - const char* left_bracket = nullptr; - const char* right_bracket = nullptr; - size_t idx_len = 0; - // find the current key and [] bracket position - for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) { - if ('[' == *key_path) { - left_bracket = key_path; - } else if (']' == *key_path) { - right_bracket = key_path; - } + if(!JsonbPath::parsePath(&stream,pval)){ + return nullptr; } - // check brackets and array index length - if (left_bracket || right_bracket) { - if (!left_bracket || !right_bracket) { - return nullptr; - } - // check the last char is ] - if (key + klen - 1 != right_bracket) { - return nullptr; - } - // the part before left_bracket is object key - klen = left_bracket - key; - // the part between left_bracket and right_bracket is array index - idx_len = right_bracket - left_bracket - 1; + if(stream.getLeg().size()==0){ + return nullptr; } - if (!klen && !idx_len) return nullptr; + if (LIKELY(pval->type_ == JsonbType::T_Object)) { + if(stream.getLeg().size() == 1 && stream.getLeg()[0] == WILDCARD){ - // get value of key in object - if (klen) { - if (LIKELY(pval->type_ == JsonbType::T_Object)) { - pval = ((ObjectVal*)pval)->find(key, klen, handler); - if (!pval) return nullptr; + return this; + } else{ + pval = ((ObjectVal*)pval)->find(stream.getLeg().c_str(), stream.getLeg().size(), handler); + } + if (!pval) return nullptr; + } else if (LIKELY(pval->type_ == JsonbType::T_Array)) { + + int index = 0; + if(stream.getLeg().size() == 1 && stream.getLeg()[0] == WILDCARD){ + return this; + } else if(std::string(stream.getLeg().c_str(),4) == LAST){ + auto pos = stream.getLeg().find(MINUS); + + if(pos != std::string::npos){ + stream.setLeg(stream.getLeg().substr(pos + 1)); + size_t num = ((ArrayVal*)pval)->numElem(); + if(std::stoi(stream.getLeg()) > num){ + return nullptr; //invalid json path + } + index = num - 1 - std::stoi(stream.getLeg()); + } else if(stream.getLeg().size() == 4){ + index = ((ArrayVal*)pval)->numElem() - 1; + } else { + return nullptr;//invalid json path + } } else { - return nullptr; + std::string::size_type pos; + index = std::stoi(stream.getLeg(),&pos,10); + if (pos != stream.getLeg().size()) { + return nullptr;//invalid json path + }else if(index >= ((ArrayVal*)pval)->numElem()){ + return nullptr; + } } + + pval = ((ArrayVal*)pval)->get(index); + } + } - // get value at idx in array - if (idx_len) { - if (LIKELY(pval->type_ == JsonbType::T_Array)) { - if (idx_len >= sizeof(idx_buf)) return nullptr; - memcpy(idx_buf, left_bracket + 1, idx_len); - idx_buf[idx_len] = 0; - - char* end = nullptr; - int index = (int)strtol(idx_buf, &end, 10); - if (end && !*end) - pval = ((ArrayVal*)pval)->get(index); - else - // incorrect index string - return nullptr; - // doris::StringParser::ParseResult parse_result; - // int index = doris::StringParser::string_to_int<int>(left_bracket + 1, idx_len, &parse_result); - // if (parse_result == doris::StringParser::ParseResult::PARSE_SUCCESS) - } else { - return nullptr; - } + return pval; +} + +inline bool JsonbPath::parsePath(Stream *stream,JsonbValue* value){ + + if(stream->peek() == BEGIN_ARRAY && value->type() == JsonbType::T_Array){ + return parse_array(stream); + } else if (stream->peek() == BEGIN_MEMBER && value->type() == JsonbType::T_Object){ + return parse_member(stream); + } else { + return false;//invalid json path + } +} + +inline bool JsonbPath::parse_array(Stream *stream){ + + assert(stream->peek() == BEGIN_ARRAY); + stream->skip(1); + stream->skip_whitespace(); + if (stream->exhausted()) return false; //invalid json path Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (stream->exhausted()) { return false; //invalid json path } ``` ########## be/src/util/jsonb_document.h: ########## @@ -1072,103 +1168,159 @@ } inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int kp_len, - const char* delim = ".", hDictFind handler = nullptr) { - if (!key_path) return nullptr; + hDictFind handler = nullptr) { + if(!key_path) return nullptr; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if(!key_path) { return nullptr; } ``` ########## be/src/util/jsonb_document.h: ########## @@ -213,6 +224,91 @@ char payload_[0]; }; +/// A simple input stream class for the JSON path parser. +class Stream { +public: + /// Creates an input stream reading from a character string. + /// @param string the input string + /// @param length the length of the input string + Stream(const char *string, size_t length) + : m_position(string), m_end(string + length), leg() {} + + /// Returns a pointer to the current position in the stream. + const char *position() const { return m_position; } + + /// Returns a pointer to the position just after the end of the stream. + const char *end() const { return m_end; } + + /// Returns the number of bytes remaining in the stream. + size_t remaining() const { + assert(m_position <= m_end); + return m_end - m_position; + } + + /// Tells if the stream has been exhausted. + bool exhausted() const { return remaining() == 0; } + + /// Reads the next byte from the stream and moves the position forward. + char read() { + assert(!exhausted()); + return *m_position++; + } + + /// Reads the next byte from the stream without moving the position forward. + char peek() const { + assert(!exhausted()); + return *m_position; + } + + /// Moves the position to the next non-whitespace character. + void skip_whitespace() { + m_position = std::find_if_not(m_position, m_end, + [](char c) { return std::isspace(c); }); + } + + /// Moves the position n bytes forward. + void skip(size_t n) { + assert(remaining() >= n); + m_position += n; + } + + void appendLeg(char a) { + leg += a; + } + + void clearLeg() { + leg.clear(); + } + + void setLeg(std::string a) { + clearLeg(); + leg = a; + } + + std::string getLeg() { + return leg; + } + +private: + /// The current position in the stream. + const char *m_position; + + /// The end of the stream. + const char *const m_end; + + ///path leg + std::string leg; Review Comment: warning: no type named 'string' in namespace 'std' [clang-diagnostic-error] ```cpp std::string leg; ^ ``` ########## be/src/util/jsonb_document.h: ########## @@ -213,6 +224,91 @@ char payload_[0]; }; +/// A simple input stream class for the JSON path parser. +class Stream { +public: + /// Creates an input stream reading from a character string. + /// @param string the input string + /// @param length the length of the input string + Stream(const char *string, size_t length) + : m_position(string), m_end(string + length), leg() {} + + /// Returns a pointer to the current position in the stream. + const char *position() const { return m_position; } + + /// Returns a pointer to the position just after the end of the stream. + const char *end() const { return m_end; } + + /// Returns the number of bytes remaining in the stream. + size_t remaining() const { + assert(m_position <= m_end); + return m_end - m_position; + } + + /// Tells if the stream has been exhausted. + bool exhausted() const { return remaining() == 0; } + + /// Reads the next byte from the stream and moves the position forward. + char read() { + assert(!exhausted()); + return *m_position++; + } + + /// Reads the next byte from the stream without moving the position forward. + char peek() const { + assert(!exhausted()); + return *m_position; + } + + /// Moves the position to the next non-whitespace character. + void skip_whitespace() { + m_position = std::find_if_not(m_position, m_end, + [](char c) { return std::isspace(c); }); + } + + /// Moves the position n bytes forward. + void skip(size_t n) { + assert(remaining() >= n); + m_position += n; + } + + void appendLeg(char a) { + leg += a; + } + + void clearLeg() { + leg.clear(); + } + + void setLeg(std::string a) { Review Comment: warning: no type named 'string' in namespace 'std' [clang-diagnostic-error] ```cpp void setLeg(std::string a) { ^ ``` ########## be/src/util/jsonb_document.h: ########## @@ -1072,103 +1168,159 @@ } inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int kp_len, - const char* delim = ".", hDictFind handler = nullptr) { - if (!key_path) return nullptr; + hDictFind handler = nullptr) { + if(!key_path) return nullptr; if (kp_len == 0) return this; + Stream stream(key_path, kp_len); + stream.skip_whitespace(); + if (stream.exhausted() || stream.read() != SCOPE) return nullptr; - // skip $ and . at beginning - if (kp_len > 0 && *key_path == '$') { - key_path++; - kp_len--; - if (kp_len > 0 && *key_path == '.') { - key_path++; - kp_len--; - } - } + JsonbValue* pval = this; - if (kp_len == 0) return this; + while ( pval && !stream.exhausted() ) { - if (!delim) delim = "."; // default delimiter + stream.clearLeg(); - JsonbValue* pval = this; - const char* fence = key_path + kp_len; - char idx_buf[21]; // buffer to parse array index (integer value) - - while (pval && key_path < fence) { - const char* key = key_path; - unsigned int klen = 0; - const char* left_bracket = nullptr; - const char* right_bracket = nullptr; - size_t idx_len = 0; - // find the current key and [] bracket position - for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) { - if ('[' == *key_path) { - left_bracket = key_path; - } else if (']' == *key_path) { - right_bracket = key_path; - } + if(!JsonbPath::parsePath(&stream,pval)){ + return nullptr; } - // check brackets and array index length - if (left_bracket || right_bracket) { - if (!left_bracket || !right_bracket) { - return nullptr; - } - // check the last char is ] - if (key + klen - 1 != right_bracket) { - return nullptr; - } - // the part before left_bracket is object key - klen = left_bracket - key; - // the part between left_bracket and right_bracket is array index - idx_len = right_bracket - left_bracket - 1; + if(stream.getLeg().size()==0){ + return nullptr; } - if (!klen && !idx_len) return nullptr; + if (LIKELY(pval->type_ == JsonbType::T_Object)) { + if(stream.getLeg().size() == 1 && stream.getLeg()[0] == WILDCARD){ - // get value of key in object - if (klen) { - if (LIKELY(pval->type_ == JsonbType::T_Object)) { - pval = ((ObjectVal*)pval)->find(key, klen, handler); - if (!pval) return nullptr; + return this; + } else{ + pval = ((ObjectVal*)pval)->find(stream.getLeg().c_str(), stream.getLeg().size(), handler); + } + if (!pval) return nullptr; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (!pval) { return nullptr; } ``` ########## be/src/util/jsonb_document.h: ########## @@ -1072,103 +1168,159 @@ } inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int kp_len, - const char* delim = ".", hDictFind handler = nullptr) { - if (!key_path) return nullptr; + hDictFind handler = nullptr) { + if(!key_path) return nullptr; if (kp_len == 0) return this; + Stream stream(key_path, kp_len); + stream.skip_whitespace(); + if (stream.exhausted() || stream.read() != SCOPE) return nullptr; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (stream.exhausted() || stream.read() != SCOPE) { return nullptr; } ``` ########## be/src/util/jsonb_document.h: ########## @@ -1072,103 +1168,159 @@ } inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int kp_len, - const char* delim = ".", hDictFind handler = nullptr) { - if (!key_path) return nullptr; + hDictFind handler = nullptr) { + if(!key_path) return nullptr; if (kp_len == 0) return this; + Stream stream(key_path, kp_len); + stream.skip_whitespace(); + if (stream.exhausted() || stream.read() != SCOPE) return nullptr; - // skip $ and . at beginning - if (kp_len > 0 && *key_path == '$') { - key_path++; - kp_len--; - if (kp_len > 0 && *key_path == '.') { - key_path++; - kp_len--; - } - } + JsonbValue* pval = this; - if (kp_len == 0) return this; + while ( pval && !stream.exhausted() ) { - if (!delim) delim = "."; // default delimiter + stream.clearLeg(); - JsonbValue* pval = this; - const char* fence = key_path + kp_len; - char idx_buf[21]; // buffer to parse array index (integer value) - - while (pval && key_path < fence) { - const char* key = key_path; - unsigned int klen = 0; - const char* left_bracket = nullptr; - const char* right_bracket = nullptr; - size_t idx_len = 0; - // find the current key and [] bracket position - for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) { - if ('[' == *key_path) { - left_bracket = key_path; - } else if (']' == *key_path) { - right_bracket = key_path; - } + if(!JsonbPath::parsePath(&stream,pval)){ + return nullptr; } - // check brackets and array index length - if (left_bracket || right_bracket) { - if (!left_bracket || !right_bracket) { - return nullptr; - } - // check the last char is ] - if (key + klen - 1 != right_bracket) { - return nullptr; - } - // the part before left_bracket is object key - klen = left_bracket - key; - // the part between left_bracket and right_bracket is array index - idx_len = right_bracket - left_bracket - 1; + if(stream.getLeg().size()==0){ + return nullptr; } - if (!klen && !idx_len) return nullptr; + if (LIKELY(pval->type_ == JsonbType::T_Object)) { + if(stream.getLeg().size() == 1 && stream.getLeg()[0] == WILDCARD){ - // get value of key in object - if (klen) { - if (LIKELY(pval->type_ == JsonbType::T_Object)) { - pval = ((ObjectVal*)pval)->find(key, klen, handler); - if (!pval) return nullptr; + return this; + } else{ + pval = ((ObjectVal*)pval)->find(stream.getLeg().c_str(), stream.getLeg().size(), handler); + } + if (!pval) return nullptr; + } else if (LIKELY(pval->type_ == JsonbType::T_Array)) { + + int index = 0; + if(stream.getLeg().size() == 1 && stream.getLeg()[0] == WILDCARD){ + return this; + } else if(std::string(stream.getLeg().c_str(),4) == LAST){ + auto pos = stream.getLeg().find(MINUS); + + if(pos != std::string::npos){ + stream.setLeg(stream.getLeg().substr(pos + 1)); + size_t num = ((ArrayVal*)pval)->numElem(); + if(std::stoi(stream.getLeg()) > num){ + return nullptr; //invalid json path + } + index = num - 1 - std::stoi(stream.getLeg()); + } else if(stream.getLeg().size() == 4){ + index = ((ArrayVal*)pval)->numElem() - 1; + } else { + return nullptr;//invalid json path + } } else { - return nullptr; + std::string::size_type pos; + index = std::stoi(stream.getLeg(),&pos,10); + if (pos != stream.getLeg().size()) { + return nullptr;//invalid json path + }else if(index >= ((ArrayVal*)pval)->numElem()){ + return nullptr; + } } + + pval = ((ArrayVal*)pval)->get(index); + } + } - // get value at idx in array - if (idx_len) { - if (LIKELY(pval->type_ == JsonbType::T_Array)) { - if (idx_len >= sizeof(idx_buf)) return nullptr; - memcpy(idx_buf, left_bracket + 1, idx_len); - idx_buf[idx_len] = 0; - - char* end = nullptr; - int index = (int)strtol(idx_buf, &end, 10); - if (end && !*end) - pval = ((ArrayVal*)pval)->get(index); - else - // incorrect index string - return nullptr; - // doris::StringParser::ParseResult parse_result; - // int index = doris::StringParser::string_to_int<int>(left_bracket + 1, idx_len, &parse_result); - // if (parse_result == doris::StringParser::ParseResult::PARSE_SUCCESS) - } else { - return nullptr; - } + return pval; +} + +inline bool JsonbPath::parsePath(Stream *stream,JsonbValue* value){ + + if(stream->peek() == BEGIN_ARRAY && value->type() == JsonbType::T_Array){ + return parse_array(stream); + } else if (stream->peek() == BEGIN_MEMBER && value->type() == JsonbType::T_Object){ + return parse_member(stream); + } else { + return false;//invalid json path + } +} + +inline bool JsonbPath::parse_array(Stream *stream){ + + assert(stream->peek() == BEGIN_ARRAY); + stream->skip(1); + stream->skip_whitespace(); + if (stream->exhausted()) return false; //invalid json path + + if (stream->peek() == WILDCARD) { + stream->skip(1); + if(stream->peek() == END_ARRAY){ + stream->appendLeg(WILDCARD); + return true; + } else { + return false;//invalid json path } + } + + for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->skip(1)) { + stream->appendLeg(stream->peek()); + } + + if(!stream->exhausted() && stream->peek() == END_ARRAY){ + stream->skip(1); + return true; + } else { + return false;//invalid json path + } +} + +inline bool JsonbPath::parse_member(Stream *stream){ - // skip the delimiter - if (key_path < fence) { - ++key_path; - if (key_path == fence) - // we have a trailing delimiter at the end - return nullptr; + // advance past the . + assert(stream->peek() == BEGIN_MEMBER); + stream->skip(1); + stream->skip_whitespace(); + if (stream->exhausted()) return false; //invalid json path Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (stream->exhausted()) { return false; //invalid json path } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org