github-actions[bot] commented on code in PR #25510: URL: https://github.com/apache/doris/pull/25510#discussion_r1361515833
########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; Review Comment: warning: 255 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp if (src == nullptr || first_octet > 255) return false; ^ ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; Review Comment: warning: 8 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp offset -= 8; ^ ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); Review Comment: warning: 10 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp value = value * 10 + (*src - '0'); ^ ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (eof()) { break; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; + } + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) { return false; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; Review Comment: warning: 24 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp int offset = 24; ^ ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { Review Comment: warning: 8 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp for (; true; offset -= 8, ++src) { ^ ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (src == nullptr || first_octet > 255) { return false; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (eof()) { return false; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; + } + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; + result |= value << offset; + + if (offset == 0) break; + } + + memcpy(dst, &result, sizeof(result)); + return true; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char* parseIPv4(const char* src, const char* end, unsigned char* dst) { + if (parseIPv4( + src, [&src, end]() { return src == end; }, dst)) + return src; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion src, [&src, end]() { return src == end; }, dst)) { return src; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; + } + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; + result |= value << offset; + + if (offset == 0) break; + } + + memcpy(dst, &result, sizeof(result)); + return true; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char* parseIPv4(const char* src, const char* end, unsigned char* dst) { + if (parseIPv4( + src, [&src, end]() { return src == end; }, dst)) + return src; + return nullptr; +} + +/// returns true if whole buffer was parsed successfully +inline bool parseIPv4whole(const char* src, const char* end, unsigned char* dst) { + return parseIPv4(src, end, dst) == end; +} + +/// returns pointer to the right after parsed sequence or null on failed parsing +inline const char* parseIPv4(const char* src, unsigned char* dst) { + if (parseIPv4( + src, []() { return false; }, dst)) + return src; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion src, []() { return false; }, dst)) { return src; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; + } + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; + result |= value << offset; + + if (offset == 0) break; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (offset == 0) { break; } ``` ########## be/src/vec/common/format_ip.h: ########## @@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, uint8_t mask_tail_o formatIPv4(src, 4, dst, mask_tail_octets, mask_string); } +/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. + * + * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, + * which should be long enough. + * That is "127.0.0.1" becomes 0x7f000001. + * + * In case of failure doesn't modify buffer pointed by `dst`. + * + * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) + * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. + * To parse strings use overloads below. + * + * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. + * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. + * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. + * @param first_octet - preparsed first octet + * @return - true if parsed successfully, false otherwise. + */ +template <typename T, typename EOFfunction> + requires(std::is_same<typename std::remove_cv<T>::type, char>::value) +inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t first_octet = -1) { + if (src == nullptr || first_octet > 255) return false; + + int64_t result = 0; + int offset = 24; + if (first_octet >= 0) { + result |= first_octet << offset; + offset -= 8; + } + + for (; true; offset -= 8, ++src) { + if (eof()) return false; + + int64_t value = 0; + size_t len = 0; + while (is_numeric_ascii(*src) && len <= 3) { + value = value * 10 + (*src - '0'); + ++len; + ++src; + if (eof()) break; + } + if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; Review Comment: warning: 255 is a magic number; consider replacing it with a named constant [readability-magic-numbers] ```cpp if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) return false; ^ ``` ########## be/src/vec/functions/function_ip.h: ########## @@ -117,4 +118,132 @@ class FunctionIPv4NumToString : public IFunction { argument.name, get_name()); } }; + +enum class IPStringToNumExceptionMode : uint8_t { Throw, Default, Null }; + +static inline bool tryParseIPv4(const char* pos, Int64& result_value) { + return parseIPv4whole(pos, reinterpret_cast<unsigned char*>(&result_value)); +} + +template <IPStringToNumExceptionMode exception_mode, typename ToColumn> +ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray<UInt8>* null_map = nullptr) { + const ColumnString* column_string = check_and_get_column<ColumnString>(column.get()); + + if (!column_string) { + throw Exception(ErrorCode::INVALID_ARGUMENT, + "Illegal column {} of argument of function {}, expected String", + column->get_name()); + } + + size_t column_size = column_string->size(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container* vec_null_map_to = nullptr; + + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { + col_null_map_to = ColumnUInt8::create(column_size, false); + vec_null_map_to = &col_null_map_to->get_data(); + } + + auto col_res = ToColumn::create(); + + auto& vec_res = col_res->get_data(); + vec_res.resize(column_size); + + const ColumnString::Chars& vec_src = column_string->get_chars(); + const ColumnString::Offsets& offsets_src = column_string->get_offsets(); + size_t prev_offset = 0; + + for (size_t i = 0; i < vec_res.size(); ++i) { + if (null_map && (*null_map)[i]) { + vec_res[i] = 0; + prev_offset = offsets_src[i]; + if constexpr (exception_mode == IPStringToNumExceptionMode::Null) + (*vec_null_map_to)[i] = true; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if constexpr (exception_mode == IPStringToNumExceptionMode::Null) { (*vec_null_map_to)[i] = true; } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org