github-actions[bot] commented on code in PR #25510:
URL: https://github.com/apache/doris/pull/25510#discussion_r1361515833


##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;

Review Comment:
   warning: 255 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
       if (src == nullptr || first_octet > 255) return false;
                                           ^
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;

Review Comment:
   warning: 8 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
           offset -= 8;
                     ^
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');

Review Comment:
   warning: 10 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
               value = value * 10 + (*src - '0');
                               ^
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (eof()) { break;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;
+        }
+        if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) 
return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != 
'.'))) { return false;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;

Review Comment:
   warning: 24 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
       int offset = 24;
                    ^
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {

Review Comment:
   warning: 8 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
       for (; true; offset -= 8, ++src) {
                              ^
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (src == nullptr || first_octet > 255) { return false;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (eof()) { return false;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;
+        }
+        if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) 
return false;
+        result |= value << offset;
+
+        if (offset == 0) break;
+    }
+
+    memcpy(dst, &result, sizeof(result));
+    return true;
+}
+
+/// returns pointer to the right after parsed sequence or null on failed 
parsing
+inline const char* parseIPv4(const char* src, const char* end, unsigned char* 
dst) {
+    if (parseIPv4(
+                src, [&src, end]() { return src == end; }, dst))
+        return src;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                   src, [&src, end]() { return src == end; }, dst)) {
           return src;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;
+        }
+        if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) 
return false;
+        result |= value << offset;
+
+        if (offset == 0) break;
+    }
+
+    memcpy(dst, &result, sizeof(result));
+    return true;
+}
+
+/// returns pointer to the right after parsed sequence or null on failed 
parsing
+inline const char* parseIPv4(const char* src, const char* end, unsigned char* 
dst) {
+    if (parseIPv4(
+                src, [&src, end]() { return src == end; }, dst))
+        return src;
+    return nullptr;
+}
+
+/// returns true if whole buffer was parsed successfully
+inline bool parseIPv4whole(const char* src, const char* end, unsigned char* 
dst) {
+    return parseIPv4(src, end, dst) == end;
+}
+
+/// returns pointer to the right after parsed sequence or null on failed 
parsing
+inline const char* parseIPv4(const char* src, unsigned char* dst) {
+    if (parseIPv4(
+                src, []() { return false; }, dst))
+        return src;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                   src, []() { return false; }, dst)) {
           return src;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;
+        }
+        if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) 
return false;
+        result |= value << offset;
+
+        if (offset == 0) break;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (offset == 0) { break;
   }
   ```
   



##########
be/src/vec/common/format_ip.h:
##########
@@ -95,4 +97,82 @@ inline void formatIPv4(const unsigned char* src, char*& dst, 
uint8_t mask_tail_o
     formatIPv4(src, 4, dst, mask_tail_octets, mask_string);
 }
 
+/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing 
IPv4 string.
+ *
+ * Parses the input string `src` and stores binary host-endian value into 
buffer pointed by `dst`,
+ * which should be long enough.
+ * That is "127.0.0.1" becomes 0x7f000001.
+ *
+ * In case of failure doesn't modify buffer pointed by `dst`.
+ *
+ * WARNING - this function is adapted to work with ReadBuffer, where src is 
the position reference (ReadBuffer::position())
+ *           and eof is the ReadBuffer::eof() - therefore algorithm below does 
not rely on buffer's continuity.
+ *           To parse strings use overloads below.
+ *
+ * @param src         - iterator (reference to pointer) over input string - 
warning - continuity is not guaranteed.
+ * @param eof         - function returning true if iterator riched the end - 
warning - can break iterator's continuity.
+ * @param dst         - where to put output bytes, expected to be non-null and 
at IPV4_BINARY_LENGTH-long.
+ * @param first_octet - preparsed first octet
+ * @return            - true if parsed successfully, false otherwise.
+ */
+template <typename T, typename EOFfunction>
+    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
+inline bool parseIPv4(T*& src, EOFfunction eof, unsigned char* dst, int64_t 
first_octet = -1) {
+    if (src == nullptr || first_octet > 255) return false;
+
+    int64_t result = 0;
+    int offset = 24;
+    if (first_octet >= 0) {
+        result |= first_octet << offset;
+        offset -= 8;
+    }
+
+    for (; true; offset -= 8, ++src) {
+        if (eof()) return false;
+
+        int64_t value = 0;
+        size_t len = 0;
+        while (is_numeric_ascii(*src) && len <= 3) {
+            value = value * 10 + (*src - '0');
+            ++len;
+            ++src;
+            if (eof()) break;
+        }
+        if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != '.'))) 
return false;

Review Comment:
   warning: 255 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
           if (len == 0 || value > 255 || (offset > 0 && (eof() || *src != 
'.'))) return false;
                                   ^
   ```
   



##########
be/src/vec/functions/function_ip.h:
##########
@@ -117,4 +118,132 @@ class FunctionIPv4NumToString : public IFunction {
                 argument.name, get_name());
     }
 };
+
+enum class IPStringToNumExceptionMode : uint8_t { Throw, Default, Null };
+
+static inline bool tryParseIPv4(const char* pos, Int64& result_value) {
+    return parseIPv4whole(pos, reinterpret_cast<unsigned 
char*>(&result_value));
+}
+
+template <IPStringToNumExceptionMode exception_mode, typename ToColumn>
+ColumnPtr convertToIPv4(ColumnPtr column, const PaddedPODArray<UInt8>* 
null_map = nullptr) {
+    const ColumnString* column_string = 
check_and_get_column<ColumnString>(column.get());
+
+    if (!column_string) {
+        throw Exception(ErrorCode::INVALID_ARGUMENT,
+                        "Illegal column {} of argument of function {}, 
expected String",
+                        column->get_name());
+    }
+
+    size_t column_size = column_string->size();
+
+    ColumnUInt8::MutablePtr col_null_map_to;
+    ColumnUInt8::Container* vec_null_map_to = nullptr;
+
+    if constexpr (exception_mode == IPStringToNumExceptionMode::Null) {
+        col_null_map_to = ColumnUInt8::create(column_size, false);
+        vec_null_map_to = &col_null_map_to->get_data();
+    }
+
+    auto col_res = ToColumn::create();
+
+    auto& vec_res = col_res->get_data();
+    vec_res.resize(column_size);
+
+    const ColumnString::Chars& vec_src = column_string->get_chars();
+    const ColumnString::Offsets& offsets_src = column_string->get_offsets();
+    size_t prev_offset = 0;
+
+    for (size_t i = 0; i < vec_res.size(); ++i) {
+        if (null_map && (*null_map)[i]) {
+            vec_res[i] = 0;
+            prev_offset = offsets_src[i];
+            if constexpr (exception_mode == IPStringToNumExceptionMode::Null)
+                (*vec_null_map_to)[i] = true;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if constexpr (exception_mode == 
IPStringToNumExceptionMode::Null) {
                   (*vec_null_map_to)[i] = true;
   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to