[GitHub] [flink] KurtYoung commented on a change in pull request #8689: [FLINK-12802][table-runtime-blink] Reducing the Code of BinaryString

GitBox Fri, 14 Jun 2019 01:58:55 -0700

KurtYoung commented on a change in pull request #8689: 
[FLINK-12802][table-runtime-blink] Reducing the Code of BinaryString
URL: https://github.com/apache/flink/pull/8689#discussion_r293718259


 ##########
 File path: 
flink-table/flink-table-runtime-blink/src/main/java/org/apache/flink/table/dataformat/BinaryString.java
 ##########
 @@ -654,531 +498,211 @@ private BinaryString trimSlow() {
        }
 
        /**
-        * Walk each character of current string from both ends, remove the 
character if it
-        * is in trim string.
-        * Return the new substring which both ends trim characters have been 
removed.
+        * Returns the index within this string of the first occurrence of the
+        * specified substring, starting at the specified index.
         *
-        * @param trimStr the trim string
-        * @return A subString which both ends trim characters have been 
removed.
+        * @param   str         the substring to search for.
+        * @param   fromIndex   the index from which to start the search.
+        * @return  the index of the first occurrence of the specified 
substring,
+        *          starting at the specified index,
+        *          or {@code -1} if there is no such occurrence.
         */
-       public BinaryString trim(BinaryString trimStr) {
-               if (trimStr == null) {
-                       return null;
-               }
-               return trimLeft(trimStr).trimRight(trimStr);
-       }
-
-       public BinaryString trimLeft() {
+       public int indexOf(BinaryString str, int fromIndex) {
                ensureMaterialized();
+               str.ensureMaterialized();
+               if (str.sizeInBytes == 0) {
+                       return 0;
+               }
                if (inFirstSegment()) {
-                       int s = 0;
-                       // skip all of the space (0x20) in the left side
-                       while (s < this.sizeInBytes && getByteOneSegment(s) == 
0x20) {
-                               s++;
-                       }
-                       if (s == this.sizeInBytes) {
-                               // empty string
-                               return EMPTY_UTF8;
-                       } else {
-                               return copyBinaryStringInOneSeg(s, 
this.sizeInBytes - 1);
+                       // position in byte
+                       int byteIdx = 0;
+                       // position is char
+                       int charIdx = 0;
+                       while (byteIdx < sizeInBytes && charIdx < fromIndex) {
+                               byteIdx += 
numBytesForFirstByte(getByteOneSegment(byteIdx));
+                               charIdx++;
                        }
+                       do {
+                               if (byteIdx + str.sizeInBytes > sizeInBytes) {
+                                       return -1;
+                               }
+                               if (SegmentsUtil.equals(segments, offset + 
byteIdx,
+                                               str.segments, str.offset, 
str.sizeInBytes)) {
+                                       return charIdx;
+                               }
+                               byteIdx += 
numBytesForFirstByte(getByteOneSegment(byteIdx));
+                               charIdx++;
+                       } while (byteIdx < sizeInBytes);
+
+                       return -1;
                } else {
-                       return trimLeftSlow();
+                       return indexOfMultiSegs(str, fromIndex);
                }
        }
 
-       private BinaryString trimLeftSlow() {
-               int s = 0;
+       private int indexOfMultiSegs(BinaryString str, int fromIndex) {
+               // position in byte
+               int byteIdx = 0;
+               // position is char
+               int charIdx = 0;
                int segSize = segments[0].size();
-               SegmentAndOffset front = firstSegmentAndOffset(segSize);
-               // skip all of the space (0x20) in the left side
-               while (s < this.sizeInBytes && front.value() == 0x20) {
-                       s++;
-                       front.nextByte(segSize);
-               }
-               if (s == this.sizeInBytes) {
-                       // empty string
-                       return EMPTY_UTF8;
-               } else {
-                       return copyBinaryString(s, this.sizeInBytes - 1);
+               SegmentAndOffset index = firstSegmentAndOffset(segSize);
+               while (byteIdx < sizeInBytes && charIdx < fromIndex) {
+                       int charBytes = numBytesForFirstByte(index.value());
+                       byteIdx += charBytes;
+                       charIdx++;
+                       index.skipBytes(charBytes, segSize);
                }
+               do {
+                       if (byteIdx + str.sizeInBytes > sizeInBytes) {
+                               return -1;
+                       }
+                       if (SegmentsUtil.equals(segments, offset + byteIdx,
+                                       str.segments, str.offset, 
str.sizeInBytes)) {
+                               return charIdx;
+                       }
+                       int charBytes = 
numBytesForFirstByte(index.segment.get(index.offset));
+                       byteIdx += charBytes;
+                       charIdx++;
+                       index.skipBytes(charBytes, segSize);
+               } while (byteIdx < sizeInBytes);
+
+               return -1;
        }
 
        /**
-        * Walk each character of current string from left end, remove the 
character if it
-        * is in trim string. Stops at the first character which is not in trim 
string.
-        * Return the new substring.
+        * Converts all of the characters in this {@code BinaryString} to upper 
case.
         *
-        * @param trimStr the trim string
-        * @return A subString which removes all of the character from the left 
side that is in
-        * trim string.
+        * @return the {@code BinaryString}, converted to uppercase.
         */
-       public BinaryString trimLeft(BinaryString trimStr) {
-               ensureMaterialized();
-               if (trimStr == null) {
-                       return null;
+       public BinaryString toUpperCase() {
+               if (javaObject != null) {
+                       return javaToUpperCase();
                }
-               trimStr.ensureMaterialized();
-               if (trimStr.isSpaceString()) {
-                       return trimLeft();
+               if (sizeInBytes == 0) {
+                       return EMPTY_UTF8;
                }
-               if (inFirstSegment()) {
-                       int searchIdx = 0;
-                       while (searchIdx < this.sizeInBytes) {
-                               int charBytes = 
numBytesForFirstByte(getByteOneSegment(searchIdx));
-                               BinaryString currentChar = 
copyBinaryStringInOneSeg(searchIdx,
-                                       searchIdx + charBytes - 1);
-                               // try to find the matching for the character 
in the trimString characters.
-                               if (trimStr.contains(currentChar)) {
-                                       searchIdx += charBytes;
-                               } else {
-                                       break;
-                               }
+               int size = segments[0].size();
+               SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
+               byte[] bytes = new byte[sizeInBytes];
+               bytes[0] = (byte) 
Character.toTitleCase(segmentAndOffset.value());
+               for (int i = 0; i < sizeInBytes; i++) {
+                       byte b = segmentAndOffset.value();
+                       if (numBytesForFirstByte(b) != 1) {
+                               // fallback
+                               return javaToUpperCase();
                        }
-                       // empty string
-                       if (searchIdx >= sizeInBytes) {
-                               return EMPTY_UTF8;
-                       } else {
-                               return copyBinaryStringInOneSeg(searchIdx, 
sizeInBytes - 1);
+                       int upper = Character.toUpperCase((int) b);
+                       if (upper > 127) {
+                               // fallback
+                               return javaToUpperCase();
                        }
-               } else {
-                       return trimLeftSlow(trimStr);
+                       bytes[i] = (byte) upper;
+                       segmentAndOffset.nextByte(size);
                }
+               return fromBytes(bytes);
        }
 
-       private BinaryString trimLeftSlow(BinaryString trimStr) {
-               int searchIdx = 0;
-               int segSize = segments[0].size();
-               SegmentAndOffset front = firstSegmentAndOffset(segSize);
-               while (searchIdx < this.sizeInBytes) {
-                       int charBytes = numBytesForFirstByte(front.value());
-                       BinaryString currentChar = copyBinaryString(searchIdx, 
searchIdx + charBytes - 1);
-                       if (trimStr.contains(currentChar)) {
-                               searchIdx += charBytes;
-                               front.skipBytes(charBytes, segSize);
-                       } else {
-                               break;
-                       }
+       private BinaryString javaToUpperCase() {
+               return fromString(toString().toUpperCase());
+       }
+
+       /**
+        * Converts all of the characters in this {@code BinaryString} to lower 
case.
+        *
+        * @return the {@code BinaryString}, converted to lowercase.
+        */
+       public BinaryString toLowerCase() {
+               if (javaObject != null) {
+                       return javaToLowerCase();
                }
-               if (searchIdx == this.sizeInBytes) {
-                       // empty string
+               if (sizeInBytes == 0) {
                        return EMPTY_UTF8;
-               } else {
-                       return copyBinaryString(searchIdx, this.sizeInBytes - 
1);
                }
-       }
-
-       public BinaryString trimRight() {
-               ensureMaterialized();
-               if (inFirstSegment()) {
-                       int e = sizeInBytes - 1;
-                       // skip all of the space (0x20) in the right side
-                       while (e >= 0 && getByteOneSegment(e) == 0x20) {
-                               e--;
-                       }
-
-                       if (e < 0) {
-                               // empty string
-                               return EMPTY_UTF8;
-                       } else {
-                               return copyBinaryStringInOneSeg(0, e);
-                       }
-               } else {
-                       return trimRightSlow();
-               }
-       }
-
-       private BinaryString trimRightSlow() {
-               int e = sizeInBytes - 1;
-               int segSize = segments[0].size();
-               SegmentAndOffset behind = lastSegmentAndOffset(segSize);
-               // skip all of the space (0x20) in the right side
-               while (e >= 0 && behind.value() == 0x20) {
-                       e--;
-                       behind.previousByte(segSize);
-               }
-
-               if (e < 0) {
-                       // empty string
-                       return EMPTY_UTF8;
-               } else {
-                       return copyBinaryString(0, e);
-               }
-       }
-
-       /**
-        * Walk each character of current string from right end, remove the 
character if it
-        * is in trim string. Stops at the first character which is not in trim 
string.
-        * Return the new substring.
-        *
-        * @param trimStr the trim string
-        * @return A subString which removes all of the character from the 
right side that is in
-        * trim string.
-        */
-       public BinaryString trimRight(BinaryString trimStr) {
-               ensureMaterialized();
-               if (trimStr == null) {
-                       return null;
-               }
-               trimStr.ensureMaterialized();
-               if (trimStr.isSpaceString()) {
-                       return trimRight();
-               }
-               if (inFirstSegment()) {
-                       int charIdx = 0;
-                       int byteIdx = 0;
-                       // each element in charLens is length of character in 
the source string
-                       int[] charLens = new int[sizeInBytes];
-                       // each element in charStartPos is start position of 
first byte in the source string
-                       int[] charStartPos = new int[sizeInBytes];
-                       while (byteIdx < sizeInBytes) {
-                               charStartPos[charIdx] = byteIdx;
-                               charLens[charIdx] = 
numBytesForFirstByte(getByteOneSegment(byteIdx));
-                               byteIdx += charLens[charIdx];
-                               charIdx++;
-                       }
-                       // searchIdx points to the first character which is not 
in trim string from the right
-                       // end.
-                       int searchIdx = sizeInBytes - 1;
-                       charIdx -= 1;
-                       while (charIdx >= 0) {
-                               BinaryString currentChar = 
copyBinaryStringInOneSeg(
-                                       charStartPos[charIdx],
-                                       charStartPos[charIdx] + 
charLens[charIdx] - 1);
-                               if (trimStr.contains(currentChar)) {
-                                       searchIdx -= charLens[charIdx];
-                               } else {
-                                       break;
-                               }
-                               charIdx--;
-                       }
-                       if (searchIdx < 0) {
-                               // empty string
-                               return EMPTY_UTF8;
-                       } else {
-                               return copyBinaryStringInOneSeg(0, searchIdx);
+               int size = segments[0].size();
+               SegmentAndOffset segmentAndOffset = startSegmentAndOffset(size);
+               byte[] bytes = new byte[sizeInBytes];
+               bytes[0] = (byte) 
Character.toTitleCase(segmentAndOffset.value());
+               for (int i = 0; i < sizeInBytes; i++) {
+                       byte b = segmentAndOffset.value();
+                       if (numBytesForFirstByte(b) != 1) {
+                               // fallback
+                               return javaToLowerCase();
                        }
-               } else {
-                       return trimRightSlow(trimStr);
-               }
-       }
-
-       private BinaryString trimRightSlow(BinaryString trimStr) {
-               int charIdx = 0;
-               int byteIdx = 0;
-               int segSize = segments[0].size();
-               SegmentAndOffset index = firstSegmentAndOffset(segSize);
-               // each element in charLens is length of character in the 
source string
-               int[] charLens = new int[sizeInBytes];
-               // each element in charStartPos is start position of first byte 
in the source string
-               int[] charStartPos = new int[sizeInBytes];
-               while (byteIdx < sizeInBytes) {
-                       charStartPos[charIdx] = byteIdx;
-                       int charBytes = numBytesForFirstByte(index.value());
-                       charLens[charIdx] = charBytes;
-                       byteIdx += charBytes;
-                       charIdx++;
-                       index.skipBytes(charBytes, segSize);
-               }
-               // searchIdx points to the first character which is not in trim 
string from the right
-               // end.
-               int searchIdx = sizeInBytes - 1;
-               charIdx -= 1;
-               while (charIdx >= 0) {
-                       BinaryString currentChar = copyBinaryString(
-                               charStartPos[charIdx],
-                               charStartPos[charIdx] + charLens[charIdx] - 1);
-                       if (trimStr.contains(currentChar)) {
-                               searchIdx -= charLens[charIdx];
-                       } else {
-                               break;
+                       int lower = Character.toLowerCase((int) b);
+                       if (lower > 127) {
+                               // fallback
+                               return javaToLowerCase();
                        }
-                       charIdx--;
-               }
-               if (searchIdx < 0) {
-                       // empty string
-                       return EMPTY_UTF8;
-               } else {
-                       return copyBinaryString(0, searchIdx);
+                       bytes[i] = (byte) lower;
+                       segmentAndOffset.nextByte(size);
                }
+               return fromBytes(bytes);
        }
 
-       public BinaryString trim(boolean leading, boolean trailing, 
BinaryString seek) {
-               ensureMaterialized();
-               if (seek == null) {
-                       return null;
-               }
-               if (leading && trailing) {
-                       return trim(seek);
-               } else if (leading) {
-                       return trimLeft(seek);
-               } else if (trailing) {
-                       return trimRight(seek);
-               } else {
-                       return this;
-               }
+       private BinaryString javaToLowerCase() {
+               return fromString(toString().toLowerCase());
        }
 
-       /**
-        * Parse target string as key-value string and
-        * return the value matches key name.
-        * If accept any null arguments, return null.
-        * example:
-        * keyvalue('k1=v1;k2=v2', ';', '=', 'k2') = 'v2'
-        * keyvalue('k1:v1,k2:v2', ',', ':', 'k3') = NULL
-        *
-        * @param split1  separator between key-value tuple.
-        * @param split2  separator between key and value.
-        * @param keyName name of the key whose value you want return.
-        *
-        * @return target value.
-        */
-       public BinaryString keyValue(byte split1, byte split2, BinaryString 
keyName) {
-               ensureMaterialized();
-               if (keyName == null || keyName.getSizeInBytes() == 0) {
-                       return null;
-               }
-               if (inFirstSegment() && keyName.inFirstSegment()) {
-                       // position in byte
-                       int byteIdx = 0;
-                       // position of last split1
-                       int lastSplit1Idx = -1;
-                       while (byteIdx < sizeInBytes) {
-                               // If find next split1 in str, process current 
kv
-                               if (segments[0].get(offset + byteIdx) == 
split1) {
-                                       int currentKeyIdx = lastSplit1Idx + 1;
-                                       // If key of current kv is keyName, 
return the value directly
-                                       BinaryString value = 
findValueOfKey(split2, keyName, currentKeyIdx, byteIdx);
-                                       if (value != null) {
-                                               return value;
-                                       }
-                                       lastSplit1Idx = byteIdx;
-                               }
-                               byteIdx++;
-                       }
-                       // process the string which is not ends with split1
-                       int currentKeyIdx = lastSplit1Idx + 1;
-                       return findValueOfKey(split2, keyName, currentKeyIdx, 
sizeInBytes);
-               } else {
-                       return keyValueSlow(split1, split2, keyName);
-               }
-       }
+       // 
------------------------------------------------------------------------------------------
+       // Internal methods on BinaryString
+       // 
------------------------------------------------------------------------------------------
 
-       private BinaryString findValueOfKey(
-               byte split,
-               BinaryString keyName,
-               int start,
-               int end) {
-               int keyNameLen = keyName.sizeInBytes;
-               for (int idx = start; idx < end; idx++) {
-                       if (segments[0].get(offset + idx) == split) {
-                               if (idx == start + keyNameLen &&
-                                       
segments[0].equalTo(keyName.segments[0], offset + start,
-                                               keyName.offset, keyNameLen)) {
-                                       int valueIdx = idx + 1;
-                                       int valueLen = end - valueIdx;
-                                       byte[] bytes = new byte[valueLen];
-                                       segments[0].get(offset + valueIdx, 
bytes, 0, valueLen);
-                                       return fromBytes(bytes, 0, valueLen);
-                               } else {
-                                       return null;
-                               }
-                       }
-               }
-               return null;
+       byte getByteOneSegment(int i) {
 
 Review comment:
   private?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] [flink] KurtYoung commented on a change in pull request #8689: [FLINK-12802][table-runtime-blink] Reducing the Code of BinaryString

Reply via email to