Jackie-Jiang commented on code in PR #12502:
URL: https://github.com/apache/pinot/pull/12502#discussion_r1503469768
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -639,64 +636,17 @@ public static void
addColumnMinMaxValueInfo(PropertiesConfiguration properties,
}
/**
- * Helper method to get the valid value for setting min/max. Returns {@code
null} if the value is not supported in
- * {@link PropertiesConfiguration}, e.g. contains character with surrogate.
+ * Helper method to get the valid value for setting min/max. Returns {@code
null} if the value is too long (longer
+ * than 512 characters), or is not supported in {@link
PropertiesConfiguration}, e.g. contains character with
+ * surrogate.
*/
@Nullable
- private static String getValidPropertyValue(String value, boolean isMax,
DataType storedType) {
- String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax,
storedType);
- return storedType == DataType.STRING ?
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(
- valueWithinLengthLimit) : valueWithinLengthLimit;
- }
-
- /**
- * Returns the original string if its length is within the allowed limit. If
the string's length exceeds the limit,
- * returns a truncated version of the string with maintaining min or max
value.
- */
- @VisibleForTesting
- static String getValueWithinLengthLimit(String value, boolean isMax,
DataType storedType) {
- int length = value.length();
- if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
- return value;
- }
- switch (storedType) {
- case STRING:
- if (isMax) {
- int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
- // determining the index for the character having value less than
'\uFFFF'
- while (trimIndexValue < length && value.charAt(trimIndexValue) ==
'\uFFFF') {
- trimIndexValue++;
- }
- if (trimIndexValue == length) {
- return value;
- } else {
- // assigning the '\uFFFF' to make the value max.
- return value.substring(0, trimIndexValue) + '\uFFFF';
- }
- } else {
- return value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
- }
- case BYTES:
- if (isMax) {
- byte[] valueInByteArray = BytesUtils.toBytes(value);
- int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
- // determining the index for the byte having value less than 0xFF
- while (trimIndexValue < valueInByteArray.length &&
valueInByteArray[trimIndexValue] == (byte) 0xFF) {
- trimIndexValue++;
- }
- if (trimIndexValue == valueInByteArray.length) {
- return value;
- } else {
- byte[] shortByteValue = Arrays.copyOf(valueInByteArray,
trimIndexValue + 1);
- shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the
0xFF to make the value max.
- return BytesUtils.toHexString(shortByteValue);
- }
- } else {
- return
BytesUtils.toHexString(Arrays.copyOf(BytesUtils.toBytes(value),
(METADATA_PROPERTY_LENGTH_LIMIT / 2)));
- }
- default:
- throw new IllegalStateException("Unsupported stored type for property
value length reduction: " + storedType);
+ private static String getValidPropertyValue(String value, DataType
storedType) {
+ if (value.length() > METADATA_PROPERTY_LENGTH_LIMIT) {
Review Comment:
When the value is over the length limit, with current code, we pick a
closest value within the length limit as the approximation
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]