abhioncbr commented on code in PR #12502:
URL: https://github.com/apache/pinot/pull/12502#discussion_r1503454225
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -639,64 +636,17 @@ public static void
addColumnMinMaxValueInfo(PropertiesConfiguration properties,
}
/**
- * Helper method to get the valid value for setting min/max. Returns {@code
null} if the value is not supported in
- * {@link PropertiesConfiguration}, e.g. contains character with surrogate.
+ * Helper method to get the valid value for setting min/max. Returns {@code
null} if the value is too long (longer
+ * than 512 characters), or is not supported in {@link
PropertiesConfiguration}, e.g. contains character with
+ * surrogate.
*/
@Nullable
- private static String getValidPropertyValue(String value, boolean isMax,
DataType storedType) {
- String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax,
storedType);
- return storedType == DataType.STRING ?
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(
- valueWithinLengthLimit) : valueWithinLengthLimit;
- }
-
- /**
- * Returns the original string if its length is within the allowed limit. If
the string's length exceeds the limit,
- * returns a truncated version of the string with maintaining min or max
value.
- */
- @VisibleForTesting
- static String getValueWithinLengthLimit(String value, boolean isMax,
DataType storedType) {
- int length = value.length();
- if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
- return value;
- }
- switch (storedType) {
- case STRING:
- if (isMax) {
- int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
- // determining the index for the character having value less than
'\uFFFF'
- while (trimIndexValue < length && value.charAt(trimIndexValue) ==
'\uFFFF') {
- trimIndexValue++;
- }
- if (trimIndexValue == length) {
- return value;
- } else {
- // assigning the '\uFFFF' to make the value max.
- return value.substring(0, trimIndexValue) + '\uFFFF';
- }
- } else {
- return value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
- }
- case BYTES:
- if (isMax) {
- byte[] valueInByteArray = BytesUtils.toBytes(value);
- int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
- // determining the index for the byte having value less than 0xFF
- while (trimIndexValue < valueInByteArray.length &&
valueInByteArray[trimIndexValue] == (byte) 0xFF) {
- trimIndexValue++;
- }
- if (trimIndexValue == valueInByteArray.length) {
- return value;
- } else {
- byte[] shortByteValue = Arrays.copyOf(valueInByteArray,
trimIndexValue + 1);
- shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the
0xFF to make the value max.
- return BytesUtils.toHexString(shortByteValue);
- }
- } else {
- return
BytesUtils.toHexString(Arrays.copyOf(BytesUtils.toBytes(value),
(METADATA_PROPERTY_LENGTH_LIMIT / 2)));
- }
- default:
- throw new IllegalStateException("Unsupported stored type for property
value length reduction: " + storedType);
+ private static String getValidPropertyValue(String value, DataType
storedType) {
+ if (value.length() > METADATA_PROPERTY_LENGTH_LIMIT) {
+ return null;
}
+ return storedType == DataType.STRING ?
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(value)
Review Comment:
When we did this at that moment we were using commons-configuration1 library
for propertiesConfiguration and there were certain challenges with it for
special character support.
We have moved to commons-configuration2 recently and we can double check
that whether it's still an issue or not.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]