jadami10 commented on code in PR #12502:
URL: https://github.com/apache/pinot/pull/12502#discussion_r1503435128


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -124,227 +131,251 @@ private List<String> getColumnsToAddMinMaxValue() {
   }
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
-    ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
+    return 
needAddColumnMinMaxValueForColumn(_segmentMetadata.getColumnMetadataFor(columnName));
+  }
+
+  private boolean needAddColumnMinMaxValueForColumn(ColumnMetadata 
columnMetadata) {
     return columnMetadata.getMinValue() == null && 
columnMetadata.getMaxValue() == null
         && !columnMetadata.isMinMaxValueInvalid();
   }
 
-  private void addColumnMinMaxValueForColumn(String columnName)
-      throws Exception {
-    // Skip column with min/max value already set
+  private void addColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
+    if (!needAddColumnMinMaxValueForColumn(columnMetadata)) {
       return;
     }
+    try {
+      if (columnMetadata.hasDictionary()) {
+        addColumnMinMaxValueWithDictionary(columnMetadata);
+      } else {
+        addColumnMinMaxValueWithoutDictionary(columnMetadata);
+      }
+      _minMaxValueAdded = true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while generating min/max value for 
column: {} in segment: {}, continuing without "

Review Comment:
   this is the part that's definitely more informed and thorough than my fix. 
So any errors here are now logged rather than throwing. thank you



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -639,64 +636,17 @@ public static void 
addColumnMinMaxValueInfo(PropertiesConfiguration properties,
   }
 
   /**
-   * Helper method to get the valid value for setting min/max. Returns {@code 
null} if the value is not supported in
-   * {@link PropertiesConfiguration}, e.g. contains character with surrogate.
+   * Helper method to get the valid value for setting min/max. Returns {@code 
null} if the value is too long (longer
+   * than 512 characters), or is not supported in {@link 
PropertiesConfiguration}, e.g. contains character with
+   * surrogate.
    */
   @Nullable
-  private static String getValidPropertyValue(String value, boolean isMax, 
DataType storedType) {
-    String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax, 
storedType);
-    return storedType == DataType.STRING ? 
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(
-        valueWithinLengthLimit) : valueWithinLengthLimit;
-  }
-
-  /**
-   * Returns the original string if its length is within the allowed limit. If 
the string's length exceeds the limit,
-   * returns a truncated version of the string with maintaining min or max 
value.
-   */
-  @VisibleForTesting
-  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType storedType) {
-    int length = value.length();
-    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
-      return value;
-    }
-    switch (storedType) {
-      case STRING:
-        if (isMax) {
-          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
-          // determining the index for the character having value less than 
'\uFFFF'
-          while (trimIndexValue < length && value.charAt(trimIndexValue) == 
'\uFFFF') {
-            trimIndexValue++;
-          }
-          if (trimIndexValue == length) {
-            return value;
-          } else {
-            // assigning the '\uFFFF' to make the value max.
-            return value.substring(0, trimIndexValue) + '\uFFFF';
-          }
-        } else {
-          return value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
-        }
-      case BYTES:
-        if (isMax) {
-          byte[] valueInByteArray = BytesUtils.toBytes(value);
-          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
-          // determining the index for the byte having value less than 0xFF
-          while (trimIndexValue < valueInByteArray.length && 
valueInByteArray[trimIndexValue] == (byte) 0xFF) {
-            trimIndexValue++;
-          }
-          if (trimIndexValue == valueInByteArray.length) {
-            return value;
-          } else {
-            byte[] shortByteValue = Arrays.copyOf(valueInByteArray, 
trimIndexValue + 1);
-            shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the 
0xFF to make the value max.
-            return BytesUtils.toHexString(shortByteValue);
-          }
-        } else {
-          return 
BytesUtils.toHexString(Arrays.copyOf(BytesUtils.toBytes(value), 
(METADATA_PROPERTY_LENGTH_LIMIT / 2)));
-        }
-      default:
-        throw new IllegalStateException("Unsupported stored type for property 
value length reduction: " + storedType);
+  private static String getValidPropertyValue(String value, DataType 
storedType) {
+    if (value.length() > METADATA_PROPERTY_LENGTH_LIMIT) {
+      return null;
     }
+    return storedType == DataType.STRING ? 
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(value)

Review Comment:
   what's the issue with keeping special characters in? does 
`PropertiesConfiguration` not support them?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -124,227 +131,251 @@ private List<String> getColumnsToAddMinMaxValue() {
   }
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
-    ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
+    return 
needAddColumnMinMaxValueForColumn(_segmentMetadata.getColumnMetadataFor(columnName));
+  }
+
+  private boolean needAddColumnMinMaxValueForColumn(ColumnMetadata 
columnMetadata) {
     return columnMetadata.getMinValue() == null && 
columnMetadata.getMaxValue() == null
         && !columnMetadata.isMinMaxValueInvalid();
   }
 
-  private void addColumnMinMaxValueForColumn(String columnName)
-      throws Exception {
-    // Skip column with min/max value already set
+  private void addColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
+    if (!needAddColumnMinMaxValueForColumn(columnMetadata)) {
       return;
     }
+    try {
+      if (columnMetadata.hasDictionary()) {
+        addColumnMinMaxValueWithDictionary(columnMetadata);
+      } else {
+        addColumnMinMaxValueWithoutDictionary(columnMetadata);
+      }
+      _minMaxValueAdded = true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while generating min/max value for 
column: {} in segment: {}, continuing without "
+          + "persisting them", columnName, _segmentMetadata.getName(), e);
+    }
+  }
 
+  private void addColumnMinMaxValueWithDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    try (Dictionary dictionary = getDictionaryForColumn(columnMetadata)) {
+      SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnMetadata.getColumnName(),

Review Comment:
   nice simplification here!



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -124,227 +131,251 @@ private List<String> getColumnsToAddMinMaxValue() {
   }
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
-    ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
+    return 
needAddColumnMinMaxValueForColumn(_segmentMetadata.getColumnMetadataFor(columnName));
+  }
+
+  private boolean needAddColumnMinMaxValueForColumn(ColumnMetadata 
columnMetadata) {
     return columnMetadata.getMinValue() == null && 
columnMetadata.getMaxValue() == null
         && !columnMetadata.isMinMaxValueInvalid();
   }
 
-  private void addColumnMinMaxValueForColumn(String columnName)
-      throws Exception {
-    // Skip column with min/max value already set
+  private void addColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
+    if (!needAddColumnMinMaxValueForColumn(columnMetadata)) {
       return;
     }
+    try {
+      if (columnMetadata.hasDictionary()) {
+        addColumnMinMaxValueWithDictionary(columnMetadata);
+      } else {
+        addColumnMinMaxValueWithoutDictionary(columnMetadata);
+      }
+      _minMaxValueAdded = true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while generating min/max value for 
column: {} in segment: {}, continuing without "
+          + "persisting them", columnName, _segmentMetadata.getName(), e);
+    }
+  }
 
+  private void addColumnMinMaxValueWithDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    try (Dictionary dictionary = getDictionaryForColumn(columnMetadata)) {
+      SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnMetadata.getColumnName(),
+          dictionary.getInternal(0), 
dictionary.getInternal(dictionary.length() - 1),
+          columnMetadata.getDataType().getStoredType());
+    }
+  }
+
+  private Dictionary getDictionaryForColumn(ColumnMetadata columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
+    DataType dataType = columnMetadata.getDataType();
+    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.dictionary());
+    int length = columnMetadata.getCardinality();
+    switch (dataType.getStoredType()) {
+      case INT:
+        return new IntDictionary(dictionaryBuffer, length);
+      case LONG:
+        return new LongDictionary(dictionaryBuffer, length);
+      case FLOAT:
+        return new FloatDictionary(dictionaryBuffer, length);
+      case DOUBLE:
+        return new DoubleDictionary(dictionaryBuffer, length);
+      case BIG_DECIMAL:
+        return new BigDecimalDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case STRING:
+        return new StringDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case BYTES:
+        return new BytesDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      default:
+        throw new IllegalStateException("Unsupported data type: " + dataType + 
" for column: " + columnName);
+    }
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private void addColumnMinMaxValueWithoutDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
     DataType dataType = columnMetadata.getDataType();
     DataType storedType = dataType.getStoredType();
-    if (columnMetadata.hasDictionary()) {
-      PinotDataBuffer dictionaryBuffer = 
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
-      int length = columnMetadata.getCardinality();
-      switch (storedType) {
-        case INT:
-          try (IntDictionary intDictionary = new 
IntDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case LONG:
-          try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case FLOAT:
-          try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case DOUBLE:
-          try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case STRING:
-          try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case BYTES:
-          try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        default:
-          throw new IllegalStateException("Unsupported data type: " + dataType 
+ " for column: " + columnName);
-      }
-    } else {
-      // setting min/max for non-dictionary columns.
+    boolean isSingleValue = columnMetadata.isSingleValue();
+    PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
+    try (ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
+        isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
       int numDocs = columnMetadata.getTotalDocs();
-      PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
-      boolean isSingleValue = 
_segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField();
-      try (
-          ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
-              isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
-        switch (storedType) {
-          case INT: {
-            int min = Integer.MAX_VALUE;
-            int max = Integer.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int value = rawIndexReader.getInt(docId, readerContext);
+      Object minValue;
+      Object maxValue;
+      switch (storedType) {
+        case INT: {
+          int min = Integer.MAX_VALUE;
+          int max = Integer.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int value = rawIndexReader.getInt(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int[] values = rawIndexReader.getIntMV(docId, readerContext);
+              for (int value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int[] values = rawIndexReader.getIntMV(docId, readerContext);
-                for (int value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case LONG: {
-            long min = Long.MAX_VALUE;
-            long max = Long.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long value = rawIndexReader.getLong(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case LONG: {
+          long min = Long.MAX_VALUE;
+          long max = Long.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long value = rawIndexReader.getLong(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long[] values = rawIndexReader.getLongMV(docId, readerContext);
+              for (long value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long[] values = rawIndexReader.getLongMV(docId, readerContext);
-                for (long value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case FLOAT: {
-            float min = Float.POSITIVE_INFINITY;
-            float max = Float.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float value = rawIndexReader.getFloat(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case FLOAT: {
+          float min = Float.POSITIVE_INFINITY;
+          float max = Float.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float value = rawIndexReader.getFloat(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float[] values = rawIndexReader.getFloatMV(docId, readerContext);
+              for (float value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float[] values = rawIndexReader.getFloatMV(docId, 
readerContext);
-                for (float value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case DOUBLE: {
-            double min = Double.POSITIVE_INFINITY;
-            double max = Double.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double value = rawIndexReader.getDouble(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case DOUBLE: {
+          double min = Double.POSITIVE_INFINITY;
+          double max = Double.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double value = rawIndexReader.getDouble(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
+              for (double value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
-                for (double value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case STRING: {
-            String min = null;
-            String max = null;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                String value = rawIndexReader.getString(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case BIG_DECIMAL: {
+          Preconditions.checkState(isSingleValue, "Unsupported multi-value 
BIG_DECIMAL column: %s", columnName);
+          BigDecimal min = null;
+          BigDecimal max = null;
+          for (int docId = 1; docId < numDocs; docId++) {

Review Comment:
   why is this starting at `docId` 1 instead of 0?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java:
##########
@@ -639,64 +636,17 @@ public static void 
addColumnMinMaxValueInfo(PropertiesConfiguration properties,
   }
 
   /**
-   * Helper method to get the valid value for setting min/max. Returns {@code 
null} if the value is not supported in
-   * {@link PropertiesConfiguration}, e.g. contains character with surrogate.
+   * Helper method to get the valid value for setting min/max. Returns {@code 
null} if the value is too long (longer
+   * than 512 characters), or is not supported in {@link 
PropertiesConfiguration}, e.g. contains character with
+   * surrogate.
    */
   @Nullable
-  private static String getValidPropertyValue(String value, boolean isMax, 
DataType storedType) {
-    String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax, 
storedType);
-    return storedType == DataType.STRING ? 
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(
-        valueWithinLengthLimit) : valueWithinLengthLimit;
-  }
-
-  /**
-   * Returns the original string if its length is within the allowed limit. If 
the string's length exceeds the limit,
-   * returns a truncated version of the string with maintaining min or max 
value.
-   */
-  @VisibleForTesting
-  static String getValueWithinLengthLimit(String value, boolean isMax, 
DataType storedType) {
-    int length = value.length();
-    if (length <= METADATA_PROPERTY_LENGTH_LIMIT) {
-      return value;
-    }
-    switch (storedType) {
-      case STRING:
-        if (isMax) {
-          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT - 1;
-          // determining the index for the character having value less than 
'\uFFFF'
-          while (trimIndexValue < length && value.charAt(trimIndexValue) == 
'\uFFFF') {
-            trimIndexValue++;
-          }
-          if (trimIndexValue == length) {
-            return value;
-          } else {
-            // assigning the '\uFFFF' to make the value max.
-            return value.substring(0, trimIndexValue) + '\uFFFF';
-          }
-        } else {
-          return value.substring(0, METADATA_PROPERTY_LENGTH_LIMIT);
-        }
-      case BYTES:
-        if (isMax) {
-          byte[] valueInByteArray = BytesUtils.toBytes(value);
-          int trimIndexValue = METADATA_PROPERTY_LENGTH_LIMIT / 2 - 1;
-          // determining the index for the byte having value less than 0xFF
-          while (trimIndexValue < valueInByteArray.length && 
valueInByteArray[trimIndexValue] == (byte) 0xFF) {
-            trimIndexValue++;
-          }
-          if (trimIndexValue == valueInByteArray.length) {
-            return value;
-          } else {
-            byte[] shortByteValue = Arrays.copyOf(valueInByteArray, 
trimIndexValue + 1);
-            shortByteValue[trimIndexValue] = (byte) 0xFF; // assigning the 
0xFF to make the value max.
-            return BytesUtils.toHexString(shortByteValue);
-          }
-        } else {
-          return 
BytesUtils.toHexString(Arrays.copyOf(BytesUtils.toBytes(value), 
(METADATA_PROPERTY_LENGTH_LIMIT / 2)));
-        }
-      default:
-        throw new IllegalStateException("Unsupported stored type for property 
value length reduction: " + storedType);
+  private static String getValidPropertyValue(String value, DataType 
storedType) {
+    if (value.length() > METADATA_PROPERTY_LENGTH_LIMIT) {

Review Comment:
   is the truncation what's causing incorrectness issue?



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -124,227 +131,251 @@ private List<String> getColumnsToAddMinMaxValue() {
   }
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
-    ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
+    return 
needAddColumnMinMaxValueForColumn(_segmentMetadata.getColumnMetadataFor(columnName));
+  }
+
+  private boolean needAddColumnMinMaxValueForColumn(ColumnMetadata 
columnMetadata) {
     return columnMetadata.getMinValue() == null && 
columnMetadata.getMaxValue() == null
         && !columnMetadata.isMinMaxValueInvalid();
   }
 
-  private void addColumnMinMaxValueForColumn(String columnName)
-      throws Exception {
-    // Skip column with min/max value already set
+  private void addColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
+    if (!needAddColumnMinMaxValueForColumn(columnMetadata)) {
       return;
     }
+    try {
+      if (columnMetadata.hasDictionary()) {
+        addColumnMinMaxValueWithDictionary(columnMetadata);
+      } else {
+        addColumnMinMaxValueWithoutDictionary(columnMetadata);
+      }
+      _minMaxValueAdded = true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while generating min/max value for 
column: {} in segment: {}, continuing without "
+          + "persisting them", columnName, _segmentMetadata.getName(), e);
+    }
+  }
 
+  private void addColumnMinMaxValueWithDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    try (Dictionary dictionary = getDictionaryForColumn(columnMetadata)) {
+      SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnMetadata.getColumnName(),
+          dictionary.getInternal(0), 
dictionary.getInternal(dictionary.length() - 1),
+          columnMetadata.getDataType().getStoredType());
+    }
+  }
+
+  private Dictionary getDictionaryForColumn(ColumnMetadata columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
+    DataType dataType = columnMetadata.getDataType();
+    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.dictionary());
+    int length = columnMetadata.getCardinality();
+    switch (dataType.getStoredType()) {
+      case INT:
+        return new IntDictionary(dictionaryBuffer, length);
+      case LONG:
+        return new LongDictionary(dictionaryBuffer, length);
+      case FLOAT:
+        return new FloatDictionary(dictionaryBuffer, length);
+      case DOUBLE:
+        return new DoubleDictionary(dictionaryBuffer, length);
+      case BIG_DECIMAL:
+        return new BigDecimalDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case STRING:
+        return new StringDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case BYTES:
+        return new BytesDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      default:
+        throw new IllegalStateException("Unsupported data type: " + dataType + 
" for column: " + columnName);
+    }
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private void addColumnMinMaxValueWithoutDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
     DataType dataType = columnMetadata.getDataType();
     DataType storedType = dataType.getStoredType();
-    if (columnMetadata.hasDictionary()) {
-      PinotDataBuffer dictionaryBuffer = 
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
-      int length = columnMetadata.getCardinality();
-      switch (storedType) {
-        case INT:
-          try (IntDictionary intDictionary = new 
IntDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case LONG:
-          try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case FLOAT:
-          try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case DOUBLE:
-          try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case STRING:
-          try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case BYTES:
-          try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        default:
-          throw new IllegalStateException("Unsupported data type: " + dataType 
+ " for column: " + columnName);
-      }
-    } else {
-      // setting min/max for non-dictionary columns.
+    boolean isSingleValue = columnMetadata.isSingleValue();
+    PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
+    try (ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
+        isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
       int numDocs = columnMetadata.getTotalDocs();
-      PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
-      boolean isSingleValue = 
_segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField();
-      try (
-          ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
-              isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
-        switch (storedType) {
-          case INT: {
-            int min = Integer.MAX_VALUE;
-            int max = Integer.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int value = rawIndexReader.getInt(docId, readerContext);
+      Object minValue;
+      Object maxValue;
+      switch (storedType) {
+        case INT: {
+          int min = Integer.MAX_VALUE;
+          int max = Integer.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int value = rawIndexReader.getInt(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int[] values = rawIndexReader.getIntMV(docId, readerContext);
+              for (int value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int[] values = rawIndexReader.getIntMV(docId, readerContext);
-                for (int value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case LONG: {
-            long min = Long.MAX_VALUE;
-            long max = Long.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long value = rawIndexReader.getLong(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case LONG: {
+          long min = Long.MAX_VALUE;
+          long max = Long.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long value = rawIndexReader.getLong(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long[] values = rawIndexReader.getLongMV(docId, readerContext);
+              for (long value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long[] values = rawIndexReader.getLongMV(docId, readerContext);
-                for (long value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case FLOAT: {
-            float min = Float.POSITIVE_INFINITY;
-            float max = Float.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float value = rawIndexReader.getFloat(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case FLOAT: {
+          float min = Float.POSITIVE_INFINITY;
+          float max = Float.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float value = rawIndexReader.getFloat(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float[] values = rawIndexReader.getFloatMV(docId, readerContext);
+              for (float value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float[] values = rawIndexReader.getFloatMV(docId, 
readerContext);
-                for (float value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case DOUBLE: {
-            double min = Double.POSITIVE_INFINITY;
-            double max = Double.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double value = rawIndexReader.getDouble(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case DOUBLE: {
+          double min = Double.POSITIVE_INFINITY;
+          double max = Double.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double value = rawIndexReader.getDouble(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
+              for (double value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
-                for (double value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case STRING: {
-            String min = null;
-            String max = null;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                String value = rawIndexReader.getString(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case BIG_DECIMAL: {
+          Preconditions.checkState(isSingleValue, "Unsupported multi-value 
BIG_DECIMAL column: %s", columnName);

Review Comment:
   is this generally true in Pinot or just too complicated to handle here for 
now?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to