Jackie-Jiang commented on code in PR #12502:
URL: https://github.com/apache/pinot/pull/12502#discussion_r1503471374


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java:
##########
@@ -124,227 +131,251 @@ private List<String> getColumnsToAddMinMaxValue() {
   }
 
   private boolean needAddColumnMinMaxValueForColumn(String columnName) {
-    ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
+    return 
needAddColumnMinMaxValueForColumn(_segmentMetadata.getColumnMetadataFor(columnName));
+  }
+
+  private boolean needAddColumnMinMaxValueForColumn(ColumnMetadata 
columnMetadata) {
     return columnMetadata.getMinValue() == null && 
columnMetadata.getMaxValue() == null
         && !columnMetadata.isMinMaxValueInvalid();
   }
 
-  private void addColumnMinMaxValueForColumn(String columnName)
-      throws Exception {
-    // Skip column with min/max value already set
+  private void addColumnMinMaxValueForColumn(String columnName) {
     ColumnMetadata columnMetadata = 
_segmentMetadata.getColumnMetadataFor(columnName);
-    if (columnMetadata.getMinValue() != null || columnMetadata.getMaxValue() 
!= null) {
+    if (!needAddColumnMinMaxValueForColumn(columnMetadata)) {
       return;
     }
+    try {
+      if (columnMetadata.hasDictionary()) {
+        addColumnMinMaxValueWithDictionary(columnMetadata);
+      } else {
+        addColumnMinMaxValueWithoutDictionary(columnMetadata);
+      }
+      _minMaxValueAdded = true;
+    } catch (Exception e) {
+      LOGGER.error("Caught exception while generating min/max value for 
column: {} in segment: {}, continuing without "
+          + "persisting them", columnName, _segmentMetadata.getName(), e);
+    }
+  }
 
+  private void addColumnMinMaxValueWithDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    try (Dictionary dictionary = getDictionaryForColumn(columnMetadata)) {
+      SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnMetadata.getColumnName(),
+          dictionary.getInternal(0), 
dictionary.getInternal(dictionary.length() - 1),
+          columnMetadata.getDataType().getStoredType());
+    }
+  }
+
+  private Dictionary getDictionaryForColumn(ColumnMetadata columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
+    DataType dataType = columnMetadata.getDataType();
+    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.dictionary());
+    int length = columnMetadata.getCardinality();
+    switch (dataType.getStoredType()) {
+      case INT:
+        return new IntDictionary(dictionaryBuffer, length);
+      case LONG:
+        return new LongDictionary(dictionaryBuffer, length);
+      case FLOAT:
+        return new FloatDictionary(dictionaryBuffer, length);
+      case DOUBLE:
+        return new DoubleDictionary(dictionaryBuffer, length);
+      case BIG_DECIMAL:
+        return new BigDecimalDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case STRING:
+        return new StringDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      case BYTES:
+        return new BytesDictionary(dictionaryBuffer, length, 
columnMetadata.getColumnMaxLength());
+      default:
+        throw new IllegalStateException("Unsupported data type: " + dataType + 
" for column: " + columnName);
+    }
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private void addColumnMinMaxValueWithoutDictionary(ColumnMetadata 
columnMetadata)
+      throws IOException {
+    String columnName = columnMetadata.getColumnName();
     DataType dataType = columnMetadata.getDataType();
     DataType storedType = dataType.getStoredType();
-    if (columnMetadata.hasDictionary()) {
-      PinotDataBuffer dictionaryBuffer = 
_segmentWriter.getIndexFor(columnName, StandardIndexes.dictionary());
-      int length = columnMetadata.getCardinality();
-      switch (storedType) {
-        case INT:
-          try (IntDictionary intDictionary = new 
IntDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                intDictionary.getStringValue(0), 
intDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case LONG:
-          try (LongDictionary longDictionary = new 
LongDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                longDictionary.getStringValue(0), 
longDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case FLOAT:
-          try (FloatDictionary floatDictionary = new 
FloatDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                floatDictionary.getStringValue(0), 
floatDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case DOUBLE:
-          try (DoubleDictionary doubleDictionary = new 
DoubleDictionary(dictionaryBuffer, length)) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                doubleDictionary.getStringValue(0), 
doubleDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case STRING:
-          try (StringDictionary stringDictionary = new 
StringDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                stringDictionary.getStringValue(0), 
stringDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        case BYTES:
-          try (BytesDictionary bytesDictionary = new 
BytesDictionary(dictionaryBuffer, length,
-              columnMetadata.getColumnMaxLength())) {
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName,
-                bytesDictionary.getStringValue(0), 
bytesDictionary.getStringValue(length - 1), storedType);
-          }
-          break;
-        default:
-          throw new IllegalStateException("Unsupported data type: " + dataType 
+ " for column: " + columnName);
-      }
-    } else {
-      // setting min/max for non-dictionary columns.
+    boolean isSingleValue = columnMetadata.isSingleValue();
+    PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
+    try (ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
+        isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
       int numDocs = columnMetadata.getTotalDocs();
-      PinotDataBuffer rawIndexBuffer = _segmentWriter.getIndexFor(columnName, 
StandardIndexes.forward());
-      boolean isSingleValue = 
_segmentMetadata.getSchema().getFieldSpecFor(columnName).isSingleValueField();
-      try (
-          ForwardIndexReader rawIndexReader = 
ForwardIndexReaderFactory.createRawIndexReader(rawIndexBuffer, storedType,
-              isSingleValue); ForwardIndexReaderContext readerContext = 
rawIndexReader.createContext()) {
-        switch (storedType) {
-          case INT: {
-            int min = Integer.MAX_VALUE;
-            int max = Integer.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int value = rawIndexReader.getInt(docId, readerContext);
+      Object minValue;
+      Object maxValue;
+      switch (storedType) {
+        case INT: {
+          int min = Integer.MAX_VALUE;
+          int max = Integer.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int value = rawIndexReader.getInt(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              int[] values = rawIndexReader.getIntMV(docId, readerContext);
+              for (int value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                int[] values = rawIndexReader.getIntMV(docId, readerContext);
-                for (int value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case LONG: {
-            long min = Long.MAX_VALUE;
-            long max = Long.MIN_VALUE;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long value = rawIndexReader.getLong(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case LONG: {
+          long min = Long.MAX_VALUE;
+          long max = Long.MIN_VALUE;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long value = rawIndexReader.getLong(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              long[] values = rawIndexReader.getLongMV(docId, readerContext);
+              for (long value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                long[] values = rawIndexReader.getLongMV(docId, readerContext);
-                for (long value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case FLOAT: {
-            float min = Float.POSITIVE_INFINITY;
-            float max = Float.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float value = rawIndexReader.getFloat(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case FLOAT: {
+          float min = Float.POSITIVE_INFINITY;
+          float max = Float.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float value = rawIndexReader.getFloat(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              float[] values = rawIndexReader.getFloatMV(docId, readerContext);
+              for (float value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                float[] values = rawIndexReader.getFloatMV(docId, 
readerContext);
-                for (float value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case DOUBLE: {
-            double min = Double.POSITIVE_INFINITY;
-            double max = Double.NEGATIVE_INFINITY;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double value = rawIndexReader.getDouble(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case DOUBLE: {
+          double min = Double.POSITIVE_INFINITY;
+          double max = Double.NEGATIVE_INFINITY;
+          if (isSingleValue) {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double value = rawIndexReader.getDouble(docId, readerContext);
+              min = Math.min(min, value);
+              max = Math.max(max, value);
+            }
+          } else {
+            for (int docId = 0; docId < numDocs; docId++) {
+              double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
+              for (double value : values) {
                 min = Math.min(min, value);
                 max = Math.max(max, value);
               }
-            } else {
-              for (int docId = 0; docId < numDocs; docId++) {
-                double[] values = rawIndexReader.getDoubleMV(docId, 
readerContext);
-                for (double value : values) {
-                  min = Math.min(min, value);
-                  max = Math.max(max, value);
-                }
-              }
             }
-            
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, 
columnName, String.valueOf(min),
-                String.valueOf(max), storedType);
-            break;
           }
-          case STRING: {
-            String min = null;
-            String max = null;
-            if (isSingleValue) {
-              for (int docId = 0; docId < numDocs; docId++) {
-                String value = rawIndexReader.getString(docId, readerContext);
+          minValue = min;
+          maxValue = max;
+          break;
+        }
+        case BIG_DECIMAL: {
+          Preconditions.checkState(isSingleValue, "Unsupported multi-value 
BIG_DECIMAL column: %s", columnName);

Review Comment:
   MV is only allowed within `DIMENSION`, and it is very rare to configure 
`BIG_DECIMAL` as dimension. In the future we can consider adding MV support, 
but as of now we don't have APIs to read MV BigDecimal values



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to