This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 760e952616 Add support for creating raw derived columns during segment
reload (#13037)
760e952616 is described below
commit 760e952616b79e455536bbf958edb84c8d0d3472
Author: Yash Mayya <[email protected]>
AuthorDate: Thu May 9 13:33:49 2024 +0530
Add support for creating raw derived columns during segment reload (#13037)
* Add support for creating raw derived columns during segment reload
* Remove createDictionary check for default value columns
ColumnIndexCreationInfo; convert arrays of primitive wrapper values to
primitive arrays for MV raw index
* Minor updates
* Fix compilation error caused by rebase on
31ae6a32aa426a6b9364b75e774a5e80c37eb336
* Refactor index creation to use ForwardIndexCreatorFactory for both
dictionary and raw cases
* Extract common forward index creator stuff into separate method
---
.../tests/OfflineClusterIntegrationTest.java | 19 +-
.../defaultcolumn/BaseDefaultColumnHandler.java | 202 +++++++++++++++------
.../defaultcolumn/V3DefaultColumnHandler.java | 14 +-
.../index/loader/SegmentPreProcessorTest.java | 26 ++-
.../src/test/resources/data/newColumnsSchema1.json | 5 +
5 files changed, 206 insertions(+), 60 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 8de4713aed..a90f3ca48c 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -1501,6 +1501,7 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
* <li>"NewAddedDerivedMVStringDimension", DIMENSION, STRING, multi-value,
split(DestCityName, ', ')</li>
* <li>"NewAddedDerivedDivAirportSeqIDs", DIMENSION, INT, multi-value,
DivAirportSeqIDs</li>
* <li>"NewAddedDerivedDivAirportSeqIDsString", DIMENSION, STRING,
multi-value, DivAirportSeqIDs</li>
+ * <li>"NewAddedRawDerivedStringDimension", DIMENSION, STRING,
single-value, "null"</li>
* </ul>
*/
@Test(dependsOnMethods = "testAggregateMetadataAPI", dataProvider =
"useBothQueryEngines")
@@ -1513,7 +1514,7 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
reloadWithExtraColumns();
JsonNode queryResponse = postQuery(SELECT_STAR_QUERY);
assertEquals(queryResponse.get("totalDocs").asLong(), numTotalDocs);
-
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(),
100);
+
assertEquals(queryResponse.get("resultTable").get("dataSchema").get("columnNames").size(),
101);
testNewAddedColumns();
testExpressionOverride();
@@ -1593,6 +1594,7 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
schema.addField(new DimensionFieldSpec("NewAddedDerivedMVStringDimension",
DataType.STRING, false));
schema.addField(new DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDs",
DataType.INT, false));
schema.addField(new
DimensionFieldSpec("NewAddedDerivedDivAirportSeqIDsString", DataType.STRING,
false));
+ schema.addField(new
DimensionFieldSpec("NewAddedRawDerivedStringDimension", DataType.STRING, true));
addSchema(schema);
TableConfig tableConfig = getOfflineTableConfig();
@@ -1602,10 +1604,13 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
new TransformConfig("NewAddedDerivedSVBooleanDimension",
"ActualElapsedTime > 0"),
new TransformConfig("NewAddedDerivedMVStringDimension",
"split(DestCityName, ', ')"),
new TransformConfig("NewAddedDerivedDivAirportSeqIDs",
"DivAirportSeqIDs"),
- new TransformConfig("NewAddedDerivedDivAirportSeqIDsString",
"DivAirportSeqIDs"));
+ new TransformConfig("NewAddedDerivedDivAirportSeqIDsString",
"DivAirportSeqIDs"),
+ new TransformConfig("NewAddedRawDerivedStringDimension",
"reverse(DestCityName)"));
IngestionConfig ingestionConfig = new IngestionConfig();
ingestionConfig.setTransformConfigs(transformConfigs);
tableConfig.setIngestionConfig(ingestionConfig);
+ // Ensure that we can reload segments with a new raw derived column
+
tableConfig.getIndexingConfig().getNoDictionaryColumns().add("NewAddedRawDerivedStringDimension");
List<FieldConfig> fieldConfigList = tableConfig.getFieldConfigList();
assertNotNull(fieldConfigList);
fieldConfigList.add(
@@ -1623,12 +1628,14 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
// Verify the index sizes
JsonNode columnIndexSizeMap = JsonUtils.stringToJsonNode(sendGetRequest(
getControllerBaseApiUrl() +
"/tables/mytable/metadata?columns=DivAirportSeqIDs"
- +
"&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString"))
+ +
"&columns=NewAddedDerivedDivAirportSeqIDs&columns=NewAddedDerivedDivAirportSeqIDsString"
+ + "&columns=NewAddedRawDerivedStringDimension"))
.get("columnIndexSizeMap");
- assertEquals(columnIndexSizeMap.size(), 3);
+ assertEquals(columnIndexSizeMap.size(), 4);
JsonNode originalColumnIndexSizes =
columnIndexSizeMap.get("DivAirportSeqIDs");
JsonNode derivedColumnIndexSizes =
columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDs");
JsonNode derivedStringColumnIndexSizes =
columnIndexSizeMap.get("NewAddedDerivedDivAirportSeqIDsString");
+ JsonNode derivedRawStringColumnIndex =
columnIndexSizeMap.get("NewAddedRawDerivedStringDimension");
// Derived int column should have the same dictionary size as the original
column
double originalColumnDictionarySize =
originalColumnIndexSizes.get("dictionary").asDouble();
@@ -1639,6 +1646,9 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
double derivedColumnForwardIndexSize =
derivedColumnIndexSizes.get("forward_index").asDouble();
assertTrue(derivedColumnForwardIndexSize <
originalColumnIndexSizes.get("forward_index").asDouble());
assertEquals(derivedStringColumnIndexSizes.get("forward_index").asDouble(),
derivedColumnForwardIndexSize);
+
+ assertTrue(derivedRawStringColumnIndex.has("forward_index"));
+ assertFalse(derivedRawStringColumnIndex.has("dictionary"));
}
private void reloadWithMissingColumns()
@@ -1647,6 +1657,7 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
TableConfig tableConfig = getOfflineTableConfig();
tableConfig.setIngestionConfig(null);
tableConfig.setFieldConfigList(getFieldConfigs());
+
tableConfig.getIndexingConfig().getNoDictionaryColumns().remove("NewAddedRawDerivedStringDimension");
updateTableConfig(tableConfig);
// Need to first delete then add the schema because removing columns is
backward-incompatible change
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
index 67e92fd63a..e75ff0923e 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java
@@ -30,16 +30,15 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.configuration2.PropertiesConfiguration;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.pinot.common.function.FunctionUtils;
import org.apache.pinot.common.utils.PinotDataType;
import org.apache.pinot.segment.local.function.FunctionEvaluator;
import org.apache.pinot.segment.local.function.FunctionEvaluatorFactory;
import
org.apache.pinot.segment.local.segment.creator.impl.SegmentColumnarIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
-import
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueEntryDictForwardIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator;
-import
org.apache.pinot.segment.local.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.BytesColumnPredIndexStatsCollector;
@@ -49,6 +48,7 @@ import
org.apache.pinot.segment.local.segment.creator.impl.stats.IntColumnPreInd
import
org.apache.pinot.segment.local.segment.creator.impl.stats.LongColumnPreIndexStatsCollector;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.StringColumnPreIndexStatsCollector;
import
org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType;
+import
org.apache.pinot.segment.local.segment.index.forward.ForwardIndexCreatorFactory;
import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexPlugin;
import org.apache.pinot.segment.local.segment.index.forward.ForwardIndexType;
import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
@@ -57,8 +57,8 @@ import
org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.SegmentMetadata;
import org.apache.pinot.segment.spi.V1Constants;
-import org.apache.pinot.segment.spi.compression.DictIdCompressionType;
import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo;
+import org.apache.pinot.segment.spi.creator.IndexCreationContext;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
import org.apache.pinot.segment.spi.index.ForwardIndexConfig;
@@ -383,15 +383,6 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
argumentsMetadata.add(columnMetadata);
}
- // TODO: Support raw derived column
- if (_indexLoadingConfig.getNoDictionaryColumns().contains(column)) {
- LOGGER.warn("Skip creating raw derived column: {}", column);
- if (errorOnFailure) {
- throw new UnsupportedOperationException(String.format("Failed to
create raw derived column: %s", column));
- }
- return false;
- }
-
// TODO: Support forward index disabled derived column
if
(_indexLoadingConfig.getForwardIndexDisabledColumns().contains(column)) {
LOGGER.warn("Skip creating forward index disabled derived column:
{}", column);
@@ -487,7 +478,7 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
new ColumnIndexCreationInfo(columnStatistics,
true/*createDictionary*/, false, true/*isAutoGenerated*/,
defaultValue/*defaultNullValue*/);
- // Create dictionary.
+ // We always create a dictionary for default value columns.
// We will have only one value in the dictionary.
int dictionaryElementSize;
try (SegmentDictionaryCreator creator = new
SegmentDictionaryCreator(fieldSpec, _indexDir, false)) {
@@ -563,7 +554,6 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
* Helper method to create the V1 indices (dictionary and forward index) for
a column with derived values.
* TODO:
* - Support chained derived column
- * - Support raw derived column
* - Support forward index disabled derived column
*/
private void createDerivedColumnV1Indices(String column, FunctionEvaluator
functionEvaluator,
@@ -595,6 +585,7 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
}
+ boolean createDictionary =
!_indexLoadingConfig.getNoDictionaryColumns().contains(column);
FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
StatsCollectorConfig statsCollectorConfig =
new StatsCollectorConfig(_indexLoadingConfig.getTableConfig(),
_schema, null);
@@ -621,7 +612,8 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
statsCollector.seal();
indexCreationInfo =
- new ColumnIndexCreationInfo(statsCollector, true, false, true,
fieldSpec.getDefaultNullValue());
+ new ColumnIndexCreationInfo(statsCollector, createDictionary,
false, true,
+ fieldSpec.getDefaultNullValue());
break;
}
case LONG: {
@@ -644,7 +636,8 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
statsCollector.seal();
indexCreationInfo =
- new ColumnIndexCreationInfo(statsCollector, true, false, true,
fieldSpec.getDefaultNullValue());
+ new ColumnIndexCreationInfo(statsCollector, createDictionary,
false, true,
+ fieldSpec.getDefaultNullValue());
break;
}
case FLOAT: {
@@ -667,7 +660,8 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
statsCollector.seal();
indexCreationInfo =
- new ColumnIndexCreationInfo(statsCollector, true, false, true,
fieldSpec.getDefaultNullValue());
+ new ColumnIndexCreationInfo(statsCollector, createDictionary,
false, true,
+ fieldSpec.getDefaultNullValue());
break;
}
case DOUBLE: {
@@ -690,7 +684,8 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
statsCollector.seal();
indexCreationInfo =
- new ColumnIndexCreationInfo(statsCollector, true, false, true,
fieldSpec.getDefaultNullValue());
+ new ColumnIndexCreationInfo(statsCollector, createDictionary,
false, true,
+ fieldSpec.getDefaultNullValue());
break;
}
case BIG_DECIMAL: {
@@ -705,7 +700,8 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
}
statsCollector.seal();
indexCreationInfo =
- new ColumnIndexCreationInfo(statsCollector, true, false, true,
fieldSpec.getDefaultNullValue());
+ new ColumnIndexCreationInfo(statsCollector, createDictionary,
false, true,
+ fieldSpec.getDefaultNullValue());
break;
}
case STRING: {
@@ -727,7 +723,7 @@ public abstract class BaseDefaultColumnHandler implements
DefaultColumnHandler {
statsCollector.collect(value);
}
statsCollector.seal();
- indexCreationInfo = new ColumnIndexCreationInfo(statsCollector, true,
+ indexCreationInfo = new ColumnIndexCreationInfo(statsCollector,
createDictionary,
_indexLoadingConfig.getVarLengthDictionaryColumns().contains(column), true,
fieldSpec.getDefaultNullValue());
break;
@@ -757,57 +753,157 @@ public abstract class BaseDefaultColumnHandler
implements DefaultColumnHandler {
} else {
useVarLengthDictionary =
_indexLoadingConfig.getVarLengthDictionaryColumns().contains(column);
}
- indexCreationInfo = new ColumnIndexCreationInfo(statsCollector,
true, useVarLengthDictionary, true,
- new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
+ indexCreationInfo = new ColumnIndexCreationInfo(statsCollector,
createDictionary, useVarLengthDictionary,
+ true, new ByteArray((byte[]) fieldSpec.getDefaultNullValue()));
break;
}
default:
throw new IllegalStateException();
}
- // Create dictionary
- try (SegmentDictionaryCreator dictionaryCreator = new
SegmentDictionaryCreator(fieldSpec, _indexDir,
- indexCreationInfo.isUseVarLengthDictionary())) {
-
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+ if (createDictionary) {
+ createDerivedColumnForwardIndexWithDictionary(column, fieldSpec,
outputValues, indexCreationInfo);
+ } else {
+ createDerivedColumnForwardIndexWithoutDictionary(column, fieldSpec,
outputValues, indexCreationInfo);
+ }
+ } finally {
+ for (ValueReader valueReader : valueReaders) {
+ valueReader.close();
+ }
+ }
+ }
+
+ /**
+ * Helper method to create the dictionary and forward indices for a column
with derived values.
+ */
+ private void createDerivedColumnForwardIndexWithDictionary(String column,
FieldSpec fieldSpec, Object[] outputValues,
+ ColumnIndexCreationInfo indexCreationInfo) throws Exception {
+
+ // Create dictionary
+ try (SegmentDictionaryCreator dictionaryCreator = new
SegmentDictionaryCreator(fieldSpec, _indexDir,
+ indexCreationInfo.isUseVarLengthDictionary())) {
+
dictionaryCreator.build(indexCreationInfo.getSortedUniqueElementsArray());
+
+ int numDocs = outputValues.length;
- // Create forward index
- int cardinality = indexCreationInfo.getDistinctValueCount();
+ // Create forward index
+ boolean isSingleValue = fieldSpec.isSingleValueField();
+
+ try (ForwardIndexCreator forwardIndexCreator
+ = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs,
column, true)) {
if (isSingleValue) {
- try (ForwardIndexCreator forwardIndexCreator =
indexCreationInfo.isSorted()
- ? new SingleValueSortedForwardIndexCreator(_indexDir, column,
cardinality)
- : new SingleValueUnsortedForwardIndexCreator(_indexDir, column,
cardinality, numDocs)) {
- for (int i = 0; i < numDocs; i++) {
-
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValues[i]));
- }
+ for (Object outputValue : outputValues) {
+
forwardIndexCreator.putDictId(dictionaryCreator.indexOfSV(outputValue));
}
} else {
- DictIdCompressionType dictIdCompressionType = null;
- FieldIndexConfigs fieldIndexConfig =
_indexLoadingConfig.getFieldIndexConfig(column);
- if (fieldIndexConfig != null) {
- ForwardIndexConfig forwardIndexConfig =
fieldIndexConfig.getConfig(new ForwardIndexPlugin().getIndexType());
- if (forwardIndexConfig != null) {
- dictIdCompressionType =
forwardIndexConfig.getDictIdCompressionType();
- }
- }
- try (ForwardIndexCreator forwardIndexCreator = dictIdCompressionType
== DictIdCompressionType.MV_ENTRY_DICT
- ? new MultiValueEntryDictForwardIndexCreator(_indexDir, column,
cardinality, numDocs)
- : new MultiValueUnsortedForwardIndexCreator(_indexDir, column,
cardinality, numDocs,
- indexCreationInfo.getTotalNumberOfEntries())) {
- for (int i = 0; i < numDocs; i++) {
-
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValues[i]));
- }
+ for (Object outputValue : outputValues) {
+
forwardIndexCreator.putDictIdMV(dictionaryCreator.indexOfMV(outputValue));
}
}
-
// Add the column metadata
SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties,
column, indexCreationInfo, numDocs,
fieldSpec, true, dictionaryCreator.getNumBytesPerEntry());
}
- } finally {
- for (ValueReader valueReader : valueReaders) {
- valueReader.close();
+ }
+ }
+
+ /**
+ * Helper method to create a forward index for a raw encoded column with
derived values.
+ */
+ private void createDerivedColumnForwardIndexWithoutDictionary(String column,
FieldSpec fieldSpec,
+ Object[] outputValues, ColumnIndexCreationInfo indexCreationInfo)
+ throws Exception {
+
+ // Create forward index
+ int numDocs = outputValues.length;
+ boolean isSingleValue = fieldSpec.isSingleValueField();
+
+ try (ForwardIndexCreator forwardIndexCreator
+ = getForwardIndexCreator(fieldSpec, indexCreationInfo, numDocs,
column, false)) {
+ if (isSingleValue) {
+ for (Object outputValue : outputValues) {
+ switch (fieldSpec.getDataType().getStoredType()) {
+ // Casts are safe here because we've already done the conversion
in createDerivedColumnV1Indices
+ case INT:
+ forwardIndexCreator.putInt((int) outputValue);
+ break;
+ case LONG:
+ forwardIndexCreator.putLong((long) outputValue);
+ break;
+ case FLOAT:
+ forwardIndexCreator.putFloat((float) outputValue);
+ break;
+ case DOUBLE:
+ forwardIndexCreator.putDouble((double) outputValue);
+ break;
+ case BIG_DECIMAL:
+ forwardIndexCreator.putBigDecimal((BigDecimal) outputValue);
+ break;
+ case STRING:
+ forwardIndexCreator.putString((String) outputValue);
+ break;
+ case BYTES:
+ forwardIndexCreator.putBytes((byte[]) outputValue);
+ break;
+ default:
+ throw new IllegalStateException();
+ }
+ }
+ } else {
+ for (Object outputValue : outputValues) {
+ switch (fieldSpec.getDataType().getStoredType()) {
+ // Casts are safe here because we've already done the conversion
in createDerivedColumnV1Indices
+ case INT:
+ forwardIndexCreator.putIntMV(ArrayUtils.toPrimitive((Integer[])
outputValue));
+ break;
+ case LONG:
+ forwardIndexCreator.putLongMV(ArrayUtils.toPrimitive((Long[])
outputValue));
+ break;
+ case FLOAT:
+ forwardIndexCreator.putFloatMV(ArrayUtils.toPrimitive((Float[])
outputValue));
+ break;
+ case DOUBLE:
+
forwardIndexCreator.putDoubleMV(ArrayUtils.toPrimitive((Double[]) outputValue));
+ break;
+ case STRING:
+ forwardIndexCreator.putStringMV((String[]) outputValue);
+ break;
+ case BYTES:
+ forwardIndexCreator.putBytesMV((byte[][]) outputValue);
+ break;
+ default:
+ throw new IllegalStateException();
+ }
+ }
}
}
+
+ // Add the column metadata
+ SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties,
column, indexCreationInfo, numDocs,
+ fieldSpec, false, 0);
+ }
+
+ private ForwardIndexCreator getForwardIndexCreator(FieldSpec fieldSpec,
ColumnIndexCreationInfo indexCreationInfo,
+ int numDocs, String column, boolean hasDictionary) throws Exception {
+
+ IndexCreationContext indexCreationContext = IndexCreationContext.builder()
+ .withIndexDir(_indexDir)
+ .withFieldSpec(fieldSpec)
+ .withColumnIndexCreationInfo(indexCreationInfo)
+ .withTotalDocs(numDocs)
+ .withDictionary(hasDictionary)
+ .build();
+
+ ForwardIndexConfig forwardIndexConfig = null;
+ FieldIndexConfigs fieldIndexConfig =
_indexLoadingConfig.getFieldIndexConfig(column);
+ if (fieldIndexConfig != null) {
+ forwardIndexConfig = fieldIndexConfig.getConfig(new
ForwardIndexPlugin().getIndexType());
+ }
+ if (forwardIndexConfig == null) {
+ forwardIndexConfig = new ForwardIndexConfig(false, null, null, null,
null, null);
+ }
+
+ return ForwardIndexCreatorFactory.createIndexCreator(indexCreationContext,
forwardIndexConfig);
}
@SuppressWarnings("rawtypes")
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
index 4f8dc7e8be..7fa10155ee 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/defaultcolumn/V3DefaultColumnHandler.java
@@ -65,27 +65,39 @@ public class V3DefaultColumnHandler extends
BaseDefaultColumnHandler {
boolean forwardIndexDisabled = !isSingleValue &&
isForwardIndexDisabled(column);
File forwardIndexFile = null;
File invertedIndexFile = null;
+
if (isSingleValue) {
forwardIndexFile = new File(_indexDir, column +
V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
if (!forwardIndexFile.exists()) {
forwardIndexFile = new File(_indexDir, column +
V1Constants.Indexes.UNSORTED_SV_FORWARD_INDEX_FILE_EXTENSION);
}
+ if (!forwardIndexFile.exists()) {
+ forwardIndexFile = new File(_indexDir, column +
V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
+ }
} else {
if (forwardIndexDisabled) {
// An inverted index is created instead of forward index for
multi-value columns with forward index disabled
+ // Note that we don't currently support creation of forward index
disabled derived columns
invertedIndexFile = new File(_indexDir, column +
V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION);
} else {
forwardIndexFile = new File(_indexDir, column +
V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION);
+ if (!forwardIndexFile.exists()) {
+ forwardIndexFile = new File(_indexDir, column +
V1Constants.Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION);
+ }
}
}
+
if (forwardIndexFile != null) {
LoaderUtils.writeIndexToV3Format(_segmentWriter, column,
forwardIndexFile, StandardIndexes.forward());
}
if (invertedIndexFile != null) {
LoaderUtils.writeIndexToV3Format(_segmentWriter, column,
invertedIndexFile, StandardIndexes.inverted());
}
+
File dictionaryFile = new File(_indexDir, column +
V1Constants.Dict.FILE_EXTENSION);
- LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile,
StandardIndexes.dictionary());
+ if (dictionaryFile.exists()) {
+ LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile,
StandardIndexes.dictionary());
+ }
File nullValueVectorFile = new File(_indexDir, column +
V1Constants.Indexes.NULLVALUE_VECTOR_FILE_EXTENSION);
if (nullValueVectorFile.exists()) {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
index acdc679256..80178b3fda 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.segment.index.loader;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import java.io.File;
@@ -141,6 +142,7 @@ public class SegmentPreProcessorTest {
private static final String NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME =
"newBooleanSVDimension";
private static final String NEW_INT_SV_DIMENSION_COLUMN_NAME =
"newIntSVDimension";
private static final String NEW_STRING_MV_DIMENSION_COLUMN_NAME =
"newStringMVDimension";
+ private static final String NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME =
"newRawStringSVDimension";
private static final String NEW_HLL_BYTE_METRIC_COLUMN_NAME =
"newHLLByteMetric";
private static final String NEW_TDIGEST_BYTE_METRIC_COLUMN_NAME =
"newTDigestByteMetric";
@@ -1101,9 +1103,13 @@ public class SegmentPreProcessorTest {
Collections.emptyList());
IngestionConfig ingestionConfig = new IngestionConfig();
ingestionConfig.setTransformConfigs(
- Collections.singletonList(new
TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)")));
+ ImmutableList.of(
+ new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME,
"plus(column1, 1)"),
+ new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME,
"reverse(column3)")
+ ));
_tableConfig.setIngestionConfig(ingestionConfig);
_indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX);
+
_indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
checkUpdateDefaultColumns();
// Try to use the third schema and update default value again.
@@ -1148,9 +1154,13 @@ public class SegmentPreProcessorTest {
IngestionConfig ingestionConfig = new IngestionConfig();
ingestionConfig.setTransformConfigs(
- Collections.singletonList(new
TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME, "plus(column1, 1)")));
+ ImmutableList.of(
+ new TransformConfig(NEW_INT_SV_DIMENSION_COLUMN_NAME,
"plus(column1, 1)"),
+ new TransformConfig(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME,
"reverse(column3)")
+ ));
_tableConfig.setIngestionConfig(ingestionConfig);
_indexLoadingConfig.addInvertedIndexColumns(NEW_COLUMN_INVERTED_INDEX);
+
_indexLoadingConfig.addNoDictionaryColumns(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
checkUpdateDefaultColumns();
// Try to use the third schema and update default value again.
@@ -1257,6 +1267,15 @@ public class SegmentPreProcessorTest {
assertEquals(columnMetadata.getMinValue(), (int)
originalColumnMetadata.getMinValue() + 1);
assertEquals(columnMetadata.getMaxValue(), (int)
originalColumnMetadata.getMaxValue() + 1);
+ columnMetadata =
segmentMetadata.getColumnMetadataFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME);
+ assertEquals(columnMetadata.getFieldSpec(),
+
_newColumnsSchema1.getFieldSpecFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME));
+ assertTrue(columnMetadata.isAutoGenerated());
+ originalColumnMetadata = segmentMetadata.getColumnMetadataFor("column3");
+ assertEquals(columnMetadata.getCardinality(),
originalColumnMetadata.getCardinality());
+ assertEquals(columnMetadata.getBitsPerElement(),
originalColumnMetadata.getBitsPerElement());
+ assertEquals(columnMetadata.getTotalNumberOfEntries(),
originalColumnMetadata.getTotalNumberOfEntries());
+
// Check dictionary and forward index exist.
try (SegmentDirectory segmentDirectory =
SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader()
.load(_indexDir.toURI(),
@@ -1276,6 +1295,9 @@ public class SegmentPreProcessorTest {
assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME,
StandardIndexes.forward()));
assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME,
StandardIndexes.dictionary()));
assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME,
StandardIndexes.forward()));
+ // Dictionary shouldn't be created for raw derived column
+ assertFalse(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME,
StandardIndexes.dictionary()));
+ assertTrue(reader.hasIndexFor(NEW_RAW_STRING_SV_DIMENSION_COLUMN_NAME,
StandardIndexes.forward()));
assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME,
StandardIndexes.nullValueVector()));
assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME,
StandardIndexes.nullValueVector()));
diff --git a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
index b6ef01c991..32ba7dcf58 100644
--- a/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
+++ b/pinot-segment-local/src/test/resources/data/newColumnsSchema1.json
@@ -32,6 +32,11 @@
"dataType": "STRING",
"name": "newStringMVDimension",
"singleValueField": false
+ },
+ {
+ "dataType": "STRING",
+ "name": "newRawStringSVDimension",
+ "defaultNullValue": "null"
}
]
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]