This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new 9797e4c Refine Kylin Config (#1726)
9797e4c is described below
commit 9797e4c339d041c344a25a0b92c3763bd5b0e184
Author: Xiaoxiang Yu <[email protected]>
AuthorDate: Mon Aug 16 08:40:58 2021 +0800
Refine Kylin Config (#1726)
* Add deprecated notice for some maven modules
* remove duplicated log
* refine kylin config
* refine sample cube desc
* Fix config entry rename
* add switch for sparder canary tool
---
build/bin/kylin.sh | 4 +-
.../org/apache/kylin/common/KylinConfigBase.java | 546 ++++++++++++++-------
.../java/org/apache/kylin/common/QueryContext.java | 2 -
.../apache/kylin/common/annotation/ConfigTag.java | 32 +-
.../src/main/resources/kylin-defaults.properties | 52 +-
core-dictionary/DEPRECATED_MODULE | 19 +
datasource-sdk/DEPRECATED_MODULE | 19 +
docker/conf/kylin/kylin.properties | 2 +-
docker/dockerfile/standalone/conf/bin/kylin.sh | 4 +-
.../standalone/conf/kylin/kylin.properties | 2 +-
engine-flink/DEPRECATED_MODULE | 19 +
engine-mr/DEPRECATED_MODULE | 19 +
engine-spark/DEPRECATED_MODULE | 19 +
.../template/cube_desc/kylin_sales_cube.json | 9 +-
.../test_case_data/file_prunning/kylin.properties | 2 +-
examples/test_case_data/localmeta/kylin.properties | 2 +-
.../test_case_data/parquet_test/kylin.properties | 2 +-
examples/test_case_data/sandbox/kylin.properties | 2 +-
kylin-it/DEPRECATED_MODULE | 19 +
.../kylin/engine/spark/job/CubeBuildJob.java | 2 -
.../kylin/engine/spark/utils/JobMetricsUtils.scala | 24 +-
.../org/apache/spark/sql/SparderContext.scala | 2 +-
source-jdbc/DEPRECATED_MODULE | 19 +
source-kafka/DEPRECATED_MODULE | 19 +
storage-hbase/DEPRECATED_MODULE | 19 +
storage-stream/DEPRECATED_MODULE | 19 +
stream-coordinator/DEPRECATED_MODULE | 19 +
stream-core/DEPRECATED_MODULE | 19 +
stream-receiver/DEPRECATED_MODULE | 19 +
stream-source-kafka/DEPRECATED_MODULE | 19 +
30 files changed, 711 insertions(+), 244 deletions(-)
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index 9498f0b..a110c8c 100755
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -212,7 +212,7 @@ function prepareFairScheduler() {
<pool name="query_pushdown">
<schedulingMode>FAIR</schedulingMode>
<weight>1</weight>
- <minShare>1</minShare>
+ <minShare>0</minShare>
</pool>
<pool name="heavy_tasks">
<schedulingMode>FAIR</schedulingMode>
@@ -227,7 +227,7 @@ function prepareFairScheduler() {
<pool name="vip_tasks">
<schedulingMode>FAIR</schedulingMode>
<weight>15</weight>
- <minShare>1</minShare>
+ <minShare>0</minShare>
</pool>
</allocations>
EOL
diff --git
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 8915669..6246b94 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -566,42 +566,52 @@ public abstract class KylinConfigBase implements
Serializable {
// DICTIONARY & SNAPSHOT
//
============================================================================
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isUseForestTrieDictionary() {
return
Boolean.parseBoolean(getOptional("kylin.dictionary.use-forest-trie", TRUE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getTrieDictionaryForestMaxTrieSizeMB() {
return
Integer.parseInt(getOptional("kylin.dictionary.forest-trie-max-mb", "500"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getCachedDictMaxEntrySize() {
return Long.parseLong(getOptional("kylin.dictionary.max-cache-entry",
"3000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getCachedDictMaxSize() {
return Integer.parseInt(getOptional("kylin.dictionary.max-cache-size",
"-1"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isGrowingDictEnabled() {
return
Boolean.parseBoolean(this.getOptional("kylin.dictionary.growing-enabled",
FALSE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isDictResuable() {
return
Boolean.parseBoolean(this.getOptional("kylin.dictionary.resuable", FALSE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getCachedDictionaryMaxEntrySize() {
return
Long.parseLong(getOptional("kylin.dictionary.cached-dict-max-cache-entry",
"50000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getAppendDictEntrySize() {
return
Integer.parseInt(getOptional("kylin.dictionary.append-entry-size", "10000000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getAppendDictMaxVersions() {
return
Integer.parseInt(getOptional("kylin.dictionary.append-max-versions", "3"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getAppendDictVersionTTL() {
return
Integer.parseInt(getOptional("kylin.dictionary.append-version-ttl",
"259200000"));
}
@@ -670,43 +680,6 @@ public abstract class KylinConfigBase implements
Serializable {
return getOptional("kylin.dictionary.mr-hive.table.suffix",
"_global_dict");
}
- //
============================================================================
- // Distributed/Spark Global dictionary
- // Add wiki link here
- //
============================================================================
-
-
- public int getGlobalDictV2MinHashPartitions() {
- return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-min-hash-partitions",
"10"));
- }
-
- public int getGlobalDictV2ThresholdBucketSize() {
- return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-threshold-bucket-size",
"500000"));
- }
-
- public int getDictionarySliceEvicationThreshold() {
- return
Integer.parseInt(getOptional("kylin.dictionary.slice.eviction.threshold", "5"));
- }
-
- public double getGlobalDictV2InitLoadFactor() {
- return
Double.parseDouble(getOptional("kylin.dictionary.globalV2-init-load-factor",
"0.5"));
- }
-
- public double getGlobalDictV2BucketOverheadFactor() {
- return
Double.parseDouble(getOptional("kylin.dictionary.globalV2-bucket-overhead-factor",
"1.5"));
- }
-
- public int getGlobalDictV2MaxVersions() {
- return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-max-versions", "3"));
- }
-
- public long getGlobalDictV2VersionTTL() {
- return
Long.parseLong(getOptional("kylin.dictionary.globalV2-version-ttl",
"259200000"));
- }
-
- public boolean isCheckGlobalDictV2() {
- return
Boolean.parseBoolean(getOptional("kylin.dictionary.globalV2-check", "true"));
- }
//
============================================================================
// CUBE
@@ -1106,7 +1079,7 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* was for route to hive, not used any more
*/
- @Deprecated
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getHiveUrl() {
return getOptional("kylin.source.hive.connection-url", "");
}
@@ -1114,7 +1087,7 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* was for route to hive, not used any more
*/
- @Deprecated
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getHiveUser() {
return getOptional("kylin.source.hive.connection-user", "");
}
@@ -1122,7 +1095,7 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* was for route to hive, not used any more
*/
- @Deprecated
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getHivePassword() {
return getOptional("kylin.source.hive.connection-password", "");
}
@@ -1444,7 +1417,7 @@ public abstract class KylinConfigBase implements
Serializable {
return
Integer.parseInt(this.getOptional("kylin.storage.hbase.max-visit-scanrange",
"1000000"));
}
- @ConfigTag(ConfigTag.Tag.UNCATEGORIZED)
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getDefaultIGTStorage() {
return getOptional("kylin.storage.hbase.gtstorage",
"org.apache.kylin.storage.hbase.cube.v2.CubeHBaseEndpointRPC");
@@ -1515,7 +1488,6 @@ public abstract class KylinConfigBase implements
Serializable {
return
Integer.parseInt(getOptional("kylin.storage.hbase.replication-scope", "0"));
}
- @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean cleanStorageAfterDelOperation() {
return
Boolean.parseBoolean(getOptional("kylin.storage.clean-after-delete-operation",
FALSE));
}
@@ -1540,11 +1512,6 @@ public abstract class KylinConfigBase implements
Serializable {
return Integer.parseInt(getOptional("kylin.engine.default", "6"));
}
-
- public String getSparkStandaloneMasterWebUI() {
- return getOptional("kylin.engine.spark.standalone.master.httpUrl", "");
- }
-
public String getKylinJobJarPath() {
final String jobJar = getOptional(KYLIN_ENGINE_MR_JOB_JAR);
if (StringUtils.isNotEmpty(jobJar)) {
@@ -1683,89 +1650,95 @@ public abstract class KylinConfigBase implements
Serializable {
}
//
============================================================================
- // ENGINE.SPARK
+ // ENGINE.SPARK (DEPRECATED)
//
============================================================================
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getHadoopConfDir() {
return getOptional("kylin.env.hadoop-conf-dir", "");
}
- /**
- * Get the sparder app name, default value is: 'sparder_on_localhost-7070'
- */
- public String getSparderAppName() {
- String customSparderAppName =
getOptional("kylin.query.sparder-context.app-name", "");
- if (StringUtils.isEmpty(customSparderAppName)) {
- customSparderAppName =
- "sparder_on_" + getServerRestAddress().replaceAll(":",
"-");
- }
- return customSparderAppName;
- }
-
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getSparkAdditionalJars() {
return getOptional("kylin.engine.spark.additional-jars", "");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getFlinkAdditionalJars() {
return getOptional("kylin.engine.flink.additional-jars", "");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public float getSparkRDDPartitionCutMB() {
return
Float.parseFloat(getOptional("kylin.engine.spark.rdd-partition-cut-mb",
"10.0"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public float getFlinkPartitionCutMB() {
return
Float.parseFloat(getOptional("kylin.engine.flink.partition-cut-mb", "10.0"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getSparkMinPartition() {
return
Integer.parseInt(getOptional("kylin.engine.spark.min-partition", "1"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getFlinkMinPartition() {
return
Integer.parseInt(getOptional("kylin.engine.flink.min-partition", "1"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getSparkMaxPartition() {
return
Integer.parseInt(getOptional("kylin.engine.spark.max-partition", "5000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getFlinkMaxPartition() {
return
Integer.parseInt(getOptional("kylin.engine.flink.max-partition", "5000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getSparkStorageLevel() {
return getOptional("kylin.engine.spark.storage-level",
"MEMORY_AND_DISK_SER");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparkSanityCheckEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark.sanity-check-enabled",
FALSE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparkFactDistinctEnable() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark-fact-distinct", "false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparkUHCDictionaryEnable() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark-udc-dictionary", "false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparkCardinalityEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark-cardinality", "false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getSparkOutputMaxSize() {
return
Integer.valueOf(getOptional("kylin.engine.spark.output.max-size", "10485760"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparkDimensionDictionaryEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark-dimension-dictionary",
"false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isFlinkSanityCheckEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.engine.flink.sanity-check-enabled",
FALSE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isSparCreateHiveTableViaSparkEnable() {
return
Boolean.parseBoolean(getOptional("kylin.engine.spark-create-table-enabled",
"false"));
}
@@ -1774,26 +1747,32 @@ public abstract class KylinConfigBase implements
Serializable {
// ENGINE.LIVY
//
============================================================================
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isLivyEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.engine.livy-conf.livy-enabled", FALSE));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getLivyRestApiBacktick() {
return getOptional("kylin.engine.livy.backtick.quote", "");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getLivyUrl() {
return getOptional("kylin.engine.livy-conf.livy-url");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public Map<String, String> getLivyKey() {
return getPropertiesByPrefix("kylin.engine.livy-conf.livy-key.");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public Map<String, String> getLivyArr() {
return getPropertiesByPrefix("kylin.engine.livy-conf.livy-arr.");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public Map<String, String> getLivyMap() {
return getPropertiesByPrefix("kylin.engine.livy-conf.livy-map.");
}
@@ -1802,6 +1781,7 @@ public abstract class KylinConfigBase implements
Serializable {
// QUERY
//
============================================================================
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isDictionaryEnumeratorEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.query.enable-dict-enumerator", FALSE));
}
@@ -1871,10 +1851,6 @@ public abstract class KylinConfigBase implements
Serializable {
return
Boolean.parseBoolean(getOptional("kylin.query.stream-aggregate-enabled", TRUE));
}
- public String getProjectQuerySparkPool() {
- return getOptional("kylin.query.spark.pool", null);
- }
-
public boolean isProjectIsolationEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.storage.project-isolation-enable",
TRUE));
}
@@ -1908,11 +1884,13 @@ public abstract class KylinConfigBase implements
Serializable {
return
Integer.parseInt(getOptional("kylin.query.project-concurrent-running-threshold",
"0"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getQueryMaxScanBytes() {
long value = Long.parseLong(getOptional("kylin.query.max-scan-bytes",
"0"));
return value > 0 ? value : Long.MAX_VALUE;
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getQueryMaxReturnRows() {
return
Integer.parseInt(this.getOptional("kylin.query.max-return-rows", "5000000"));
}
@@ -2456,7 +2434,7 @@ public abstract class KylinConfigBase implements
Serializable {
}
//
============================================================================
- // jdbc metadata resource store
+ // Jdbc metadata resource store
//
============================================================================
public String getMetadataDialect() {
@@ -2492,151 +2470,189 @@ public abstract class KylinConfigBase implements
Serializable {
//
============================================================================
// Realtime streaming
//
============================================================================
+
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingStoreClass() {
return getOptional("kylin.stream.store.class",
"org.apache.kylin.stream.core.storage.columnar.ColumnarSegmentStore");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingBasicCuboidJobDFSBlockSize() {
return getOptional("kylin.stream.job.dfs.block.size",
String.valueOf(16 * 1024 * 1024));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingIndexPath() {
return getOptional("kylin.stream.index.path", "stream_index");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCubeConsumerTasksNum() {
return
Integer.parseInt(getOptional("kylin.stream.cube-num-of-consumer-tasks", "3"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCubeWindowInSecs() {
return Integer.parseInt(getOptional("kylin.stream.cube.window",
"3600"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCubeDurationInSecs() {
return Integer.parseInt(getOptional("kylin.stream.cube.duration",
"7200"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCubeMaxDurationInSecs() {
return Integer.parseInt(getOptional("kylin.stream.cube.duration.max",
"43200"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCheckPointFileMaxNum() {
return
Integer.parseInt(getOptional("kylin.stream.checkpoint.file.max.num", "5"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingCheckPointIntervalsInSecs() {
return
Integer.parseInt(getOptional("kylin.stream.index.checkpoint.intervals", "300"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingIndexMaxRows() {
return Integer.parseInt(getOptional("kylin.stream.index.maxrows",
"50000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingMaxImmutableSegments() {
return
Integer.parseInt(getOptional("kylin.stream.immutable.segments.max.num", "100"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isStreamingConsumeFromLatestOffsets() {
return
Boolean.parseBoolean(getOptional("kylin.stream.consume.offsets.latest",
"true"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingNode() {
return getOptional("kylin.stream.node", null);
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public Map<String, String> getStreamingNodeProperties() {
return getPropertiesByPrefix("kylin.stream.node");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingMetadataStoreType() {
return getOptional("kylin.stream.metadata.store.type", "zk");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingSegmentRetentionPolicy() {
return getOptional("kylin.stream.segment.retention.policy",
"fullBuild");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingAssigner() {
return getOptional("kylin.stream.assigner", "DefaultAssigner");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getCoordinatorHttpClientTimeout() {
return
Integer.parseInt(getOptional("kylin.stream.coordinator.client.timeout.millsecond",
"5000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getReceiverHttpClientTimeout() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.client.timeout.millsecond",
"5000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingReceiverHttpMaxThreads() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.http.max.threads", "200"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingReceiverHttpMinThreads() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.http.min.threads", "10"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingReceiverQueryCoreThreads() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.query-core-threads", "50"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingReceiverQueryMaxThreads() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.query-max-threads", "200"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingReceiverUseThreadsPerQuery() {
return
Integer.parseInt(getOptional("kylin.stream.receiver.use-threads-per-query",
"8"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingRPCHttpConnTimeout() {
return
Integer.parseInt(getOptional("kylin.stream.rpc.http.connect.timeout", "10000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingRPCHttpReadTimeout() {
return
Integer.parseInt(getOptional("kylin.stream.rpc.http.read.timeout", "60000"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isStreamingBuildAdditionalCuboids() {
return
Boolean.parseBoolean(getOptional("kylin.stream.build.additional.cuboids",
"false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public Map<String, String>
getStreamingSegmentRetentionPolicyProperties(String policyName) {
return getPropertiesByPrefix("kylin.stream.segment.retention.policy."
+ policyName + ".");
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingMaxFragmentsInSegment() {
return
Integer.parseInt(getOptional("kylin.stream.segment-max-fragments", "50"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingMinFragmentsInSegment() {
return
Integer.parseInt(getOptional("kylin.stream.segment-min-fragments", "15"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public int getStreamingMaxFragmentSizeInMb() {
return
Integer.parseInt(getOptional("kylin.stream.max-fragment-size-mb", "300"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isStreamingFragmentsAutoMergeEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.stream.fragments-auto-merge-enable",
"true"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isStreamingConcurrentScanEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.stream.segment.concurrent.scan",
"false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isStreamingStandAloneMode() {
return
Boolean.parseBoolean(getOptional("kylin.stream.stand-alone.mode", "false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isNewCoordinatorEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.stream.new.coordinator-enabled",
"true"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getLocalStorageImpl() {
return getOptional("kylin.stream.settled.storage", null);
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamMetrics() {
return getOptional("kylin.stream.metrics.option", "");
}
@@ -2644,10 +2660,12 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* whether to print encode integer value for count distinct string value,
only for debug/test purpose
*/
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isPrintRealtimeDictEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.stream.print-realtime-dict-enabled",
"false"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public long getStreamMetricsInterval() {
return Long.parseLong(getOptional("kylin.stream.metrics.interval",
"5"));
}
@@ -2655,14 +2673,17 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* whether realtime query should add timezone offset by kylin's
web-timezone, please refer to KYLIN-4010 for detail
*/
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getStreamingDerivedTimeTimezone() {
return (getOptional("kylin.stream.event.timezone", ""));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public boolean isAutoResubmitDiscardJob() {
return
Boolean.parseBoolean(getOptional("kylin.stream.auto-resubmit-after-discard-enabled",
"true"));
}
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public String getHiveDatabaseLambdaCube() {
return this.getOptional("kylin.stream.hive.database-for-lambda-cube",
DEFAULT);
}
@@ -2691,8 +2712,9 @@ public abstract class KylinConfigBase implements
Serializable {
return
Integer.parseInt(getOptional("kylin.tool.health-check.stale-job-threshold-days",
"30"));
}
+
//
============================================================================
- // Kylin 4.x related
+ // Kylin 4.X Build Engine
//
============================================================================
public String getKylinParquetJobJarPath() {
@@ -2710,6 +2732,7 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* Use https://github.com/spektom/spark-flamegraph for Spark profile
*/
+ @ConfigTag(ConfigTag.Tag.DEBUG_HACK)
public String getSparkSubmitCmd() {
return getOptional("kylin.engine.spark-cmd", null);
}
@@ -2724,32 +2747,6 @@ public abstract class KylinConfigBase implements
Serializable {
return getOptional("kylin.engine.spark.build-class-name",
"org.apache.kylin.engine.spark.job.CubeBuildJob");
}
- public Boolean getSparkEngineTaskImpactInstanceEnabled() {
- return
Boolean.parseBoolean(getOptional("kylin.engine.spark.task-impact-instance-enabled",
"true"));
- }
-
- public int getSparkEngineTaskCoreFactor() {
- return
Integer.parseInt(getOptional("kylin.engine.spark.task-core-factor", "3"));
- }
-
- public int getSparkEngineDriverMemoryBase() {
- return Integer.parseInt(getOptional("kylin.engine.driver-memory-base",
"1024"));
- }
-
- public boolean isTrackingUrlIpAddressEnabled() {
- return
Boolean.valueOf(this.getOptional("kylin.job.tracking-url-ip-address-enabled",
TRUE));
- }
-
- //Auto adjust the memory of driver
- public int[] getSparkEngineDriverMemoryStrategy() {
- String[] dft = {"2", "20", "100"};
- return getOptionalIntArray("kylin.engine.driver-memory-strategy", dft);
- }
-
- public int getSparkEngineDriverMemoryMaximum() {
- return
Integer.parseInt(getOptional("kylin.engine.driver-memory-maximum", "4096"));
- }
-
public StorageURL getJobTmpMetaStoreUrl(String project, String jobId) {
Map<String, String> params = new HashMap<>();
params.put("path", getJobTmpDir(project) + getNestedPath(jobId) +
"meta");
@@ -2857,10 +2854,11 @@ public abstract class KylinConfigBase implements
Serializable {
return
Integer.valueOf(getOptional("kylin.storage.columnar.dfs-replication", "3"));
}
- public boolean isAutoSetSparkConf() {
- return Boolean.parseBoolean(getOptional("kylin.spark-conf.auto.prior",
"true"));
- }
-
+ /**
+ * Read-Write separation deployment for Kylin 4
+ * Please check
https://cwiki.apache.org/confluence/display/KYLIN/Read-Write+Separation+Deployment+for+Kylin+4.0
+ */
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
public String getBuildConf() {
return getOptional("kylin.engine.submit-hadoop-conf-dir", "");
}
@@ -2884,6 +2882,88 @@ public abstract class KylinConfigBase implements
Serializable {
return Integer.parseInt(getOptional("kylin.engine.max-retry-time",
"3"));
}
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public int getSnapshotShardSizeMB() {
+ return Integer.parseInt(getOptional("kylin.snapshot.shard-size-mb",
"128"));
+ }
+
+ /**
+ * If we should calculate cuboid statistics for each segment, which is
needed for cube planner phase two
+ */
+ @ConfigTag(ConfigTag.Tag.NOT_CLEAR)
+ public boolean isSegmentStatisticsEnabled() {
+ return
Boolean.parseBoolean(this.getOptional("kylin.engine.segment-statistics-enabled",
"false"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public String getParentDatasetStorageLevel() {
+ return
getOptional("kylin.engine.spark.cache-parent-dataset-storage-level", "NONE");
+ }
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public int getMaxParentDatasetPersistCount() {
+ return
Integer.parseInt(getOptional("kylin.engine.spark.cache-parent-dataset-count",
"1"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public boolean isBuildBaseCuboid() {
+ return
Boolean.valueOf(getOptional("kylin.engine.build-base-cuboid-enabled", TRUE));
+ }
+
+ public boolean isTrackingUrlIpAddressEnabled() {
+ return
Boolean.valueOf(this.getOptional("kylin.job.tracking-url-ip-address-enabled",
TRUE));
+ }
+
+ //
============================================================================
+ // Kylin 4.X Spark resources automatic adjustment strategy configuration
+ //
============================================================================
+
+ /**
+ * <pre>
+ * For a CubeBuildJob and CubeMergeJob, it is important to allocate enough
and proper resources(cpu/memory), including following config entries mainly:
+ * - spark.driver.memory
+ * - spark.executor.memory
+ * - spark.executor.cores
+ * - spark.executor.memoryOverhead
+ * - spark.executor.instances
+ * - spark.sql.shuffle.partitions
+ *
+ * When `kylin.spark-conf.auto.prior` is set to true, Kylin will try to
adjust above config entries according to:
+ * - Count of cuboids to be built
+ * - (Max)Size of fact table
+ * - Available resources from current resource manager 's queue
+ *
+ * But user still can choose to override some config via
`kylin.engine.spark-conf.XXX` in Cube level .
+ * Check detail at
https://cwiki.apache.org/confluence/display/KYLIN/How+to+improve+cube+building+and+query+performance
+ * </pre>
+ *
+ */
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public boolean isAutoSetSparkConf() {
+ return Boolean.parseBoolean(getOptional("kylin.spark-conf.auto.prior",
"true"));
+ }
+
+ public Boolean getSparkEngineTaskImpactInstanceEnabled() {
+ return
Boolean.parseBoolean(getOptional("kylin.engine.spark.task-impact-instance-enabled",
"true"));
+ }
+
+ public int getSparkEngineTaskCoreFactor() {
+ return
Integer.parseInt(getOptional("kylin.engine.spark.task-core-factor", "3"));
+ }
+
+ public int getSparkEngineDriverMemoryBase() {
+ return Integer.parseInt(getOptional("kylin.engine.driver-memory-base",
"1024"));
+ }
+
+ public int[] getSparkEngineDriverMemoryStrategy() {
+ String[] dft = {"2", "20", "100"};
+ return getOptionalIntArray("kylin.engine.driver-memory-strategy", dft);
+ }
+
+ public int getSparkEngineDriverMemoryMaximum() {
+ return
Integer.parseInt(getOptional("kylin.engine.driver-memory-maximum", "4096"));
+ }
+
public double getSparkEngineRetryMemoryGradient() {
return
Double.parseDouble(getOptional("kylin.engine.retry-memory-gradient", "1.5"));
}
@@ -2908,13 +2988,133 @@ public abstract class KylinConfigBase implements
Serializable {
return getOptional("kylin.engine.executor-instance-strategy",
"100,2,500,3,1000,4");
}
- public int getSnapshotShardSizeMB() {
- return Integer.parseInt(getOptional("kylin.snapshot.shard-size-mb",
"128"));
+
+ //
============================================================================
+ // Kylin 4.X Global dictionary
+ // Wiki :
https://cwiki.apache.org/confluence/display/KYLIN/Global+Dictionary+on+Spark
+ //
============================================================================
+
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public int getGlobalDictV2MinHashPartitions() {
+ return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-min-hash-partitions",
"10"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public int getGlobalDictV2ThresholdBucketSize() {
+ return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-threshold-bucket-size",
"500000"));
+ }
+
+ public int getDictionarySliceEvicationThreshold() {
+ return
Integer.parseInt(getOptional("kylin.dictionary.slice.eviction.threshold", "5"));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public double getGlobalDictV2InitLoadFactor() {
+ return
Double.parseDouble(getOptional("kylin.dictionary.globalV2-init-load-factor",
"0.5"));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public double getGlobalDictV2BucketOverheadFactor() {
+ return
Double.parseDouble(getOptional("kylin.dictionary.globalV2-bucket-overhead-factor",
"1.5"));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public int getGlobalDictV2MaxVersions() {
+ return
Integer.parseInt(getOptional("kylin.dictionary.globalV2-max-versions", "3"));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public long getGlobalDictV2VersionTTL() {
+ return
Long.parseLong(getOptional("kylin.dictionary.globalV2-version-ttl",
"259200000"));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public boolean isCheckGlobalDictV2() {
+ return
Boolean.parseBoolean(getOptional("kylin.dictionary.globalV2-check-enabled",
"true"));
+ }
+
+ /**
+ * Detect dataset skew in dictionary encode step.
+ */
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public boolean detectDataSkewInDictEncodingEnabled() {
+ return
Boolean.valueOf(getOptional("kylin.dictionary.detect-data-skew-sample-enabled",
FALSE));
+ }
+
+ /**
+ * In some data skew cases, the repartition step during dictionary
encoding will be slow.
+ * We can choose to sample from the dataset to detect skewed. This
configuration is used to set the sample rate.
+ */
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public double sampleRateInEncodingSkewDetection() {
+ return
Double.valueOf(getOptional("kylin.dictionary.detect-data-skew-sample-rate",
"0.1"));
+ }
+
+ /*
+ * In KYLIN4, dictionaries are hashed into several buckets, column data
are repartitioned by the same hash algorithm
+ * during encoding step too. In data skew cases, the repartition step will
be very slow. Kylin will automatically
+ * sample from the source to detect skewed data and repartition these
skewed data to random partitions.
+ * This configuration is used to set the skew data threshold, valued from
0 to 1.
+ * e.g.
+ * if you set this value to 0.05, for each value that takes up more than
5% percent of the total will be regarded
+ * as skew data, as a result the skewed data will be no more than 20
records
+ * */
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public double skewPercentageThreshHold() {
+ return
Double.valueOf(getOptional("kylin.dictionary.detect-data-skew-percentage-threshold",
"0.05"));
+ }
+
+ /***
+ * Global dictionary will be split into several buckets. To encode a
column to int value more
+ * efficiently, source dataset will be repartitioned by the to-be encoded
column to the same
+ * amount of partitions as the dictionary's bucket size.
+ *
+ * It sometimes bring side effect, because repartitioning by a single
column is more likely to cause
+ * serious data skew, causing one task takes the majority of time in first
layer's cuboid building.
+ *
+ * When faced with this case, you can try repartitioning encoded dataset
by all
+ * RowKey columns to avoid data skew. The repartition size is default to
max bucket
+ * size of all dictionaries, but you can also set to other flexible value
by this option:
+ * 'kylin.engine.spark.dataset.repartition.num.after.encoding'
+ */
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public boolean rePartitionEncodedDatasetWithRowKey() {
+ return
Boolean.valueOf(getOptional("kylin.engine.spark.repartition.dataset.after.encode-enabled",
FALSE));
+ }
+
+ @ConfigTag({ConfigTag.Tag.CUBE_LEVEL})
+ public int getRepartitionNumAfterEncode() {
+ return
Integer.valueOf(getOptional("kylin.engine.spark.repartition.dataset.after.encode.num",
"0"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
+ public String getSparkStandaloneMasterWebUI() {
+ return getOptional("kylin.engine.spark.standalone.master.httpUrl", "");
+ }
+
+
+ //
============================================================================
+ // Kylin 4.X Query Engine (SparderContext)
+ //
============================================================================
+
+ /**
+ * Set the sparder app name, default value is:
'sparder_on_${hostname}-${port}'
+ */
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
+ public String getSparderAppName() {
+ String customSparderAppName =
getOptional("kylin.query.sparder-context.app-name", "");
+ if (StringUtils.isEmpty(customSparderAppName)) {
+ customSparderAppName =
+ "sparder_on_" + getServerRestAddress().replaceAll(":",
"-");
+ }
+ return customSparderAppName;
}
/**
* driver memory that can be used by join(mostly BHJ)
*/
+ @ConfigTag(ConfigTag.Tag.DEPRECATED)
public double getJoinMemoryFraction() {
return
Double.parseDouble(getOptional("kylin.query.spark-engine.join-memory-fraction",
"0.3"));
}
@@ -2932,7 +3132,6 @@ public abstract class KylinConfigBase implements
Serializable {
return getLogPropertyFile("spark-executor-log4j.properties");
}
-
private String getLogPropertyFile(String filename) {
if (isDevEnv()) {
return Paths.get(getKylinHomeWithoutWarn(),
@@ -2943,30 +3142,59 @@ public abstract class KylinConfigBase implements
Serializable {
}
}
+ /**
+ * <pre>
+ * SparderContext will try to set `spark.sql.shuffle.partitions` for each
query according to bytes to scan
+ * 1. set to -1 to let it auto decided by query engine, to be specific, it
is
+ * ${total bytes of all files after pruned by FilePruner} /
KylinConfigBase#getQueryPartitionSplitSizeMB
+ * 2. other positive integer to set a fixed value.
+ * </pre>
+ *
+ * @see KylinConfigBase#getQueryPartitionSplitSizeMB
+ */
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
+ public int getSparkSqlShufflePartitions() {
+ return
Integer.parseInt(getOptional("kylin.query.spark-engine.spark-sql-shuffle-partitions",
+ "-1"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
public int getQueryPartitionSplitSizeMB() {
return
Integer.parseInt(getOptional("kylin.query.spark-engine.partition-split-size-mb",
"64"));
}
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
+ public boolean isAutoSetPushDownPartitions() {
+ return Boolean
+
.parseBoolean(this.getOptional("kylin.query.pushdown.auto-set-shuffle-partitions-enabled",
"true"));
+ }
+
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
+ public int getBaseShufflePartitionSize() {
+ return
Integer.parseInt(this.getOptional("kylin.query.pushdown.base-shuffle-partition-size",
"48"));
+ }
+
/**
* The max size in mb handled per task when using shard by column,
* if the sharding size exceeds this value, it will fall back to
non-sharding read RDD
*/
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
public int getMaxShardingSizeMBPerTask() {
return
Integer.parseInt(getOptional("kylin.query.spark-engine.max-sharding-size-mb",
"64"));
}
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
public boolean isShardingJoinOptEnabled() {
return
Boolean.parseBoolean(getOptional("kylin.query.spark-engine.expose-sharding-trait",
"true"));
}
- public int getSparkSqlShufflePartitions() {
- return
Integer.parseInt(getOptional("kylin.query.spark-engine.spark-sql-shuffle-partitions",
- "-1"));
- }
-
+ /**
+ * Set proper resources(cpu and memory) for SparderContext
+ */
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
public Map<String, String> getQuerySparkConf() {
return getPropertiesByPrefix("kylin.query.spark-conf.");
}
@@ -2986,6 +3214,7 @@ public abstract class KylinConfigBase implements
Serializable {
return Integer.parseInt(getOptional("kylin.query.bitmap-upper-bound",
"10000000"));
}
+ @ConfigTag(ConfigTag.Tag.CUBE_LEVEL)
public boolean needReplaceAggWhenExactlyMatched() {
return
Boolean.parseBoolean(getOptional("kylin.query.need-replace-exactly-agg",
"true"));
}
@@ -3045,26 +3274,50 @@ public abstract class KylinConfigBase implements
Serializable {
}
}
- public boolean isAutoSetPushDownPartitions() {
- return Boolean
-
.parseBoolean(this.getOptional("kylin.query.pushdown.auto-set-shuffle-partitions-enabled",
"true"));
- }
-
public String getJobOutputStorePath(String project, String jobId) {
return getSparkLogDir(project) + getNestedPath(jobId) +
"execute_output.json";
}
- public int getBaseShufflePartitionSize() {
- return
Integer.parseInt(this.getOptional("kylin.query.pushdown.base-shuffle-partition-size",
"48"));
+ /**
+ * <pre>
+ * The fair scheduler of Apache Spark supports grouping jobs into pools,
and setting different scheduling options (e.g. weight) for each pool. This can
be useful to create a “high-priority” pool for more important query jobs.
+ *
+ * Query engine of Kylin 4 support set pool for query at project level and
thread level, and it has built-in pools:($KYLIN_HOME/conf/fairscheduler.xml)
+ *
+ * - lightweight_tasks are query which not require all available cpu
cores
+ * - heavy_tasks are query which require all available cpu cores
+ * - query_pushdown are query which not answered by cube
+ *
+ * Please check following link for detail.
+ * -
http://spark.apache.org/docs/latest/job-scheduling.html#scheduling-within-an-application
+ * -
https://cwiki.apache.org/confluence/display/KYLIN/Use+different+spark+pool+for+different+query
+ * </pre>
+ */
+ @ConfigTag({ConfigTag.Tag.PROJECT_LEVEL, ConfigTag.Tag.THREAD_LEVEL})
+ public String getProjectQuerySparkPool() {
+ return getOptional("kylin.query.spark.pool", null);
}
+ /**
+ * Whether or not to start SparderContext when query server start, set to
false if your
+ */
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
public boolean isAutoStartSparder() {
- return
Boolean.parseBoolean(this.getOptional("kylin.query.auto-sparder-context",
"false"));
+ return
Boolean.parseBoolean(this.getOptional("kylin.query.auto-sparder-context-enabled",
"false"));
+ }
+
+ /**
+ * whether to enable sparder monitor function
+ */
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
+ public boolean isSparderCanaryEnabled() {
+ return
Boolean.parseBoolean(this.getOptional("kylin.canary.sparder-context-canary-enabled",
TRUE));
}
/**
* Sparder is considered unavailable when the check task is unresponsive
for more than this time
*/
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
public int getSparderCanaryErrorResponseMs() {
return
Integer.parseInt(this.getOptional("kylin.canary.sparder-context-error-response-ms",
"3000"));
}
@@ -3072,6 +3325,7 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* The maximum number of restart sparder when sparder is not available
*/
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
public int getThresholdToRestartSparder() {
return
Integer.parseInt(this.getOptional("kylin.canary.sparder-context-threshold-to-restart-spark",
"3"));
}
@@ -3079,16 +3333,11 @@ public abstract class KylinConfigBase implements
Serializable {
/**
* Time period between two sparder health checks
*/
+ @ConfigTag(ConfigTag.Tag.GLOBAL_LEVEL)
public int getSparderCanaryPeriodMinutes() {
return
Integer.parseInt(this.getOptional("kylin.canary.sparder-context-period-min",
"3"));
}
- /**
- * If we should calculate cuboid statistics for each segment, which is
needed for cube planner phase two
- */
- public boolean isSegmentStatisticsEnabled() {
- return
Boolean.parseBoolean(this.getOptional("kylin.engine.segment-statistics-enabled",
"false"));
- }
//
============================================================================
// Spark with Kerberos
@@ -3154,65 +3403,4 @@ public abstract class KylinConfigBase implements
Serializable {
public String getKerberosPrincipal() {
return getOptional("kylin.kerberos.principal");
}
-
- public String getParentDatasetStorageLevel() {
- return getOptional("kylin.engine.spark.parent-dataset.storage.level",
"NONE");
- }
-
- public int getMaxParentDatasetPersistCount() {
- return
Integer.parseInt(getOptional("kylin.engine.spark.parent-dataset.max.persist.count",
"1"));
- }
-
- public int getRepartitionNumAfterEncode() {
- return
Integer.valueOf(getOptional("kylin.engine.spark.dataset.repartition.num.after.encoding",
"0"));
- }
-
- /***
- * Global dictionary will be split into several buckets. To encode a
column to int value more
- * efficiently, source dataset will be repartitioned by the to-be encoded
column to the same
- * amount of partitions as the dictionary's bucket size.
- *
- * It sometimes bring side effect, because repartitioning by a single
column is more likely to cause
- * serious data skew, causing one task takes the majority of time in first
layer's cuboid building.
- *
- * When faced with this case, you can try repartitioning encoded dataset
by all
- * RowKey columns to avoid data skew. The repartition size is default to
max bucket
- * size of all dictionaries, but you can also set to other flexible value
by this option:
- * 'kylin.engine.spark.dataset.repartition.num.after.encoding'
- ***/
- public boolean rePartitionEncodedDatasetWithRowKey() {
- return
Boolean.valueOf(getOptional("kylin.engine.spark.repartition.encoded.dataset",
"false"));
- }
-
- /*
- * Detect dataset skew in dictionary encode step.
- * */
- public boolean detectDataSkewInDictEncodingEnabled() {
- return
Boolean.valueOf(getOptional("kylin.dictionary.detect.data.skew.in.encoding",
"false"));
- }
-
- /*
- * In some data skew cases, the repartition step during dictionary encoding
will be slow.
- * We can choose to sample from the dataset to detect skewed. This
configuration is used to set the sample rate.
- * */
- public double sampleRateInEncodingSkewDetection() {
- return
Double.valueOf(getOptional("kylin.dictionary.detect.data.skew.sample.rate",
"0.1"));
- }
-
- /*
- * In KYLIN4, dictionaries are hashed into several buckets, column data are
repartitioned by the same hash algorithm
- * during encoding step too. In data skew cases, the repartition step will
be very slow. Kylin will automatically
- * sample from the source to detect skewed data and repartition these
skewed data to random partitions.
- * This configuration is used to set the skew data threshhold, valued from
0 to 1.
- * e.g.
- * if you set this value to 0.05, for each value that takes up more than
5% percent of the total will be regarded
- * as skew data, as a result the skewed data will be no more than 20
records
- * */
- public double skewPercentageThreshHold() {
- return
Double.valueOf(getOptional("kylin.dictionary.data.skew.percentage.threshhold",
"0.05"));
- }
-
- public boolean isBuildBaseCuboid() {
- return Boolean.valueOf(getOptional("kylin.engine.cuboid.build.base",
"true"));
- }
}
diff --git
a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
index 16999e9..53f67f9 100644
--- a/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
+++ b/core-common/src/main/java/org/apache/kylin/common/QueryContext.java
@@ -214,12 +214,10 @@ public class QueryContext {
return scanTime.addAndGet(time);
}
- @Clarification(priority = Clarification.Priority.MAJOR, msg = "remove
this")
public void addQueryStopListener(QueryStopListener listener) {
this.stopListeners.add(listener);
}
- @Clarification(priority = Clarification.Priority.MAJOR, msg = "Maybe we
can change spark.scheduler.pool to implement resource isolation.")
public void setHighPriorityQuery(boolean highPriorityQuery) {
isHighPriorityQuery = highPriorityQuery;
}
diff --git
a/core-common/src/main/java/org/apache/kylin/common/annotation/ConfigTag.java
b/core-common/src/main/java/org/apache/kylin/common/annotation/ConfigTag.java
index 53a6967..e82fa49 100644
---
a/core-common/src/main/java/org/apache/kylin/common/annotation/ConfigTag.java
+++
b/core-common/src/main/java/org/apache/kylin/common/annotation/ConfigTag.java
@@ -34,11 +34,17 @@ public @interface ConfigTag {
Tag[] value();
enum Tag {
+
+ // =============== basic classification ===============
+
/**
- * Indicate this property will be removed soon.
+ * Out of date, indicate this property will be removed soon, most of
them are from Kylin 3.X.
*/
DEPRECATED,
+ /**
+ * Not well tested/supported for now.
+ */
NOT_CLEAR,
/**
@@ -47,28 +53,36 @@ public @interface ConfigTag {
NOT_IMPLEMENTED,
/**
- * To be categorized
+ * For hacker or developer, not for user
*/
- UNCATEGORIZED,
+ DEBUG_HACK,
+
+ // =============== configuration level ===============
/**
- * For hacker or developer
+ * Support thread/local level configuration (BackdoorToggles)
*/
- DEBUG_HACK,
+ THREAD_LEVEL,
/**
- * Support cube level configuration
+ * Support cube level configuration (CubeInstance)
*/
CUBE_LEVEL,
/**
- * Support project level configuration
+ * Support project level configuration (ProjectInstance)
*/
PROJECT_LEVEL,
/**
- * Only support global configuration
+ * Only support global level configuration, require restart Kylin
instance (kylin.properties)
*/
- GLOBAL_LEVEL
+ GLOBAL_LEVEL,
+
+ // =============== importance level ===============
+
+ MAJOR,
+
+ MINOR
}
}
\ No newline at end of file
diff --git a/core-common/src/main/resources/kylin-defaults.properties
b/core-common/src/main/resources/kylin-defaults.properties
index 1009ee6..384466f 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -87,26 +87,9 @@ kylin.server.external-acl-provider=
# Default time filter for job list, 0->current day, 1->last one day, 2->last
one week, 3->last one year, 4->all
kylin.web.default-time-filter=1
-### SOURCE ###
-# Define how to access to hive metadata
-# When user deploy kylin on AWS EMR and Glue is used as external metadata, use
gluecatalog instead
-kylin.source.hive.metadata-type=hcatalog
-
-# Hive client, valid value [cli, beeline]
-kylin.source.hive.client=cli
-
-# Absolute path to beeline shell, can be set to spark beeline instead of the
default hive beeline on PATH
-kylin.source.hive.beeline-shell=beeline
-
-# Hive database name for putting the intermediate flat tables
-kylin.source.hive.database-for-flat-table=default
-
### STORAGE ###
-# The storage for final cube file in hbase
-kylin.storage.url=hbase
-
-# clean real storage after delete operation
+# clean real storage after purge operation
# if you want to delete the real storage like htable of deleting segment, you
can set it to true
kylin.storage.clean-after-delete-operation=false
@@ -136,18 +119,6 @@ kylin.job.scheduler.default=0
kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
-
-# 'auto', 'inmem', 'layer' or 'random' for testing
-kylin.cube.algorithm=layer
-
-# A smaller threshold prefers layer, a larger threshold prefers in-mem
-kylin.cube.algorithm.layer-or-inmem-threshold=7
-
-# auto use inmem algorithm:
-# 1, cube planner optimize job
-# 2, no source record
-kylin.cube.algorithm.inmem-auto-optimize=true
-
kylin.cube.aggrgroup.max-combination=32768
kylin.cube.cubeplanner.enabled=false
@@ -160,11 +131,6 @@ kylin.cube.cubeplanner.algorithm-threshold-genetic=23
### QUERY ###
-# Controls the maximum number of bytes a query is allowed to scan storage.
-# The default value 0 means no limit.
-# The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per
coprocessor.
-kylin.query.max-scan-bytes=0
-
kylin.query.cache-enabled=true
kylin.query.cache-threshold-scan-count=10240
kylin.query.cache-threshold-duration=2000
@@ -235,12 +201,19 @@ kylin.security.saml.context-server-name=hostname
kylin.security.saml.context-server-port=443
kylin.security.saml.context-path=/kylin
+##################################
### SPARK BUILD ENGINE CONFIGS ###
# Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
# This must contain site xmls of core, yarn, hive, and hbase in one folder
#kylin.env.hadoop-conf-dir=/etc/hadoop/conf
+# Switch to spark resources automatic adjustment strategy
+kylin.spark-conf.auto.prior=true
+
+# Read-Write separation deployment for Kylin 4, please check
https://cwiki.apache.org/confluence/display/KYLIN/Read-Write+Separation+Deployment+for+Kylin+4.0
+#kylin.engine.submit-hadoop-conf-dir=
+
# Spark conf (default is in spark/conf/spark-defaults.conf)
kylin.engine.spark-conf.spark.master=yarn
kylin.engine.spark-conf.spark.submit.deployMode=client
@@ -269,11 +242,16 @@
kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 -D
#kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
kylin.engine.spark-conf.spark.driver.extraJavaOptions=-XX:+CrashOnOutOfMemoryError
-### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+##################################
+### SPARK QUERY ENGINE CONFIGS ###
+
# Enlarge cores and memory to improve query performance in production env,
please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+#Whether or not to start SparderContext when query server start
+kylin.query.auto-sparder-context-enabled-enabled=false
+#kylin.query.sparder-context.app-name=
+
kylin.query.spark-conf.spark.master=yarn
-#kylin.query.spark-conf.spark.submit.deployMode=client
kylin.query.spark-conf.spark.driver.cores=1
kylin.query.spark-conf.spark.driver.memory=4G
kylin.query.spark-conf.spark.driver.memoryOverhead=1G
diff --git a/core-dictionary/DEPRECATED_MODULE
b/core-dictionary/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/core-dictionary/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/datasource-sdk/DEPRECATED_MODULE b/datasource-sdk/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/datasource-sdk/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/docker/conf/kylin/kylin.properties
b/docker/conf/kylin/kylin.properties
index 7054d83..a537d46 100644
--- a/docker/conf/kylin/kylin.properties
+++ b/docker/conf/kylin/kylin.properties
@@ -332,7 +332,7 @@ kylin.storage.columnar.shard-countdistinct-rowcount=1000000
kylin.storage.columnar.repartition-threshold-size-mb=128
kylin.storage.columnar.shard-size-mb=128
-kylin.query.auto-sparder-context=true
+kylin.query.auto-sparder-context-enabled=true
kylin.query.spark-conf.spark.master=yarn
kylin.query.spark-conf.spark.driver.memory=512M
kylin.query.spark-conf.spark.driver.memoryOverhead=512M
diff --git a/docker/dockerfile/standalone/conf/bin/kylin.sh
b/docker/dockerfile/standalone/conf/bin/kylin.sh
index 9498f0b..a110c8c 100755
--- a/docker/dockerfile/standalone/conf/bin/kylin.sh
+++ b/docker/dockerfile/standalone/conf/bin/kylin.sh
@@ -212,7 +212,7 @@ function prepareFairScheduler() {
<pool name="query_pushdown">
<schedulingMode>FAIR</schedulingMode>
<weight>1</weight>
- <minShare>1</minShare>
+ <minShare>0</minShare>
</pool>
<pool name="heavy_tasks">
<schedulingMode>FAIR</schedulingMode>
@@ -227,7 +227,7 @@ function prepareFairScheduler() {
<pool name="vip_tasks">
<schedulingMode>FAIR</schedulingMode>
<weight>15</weight>
- <minShare>1</minShare>
+ <minShare>0</minShare>
</pool>
</allocations>
EOL
diff --git a/docker/dockerfile/standalone/conf/kylin/kylin.properties
b/docker/dockerfile/standalone/conf/kylin/kylin.properties
index 280b846..b555a4a 100644
--- a/docker/dockerfile/standalone/conf/kylin/kylin.properties
+++ b/docker/dockerfile/standalone/conf/kylin/kylin.properties
@@ -336,7 +336,7 @@ kylin.storage.columnar.shard-countdistinct-rowcount=1000000
kylin.storage.columnar.repartition-threshold-size-mb=128
kylin.storage.columnar.shard-size-mb=128
-kylin.query.auto-sparder-context=true
+kylin.query.auto-sparder-context-enabled=true
kylin.query.sparder-context.app-name=sparder_on_docker
kylin.query.spark-conf.spark.master=yarn
kylin.query.spark-conf.spark.driver.memory=512M
diff --git a/engine-flink/DEPRECATED_MODULE b/engine-flink/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/engine-flink/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/engine-mr/DEPRECATED_MODULE b/engine-mr/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/engine-mr/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/engine-spark/DEPRECATED_MODULE b/engine-spark/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/engine-spark/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
index 4c970c5..e344444 100644
--- a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
+++ b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json
@@ -306,7 +306,14 @@
"storage_type" : %default_storage_type%,
"override_kylin_properties" : {
"kylin.cube.aggrgroup.is-mandatory-only-valid" : "true",
- "kylin.engine.spark.rdd-partition-cut-mb" : "500"
+ "kylin.engine.build-base-cuboid-enabled": "false",
+ "kylin.query.spark.pool": "vip_tasks",
+ "kylin.storage.columnar.shard-countdistinct-rowcount" : "2500",
+ "kylin.query.spark-engine.spark-sql-shuffle-partitions" : "4",
+ "kylin.dictionary.globalV2-min-hash-partitions" : "3",
+ "kylin.dictionary.globalV2-threshold-bucket-size": "1000",
+ "kylin.engine.spark.cache-parent-dataset-count" : "5",
+ "kylin.engine.spark.cache-parent-dataset-storage-level" :
"MEMORY_AND_DISK_SER"
},
"cuboid_black_list" : [ ],
"parent_forward" : 3,
diff --git a/examples/test_case_data/file_prunning/kylin.properties
b/examples/test_case_data/file_prunning/kylin.properties
index 679ef64..1d8f345 100644
--- a/examples/test_case_data/file_prunning/kylin.properties
+++ b/examples/test_case_data/file_prunning/kylin.properties
@@ -157,4 +157,4 @@ kylin.source.jdbc.user=
kylin.source.jdbc.pass=
-kylin.query.auto-sparder-context=false
\ No newline at end of file
+kylin.query.auto-sparder-context-enabled=false
\ No newline at end of file
diff --git a/examples/test_case_data/localmeta/kylin.properties
b/examples/test_case_data/localmeta/kylin.properties
index 4b015c4..cbcd3c2 100644
--- a/examples/test_case_data/localmeta/kylin.properties
+++ b/examples/test_case_data/localmeta/kylin.properties
@@ -159,7 +159,7 @@ kylin.source.jdbc.connection-url=jdbc:h2:mem:db
kylin.source.jdbc.user=
kylin.source.jdbc.pass=
-kylin.query.auto-sparder-context=false
+kylin.query.auto-sparder-context-enabled=false
kylin.metrics.query-cache.expire-seconds=5
kylin.metrics.query-cache.max-entries=2
diff --git a/examples/test_case_data/parquet_test/kylin.properties
b/examples/test_case_data/parquet_test/kylin.properties
index 96ee3c5..b78591d 100644
--- a/examples/test_case_data/parquet_test/kylin.properties
+++ b/examples/test_case_data/parquet_test/kylin.properties
@@ -156,4 +156,4 @@ kylin.source.jdbc.connection-url=jdbc:h2:mem:db
kylin.source.jdbc.user=
kylin.source.jdbc.pass=
-kylin.query.auto-sparder-context=false
\ No newline at end of file
+kylin.query.auto-sparder-context-enabled=false
\ No newline at end of file
diff --git a/examples/test_case_data/sandbox/kylin.properties
b/examples/test_case_data/sandbox/kylin.properties
index 8179b6f..ca2f7f4 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -230,4 +230,4 @@ kylin.engine.flink-conf.yarn.nodelabel=
kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer
-kylin.query.auto-sparder-context=false
\ No newline at end of file
+kylin.query.auto-sparder-context-enabled=false
\ No newline at end of file
diff --git a/kylin-it/DEPRECATED_MODULE b/kylin-it/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/kylin-it/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/CubeBuildJob.java
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/CubeBuildJob.java
index 5b6500f..1262ad5 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/CubeBuildJob.java
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/job/CubeBuildJob.java
@@ -467,8 +467,6 @@ public class CubeBuildJob extends SparkApplication {
JobMetrics metrics = JobMetricsUtils.collectMetrics(queryExecutionId);
long rowCount = metrics.getMetrics(Metrics.CUBOID_ROWS_CNT());
if (rowCount == -1) {
- infos.recordAbnormalLayouts(layoutId, "'Job metrics seems null,
use count() to collect cuboid rows.'");
- logger.debug("Can not get cuboid row cnt, use count() to collect
cuboid rows.");
long cuboidRowCnt = dataset.count();
layout.setRows(cuboidRowCnt);
// record the row count of cuboid
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/utils/JobMetricsUtils.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/utils/JobMetricsUtils.scala
index 7b6e558..dfb0d19 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/utils/JobMetricsUtils.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/utils/JobMetricsUtils.scala
@@ -40,9 +40,13 @@ object JobMetricsUtils extends Logging {
if (execution != null) {
metrics = collectOutputRows(execution.executedPlan)
logInfo(s"Collect output rows successfully. $metrics")
- } else {
- logInfo(s"Collect output rows failed.")
}
+
+ // comment below source, because it always collect failed when using
apache spark.
+
+ // else {
+ // logDebug(s"Collect output rows failed.")
+ //}
metrics
}
@@ -86,7 +90,21 @@ object JobMetricsUtils extends Logging {
rowMetrics
}
- // to get actual QueryExecution when write parquet, more info in issue #8212
+ /**
+ * When using a custom spark which sent event which contain QueryExecution
belongs to a specific N_EXECUTION_ID_KEY,
+ * kylin can cache QueryExecution object into QueryExecutionCache and
collect metrics such as bytes/row count for a cuboid
+ *
+ override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
+ case e: PostQueryExecutionForKylin =>
+ val nExecutionId =
e.localProperties.getProperty(QueryExecutionCache.N_EXECUTION_ID_KEY, "")
+ if (nExecutionId != "" && e.queryExecution != null) {
+ QueryExecutionCache.setQueryExecution(nExecutionId,
e.queryExecution)
+ } else {
+ logWarning("executionIdStr is null, can't get QueryExecution from
SQLExecution.")
+ }
+ case _ => // Ignore
+ }
+ */
def registerListener(ss: SparkSession): Unit = {
sparkListener = new SparkListener {
diff --git
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
index c6916d2..5ae1961 100644
---
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
+++
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
@@ -205,7 +205,7 @@ object SparderContext extends Logging {
initializingThread.join()
}
- if (System.getProperty("spark.local") ne "true") {
+ if (System.getProperty("spark.local") != "true" &&
KylinConfig.getInstanceFromEnv.isSparderCanaryEnabled) {
//monitor sparder
SparderContextCanary.init()
}
diff --git a/source-jdbc/DEPRECATED_MODULE b/source-jdbc/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/source-jdbc/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/source-kafka/DEPRECATED_MODULE b/source-kafka/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/source-kafka/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/storage-hbase/DEPRECATED_MODULE b/storage-hbase/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/storage-hbase/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/storage-stream/DEPRECATED_MODULE b/storage-stream/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/storage-stream/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/stream-coordinator/DEPRECATED_MODULE
b/stream-coordinator/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/stream-coordinator/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/stream-core/DEPRECATED_MODULE b/stream-core/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/stream-core/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/stream-receiver/DEPRECATED_MODULE
b/stream-receiver/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/stream-receiver/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file
diff --git a/stream-source-kafka/DEPRECATED_MODULE
b/stream-source-kafka/DEPRECATED_MODULE
new file mode 100644
index 0000000..b593cf2
--- /dev/null
+++ b/stream-source-kafka/DEPRECATED_MODULE
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+This maven module is deprecated and maybe be removed in the future.
\ No newline at end of file