This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this
push:
new df42555 KYLIN-4843 Support INTERSECT_COUNT/INTERSECT_VALUE function
for Kylin 4
df42555 is described below
commit df425556757d0654800d96aa0b11728e8adcc4a6
Author: Zhichao Zhang <[email protected]>
AuthorDate: Wed Dec 16 23:45:11 2020 +0800
KYLIN-4843 Support INTERSECT_COUNT/INTERSECT_VALUE function for Kylin 4
---
.../org/apache/kylin/common/KylinConfigBase.java | 7 +++
.../apache/kylin/metadata/model/FunctionDesc.java | 1 +
.../{query08.sql.disabled => query08.sql} | 3 +-
...e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc | Bin 0 -> 12 bytes
...01bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc | Bin 12 -> 0 bytes
...0-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv | 2 +
...0-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv | 2 -
...c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc | Bin 0 -> 12 bytes
...c2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc | Bin 12 -> 0 bytes
...0-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv | 3 ++
...0-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv | 3 --
...535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc | Bin 0 -> 12 bytes
...b552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc | Bin 12 -> 0 bytes
...0-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv | 28 +++++++++++
...0-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv | 28 -----------
...a75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc | Bin 0 -> 12 bytes
...fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc | Bin 12 -> 0 bytes
...0-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv | 2 +
...0-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv | 2 -
...ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc | Bin 0 -> 12 bytes
...a313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc | Bin 12 -> 0 bytes
...0-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv | 1 +
...0-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv | 1 -
.../query/sql_intersect_value/query00.sql | 32 +++++++++++++
.../query00.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc | Bin 0 -> 12 bytes
.../query00.sql.expected/_SUCCESS | 0
...0-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv | 2 +
.../query01.sql} | 12 ++++-
.../query01.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...3c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc | Bin 0 -> 12 bytes
.../query01.sql.expected/_SUCCESS | 0
...0-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv | 3 ++
.../query03.sql} | 53 +++++++++------------
.../query03.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc | Bin 0 -> 12 bytes
.../query03.sql.expected/_SUCCESS | 0
...0-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv | 28 +++++++++++
.../query/sql_intersect_value/query04.sql | 33 +++++++++++++
.../query04.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...2bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc | Bin 0 -> 12 bytes
.../query04.sql.expected/_SUCCESS | 0
...0-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv | 2 +
.../query05.sql} | 8 +++-
.../query05.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc | Bin 0 -> 12 bytes
.../query05.sql.expected/_SUCCESS | 0
...0-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv | 1 +
.../query06.sql} | 12 ++++-
.../query06.sql.expected/._SUCCESS.crc | Bin 0 -> 8 bytes
...e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc | Bin 0 -> 12 bytes
.../query06.sql.expected/_SUCCESS | 0
...0-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv | 2 +
.../org/apache/spark/sql/KylinFunctions.scala | 15 ++++--
.../spark/sql/udaf/IntersectBitmapCounter.scala | 6 +--
.../org/apache/spark/sql/udaf/IntersectCount.scala | 46 ++++++++++++++++--
.../kylin/query/runtime/plans/AggregatePlan.scala | 32 +++++++++----
.../org/apache/spark/sql/SparkOperation.scala | 1 -
.../kylin/engine/spark2/NBuildAndQueryTest.java | 5 +-
.../apache/kylin/engine/spark2/NExecAndComp.java | 11 +++--
60 files changed, 287 insertions(+), 100 deletions(-)
diff --git
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 8f2bf8e..000ad80 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -2874,6 +2874,13 @@ public abstract class KylinConfigBase implements
Serializable {
}
/**
+ * the maximum number of returned values for intersect_value function
+ */
+ public int getBitmapValuesUpperBound() {
+ return Integer.parseInt(getOptional("kylin.query.bitmap-upper-bound",
"10000000"));
+ }
+
+ /**
* Used to upload user-defined log4j configuration
*/
public String sparkUploadFiles() {
diff --git
a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
index c35ad2f..3f8496b 100644
---
a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
+++
b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
@@ -63,6 +63,7 @@ public class FunctionDesc implements Serializable {
public static final String FUNC_COUNT = "COUNT";
public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT";
public static final String FUNC_INTERSECT_COUNT = "INTERSECT_COUNT";
+ public static final String FUNC_INTERSECT_VALUE = "INTERSECT_VALUE";
public static final String FUNC_GROUPING = "GROUPING";
public static final String FUNC_PERCENTILE = "PERCENTILE_APPROX";
public static final Set<String> BUILT_IN_AGGREGATIONS = Sets.newHashSet();
diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
b/kylin-it/src/test/resources/query/sql_derived/query08.sql
similarity index 92%
copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
copy to kylin-it/src/test/resources/query/sql_derived/query08.sql
index c4e7d0c..d975b1e 100644
--- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
+++ b/kylin-it/src/test/resources/query/sql_derived/query08.sql
@@ -16,4 +16,5 @@
-- limitations under the License.
--
-select distinct leaf_categ_id, lstg_site_id from test_kylin_fact
+select distinct leaf_categ_id, lstg_site_id from test_kylin_fact
+;{"scanRowCount":10000,"scanBytes":0,"scanFiles":1,"cuboidId":[2097151]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc
new file mode 100644
index 0000000..5ea0498
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc
deleted file mode 100644
index 9500b67..0000000
Binary files
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/.part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv.crc
and /dev/null differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv
new file mode 100644
index 0000000..8549d01
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-24e31c51-c35a-4d88-9eea-e3ef825967d8-c000.csv
@@ -0,0 +1,2 @@
+2012-06-23,16,25,17,1,0,0,92,98
+2013-12-22,13,16,21,0,0,0,96,100
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv
deleted file mode 100644
index d688146..0000000
---
a/kylin-it/src/test/resources/query/sql_intersect_count/query00.sql.expected/part-00000-a101bae9-c5b0-4314-b056-3545887f035b-c000.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-2013-12-22,96,96,96,96,96,96,96,100
-2012-06-23,92,92,92,92,92,92,92,98
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc
new file mode 100644
index 0000000..0c55ec2
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc
deleted file mode 100644
index b63189b..0000000
Binary files
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/.part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv.crc
and /dev/null differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv
new file mode 100644
index 0000000..b43f3fe
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-09c77c16-39dd-489d-9078-2a12bb25ef3f-c000.csv
@@ -0,0 +1,3 @@
+2012-01-01,10,0,0,0,0
+2012-01-02,0,11,0,0,0
+2012-01-03,0,0,12,0,0
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv
deleted file mode 100644
index 8514dd2..0000000
---
a/kylin-it/src/test/resources/query/sql_intersect_count/query01.sql.expected/part-00000-5cc2c54f-b702-47d8-a246-d3eebe73eb20-c000.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-2012-01-03,12,12,12,12,12
-2012-01-01,10,10,10,10,10
-2012-01-02,11,11,11,11,11
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc
new file mode 100644
index 0000000..8e80af5
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc
deleted file mode 100644
index 9b3f176..0000000
Binary files
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/.part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv.crc
and /dev/null differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv
new file mode 100644
index 0000000..7866262
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-0c535c4a-649e-4a27-8ee3-f839e858e96b-c000.csv
@@ -0,0 +1,28 @@
+9426,1
+10866,1
+13987,1
+26262,1
+32996,1
+62179,1
+67698,1
+95672,1
+152801,1
+164261,1
+11554,0
+20865,0
+24541,0
+43479,0
+44079,0
+156614,0
+161567,0
+1161,0
+1504,0
+15115,0
+61323,0
+66767,0
+95173,0
+99985,0
+106246,0
+139973,0
+148324,0
+166013,0
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv
deleted file mode 100644
index f3e2ce2..0000000
---
a/kylin-it/src/test/resources/query/sql_intersect_count/query03.sql.expected/part-00000-6eb552df-f11a-41ed-8467-4713a29d2dd2-c000.csv
+++ /dev/null
@@ -1,28 +0,0 @@
-24541,1
-67698,1
-32996,1
-44079,1
-1504,1
-13987,2
-10866,1
-62179,1
-43479,1
-148324,1
-11554,2
-156614,1
-166013,1
-139973,1
-26262,1
-1161,1
-20865,1
-66767,1
-9426,1
-164261,1
-106246,1
-152801,3
-15115,1
-99985,1
-61323,1
-95672,1
-95173,2
-161567,1
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc
new file mode 100644
index 0000000..2294938
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc
deleted file mode 100644
index 0645ce5..0000000
Binary files
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/.part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv.crc
and /dev/null differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv
new file mode 100644
index 0000000..9351364
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-0ba75ff1-920d-4161-a3ad-080d3e71ce25-c000.csv
@@ -0,0 +1,2 @@
+2012-06-23,16,25,17,1,0,0,0,92,98
+2013-12-22,13,16,21,0,0,0,0,96,100
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv
deleted file mode 100644
index ba9e47b..0000000
---
a/kylin-it/src/test/resources/query/sql_intersect_count/query04.sql.expected/part-00000-87fdae83-1182-4f41-b9ca-3431c034bdc4-c000.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-2013-12-22,96,96,96,96,96,96,96,96,100
-2012-06-23,92,92,92,92,92,92,92,92,98
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc
new file mode 100644
index 0000000..c738131
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc
deleted file mode 100644
index c73e8c9..0000000
Binary files
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/.part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv.crc
and /dev/null differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv
new file mode 100644
index 0000000..2d51ee2
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-a3ed9048-d2e2-414a-8588-9ed588204a97-c000.csv
@@ -0,0 +1 @@
+10,0
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv
deleted file mode 100644
index 3ff9442..0000000
---
a/kylin-it/src/test/resources/query/sql_intersect_count/query05.sql.expected/part-00000-fba313d2-0743-46d8-bcee-b36afdeeaf29-c000.csv
+++ /dev/null
@@ -1 +0,0 @@
-33,33
diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql
new file mode 100644
index 0000000..51ecd7e
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql
@@ -0,0 +1,32 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select
+week_beg_dt as week,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['FP-GTC']) as a,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['Auction']) as b,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['Others']) as c,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Auction']) as ab,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Others']) as ac,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Auction', 'Others']) as abc,
+count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers,
+count(*) as cnt
+from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt =
edw.test_cal_dt.CAL_DT
+where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23')
+group by week_beg_dt
+;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc
new file mode 100644
index 0000000..040ab65
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/.part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv
new file mode 100644
index 0000000..0751172
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query00.sql.expected/part-00000-3a6bc9a2-0d5a-4839-94e1-d2ec55db7c1b-c000.csv
@@ -0,0 +1,2 @@
+2012-06-23,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]","[4,19,34,55,151,153,182,210,278,288,293,317,360,424,461,513,524,550,675,678,679,760,791,854,972]","[15,28,34,128,161,174,217,270,441,532,589,630,770,876,925,931,961]",[854],"","",92,98
+2013-12-22,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","[58,68,155,246,293,296,307,309,341,470,505,604,627,655,811,919]","[55,101,261,281,298,310,354,399,405,541,553,559,571,606,623,632,689,700,758,767,820]","","","",96,100
diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql
similarity index 54%
copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
copy to kylin-it/src/test/resources/query/sql_intersect_value/query01.sql
index c4e7d0c..f9af7ab 100644
--- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql
@@ -15,5 +15,13 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-
-select distinct leaf_categ_id, lstg_site_id from test_kylin_fact
+select CAL_DT,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as
first_day,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-02']) as
second_day,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-03']) as
third_day,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT,
array['2012-01-01','2012-01-02']) as retention_oneday,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT,
array['2012-01-01','2012-01-02','2012-01-03']) as retention_twoday
+from test_kylin_fact
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
+group by CAL_DT
+;{"scanRowCount":731,"scanBytes":0,"scanFiles":1,"cuboidId":[262144]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc
new file mode 100644
index 0000000..2aba606
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/.part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv
new file mode 100644
index 0000000..767187a
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query01.sql.expected/part-00000-513c8db3-f109-43f8-b8bc-c02ff5f0405c-c000.csv
@@ -0,0 +1,3 @@
+2012-01-01,"[245,246,247,326,430,520,610,709,809,810]","","","",""
+2012-01-02,"","[1,2,121,327,328,329,431,611,612,811,898]","","",""
+2012-01-03,"","","[122,123,124,248,249,330,432,521,613,614,710,711]","",""
diff --git a/kylin-it/src/test/resources/query/sql_extended_column/query00.sql
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql
similarity index 63%
rename from kylin-it/src/test/resources/query/sql_extended_column/query00.sql
rename to kylin-it/src/test/resources/query/sql_intersect_value/query03.sql
index 530572e..cd5efdd 100644
--- a/kylin-it/src/test/resources/query/sql_extended_column/query00.sql
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql
@@ -1,30 +1,23 @@
---
--- Licensed to the Apache Software Foundation (ASF) under one
--- or more contributor license agreements. See the NOTICE file
--- distributed with this work for additional information
--- regarding copyright ownership. The ASF licenses this file
--- to you under the Apache License, Version 2.0 (the
--- "License"); you may not use this file except in compliance
--- with the License. You may obtain a copy of the License at
---
--- http://www.apache.org/licenses/LICENSE-2.0
---
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
-
-SELECT
-
-TEST_ORDER.ORDER_ID
-,TEST_EXTENDED_COLUMN
-
-FROM TEST_KYLIN_FACT as TEST_KYLIN_FACT
-INNER JOIN TEST_ORDER as TEST_ORDER
-ON TEST_KYLIN_FACT.ORDER_ID = TEST_ORDER.ORDER_ID
-INNER JOIN TEST_CATEGORY_GROUPINGS as TEST_CATEGORY_GROUPINGS
-ON TEST_KYLIN_FACT.LEAF_CATEG_ID = TEST_CATEGORY_GROUPINGS.LEAF_CATEG_ID AND
TEST_KYLIN_FACT.LSTG_SITE_ID = TEST_CATEGORY_GROUPINGS.SITE_ID
-
-group by TEST_ORDER.ORDER_ID,TEST_EXTENDED_COLUMN
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+select LEAF_CATEG_ID,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array[date'2012-01-01'])
as first_day
+from test_kylin_fact
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
+group by LEAF_CATEG_ID
+;{"scanRowCount":9562,"scanBytes":0,"scanFiles":1,"cuboidId":[507904]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc
new file mode 100644
index 0000000..ccffb72
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/.part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv
new file mode 100644
index 0000000..888cd05
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query03.sql.expected/part-00000-77c6dcd8-9969-4209-bd1e-b5906fe175c6-c000.csv
@@ -0,0 +1,28 @@
+9426,[610]
+10866,[326]
+13987,[247]
+26262,[246]
+32996,[430]
+62179,[810]
+67698,[709]
+95672,[520]
+152801,[809]
+164261,[245]
+11554,""
+20865,""
+24541,""
+43479,""
+44079,""
+156614,""
+161567,""
+1161,""
+1504,""
+15115,""
+61323,""
+66767,""
+95173,""
+99985,""
+106246,""
+139973,""
+148324,""
+166013,""
diff --git a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql
new file mode 100644
index 0000000..c9429fa
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql
@@ -0,0 +1,33 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements. See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership. The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+select
+week_beg_dt as week,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['FP-GTC']) as a,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['Auction']) as b,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['Others']) as c,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Auction']) as ab,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Others']) as ac,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Auction', 'Others']) as abc,
+intersect_value( TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['FP-GTC|Auction', 'Others']) as a_or_b_and_c,
+count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers,
+count(*) as cnt
+from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt =
edw.test_cal_dt.CAL_DT
+where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23')
+group by week_beg_dt
+;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc
new file mode 100644
index 0000000..e101351
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/.part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv
new file mode 100644
index 0000000..ddf4299
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query04.sql.expected/part-00000-892bf65a-af47-44ab-84b2-56abc974a2b0-c000.csv
@@ -0,0 +1,2 @@
+2012-06-23,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]","[4,19,34,55,151,153,182,210,278,288,293,317,360,424,461,513,524,550,675,678,679,760,791,854,972]","[15,28,34,128,161,174,217,270,441,532,589,630,770,876,925,931,961]",[854],"","","",92,98
+2013-12-22,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","[58,68,155,246,293,296,307,309,341,470,505,604,627,655,811,919]","[55,101,261,281,298,310,354,399,405,541,553,559,571,606,623,632,689,700,758,767,820]","","","","",96,100
diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql
similarity index 69%
copy from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
copy to kylin-it/src/test/resources/query/sql_intersect_value/query05.sql
index c4e7d0c..d887272 100644
--- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql
@@ -15,5 +15,9 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
-
-select distinct leaf_categ_id, lstg_site_id from test_kylin_fact
+select
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01']) as
first_day,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, CAL_DT, array['2012-01-01',
'2012-01-02']) as first_and_second_day
+from test_kylin_fact
+where CAL_DT in ('2012-01-01','2012-01-02','2012-01-03')
+;{"scanRowCount":731,"scanBytes":0,"scanFiles":1,"cuboidId":[262144]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc
new file mode 100644
index 0000000..2b3fd41
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/.part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv
new file mode 100644
index 0000000..9286fbf
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query05.sql.expected/part-00000-bf3de84f-1d22-4777-8876-908c30a97290-c000.csv
@@ -0,0 +1 @@
+"[245,246,247,326,430,520,610,709,809,810]",""
diff --git a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql
similarity index 52%
rename from kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
rename to kylin-it/src/test/resources/query/sql_intersect_value/query06.sql
index c4e7d0c..689d980 100644
--- a/kylin-it/src/test/resources/query/sql_derived/query08.sql.disabled
+++ b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql
@@ -16,4 +16,14 @@
-- limitations under the License.
--
-select distinct leaf_categ_id, lstg_site_id from test_kylin_fact
+select
+week_beg_dt as week,
+intersect_count(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC'])
as a_cnt,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC'])
as a_value,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name, array['FP-GTC',
'Auction']) as ab,
+intersect_value(TEST_COUNT_DISTINCT_BITMAP, lstg_format_name,
array['FP-GTC|Auction', 'Others']) as a_or_b_and_c,
+count(distinct TEST_COUNT_DISTINCT_BITMAP) as sellers
+from test_kylin_fact left join edw.test_cal_dt on test_kylin_fact.cal_dt =
edw.test_cal_dt.CAL_DT
+where week_beg_dt in (DATE '2013-12-22', DATE '2012-06-23')
+group by week_beg_dt
+;{"scanRowCount":10018,"scanBytes":0,"scanFiles":2,"cuboidId":[276480]}
\ No newline at end of file
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/._SUCCESS.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc
new file mode 100644
index 0000000..5e55dfd
Binary files /dev/null and
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/.part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv.crc
differ
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/_SUCCESS
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git
a/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv
new file mode 100644
index 0000000..7881113
--- /dev/null
+++
b/kylin-it/src/test/resources/query/sql_intersect_value/query06.sql.expected/part-00000-49e47126-09a8-4b4c-b022-a3c537ab5c04-c000.csv
@@ -0,0 +1,2 @@
+2012-06-23,16,"[44,235,257,284,341,363,419,719,780,854,856,862,892,927,957,989]",[854],"",92
+2013-12-22,13,"[72,115,188,237,417,483,486,555,638,741,746,891,959]","","",96
diff --git
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala
index 330ded2..7b645fd 100644
---
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala
+++
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/KylinFunctions.scala
@@ -21,7 +21,7 @@ import
org.apache.kylin.engine.spark.common.util.KylinDateTimeUtils
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
-import org.apache.spark.sql.types.{AbstractDataType, DataType, DateType,
IntegerType}
+import org.apache.spark.sql.types._
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression,
DictEncode, Expression, ExpressionInfo, ExpressionUtils,
ImplicitCastInputTypes, In, KylinAddMonths, Like, Literal, RoundBase,
SplitPart, Sum0, TimestampAdd, TimestampDiff, Truncate, UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
import org.apache.spark.sql.udaf.{ApproxCountDistinct, IntersectCount,
PreciseCountDistinct}
@@ -68,11 +68,18 @@ object KylinFunctions {
def approx_count_distinct(column: Column, precision: Int): Column =
Column(ApproxCountDistinct(column.expr, precision).toAggregateExpression())
- def intersect_count(columns: Column*): Column = {
+ def intersect_count(upperBound: Int, columns: Column*): Column = {
require(columns.size == 3, s"Input columns size ${columns.size} don't
equal to 3.")
val expressions = columns.map(_.expr)
- Column(IntersectCount(expressions.apply(0), expressions.apply(1),
expressions.apply(2))
- .toAggregateExpression())
+ Column(IntersectCount(expressions.apply(0), expressions.apply(1),
expressions.apply(2),
+ LongType, upperBound).toAggregateExpression())
+ }
+
+ def intersect_value(upperBound: Int, columns: Column*): Column = {
+ require(columns.size == 3, s"Input columns size ${columns.size} don't
equal to 3.")
+ val expressions = columns.map(_.expr)
+ Column(IntersectCount(expressions.apply(0), expressions.apply(1),
expressions.apply(2),
+ StringType, upperBound).toAggregateExpression())
}
def sum0(e: Column): Column = withAggregateFunction {
diff --git
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala
index 53436ad..0ed2551 100644
---
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala
+++
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectBitmapCounter.scala
@@ -59,15 +59,15 @@ class IntersectBitmapCounter() {
}
}
- def result(filterSize: Int): Long = {
+ def result(filterSize: Int): Roaring64NavigableMap = {
if (_map.size() != filterSize || _map.size() == 0) {
- 0
+ new Roaring64NavigableMap()
} else {
val bitmap = _map.asScala.values.reduce { (a, b) =>
a.and(b)
a
}
- bitmap.getLongCardinality
+ bitmap
}
}
}
diff --git
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala
index 36e27f6..d05ab3e 100644
---
a/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala
+++
b/kylin-spark-project/kylin-spark-common/src/main/scala/org/apache/spark/sql/udaf/IntersectCount.scala
@@ -20,18 +20,22 @@ package org.apache.spark.sql.udaf
import com.esotericsoftware.kryo.KryoException
import com.esotericsoftware.kryo.io.{Input, KryoDataInput, KryoDataOutput,
Output}
+import org.apache.commons.lang3.StringUtils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import
org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate,
TypedImperativeAggregate}
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.types.{DataType, LongType}
+import org.apache.spark.sql.types.{ArrayType, DataType, LongType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
import org.roaringbitmap.longlong.Roaring64NavigableMap
import scala.collection.JavaConverters._
@SerialVersionUID(1)
-case class IntersectCount(child1: Expression, child2: Expression, child3:
Expression, mutableAggBufferOffset: Int = 0,
+case class IntersectCount(child1: Expression, child2: Expression, child3:
Expression,
+ returnDataType: DataType, upperBound: Int = 10000000,
+ mutableAggBufferOffset: Int = 0,
inputAggBufferOffset: Int = 0)
extends TypedImperativeAggregate[IntersectBitmapCounter] with Serializable
with Logging {
@@ -41,7 +45,8 @@ case class IntersectCount(child1: Expression, child2:
Expression, child3: Expres
override def update(counter: IntersectBitmapCounter, input: InternalRow):
IntersectBitmapCounter = {
if (filters == null) {
- filters =
child3.eval(input).asInstanceOf[GenericArrayData].array.map(filter => filter ->
filter.toString).toMap
+ filters = child3.eval(input).asInstanceOf[GenericArrayData]
+ .array.map(filter => filter -> filter.toString).toMap
}
val bitmap = child1.eval(input).asInstanceOf[Array[Byte]]
val key = child2.eval(input)
@@ -59,7 +64,38 @@ case class IntersectCount(child1: Expression, child2:
Expression, child3: Expres
}
override def eval(counter: IntersectBitmapCounter): Any = {
-
counter.result(child3.asInstanceOf[Literal].value.asInstanceOf[GenericArrayData].array.distinct.length)
+ val map = counter.result(
+
child3.asInstanceOf[Literal].value.asInstanceOf[GenericArrayData].array.distinct.length)
+ dataType match {
+ // for intersect_count
+ case LongType => map.getLongCardinality
+ // for intersect_value
+ case StringType =>
+ val intCardinality = map.getIntCardinality
+ if (intCardinality > upperBound) {
+ throw new UnsupportedOperationException(s"Cardinality of the bitmap
is greater than " +
+ s"configured upper bound(${upperBound})")
+ }
+ val result = new StringBuffer("")
+ if (intCardinality > 0) {
+ result.append("[").append(StringUtils.join(map.iterator(),
",")).append("]");
+ }
+ UTF8String.fromString(result.toString)
+ case ArrayType(LongType, false) =>
+ val cardinality = map.getIntCardinality
+ if (cardinality > upperBound) {
+ throw new UnsupportedOperationException(s"Cardinality of the bitmap
is greater than " +
+ s"configured upper bound(${upperBound})")
+ }
+ val longs = new Array[Long](cardinality)
+ var id = 0
+ val iterator = map.iterator()
+ while (iterator.hasNext) {
+ longs(id) = iterator.next()
+ id += 1
+ }
+ new GenericArrayData(longs)
+ }
}
var array: Array[Byte] = _
@@ -126,7 +162,7 @@ case class IntersectCount(child1: Expression, child2:
Expression, child3: Expres
override def nullable: Boolean = false
- override def dataType: DataType = LongType
+ override def dataType: DataType = returnDataType
override def children: Seq[Expression] = child1 :: child2 :: child3 :: Nil
}
diff --git
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
index 4907af3..ab36cfe 100644
---
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
+++
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/kylin/query/runtime/plans/AggregatePlan.scala
@@ -21,6 +21,7 @@ import org.apache.calcite.DataContext
import org.apache.calcite.rel.core.Aggregate
import org.apache.calcite.rel.core.AggregateCall
import org.apache.calcite.sql.SqlKind
+import org.apache.kylin.common.KylinConfig
import org.apache.kylin.metadata.model.FunctionDesc
import org.apache.kylin.query.relnode.{KylinAggregateCall, OLAPAggregateRel}
import org.apache.kylin.query.runtime.RuntimeHelper
@@ -112,21 +113,34 @@ object AggregatePlan extends LogEx {
val columnName = argNames.map(col)
val registeredFuncName =
RuntimeHelper.registerSingleByColName(funcName, dataType)
val aggName = SchemaProcessor.replaceToAggravateSchemaName(index,
funcName, hash, argNames: _*)
- if (funcName == "COUNT_DISTINCT") {
+ if (funcName == FunctionDesc.FUNC_COUNT_DISTINCT) {
if (dataType.getName == "hllc") {
org.apache.spark.sql.KylinFunctions
.approx_count_distinct(columnName.head, dataType.getPrecision)
.alias(aggName)
- } else {
+ } else if
(call.getAggregation().getName.equalsIgnoreCase(FunctionDesc.FUNC_COUNT_DISTINCT))
{
+ // execute count distinct precisely
KylinFunctions.precise_count_distinct(columnName.head).alias(aggName)
+ } else {
+ // for intersect_count and intersect_value function
+ require(columnName.size == 3, s"Input columns size
${columnName.size} don't equal to 3.")
+ val columns = columnName.zipWithIndex.map {
+ case (column: Column, 2) =>
column.cast(ArrayType.apply(schema.fields.apply(call.getArgList.get(1)).dataType))
+ case (column: Column, _) => column
+ }
+ val upperBound =
KylinConfig.getInstanceFromEnv.getBitmapValuesUpperBound
+ if
(call.getAggregation().getName.equalsIgnoreCase(FunctionDesc.FUNC_INTERSECT_COUNT))
{
+ KylinFunctions.intersect_count(upperBound, columns.toList: _*)
+ .alias(SchemaProcessor
+ .replaceToAggravateSchemaName(index,
FunctionDesc.FUNC_INTERSECT_COUNT, hash,
+ argNames: _*))
+ } else {
+ KylinFunctions.intersect_value(upperBound, columns.toList: _*)
+ .alias(SchemaProcessor
+ .replaceToAggravateSchemaName(index,
FunctionDesc.FUNC_INTERSECT_VALUE, hash,
+ argNames: _*))
+ }
}
- } else if
(funcName.equalsIgnoreCase(FunctionDesc.FUNC_INTERSECT_COUNT)) {
- require(columnName.size == 3, s"Input columns size
${columnName.size} don't equal to 3.")
- val columns = columnName.zipWithIndex.map {
- case (column: Column, 2) =>
column.cast(ArrayType.apply(schema.fields.apply(call.getArgList.get(1)).dataType))
- case (column: Column, _) => column
- }
- KylinFunctions.intersect_count(columns.toList: _*).alias(aggName)
} else {
callUDF(registeredFuncName, columnName.toList: _*).alias(aggName)
}
diff --git
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala
index 44791df..800caae 100644
---
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala
+++
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparkOperation.scala
@@ -21,7 +21,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.plans.logical.GroupingSets
import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{Column, DataFrame, Row, SparderContext}
object SparkOperation {
diff --git
a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
index ce27ff2..c061135 100644
---
a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
+++
b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NBuildAndQueryTest.java
@@ -169,13 +169,13 @@ public class NBuildAndQueryTest extends
LocalWithSparkSessionTest {
// Not support yet
//tasks.add(new QueryCallable(CompareLevel.NONE, joinType,
"sql_expression"));
- //tasks.add(new QueryCallable(CompareLevel.NONE, joinType,
"sql_extended_column"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_function"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_grouping"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_h2"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_hive"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_intersect_count"));
+ tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_intersect_value"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_join"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_like"));
tasks.add(new QueryCallable(CompareLevel.SAME, joinType,
"sql_lookup"));
@@ -338,7 +338,8 @@ public class NBuildAndQueryTest extends
LocalWithSparkSessionTest {
} else {
List<Quadruple<String, String,
NExecAndComp.ITQueryMetrics, List<String>>> queries =
NExecAndComp.fetchQueries2(KYLIN_SQL_BASE_DIR +
File.separator + sqlFolder);
- NExecAndComp.execAndCompareNew2(queries, getProject(),
compareLevel, joinType, null);
+ NExecAndComp.execAndCompareNew2(queries, getProject(),
compareLevel, joinType,
+ null);
}
} catch (Throwable th) {
logger.error("Query fail on: {}", identity);
diff --git
a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
index 707a906..b570965 100644
---
a/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
+++
b/kylin-spark-project/kylin-spark-test/src/test/java/org/apache/kylin/engine/spark2/NExecAndComp.java
@@ -228,8 +228,10 @@ public class NExecAndComp {
}
// generate results and save them into csv file
try {
- queryResult.getFirst().repartition(1)
- .write().option("header",
false).csv(genResultsCSVFile(query.getFirst()));
+ queryResult.getFirst().repartition(1).write()
+ .option("header", false)
+ .option("nullValue", "\"-\"")
+ .csv(genResultsFiles(query.getFirst()));
} catch (JsonProcessingException e) {
logger.error("Write results as csv file error: ", e);
}
@@ -249,7 +251,8 @@ public class NExecAndComp {
String csvDataPathStr = query.getFirst() + ".expected";
if(new File(csvDataPathStr).exists()) {
logger.debug("Use expected dataset for {}", sql);
- sparkResult =
KylinSparkEnv.getSparkSession().read().csv(csvDataPathStr);
+ sparkResult = KylinSparkEnv.getSparkSession().read()
+ .option("nullValue", "\"-\"").csv(csvDataPathStr);
} else {
sparkResult = queryWithSpark(prj, sql, query.getFirst(),
query.getFourth());
}
@@ -512,7 +515,7 @@ public class NExecAndComp {
return parameters;
}
- public static String genResultsCSVFile(String sqlFileName) throws
IOException {
+ public static String genResultsFiles(String sqlFileName) throws
IOException {
String resultsFielName = sqlFileName + ".expected";
File resultsFile = new File(resultsFielName);
if (resultsFile.exists()) {