[
https://issues.apache.org/jira/browse/IMPALA-14200?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
weihua zhang updated IMPALA-14200:
----------------------------------
Description:
{code:sql}
CREATE TABLE dept(
deptno INT,
dname string,
loc string
);
CREATE TABLE emp(
empno INT ,
ename string,
job string,
age INT,
mgr INT,
hiredate DATE,
sal double,
comm double,
deptno INT,
email string,
create_datetime TIMESTAMP,
upsert_time TIMESTAMP
);
INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');
INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800,
NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40');
INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20',
1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09 09:00:00');
INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250,
500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40');
INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975,
NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09 22:16:30');
INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28',
1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02
12:12:56');
INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850,
NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01 14:45:00');
INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450,
NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56');
INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000,
NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28 12:12:12');
INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17',
5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08 15:15:15');
INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08',
1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37');
INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100,
NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06');
INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950,
NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00');
INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000,
NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29 00:00:00');
INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300,
NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01');
{code}
{code:sql}
select
count(*) filter (where empno in (select deptno from emp))
from emp;
{code}
maybe equal to:
{code:sql}
SELECT
SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS
count_matching
FROM emp;
{code}
{noformat}
LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
// <----------- here
LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
LogicalTableScan(table=[[db1, emp]]), id = 7050
LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
LogicalAggregate(group=[{0}]), id = 7117
LogicalProject(DEPTNO=[$8]), id = 7107
LogicalTableScan(table=[[db1, emp]]), id = 7050
{noformat}
was:
{code:sql}
CREATE TABLE dept(
deptno INT,
dname string,
loc string
);
CREATE TABLE emp(
empno INT ,
ename string,
job string,
age INT,
mgr INT,
hiredate DATE,
sal double,
comm double,
deptno INT,
email string,
create_datetime TIMESTAMP,
upsert_time TIMESTAMP
);
INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');
INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800,
NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40');
INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20',
1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09 09:00:00');
INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250,
500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40');
INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975,
NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09 22:16:30');
INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28',
1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02
12:12:56');
INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850,
NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01 14:45:00');
INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450,
NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56');
INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000,
NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28 12:12:12');
INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17',
5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08 15:15:15');
INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08',
1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37');
INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100,
NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06');
INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950,
NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00');
INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000,
NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29 00:00:00');
INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300,
NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01');
{code}
{code:sql}
select
count(*) filter (where empno in (select deptno from emp))
from emp;
{code}
maybe equal to:
{code:sql}
SELECT
SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS
count_matching
FROM emp;
{code}
{noformat}
LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
LogicalTableScan(table=[[db1, emp]]), id = 7050
LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
LogicalAggregate(group=[{0}]), id = 7117
LogicalProject(DEPTNO=[$8]), id = 7107
LogicalTableScan(table=[[db1, emp]]), id = 7050
{noformat}
> Calcite Planner: Impala needs to implement a converter that transforms
> COUNT() FILTER into a form that Impala can handle.
> -------------------------------------------------------------------------------------------------------------------------
>
> Key: IMPALA-14200
> URL: https://issues.apache.org/jira/browse/IMPALA-14200
> Project: IMPALA
> Issue Type: Sub-task
> Reporter: weihua zhang
> Priority: Major
>
> {code:sql}
> CREATE TABLE dept(
> deptno INT,
> dname string,
> loc string
> );
> CREATE TABLE emp(
> empno INT ,
> ename string,
> job string,
> age INT,
> mgr INT,
> hiredate DATE,
> sal double,
> comm double,
> deptno INT,
> email string,
> create_datetime TIMESTAMP,
> upsert_time TIMESTAMP
> );
> INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
> INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
> INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
> INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');
> INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800,
> NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40');
> INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20',
> 1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09
> 09:00:00');
> INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22',
> 1250, 500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40');
> INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04',
> 2975, NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09
> 22:16:30');
> INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28',
> 1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02
> 12:12:56');
> INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05',
> 2850, NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01
> 14:45:00');
> INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09',
> 2450, NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56');
> INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19',
> 3000, NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28
> 12:12:12');
> INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17',
> 5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08
> 15:15:15');
> INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08',
> 1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37');
> INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100,
> NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06');
> INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950,
> NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00');
> INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03',
> 3000, NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29
> 00:00:00');
> INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23',
> 1300, NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01');
> {code}
> {code:sql}
> select
> count(*) filter (where empno in (select deptno from emp))
> from emp;
> {code}
> maybe equal to:
> {code:sql}
> SELECT
> SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS
> count_matching
> FROM emp;
> {code}
> {noformat}
> LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
> // <----------- here
> LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
> LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
> LogicalTableScan(table=[[db1, emp]]), id = 7050
> LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
> LogicalAggregate(group=[{0}]), id = 7117
> LogicalProject(DEPTNO=[$8]), id = 7107
> LogicalTableScan(table=[[db1, emp]]), id = 7050
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]