[
https://issues.apache.org/jira/browse/CALCITE-2907?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16789363#comment-16789363
]
Lai Zhou commented on CALCITE-2907:
-----------------------------------
[~danny0405]
but this sql query would work, and the final result is right.
{code:java}
SELECT user_id,
order_id,
product_id,
count(DISTINCT phone) AS contact_count,
count(phone) AS seg3stddev
FROM dw_risk__mygravitation_v_snap_contacts_contacts
GROUP BY user_id,
order_id,
product_id
{code}
I'm analyzing the execution code generated by calcite for above sql, may be
helpful for you:
{code:java}
public static class Record1_0 implements java.io.Serializable {
public long f0;
public Record1_0() {}
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof Record1_0)) {
return false;
}
return this.f0 == ((Record1_0) o).f0;
}
public int hashCode() {
int h = 0;
h = org.apache.calcite.runtime.Utilities.hash(h, this.f0);
return h;
}
public int compareTo(Record1_0 that) {
final int c;
c = org.apache.calcite.runtime.Utilities.compare(this.f0, that.f0);
if (c != 0) {
return c;
}
return 0;
}
public String toString() {
return "{f0=" + this.f0 + "}";
}
}
public static class Record3_1 implements java.io.Serializable {
public long f0;
public long f1;
public boolean f2;
public Record3_1() {}
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof Record3_1)) {
return false;
}
return this.f0 == ((Record3_1) o).f0 && this.f1 == ((Record3_1) o).f1 &&
this.f2 == ((Record3_1) o).f2;
}
public int hashCode() {
int h = 0;
h = org.apache.calcite.runtime.Utilities.hash(h, this.f0);
h = org.apache.calcite.runtime.Utilities.hash(h, this.f1);
h = org.apache.calcite.runtime.Utilities.hash(h, this.f2);
return h;
}
public int compareTo(Record3_1 that) {
int c;
c = org.apache.calcite.runtime.Utilities.compare(this.f0, that.f0);
if (c != 0) {
return c;
}
c = org.apache.calcite.runtime.Utilities.compare(this.f1, that.f1);
if (c != 0) {
return c;
}
c = org.apache.calcite.runtime.Utilities.compare(this.f2, that.f2);
if (c != 0) {
return c;
}
return 0;
}
public String toString() {
return "{f0=" + this.f0 + ", f1=" + this.f1 + ", f2=" + this.f2 + "}";
}
}
public org.apache.calcite.linq4j.Enumerable bind(final
org.apache.calcite.DataContext root) {
final org.apache.calcite.linq4j.Enumerable _inputEnumerable =
org.apache.calcite.schema.Schemas.queryable(root, root.getRootSchema(),
java.lang.Object[].class,
"DW_RISK__MYGRAVITATION_V_SNAP_CONTACTS_CONTACTS").asEnumerable();
final org.apache.calcite.linq4j.AbstractEnumerable child = new
org.apache.calcite.linq4j.AbstractEnumerable(){
public org.apache.calcite.linq4j.Enumerator enumerator() {
return new org.apache.calcite.linq4j.Enumerator(){
public final org.apache.calcite.linq4j.Enumerator inputEnumerator =
_inputEnumerable.enumerator();
public void reset() {
inputEnumerator.reset();
}
public boolean moveNext() {
return inputEnumerator.moveNext();
}
public void close() {
inputEnumerator.close();
}
public Object current() {
final Object[] current = (Object[]) inputEnumerator.current();
return new Object[] {
current[4],
current[3],
current[5],
new
com.enniu.cloud.services.dscompute.udfs.UenniuSecurityAesDecrypt().call(current[8]
== null ? (String) null : current[8].toString())};
}
};
}
};
java.util.List accumulatorAdders = new java.util.LinkedList();
accumulatorAdders.add(new org.apache.calcite.linq4j.function.Function2() {
public Record1_0 apply(Record1_0 acc, Object[] in) {
if (!(in[3] == null || in[3].toString() == null)) {
acc.f0++;
}
return acc;
}
public Record1_0 apply(Object acc, Object in) {
return apply(
(Record1_0) acc,
(Object[]) in);
}
}
);
accumulatorAdders.add(new org.apache.calcite.linq4j.function.Function2() {
public Record1_0 apply(Record1_0 acc, Object[] in) {
return acc;
}
public Record1_0 apply(Object acc, Object in) {
return apply(
(Record1_0) acc,
(Object[]) in);
}
}
);
org.apache.calcite.adapter.enumerable.AggregateLambdaFactory lambdaFactory =
new org.apache.calcite.adapter.enumerable.SequencedAdderAggregateLambdaFactory(
new org.apache.calcite.linq4j.function.Function0() {
public Object apply() {
long a0s0;
a0s0 = 0L;
Record1_0 record0;
record0 = new Record1_0();
record0.f0 = a0s0;
return record0;
}
}
,
accumulatorAdders);
final org.apache.calcite.linq4j.Enumerable _inputEnumerable0 =
org.apache.calcite.linq4j.EnumerableDefaults.groupByMultiple(child,
java.util.Arrays.asList(new org.apache.calcite.linq4j.function.Function1() {
public java.util.List apply(Object[] a0) {
return org.apache.calcite.runtime.FlatLists.copyOf(new Comparable[] {
a0[0] == null ? (String) null : a0[0].toString(),
(Long) a0[1],
(Integer) a0[2],
a0[3] == null ? (String) null : a0[3].toString(),
false,
false,
false,
false});
}
public Object apply(Object a0) {
return apply(
(Object[]) a0);
}
}
, new org.apache.calcite.linq4j.function.Function1() {
public java.util.List apply(Object[] a0) {
return org.apache.calcite.runtime.FlatLists.copyOf(new Comparable[] {
a0[0] == null ? (String) null : a0[0].toString(),
(Long) a0[1],
(Integer) a0[2],
null,
false,
false,
false,
true});
}
public Object apply(Object a0) {
return apply(
(Object[]) a0);
}
}
), lambdaFactory.accumulatorInitializer(), lambdaFactory.accumulatorAdder(),
lambdaFactory.resultSelector(new org.apache.calcite.linq4j.function.Function2()
{
public Object[] apply(org.apache.calcite.runtime.FlatLists.ComparableList key,
Record1_0 acc) {
return new Object[] {
org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(4)) ? (String) null :
key.get(0) == null ? (String) null : key.get(0).toString(),
org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(5)) ? (Long) null :
(Long) key.get(1),
org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(6)) ? (Integer) null
: (Integer) key.get(2),
org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(7)) ? (String) null :
key.get(3) == null ? (String) null : key.get(3).toString(),
acc.f0,
( org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(4)) ? 8L : 0L) +
(org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(5)) ? 4L : 0L) +
(org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(6)) ? 2L : 0L) +
(org.apache.calcite.runtime.SqlFunctions.toBoolean(key.get(7)) ? 1L : 0L)};
}
public Object[] apply(Object key, Object acc) {
return apply(
(org.apache.calcite.runtime.FlatLists.ComparableList) key,
(Record1_0) acc);
}
}
));
final org.apache.calcite.linq4j.AbstractEnumerable child0 = new
org.apache.calcite.linq4j.AbstractEnumerable(){
public org.apache.calcite.linq4j.Enumerator enumerator() {
return new org.apache.calcite.linq4j.Enumerator(){
public final org.apache.calcite.linq4j.Enumerator inputEnumerator =
_inputEnumerable0.enumerator();
public void reset() {
inputEnumerator.reset();
}
public boolean moveNext() {
return inputEnumerator.moveNext();
}
public void close() {
inputEnumerator.close();
}
public Object current() {
final Object[] current = (Object[]) inputEnumerator.current();
final long inp5_ = org.apache.calcite.runtime.SqlFunctions.toLong(current[5]);
return new Object[] {
current[0],
current[1],
current[2],
current[3],
current[4],
org.apache.calcite.runtime.SqlFunctions.toBoolean(org.apache.calcite.hivesql.function.ObjectConvertUtil.castBoolean(org.apache.calcite.hivesql.function.HiveUDFImplementor.callGenericUDF(org.apache.calcite.hivesql.function.HiveUDFImplementor.newGenericUDF("=",
org.apache.calcite.sql.SqlSyntax.BINARY), new Object[] {
inp5_,
0}, new org.apache.calcite.hivesql.function.RelDataTypeHolder[] {
new org.apache.calcite.hivesql.function.RelDataTypeHolder(
org.apache.calcite.sql.type.SqlTypeName.BIGINT),
new org.apache.calcite.hivesql.function.RelDataTypeHolder(
org.apache.calcite.sql.type.SqlTypeName.INTEGER,
true,
0)}))),
org.apache.calcite.runtime.SqlFunctions.toBoolean(org.apache.calcite.hivesql.function.ObjectConvertUtil.castBoolean(org.apache.calcite.hivesql.function.HiveUDFImplementor.callGenericUDF(org.apache.calcite.hivesql.function.HiveUDFImplementor.newGenericUDF("=",
org.apache.calcite.sql.SqlSyntax.BINARY), new Object[] {
inp5_,
1}, new org.apache.calcite.hivesql.function.RelDataTypeHolder[] {
new org.apache.calcite.hivesql.function.RelDataTypeHolder(
org.apache.calcite.sql.type.SqlTypeName.BIGINT),
new org.apache.calcite.hivesql.function.RelDataTypeHolder(
org.apache.calcite.sql.type.SqlTypeName.INTEGER,
true,
1)})))};
}
};
}
};
java.util.List accumulatorAdders0 = new java.util.LinkedList();
accumulatorAdders0.add(new org.apache.calcite.linq4j.function.Function2() {
public Record3_1 apply(Record3_1 acc, Object[] in) {
if (!(in[3] == null || in[3].toString() == null) &&
org.apache.calcite.runtime.SqlFunctions.toBoolean(in[5])) {
acc.f0++;
}
return acc;
}
public Record3_1 apply(Object acc, Object in) {
return apply(
(Record3_1) acc,
(Object[]) in);
}
}
);
accumulatorAdders0.add(new org.apache.calcite.linq4j.function.Function2() {
public Record3_1 apply(Record3_1 acc, Object[] in) {
if (org.apache.calcite.runtime.SqlFunctions.toBoolean(in[6])) {
acc.f2 = true;
acc.f1 = org.apache.calcite.runtime.SqlFunctions.lesser(acc.f1,
org.apache.calcite.runtime.SqlFunctions.toLong(in[4]));
}
return acc;
}
public Record3_1 apply(Object acc, Object in) {
return apply(
(Record3_1) acc,
(Object[]) in);
}
}
);
org.apache.calcite.adapter.enumerable.AggregateLambdaFactory lambdaFactory0 =
new org.apache.calcite.adapter.enumerable.SequencedAdderAggregateLambdaFactory(
new org.apache.calcite.linq4j.function.Function0() {
public Object apply() {
long a0s0;
a0s0 = 0L;
long a1s0;
boolean a1s1;
a1s1 = false;
a1s0 = 9223372036854775807L;
Record3_1 record0;
record0 = new Record3_1();
record0.f0 = a0s0;
record0.f1 = a1s0;
record0.f2 = a1s1;
return record0;
}
}
,
accumulatorAdders0);
final org.apache.calcite.linq4j.Enumerable _inputEnumerable1 =
child0.groupBy(new org.apache.calcite.linq4j.function.Function1() {
public java.util.List apply(Object[] a0) {
return org.apache.calcite.runtime.FlatLists.of(a0[0] == null ? (String) null :
a0[0].toString(), (Long) a0[1], (Integer) a0[2]);
}
public Object apply(Object a0) {
return apply(
(Object[]) a0);
}
}
, lambdaFactory0.accumulatorInitializer(), lambdaFactory0.accumulatorAdder(),
lambdaFactory0.resultSelector(new
org.apache.calcite.linq4j.function.Function2() {
public Object[] apply(org.apache.calcite.runtime.FlatLists.ComparableList key,
Record3_1 acc) {
return new Object[] {
key.get(0) == null ? (String) null : key.get(0).toString(),
(Long) key.get(1),
(Integer) key.get(2),
acc.f0,
acc.f2 ? Long.valueOf(acc.f1) : (Long) null};
}
public Object[] apply(Object key, Object acc) {
return apply(
(org.apache.calcite.runtime.FlatLists.ComparableList) key,
(Record3_1) acc);
}
}
));
return new org.apache.calcite.linq4j.AbstractEnumerable(){
public org.apache.calcite.linq4j.Enumerator enumerator() {
return new org.apache.calcite.linq4j.Enumerator(){
public final org.apache.calcite.linq4j.Enumerator inputEnumerator =
_inputEnumerable1.enumerator();
public void reset() {
inputEnumerator.reset();
}
public boolean moveNext() {
return inputEnumerator.moveNext();
}
public void close() {
inputEnumerator.close();
}
public Object current() {
final Object[] current = (Object[]) inputEnumerator.current();
return new Object[] {
current[0],
current[1],
current[2],
current[3],
( (Long) current[4]).longValue()};
}
};
}
};
}
public Class getElementType() {
return java.lang.Object[].class;
}
{code}
> AggregateExpandDistinctAggregatesRule produces a wrong relational algebra
> -------------------------------------------------------------------------
>
> Key: CALCITE-2907
> URL: https://issues.apache.org/jira/browse/CALCITE-2907
> Project: Calcite
> Issue Type: Bug
> Components: core
> Affects Versions: 1.18.0
> Reporter: Lai Zhou
> Priority: Major
>
> In my usecase:
> an Aggregate which contains distinct call was converted improperly to a
> wrong relational algebra.
>
> {code:java}
> SELECT user_id,
> order_id,
> product_id,
> count(DISTINCT secured_libs.u51decrypt(phone)) AS
> contact_count,
> stddev_pop(secured_libs.u51decrypt(phone)) AS
> seg3stddev,
> entropy(secured_libs.u51decrypt(phone)) AS
> seg3entropy
> FROM dw_risk__mygravitation_v_snap_contacts_contacts
> GROUP BY user_id,
> order_id,
> product_id
> {code}
>
>
> After digging into the code,I found at the line 444 of the
> AggregateExpandDistinctAggregatesRule.java :
>
> {code:java}
> int x = groupCount;
> final List<AggregateCall> newCalls = new ArrayList<>();
> for (AggregateCall aggCall : aggregate.getAggCallList()) {
> final int newFilterArg;
> final List<Integer> newArgList;
> final SqlAggFunction aggregation;
> if (!aggCall.isDistinct()) {
> aggregation = SqlStdOperatorTable.MIN;
> newArgList = ImmutableIntList.of(x++);
> newFilterArg = filters.get(aggregate.getGroupSet());
> } else {
> {code}
> the undistinct aggregate call `stddev_pop` and `entropy` was converted to a
> SqlStdOperatorTable.MIN,(actually all undistinct aggregate call here will be
> converted to a SqlStdOperatorTable.MIN ).
> I don't understand how it works.
> I guess someone made a faulty assumption here. [~julianhyde] ,can someone
> help me ASAP ?
> It’s very important for my business.
>
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)