This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 7e92f80a1 IMPALA-13407: Codegen fails with struct in TOP-N
7e92f80a1 is described below
commit 7e92f80a1bcf4d1f3adc0008f81f5d193eec82c2
Author: Daniel Becker <[email protected]>
AuthorDate: Mon Sep 30 15:53:46 2024 +0200
IMPALA-13407: Codegen fails with struct in TOP-N
The following query succeeded with codegen disabled but failed with
codegen enabled:
use functional_parquet;
select str, alltypes from complextypes_structs order by str limit 4;
The root cause is the handling of CHARs in
CodegenAnyVal::StructChildToReadWriteInfo(). The code assumed that CHARs
could be handled the same way as STRINGs and VARCHARs but this is
incorrect. This patch fixes it by adding CHAR-specific handling.
Testing:
- added test queries to top-n-complex.test and also sort-complex.test
for consistency
- cleanup: moved one test with a LIMIT clause from
sort-complex.test to top-n-complex.test; also brought the two test
files in sync by adding counterparts to queries with and without
LIMIT clauses in the other file where they were missing.
Change-Id: I25c3ad73dea062a0de2402c20f39097625fbb2c8
Reviewed-on: http://gerrit.cloudera.org:8080/21865
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/codegen/codegen-anyval.cc | 23 ++++---
.../queries/QueryTest/sort-complex.test | 19 +++++-
.../queries/QueryTest/top-n-complex.test | 70 ++++++++++++++++++++++
3 files changed, 99 insertions(+), 13 deletions(-)
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index b98f9e31c..de46375f0 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -1065,29 +1065,28 @@ void CodegenAnyVal::StructChildToReadWriteInfo(
// 'AnyVal's). As this code deals with values in 'AnyVal's, for BOOLEANS we
use i8,
// which is the LLVM type corresponding to 'bool', and for other types we
use the slot
// type.
- llvm::Type* child_type = type.type == TYPE_BOOLEAN ?
+ llvm::Type* child_type = type.type == TYPE_BOOLEAN || type.type == TYPE_CHAR
?
codegen->i8_type() : slot_type;
llvm::Value* cast_child_ptr = builder->CreateBitCast(child_ptr,
child_type->getPointerTo(), "cast_child_ptr");
switch (type.type) {
- case TYPE_STRING:
- case TYPE_VARCHAR:
case TYPE_CHAR: {
+ llvm::Value* len = codegen->GetI32Constant(type.len);
+ read_write_info->SetPtrAndLen(cast_child_ptr, len);
+ break;
+ }
+ case TYPE_STRING:
+ case TYPE_VARCHAR: {
llvm::Function* str_ptr_fn = codegen->GetFunction(
IRFunction::STRING_VALUE_PTR, false);
llvm::Value* ptr = builder->CreateCall(str_ptr_fn,
llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "ptr");
- llvm::Value* len;
- if (type.type == TYPE_CHAR) {
- len = codegen->GetI32Constant(type.len);
- } else {
- llvm::Function* str_len_fn = codegen->GetFunction(
- IRFunction::STRING_VALUE_LEN, false);
- len = builder->CreateCall(str_len_fn,
- llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "len");
- }
+ llvm::Function* str_len_fn = codegen->GetFunction(
+ IRFunction::STRING_VALUE_LEN, false);
+ llvm::Value* len = builder->CreateCall(str_len_fn,
+ llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "len");
read_write_info->SetPtrAndLen(ptr, len);
break;
}
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
index 68b84bbd1..5ecdbf820 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
@@ -201,14 +201,31 @@ from (
arr.inner_struct2.str str,
row_number() over (order by arr.inner_struct2.str) as row_no
from collection_struct_mix t, t.arr_contains_nested_struct arr
-) res limit 4;
+) res;
---- RESULTS
1,'four spaceship captains',2
2,'lots of soju distilleries',105
3,'more spaceship captains',20
4,'very few distilleries',104
+5,'NULL',NULL
+6,'NULL',NULL
---- TYPES
BIGINT,STRING, SMALLINT
+
+====
+---- QUERY
+# Regression test for # IMPALA-13407
+select id, str, alltypes, tiny_struct, small_struct from complextypes_structs
order by str;
+---- RESULTS
+5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
+1,'first
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
08:19:04","s1":"some string","s2":"another
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+6,'sixth
item','{"ti":127,"si":100,"i":234732212,"bi":664233223342,"b":true,"f":34.56000137329102,"do":99523423.33,"da":"1985-11-19","ts":"2020-09-15
01:11:22","s1":"string1","s2":"string2","c1":"z","c2":"
","vc":"cv","de1":346,"de2":6235.600}','NULL','{"i":null,"s":null}'
+3,'third
item','{"ti":null,"si":null,"i":null,"bi":null,"b":null,"f":null,"do":null,"da":null,"ts":null,"s1":null,"s2":null,"c1":null,"c2":null,"vc":null,"de1":null,"de2":null}','{"b":true}','{"i":98765,"s":null}'
+---- TYPES
+INT,STRING,STRING,STRING,STRING
+
====
---- QUERY
# Sorting is not supported yet for collections within structs: IMPALA-12160.
Test with a
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
index 07e5e8cc1..ccf615823 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
@@ -146,6 +146,76 @@ select id, arr_contains_nested_struct from
collection_struct_mix order by id lim
2,'[{"inner_struct1":null,"inner_struct2":{"str":"very few
distilleries","l":128},"small":104},{"inner_struct1":{"str":"a few soju
distilleries","l":28},"inner_struct2":{"str":"lots of soju
distilleries","l":228},"small":105},null]'
---- TYPES
INT,STRING
+
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+ row_no
+from (
+ select
+ arr.small,
+ row_number() over (order by arr.inner_struct1.str) as row_no
+ from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1
+2
+3
+4
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+ row_no, str
+from (
+ select
+ arr.small,
+ arr.inner_struct1.str str,
+ row_number() over (order by arr.inner_struct1.str) as row_no
+ from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1,''
+2,'a few soju distilleries'
+3,'NULL'
+4,'NULL'
+---- TYPES
+BIGINT,STRING
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+ row_no, str, small
+from (
+ select
+ arr.small,
+ arr.inner_struct2.str str,
+ row_number() over (order by arr.inner_struct2.str) as row_no
+ from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1,'four spaceship captains',2
+2,'lots of soju distilleries',105
+3,'more spaceship captains',20
+4,'very few distilleries',104
+---- TYPES
+BIGINT,STRING, SMALLINT
+
+====
+---- QUERY
+# Regression test for IMPALA-13407
+select id, str, alltypes, tiny_struct, small_struct from complextypes_structs
order by str limit 4;
+---- RESULTS
+5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
+1,'first
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
08:19:04","s1":"some string","s2":"another
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+---- TYPES
+INT,STRING,STRING,STRING,STRING
+
====
---- QUERY
# Sorting is not supported yet for collections within structs: IMPALA-12160.
Test with a