This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e92f80a1 IMPALA-13407: Codegen fails with struct in TOP-N
7e92f80a1 is described below

commit 7e92f80a1bcf4d1f3adc0008f81f5d193eec82c2
Author: Daniel Becker <[email protected]>
AuthorDate: Mon Sep 30 15:53:46 2024 +0200

    IMPALA-13407: Codegen fails with struct in TOP-N
    
    The following query succeeded with codegen disabled but failed with
    codegen enabled:
    
      use functional_parquet;
      select str, alltypes from complextypes_structs order by str limit 4;
    
    The root cause is the handling of CHARs in
    CodegenAnyVal::StructChildToReadWriteInfo(). The code assumed that CHARs
    could be handled the same way as STRINGs and VARCHARs but this is
    incorrect. This patch fixes it by adding CHAR-specific handling.
    
    Testing:
     - added test queries to top-n-complex.test and also sort-complex.test
       for consistency
     - cleanup: moved one test with a LIMIT clause from
       sort-complex.test to top-n-complex.test; also brought the two test
       files in sync by adding counterparts to queries with and without
       LIMIT clauses in the other file where they were missing.
    
    Change-Id: I25c3ad73dea062a0de2402c20f39097625fbb2c8
    Reviewed-on: http://gerrit.cloudera.org:8080/21865
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/codegen/codegen-anyval.cc                   | 23 ++++---
 .../queries/QueryTest/sort-complex.test            | 19 +++++-
 .../queries/QueryTest/top-n-complex.test           | 70 ++++++++++++++++++++++
 3 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index b98f9e31c..de46375f0 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -1065,29 +1065,28 @@ void CodegenAnyVal::StructChildToReadWriteInfo(
   // 'AnyVal's). As this code deals with values in 'AnyVal's, for BOOLEANS we 
use i8,
   // which is the LLVM type corresponding to 'bool', and for other types we 
use the slot
   // type.
-  llvm::Type* child_type = type.type == TYPE_BOOLEAN ?
+  llvm::Type* child_type = type.type == TYPE_BOOLEAN || type.type == TYPE_CHAR 
?
     codegen->i8_type() : slot_type;
   llvm::Value* cast_child_ptr = builder->CreateBitCast(child_ptr,
       child_type->getPointerTo(), "cast_child_ptr");
 
   switch (type.type) {
-    case TYPE_STRING:
-    case TYPE_VARCHAR:
     case TYPE_CHAR: {
+      llvm::Value* len = codegen->GetI32Constant(type.len);
+      read_write_info->SetPtrAndLen(cast_child_ptr, len);
+      break;
+    }
+    case TYPE_STRING:
+    case TYPE_VARCHAR: {
       llvm::Function* str_ptr_fn = codegen->GetFunction(
             IRFunction::STRING_VALUE_PTR, false);
       llvm::Value* ptr = builder->CreateCall(str_ptr_fn,
             llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "ptr");
 
-      llvm::Value* len;
-      if (type.type == TYPE_CHAR) {
-        len = codegen->GetI32Constant(type.len);
-      } else {
-        llvm::Function* str_len_fn = codegen->GetFunction(
-            IRFunction::STRING_VALUE_LEN, false);
-        len = builder->CreateCall(str_len_fn,
-            llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "len");
-      }
+      llvm::Function* str_len_fn = codegen->GetFunction(
+          IRFunction::STRING_VALUE_LEN, false);
+      llvm::Value* len = builder->CreateCall(str_len_fn,
+          llvm::ArrayRef<llvm::Value*>({cast_child_ptr}), "len");
       read_write_info->SetPtrAndLen(ptr, len);
       break;
     }
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test 
b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
index 68b84bbd1..5ecdbf820 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
@@ -201,14 +201,31 @@ from (
     arr.inner_struct2.str str,
     row_number() over (order by arr.inner_struct2.str) as row_no
   from collection_struct_mix t, t.arr_contains_nested_struct arr
-) res limit 4;
+) res;
 ---- RESULTS
 1,'four spaceship captains',2
 2,'lots of soju distilleries',105
 3,'more spaceship captains',20
 4,'very few distilleries',104
+5,'NULL',NULL
+6,'NULL',NULL
 ---- TYPES
 BIGINT,STRING, SMALLINT
+
+====
+---- QUERY
+# Regression test for # IMPALA-13407
+select id, str, alltypes, tiny_struct, small_struct from complextypes_structs 
order by str;
+---- RESULTS
+5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
+1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 08:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
 23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+6,'sixth 
item','{"ti":127,"si":100,"i":234732212,"bi":664233223342,"b":true,"f":34.56000137329102,"do":99523423.33,"da":"1985-11-19","ts":"2020-09-15
 01:11:22","s1":"string1","s2":"string2","c1":"z","c2":"   
","vc":"cv","de1":346,"de2":6235.600}','NULL','{"i":null,"s":null}'
+3,'third 
item','{"ti":null,"si":null,"i":null,"bi":null,"b":null,"f":null,"do":null,"da":null,"ts":null,"s1":null,"s2":null,"c1":null,"c2":null,"vc":null,"de1":null,"de2":null}','{"b":true}','{"i":98765,"s":null}'
+---- TYPES
+INT,STRING,STRING,STRING,STRING
+
 ====
 ---- QUERY
 # Sorting is not supported yet for collections within structs: IMPALA-12160. 
Test with a
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test 
b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
index 07e5e8cc1..ccf615823 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
@@ -146,6 +146,76 @@ select id, arr_contains_nested_struct from 
collection_struct_mix order by id lim
 2,'[{"inner_struct1":null,"inner_struct2":{"str":"very few 
distilleries","l":128},"small":104},{"inner_struct1":{"str":"a few soju 
distilleries","l":28},"inner_struct2":{"str":"lots of soju 
distilleries","l":228},"small":105},null]'
 ---- TYPES
 INT,STRING
+
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+  row_no
+from (
+  select
+    arr.small,
+    row_number() over (order by arr.inner_struct1.str) as row_no
+  from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1
+2
+3
+4
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+  row_no, str
+from (
+  select
+    arr.small,
+    arr.inner_struct1.str str,
+    row_number() over (order by arr.inner_struct1.str) as row_no
+  from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1,''
+2,'a few soju distilleries'
+3,'NULL'
+4,'NULL'
+---- TYPES
+BIGINT,STRING
+====
+---- QUERY
+# Regression test for IMPALA-13272
+select
+  row_no, str, small
+from (
+  select
+    arr.small,
+    arr.inner_struct2.str str,
+    row_number() over (order by arr.inner_struct2.str) as row_no
+  from collection_struct_mix t, t.arr_contains_nested_struct arr
+) res limit 4;
+---- RESULTS
+1,'four spaceship captains',2
+2,'lots of soju distilleries',105
+3,'more spaceship captains',20
+4,'very few distilleries',104
+---- TYPES
+BIGINT,STRING, SMALLINT
+
+====
+---- QUERY
+# Regression test for IMPALA-13407
+select id, str, alltypes, tiny_struct, small_struct from complextypes_structs 
order by str limit 4;
+---- RESULTS
+5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
+1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 08:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
 23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+---- TYPES
+INT,STRING,STRING,STRING,STRING
+
 ====
 ---- QUERY
 # Sorting is not supported yet for collections within structs: IMPALA-12160. 
Test with a

Reply via email to