[ https://issues.apache.org/jira/browse/HIVE-25874?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zoltan Haindrich updated HIVE-25874: ------------------------------------ Description: time is spent at resizing vectors around [here|https://github.com/apache/hive/blob/200c0bf1feb259f4d95bf065a2ab38fe684383da/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java#L252] or in some other "ensureSize" method {code:java} create table t as select named_struct('id',13,'str','string','nest',named_struct('id',12,'str','string','arr',array('value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value'))) s; -- go up to 1M rows insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; -- insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; set hive.fetch.task.conversion=none; select count(1) from t; --explain select s .id from t where s .nest .id > 0; {code} interestingly; the issue is not present: * for a query not looking into the nested struct * and in case the struct with the array is at the top level {code} select count(1) from t; --explain select s .id from t where s -- .nest .id > 0; {code} was: {code:java} create table t as select named_struct('id',13,'str','string','nest',named_struct('id',12,'str','string','arr',array('value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value'))) s; -- go up to 1M rows insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; -- insert into table t select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t union all select * from t; set hive.fetch.task.conversion=none; select count(1) from t; --explain select s .id from t where s .nest .id > 0; {code} interestingly; the issue is not present: * for a query not looking into the nested struct * and in case the struct with the array is at the top level {code} select count(1) from t; --explain select s .id from t where s -- .nest .id > 0; {code} > Slow filter evaluation of nest struct fields in vectorized executions > --------------------------------------------------------------------- > > Key: HIVE-25874 > URL: https://issues.apache.org/jira/browse/HIVE-25874 > Project: Hive > Issue Type: Improvement > Reporter: Zoltan Haindrich > Priority: Major > > time is spent at resizing vectors around > [here|https://github.com/apache/hive/blob/200c0bf1feb259f4d95bf065a2ab38fe684383da/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java#L252] > or in some other "ensureSize" method > {code:java} > create table t as > select > named_struct('id',13,'str','string','nest',named_struct('id',12,'str','string','arr',array('value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value','value'))) > s; > -- go up to 1M rows > insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > -- insert into table t select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t union all select * from t union all select * from t union all > select * from t; > set hive.fetch.task.conversion=none; > select count(1) from t; > --explain > select s > .id from t > where > s > .nest > .id > 0; > {code} > interestingly; the issue is not present: > * for a query not looking into the nested struct > * and in case the struct with the array is at the top level > {code} > select count(1) from t; > --explain > select s > .id from t > where > s > -- .nest > .id > 0; > {code} -- This message was sent by Atlassian Jira (v8.20.1#820001)