[ 
https://issues.apache.org/jira/browse/HIVE-10729?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14578856#comment-14578856
 ] 

Greg Senia commented on HIVE-10729:
-----------------------------------

Here is the query and source table describe that shows the array<string> which 
seems to be the cause...

drop table debug.ct_gsd_events1_test;
create table debug.ct_gsd_events1_test
as select  a.*,
b.svcrqst_id,
b.svcrqct_cds,
b.svcrtyp_cd,
b.cmpltyp_cd,
b.sum_reason_cd as src,
b.cnctmd_cd,
b.notes
from ctm.ct_gsd_events a
inner join
mbr.gsd_service_request b
on a.contact_event_id = b.cnctevn_id;


hive> describe formatted ctm.ct_gsd_events;
OK
# col_name              data_type               comment             
                 
hmoid                   string                                      
cumb_id_no              int                                         
mbrind_id               string                                      
contact_event_id        string                                      
ce_create_dt            string                                      
ce_end_dt               string                                      
contact_type            string                                      
cnctevs_cd              string                                      
contact_mode            string                                      
cntvnst_stts_cd         string                                      
total_transfers         int                                         
ce_notes                array<string>                               
                 
# Detailed Table Information             
Database:               ctm                      
Owner:                  LOAD_USER                  
CreateTime:             Fri May 29 09:41:58 EDT 2015     
LastAccessTime:         UNKNOWN                  
Protect Mode:           None                     
Retention:              0                        
Location:               
hdfs://xhadnnm1p.example.com:8020/apps/hive/warehouse/ctm.db/ct_gsd_events      
   
Table Type:             MANAGED_TABLE            
Table Parameters:                
        COLUMN_STATS_ACCURATE   true                
        numFiles                154                 
        numRows                 0                   
        rawDataSize             0                   
        totalSize               5464108             
        transient_lastDdlTime   1432906919          
                 
# Storage Information            
SerDe Library:          org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe      
 
InputFormat:            org.apache.hadoop.mapred.TextInputFormat         
OutputFormat:           
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
Compressed:             No                       
Num Buckets:            -1                       
Bucket Columns:         []                       
Sort Columns:           []                       
Storage Desc Params:             
        serialization.format    1                   
Time taken: 2.968 seconds, Fetched: 42 row(s)

> Query failed when select complex columns from joinned table (tez map join 
> only)
> -------------------------------------------------------------------------------
>
>                 Key: HIVE-10729
>                 URL: https://issues.apache.org/jira/browse/HIVE-10729
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Processor
>    Affects Versions: 1.2.0
>            Reporter: Selina Zhang
>            Assignee: Selina Zhang
>         Attachments: HIVE-10729.1.patch, HIVE-10729.2.patch
>
>
> When map join happens, if projection columns include complex data types, 
> query will fail. 
> Steps to reproduce:
> {code:sql}
> hive> set hive.auto.convert.join;
> hive.auto.convert.join=true
> hive> desc foo;
> a                     array<int>
> hive> select * from foo;
> [1,2]
> hive> desc src_int;
> key                   int
> value                 string
> hive> select * from src_int where key=2;
> 2        val_2
> hive> select * from foo join src_int src  on src.key = foo.a[1];
> {code}
> Query will fail with stack trace
> {noformat}
> Caused by: java.lang.ClassCastException: 
> org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray cannot be cast to 
> [Ljava.lang.Object;
>       at 
> org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector.getList(StandardListObjectInspector.java:111)
>       at 
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:314)
>       at 
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:262)
>       at 
> org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:246)
>       at 
> org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:50)
>       at 
> org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:692)
>       at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:837)
>       at 
> org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
>       at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:837)
>       at 
> org.apache.hadoop.hive.ql.exec.CommonJoinOperator.internalForward(CommonJoinOperator.java:644)
>       at 
> org.apache.hadoop.hive.ql.exec.CommonJoinOperator.genAllOneUniqueJoinObject(CommonJoinOperator.java:676)
>       at 
> org.apache.hadoop.hive.ql.exec.CommonJoinOperator.checkAndGenObject(CommonJoinOperator.java:754)
>       at 
> org.apache.hadoop.hive.ql.exec.MapJoinOperator.process(MapJoinOperator.java:386)
>       ... 23 more
> {noformat}
> Similar error when projection columns include a map:
> {code:sql}
> hive> CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC;
> hive> INSERT OVERWRITE TABLE test SELECT 1, MAP(1, "val_1", 2, "val_2") FROM 
> src LIMIT 1;
> hive> select * from src join test where src.key=test.a;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to