Rajat Jain created HIVE-11396: --------------------------------- Summary: Hive on Tez does not work well with Sequence Files with different keys Key: HIVE-11396 URL: https://issues.apache.org/jira/browse/HIVE-11396 Project: Hive Issue Type: Bug Reporter: Rajat Jain Assignee: Hitesh Shah
{code} hive> create external table foo (a string) partitioned by (p string) stored as sequencefile location 'hdfs:///user/hive/foo' # A useless file with some text in hdfs hive> create external table tmp_foo (a string) location 'hdfs:///tmp/random_data' hive> insert overwrite table foo partition (p = '1') select * from tmp_foo {code} After this step, {{foo}} contains one partition with a text file. Now use this Java program to generate the second sequence file (but with a different key class) {code} import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import java.io.IOException; public class SequenceFileWriter { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/tmp/random_data")); SequenceFileOutputFormat.setOutputPath(job, new Path("/user/hive/foo/p=2/")); // submit and wait for completion job.waitForCompletion(true); } } {code} Now run {{select count(*) from foo;}}. It passes with MapReduce, but fails with Tez with the following error: {code} hive> set hive.execution.engine=tez; hive> select count(*) from foo; Status: Failed Vertex failed, vertexName=Map 1, vertexId=vertex_1438013895843_0007_1_00, diagnostics=[Task failed, taskId=task_1438013895843_0007_1_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running task:java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: java.io.IOException: While processing file hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: org.apache.hadoop.io.BytesWritable is not class org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:171) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:137) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:337) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:179) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:171) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1635) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:171) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:167) at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.IOException: java.io.IOException: While processing file hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: org.apache.hadoop.io.BytesWritable is not class org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:71) at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:290) at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:148) ... 14 more Caused by: java.io.IOException: java.io.IOException: While processing file hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: org.apache.hadoop.io.BytesWritable is not class org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121) at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77) at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:372) at org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:79) at org.apache.hadoop.hive.ql.io.HiveRecordReader.doNext(HiveRecordReader.java:33) at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:118) at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.next(TezGroupedSplitsInputFormat.java:137) at org.apache.tez.mapreduce.lib.MRReaderMapred.next(MRReaderMapred.java:113) at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:61) ... 16 more Caused by: java.io.IOException: While processing file hdfs://localhost:9000/user/hive/foo/p=2/part-m-00000. wrong key class: org.apache.hadoop.io.BytesWritable is not class org.apache.hadoop.io.LongWritable at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.handleExceptionWhenReadNext(HiveContextAwareRecordReader.java:386) at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:368) ... 22 more Caused by: java.io.IOException: wrong key class: org.apache.hadoop.io.BytesWritable is not class org.apache.hadoop.io.LongWritable at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:2484) at org.apache.hadoop.mapred.SequenceFileRecordReader.next(SequenceFileRecordReader.java:82) at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:359) ... 22 more {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)