I have spent a few days trying to get past what I feel is a trivial issue. Just trying to commit Hive mapReduce results to a Cassandra CF using the Hive overwrite table function. I have reviewed a few online examples and wrote my procedure a few different ways, always getting the same exception.
I have attached log files and CF/Hive table definitions. Any assistance would be appreciated! BTW - I am using a bundle Hive installation (DataStax enterprise 1.0) Thanks, Brian Hive scripts: CREATE EXTERNAL TABLE LineItemHive(key STRING, month STRING, day STRING, dayOfMonth STRING, dayOfWeek STRING, year STRING, msgType STRING,sender STRING,receiver STRING,billToState STRING,shipToState STRING,quantity INT,price FLOAT,totalPrice FLOAT,firstLineItem STRING, shipToLocation STRING, billToLocation STRING) STORED BY 'org.apache.hadoop.hive.cassandra.CassandraStorageHandler' WITH SERDEPROPERTIES ( "cassandra.columns.mapping" = ":key,month ,day ,dayOfMonth,dayOfWeek,year,msgType,sender,receiver,billToState,shipToState,quantity,price,totalPrice,firstLineItem,shipToLocation,billToLocation","cassandra.cf.name<http://cassandra.cf.name>"="LineItemColumnFamily") TBLPROPERTIES ("cassandra.ks.name<http://cassandra.ks.name>"="xifStore" ); CREATE EXTERNAL TABLE StateTotals(row_key string, total int) STORED BY 'org.apache.hadoop.hive.cassandra.CassandraStorageHandler' WITH SERDEPROPERTIES ("cassandra.ks.name<http://cassandra.ks.name>" = "xifStore"); ******THIS LEADS TO EXCEPTION******* INSERT OVERWRITE TABLE StateTotals SELECT billToState,SUM(price) from LineItemHive GROUP BY billToState; hive> describe StateTotals; OK row_key string from deserializer total int from deserializer Time taken: 0.303 seconds CQL: cqlsh:xifStore> describe columnfamily LineItemColumnFamily; CREATE COLUMNFAMILY LineItemColumnFamily ( KEY blob PRIMARY KEY ) WITH comment='' AND comparator=text AND row_cache_provider='SerializingCacheProvider' AND key_cache_size=200000.000000 AND row_cache_size=0.000000 AND read_repair_chance=1.000000 AND gc_grace_seconds=864000 AND default_validation=blob AND min_compaction_threshold=4 AND max_compaction_threshold=32 AND row_cache_save_period_in_seconds=0 AND key_cache_save_period_in_seconds=14400 AND replication_on_write=True; cqlsh:xifStore> describe columnfamily StateTotals; CREATE COLUMNFAMILY StateTotals ( KEY blob PRIMARY KEY ) WITH comment='' AND comparator=blob AND row_cache_provider='SerializingCacheProvider' AND key_cache_size=200000.000000 AND row_cache_size=0.000000 AND read_repair_chance=1.000000 AND gc_grace_seconds=864000 AND default_validation=blob AND min_compaction_threshold=4 AND max_compaction_threshold=32 AND row_cache_save_period_in_seconds=0 AND key_cache_save_period_in_seconds=14400 AND replication_on_write=True; Exception: Console: Kill Command = /usr/bin/dse hadoop job -Dmapred.job.tracker=10.86.165.248:8012<http://10.86.165.248:8012> -kill job_201202072146_0001 2012-02-07 22:05:17,787 Stage-0 map = 0%, reduce = 0% 2012-02-07 22:05:20,880 Stage-0 map = 25%, reduce = 0% 2012-02-07 22:05:24,931 Stage-0 map = 75%, reduce = 0% 2012-02-07 22:05:25,939 Stage-0 map = 100%, reduce = 0% 2012-02-07 22:05:31,970 Stage-0 map = 100%, reduce = 33% 2012-02-07 22:05:43,035 Stage-0 map = 100%, reduce = 0% 2012-02-07 22:05:52,076 Stage-0 map = 100%, reduce = 25% 2012-02-07 22:06:04,156 Stage-0 map = 100%, reduce = 0% 2012-02-07 22:06:13,193 Stage-0 map = 100%, reduce = 33% 2012-02-07 22:06:25,248 Stage-0 map = 100%, reduce = 0% 2012-02-07 22:06:34,287 Stage-0 map = 100%, reduce = 25% 2012-02-07 22:06:45,336 Stage-0 map = 100%, reduce = 0% 2012-02-07 22:06:47,345 Stage-0 map = 100%, reduce = 100% Ended Job = job_201202072146_0001 with errors FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRe dTask Hive.log 2012-02-07 22:05:01,996 WARN mapred.JobClient (JobClient.java:copyAndConfigureFiles(624)) - Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 2012-02-07 22:06:47,349 ERROR exec.MapRedTask (SessionState.java:printError(343)) - Ended Job = job_201202072146_0001 with errors system.log INFO [IPC Server handler 7 on 8012] 2012-02-07 22:06:34,628 TaskInProgress.java (line 551) Error from attempt_201202072146_0001_r_000000_3: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"_col0":"TX"},"value":{"_col0":0.0},"alias":0} at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:268) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420) at org.apache.hadoop.mapred.Child$4.run(Child.java:272) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059) at org.apache.hadoop.mapred.Child.main(Child.java:266) Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row (tag=0) {"key":{"_col0":"TX"},"value":{"_col0":0.0},"alias":0} at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256) ... 7 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.GroupByOperator.processOp(GroupByOperator.java:737) at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471) at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247) ... 7 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.serde2.lazy.LazyUtils.writePrimitiveUTF8(LazyUtils.java:207) at org.apache.hadoop.hive.cassandra.serde.TableMapping.serialize(TableMapping.java:130) at org.apache.hadoop.hive.cassandra.serde.TableMapping.serializeToBytes(TableMapping.java:114) at org.apache.hadoop.hive.cassandra.serde.TableMapping.serializeToBytes(TableMapping.java:79) at org.apache.hadoop.hive.cassandra.serde.TableMapping.getWritable(TableMapping.java:54) at org.apache.hadoop.hive.cassandra.serde.AbstractColumnSerDe.serialize(AbstractColumnSerDe.java:154) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:553) at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744) at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84) at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744) at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84) at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744) at org.apache.hadoop.hive.ql.exec.GroupByOperator.forward(GroupByOperator.java:959) at org.apache.hadoop.hive.ql.exec.GroupByOperator.processAggr(GroupByOperator.java:798) at org.apache.hadoop.hive.ql.exec.GroupByOperator.processOp(GroupByOperator.java:724) ... 9 more WARN [JVM Runner jvm_201202072146_0001_r_1427928906 spawned.] 2012-02-07 22:06:42,226 DefaultTaskController.java (line 137) Exit code from task is : 65 INFO [JVM Runner jvm_201202072146_0001_r_1427928906 spawned.] 2012-02-07 22:06:42,226 DefaultTaskController.java (line 138) Output from DefaultTaskController's launchTask follows: INFO [JVM Runner jvm_201202072146_0001_r_1427928906 spawned.] 2012-02-07 22:06:42,226 TaskController.java (line 262) INFO [JVM Runner jvm_201202072146_0001_r_1427928906 spawned.] 2012-02-07 22:06:42,227 JvmManager.java (line 510) JVM : jvm_201202072146_0001_r_1427928906 exited with exit code 65. Number of tasks it ran: 0 WARN [Thread-81] 2012-02-07 22:06:42,227 TaskRunner.java (line 270) attempt_201202072146_0001_r_000000_3 : Child Error java.io.IOException: Task process exit with nonzero status of 65. at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:258) INFO [IPC Server handler 2 on 8012] 2012-02-07 22:06:42,642 TaskInProgress.java (line 551) Error from attempt_201202072146_0001_r_000000_3: java.lang.Throwable: Child Error at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:271) Caused by: java.io.IOException: Task process exit with nonzero status of 65. at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:258) Source Code (LazyUtils): public static void writePrimitiveUTF8(OutputStream<http://www.javasourcecode.org/html/open-source/jdk/jdk-6u23/java/io/OutputStream.java.html> out, Object<http://www.javasourcecode.org/html/open-source/jdk/jdk-6u23/java/lang/Object.java.html> o, PrimitiveObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java.html> oi, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException<http://www.javasourcecode.org/html/open-source/jdk/jdk-6u23/java/io/IOException.java.html> { switch (oi.getPrimitiveCategory()) { case BOOLEAN: { boolean b = ((BooleanObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/BooleanObjectInspector.java.html>) oi).get(o); if (b) { out.write(trueBytes, 0, trueBytes.length); } else { out.write(falseBytes, 0, falseBytes.length); } break; } case BYTE: { LazyInteger.writeUTF8(out, ((ByteObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/ByteObjectInspector.java.html>) oi).get(o)); break; } case SHORT: { LazyInteger.writeUTF8(out, ((ShortObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/ShortObjectInspector.java.html>) oi).get(o)); break; } case INT: { LazyInteger.writeUTF8(out, ((IntObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/IntObjectInspector.java.html>) oi).get(o)); break; } case LONG: { LazyLong.writeUTF8(out, ((LongObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/LongObjectInspector.java.html>) oi).get(o)); break; } case FLOAT: { float f = ((FloatObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/FloatObjectInspector.java.html>) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(f)); out.write(b.array(), 0, b.limit()); break; } case DOUBLE: { double d = ((DoubleObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/DoubleObjectInspector.java.html>) oi).get(o); ByteBuffer b = Text.encode(String.valueOf(d)); out.write(b.array(), 0, b.limit()); break; } case STRING: { Text<http://www.javasourcecode.org/html/open-source/hadoop/hadoop-0.20.203.0/org/apache/hadoop/io/Text.java.html> t = ((StringObjectInspector<http://www.javasourcecode.org/html/open-source/hive/hive-0.7.1/org/apache/hadoop/hive/serde2/objectinspector/primitive/StringObjectInspector.java.html>) oi).getPrimitiveWritableObject(o); //*** Is t being assigned with null? // NPE? writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; } default: { throw new RuntimeException<http://www.javasourcecode.org/html/open-source/jdk/jdk-6u23/java/lang/RuntimeException.java.html>("Hive internal error."); } } }