[ https://issues.apache.org/jira/browse/HIVE-21720?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ShrekerNil updated HIVE-21720: ------------------------------ Description: I'm a fresh to hive, and when i used flume sink data to hive, the error occured: 2019-05-11 09:50:31,183 (hive-shive-call-runner-0) [ERROR - org.apache.hadoop.hive.ql.exec.DDLTask.failed(DDLTask.java:512)] org.apache.hadoop.hive.ql.metadata.HiveException: {color:#f79232}partition spec is invalid; field collection does not exist or is empty{color} at org.apache.hadoop.hive.ql.metadata.Partition.createMetaPartitionObject(Partition.java:130) at org.apache.hadoop.hive.ql.metadata.Hive.convertAddSpecToMetaPartition(Hive.java:1662) at org.apache.hadoop.hive.ql.metadata.Hive.createPartitions(Hive.java:1638) at org.apache.hadoop.hive.ql.exec.DDLTask.addPartitions(DDLTask.java:900) at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:339) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1638) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1397) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1183) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1039) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.runDDL(HiveEndPoint.java:404) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.createPartitionIfNotExists(HiveEndPoint.java:372) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:276) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110) at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:379) at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:376) at org.apache.flume.sink.hive.HiveWriter$11.call(HiveWriter.java:428) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) and other errors fallows above: org.apache.flume.sink.hive.{color:#f79232}HiveWriter$ConnectException: Failed connecting to EndPoint \{metaStoreUri='thrift://172.25.48.146:9083', database='test', table='user_increase', partitionVals=[, 190511] }{color} at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:99) at org.apache.flume.sink.hive.HiveSink.getOrCreateWriter(HiveSink.java:343) at org.apache.flume.sink.hive.HiveSink.drainOneBatch(HiveSink.java:295) at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:253) at org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:67) at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:145) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.hive.hcatalog.streaming.StreamingException: partition values=[, 190511]. Unable to get path for end point: [, 190511] at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:162) at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.<init>(AbstractRecordWriter.java:66) at org.apache.hive.hcatalog.streaming.StrictJsonWriter.<init>(StrictJsonWriter.java:49) at org.apache.flume.sink.hive.HiveJsonSerializer.createRecordWriter(HiveJsonSerializer.java:54) at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:89) ... 6 more Caused by: NoSuchObjectException(message:partition values=[, 190511]) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56077) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56045) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result.read(ThriftHiveMetastore.java:55976) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partition(ThriftHiveMetastore.java:1776) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partition(ThriftHiveMetastore.java:1761) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getPartition(HiveMetaStoreClient.java:1112) at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:157) ... 10 more *Hive table creatation statement:* REATE TABLE IF NOT EXISTS user_increase( platform STRING COMMENT "platform info - ios android", system STRING COMMENT "system info - platform + version", brand STRING COMMENT "phone brand", model STRING COMMENT "phone model", content STRING COMMENT "json string of statistics" ) COMMENT "wechat user increase data in sharing" PARTITIONED BY (collection STRING COMMENT "a flag to distinct statistics catalog in common flow", create_time STRING COMMENT "timestamp when data just reported") CLUSTERED BY (platform) INTO 2 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\n' STORED AS ORC TBLPROPERTIES ('transactional'='true'); Configuration: # The configuration file needs to define the sources, # the channels and the sinks. # Sources, channels and sinks are defined per agent, # in this case called 'agent' #source: skafka sink: shive channel: k2h agent.sources = skafka agent.sinks = shive agent.channels = k2h #skafka --> k2h agent.sources.skafka.channels = k2h #k2h --> shive agent.sinks.shive.channel = k2h #describe the source agent.sources.skafka.type = org.apache.flume.source.kafka.KafkaSource agent.sources.skafka.batchSize = 5000 agent.sources.skafka.batchDurationMillis = 2000 agent.sources.skafka.kafka.bootstrap.servers = 172.25.48.176:9092 agent.sources.skafka.kafka.topics = soft-device-statis #use a channel which buffers events in memory agent.channels.k2h.type = memory agent.channels.k2h.capacity = 1000 agent.channels.k2h.transactionCapacity = 100 #sinks type agent.sinks.shive.type = hive agent.sinks.shive.hive.metastore = thrift://172.25.48.146:9083 agent.sinks.shive.hive.database = test agent.sinks.shive.hive.table = user_increase agent.sinks.shive.hive.partition = %\{platform},%Y%m%d agent.sinks.shive.useLocalTimeStamp = true agent.sinks.shive.round = true agent.sinks.shive.roundValue = 10 agent.sinks.shive.roundUnit = minute agent.sinks.shive.serializer = json Any idea for me ? Thanks. was: I'm a fresh to hive, and when i used flume sink data to hive, the error occured: 2019-05-11 09:50:31,183 (hive-shive-call-runner-0) [ERROR - org.apache.hadoop.hive.ql.exec.DDLTask.failed(DDLTask.java:512)] org.apache.hadoop.hive.ql.metadata.HiveException: {color:#f79232}partition spec is invalid; field collection does not exist or is empty{color} at org.apache.hadoop.hive.ql.metadata.Partition.createMetaPartitionObject(Partition.java:130) at org.apache.hadoop.hive.ql.metadata.Hive.convertAddSpecToMetaPartition(Hive.java:1662) at org.apache.hadoop.hive.ql.metadata.Hive.createPartitions(Hive.java:1638) at org.apache.hadoop.hive.ql.exec.DDLTask.addPartitions(DDLTask.java:900) at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:339) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1638) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1397) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1183) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1039) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.runDDL(HiveEndPoint.java:404) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.createPartitionIfNotExists(HiveEndPoint.java:372) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:276) at org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157) at org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110) at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:379) at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:376) at org.apache.flume.sink.hive.HiveWriter$11.call(HiveWriter.java:428) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) and other errors fallows above: org.apache.flume.sink.hive.{color:#f79232}HiveWriter$ConnectException: Failed connecting to EndPoint {metaStoreUri='thrift://172.25.48.146:9083', database='test', table='user_increase', partitionVals=[, 190511] }{color} at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:99) at org.apache.flume.sink.hive.HiveSink.getOrCreateWriter(HiveSink.java:343) at org.apache.flume.sink.hive.HiveSink.drainOneBatch(HiveSink.java:295) at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:253) at org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:67) at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:145) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.hive.hcatalog.streaming.StreamingException: partition values=[, 190511]. Unable to get path for end point: [, 190511] at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:162) at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.<init>(AbstractRecordWriter.java:66) at org.apache.hive.hcatalog.streaming.StrictJsonWriter.<init>(StrictJsonWriter.java:49) at org.apache.flume.sink.hive.HiveJsonSerializer.createRecordWriter(HiveJsonSerializer.java:54) at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:89) ... 6 more Caused by: NoSuchObjectException(message:partition values=[, 190511]) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56077) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56045) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result.read(ThriftHiveMetastore.java:55976) at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partition(ThriftHiveMetastore.java:1776) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partition(ThriftHiveMetastore.java:1761) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getPartition(HiveMetaStoreClient.java:1112) at org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:157) ... 10 more Any idea for me ? Thanks. > HiveException: partition spec is invalid; field <partition> does not exist or > is empty > -------------------------------------------------------------------------------------- > > Key: HIVE-21720 > URL: https://issues.apache.org/jira/browse/HIVE-21720 > Project: Hive > Issue Type: Bug > Environment: apache-flume-1.7.0-bin > Reporter: ShrekerNil > Priority: Major > > I'm a fresh to hive, and when i used flume sink data to hive, the error > occured: > 2019-05-11 09:50:31,183 (hive-shive-call-runner-0) [ERROR - > org.apache.hadoop.hive.ql.exec.DDLTask.failed(DDLTask.java:512)] > org.apache.hadoop.hive.ql.metadata.HiveException: {color:#f79232}partition > spec is invalid; field collection does not exist or is empty{color} > at > org.apache.hadoop.hive.ql.metadata.Partition.createMetaPartitionObject(Partition.java:130) > at > org.apache.hadoop.hive.ql.metadata.Hive.convertAddSpecToMetaPartition(Hive.java:1662) > at org.apache.hadoop.hive.ql.metadata.Hive.createPartitions(Hive.java:1638) > at org.apache.hadoop.hive.ql.exec.DDLTask.addPartitions(DDLTask.java:900) > at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:339) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:88) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1638) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1397) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1183) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1039) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.runDDL(HiveEndPoint.java:404) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.createPartitionIfNotExists(HiveEndPoint.java:372) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:276) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157) > at > org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110) > at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:379) > at org.apache.flume.sink.hive.HiveWriter$8.call(HiveWriter.java:376) > at org.apache.flume.sink.hive.HiveWriter$11.call(HiveWriter.java:428) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > > and other errors fallows above: > org.apache.flume.sink.hive.{color:#f79232}HiveWriter$ConnectException: Failed > connecting to EndPoint \{metaStoreUri='thrift://172.25.48.146:9083', > database='test', table='user_increase', partitionVals=[, 190511] }{color} > at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:99) > at org.apache.flume.sink.hive.HiveSink.getOrCreateWriter(HiveSink.java:343) > at org.apache.flume.sink.hive.HiveSink.drainOneBatch(HiveSink.java:295) > at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:253) > at > org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:67) > at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:145) > at java.lang.Thread.run(Thread.java:748) > Caused by: org.apache.hive.hcatalog.streaming.StreamingException: partition > values=[, 190511]. Unable to get path for end point: [, 190511] > at > org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:162) > at > org.apache.hive.hcatalog.streaming.AbstractRecordWriter.<init>(AbstractRecordWriter.java:66) > at > org.apache.hive.hcatalog.streaming.StrictJsonWriter.<init>(StrictJsonWriter.java:49) > at > org.apache.flume.sink.hive.HiveJsonSerializer.createRecordWriter(HiveJsonSerializer.java:54) > at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:89) > ... 6 more > Caused by: NoSuchObjectException(message:partition values=[, 190511]) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56077) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result$get_partition_resultStandardScheme.read(ThriftHiveMetastore.java:56045) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partition_result.read(ThriftHiveMetastore.java:55976) > at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partition(ThriftHiveMetastore.java:1776) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partition(ThriftHiveMetastore.java:1761) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getPartition(HiveMetaStoreClient.java:1112) > at > org.apache.hive.hcatalog.streaming.AbstractRecordWriter.getPathForEndPoint(AbstractRecordWriter.java:157) > ... 10 more > > > *Hive table creatation statement:* > REATE TABLE IF NOT EXISTS user_increase( > platform STRING COMMENT "platform info - ios android", > system STRING COMMENT "system info - platform + version", > brand STRING COMMENT "phone brand", > model STRING COMMENT "phone model", > content STRING COMMENT "json string of statistics" > ) COMMENT "wechat user increase data in sharing" > PARTITIONED BY (collection STRING COMMENT "a flag to distinct statistics > catalog in common flow", > create_time STRING COMMENT "timestamp when data just reported") > CLUSTERED BY (platform) INTO 2 BUCKETS > ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' > LINES TERMINATED BY '\n' > STORED AS ORC > TBLPROPERTIES ('transactional'='true'); > > Configuration: > # The configuration file needs to define the sources, > # the channels and the sinks. > # Sources, channels and sinks are defined per agent, > # in this case called 'agent' > #source: skafka sink: shive channel: k2h > agent.sources = skafka > agent.sinks = shive > agent.channels = k2h > #skafka --> k2h > agent.sources.skafka.channels = k2h > #k2h --> shive > agent.sinks.shive.channel = k2h > #describe the source > agent.sources.skafka.type = org.apache.flume.source.kafka.KafkaSource > agent.sources.skafka.batchSize = 5000 > agent.sources.skafka.batchDurationMillis = 2000 > agent.sources.skafka.kafka.bootstrap.servers = 172.25.48.176:9092 > agent.sources.skafka.kafka.topics = soft-device-statis > #use a channel which buffers events in memory > agent.channels.k2h.type = memory > agent.channels.k2h.capacity = 1000 > agent.channels.k2h.transactionCapacity = 100 > #sinks type > agent.sinks.shive.type = hive > agent.sinks.shive.hive.metastore = thrift://172.25.48.146:9083 > agent.sinks.shive.hive.database = test > agent.sinks.shive.hive.table = user_increase > agent.sinks.shive.hive.partition = %\{platform},%Y%m%d > agent.sinks.shive.useLocalTimeStamp = true > agent.sinks.shive.round = true > agent.sinks.shive.roundValue = 10 > agent.sinks.shive.roundUnit = minute > agent.sinks.shive.serializer = json > > Any idea for me ? > > Thanks. > -- This message was sent by Atlassian JIRA (v7.6.3#76005)