[ https://issues.apache.org/jira/browse/HIVE-24797?focusedWorklogId=555141&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-555141 ]
ASF GitHub Bot logged work on HIVE-24797: ----------------------------------------- Author: ASF GitHub Bot Created on: 20/Feb/21 11:11 Start Date: 20/Feb/21 11:11 Worklog Time Spent: 10m Work Description: wangyum commented on pull request #1994: URL: https://github.com/apache/hive/pull/1994#issuecomment-782608079 Full stack trace: ``` 21/02/19 01:43:22,745 WARN [HiveServer2-Background-Pool: Thread-162871] avro.AvroSerDe:195 : Encountered exception determining schema. Returning signal schema to indicate problem org.apache.avro.AvroTypeException: Invalid default for field USERACTIONS: null not a {"type":"array","items":"string"} at org.apache.avro.Schema.validateDefault(Schema.java:1571) at org.apache.avro.Schema.access$500(Schema.java:87) at org.apache.avro.Schema$Field.<init>(Schema.java:544) at org.apache.avro.Schema.parse(Schema.java:1678) at org.apache.avro.Schema$Parser.parse(Schema.java:1425) at org.apache.avro.Schema$Parser.parse(Schema.java:1396) at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:287) at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFromFS(AvroSerdeUtils.java:170) at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:139) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107) at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83) at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533) at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:493) at org.apache.hadoop.hive.ql.metadata.Partition.getDeserializer(Partition.java:225) at org.apache.spark.sql.hive.HadoopTableReader.$anonfun$makeRDDForPartitionedTable$1(TableReader.scala:146) at scala.collection.immutable.Stream.map(Stream.scala:418) at org.apache.spark.sql.hive.HadoopTableReader.makeRDDForPartitionedTable(TableReader.scala:145) at org.apache.spark.sql.hive.execution.HiveTableScanExec.$anonfun$doExecute$2(HiveTableScanExec.scala:211) at org.apache.spark.util.Utils$.withDummyCallSite(Utils.scala:2469) at org.apache.spark.sql.hive.execution.HiveTableScanExec.doExecute(HiveTableScanExec.scala:211) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177) at org.apache.spark.sql.execution.ProjectExec.doExecute(basicPhysicalOperators.scala:79) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:181) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:219) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:216) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:177) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:323) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:465) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:451) at org.apache.spark.sql.execution.SparkPlan.executeTakeAsIterator(SparkPlan.scala:543) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectAsIterator(limit.scala:50) at org.apache.spark.sql.Dataset.collectIteratorFromPlan(Dataset.scala:3659) at org.apache.spark.sql.Dataset.$anonfun$collectAsIterator$1(Dataset.scala:2951) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3641) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:105) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:165) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:67) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3639) at org.apache.spark.sql.Dataset.collectAsIterator(Dataset.scala:2951) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$executeForResults$1(SparkExecuteStatementOperation.scala:481) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.executeResultInShareGroup(SparkExecuteStatementOperation.scala:521) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.executeForResults(SparkExecuteStatementOperation.scala:481) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$3(SparkExecuteStatementOperation.scala:329) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withRetry(SparkExecuteStatementOperation.scala:419) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:322) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.$anonfun$run$1(SparkExecuteStatementOperation.scala:225) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties(SparkOperation.scala:78) at org.apache.spark.sql.hive.thriftserver.SparkOperation.withLocalProperties$(SparkOperation.scala:62) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:57) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:225) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1$$anon$2.run(SparkExecuteStatementOperation.scala:220) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869) at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1.run(SparkExecuteStatementOperation.scala:236) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 555141) Time Spent: 1.5h (was: 1h 20m) > Disable validate default values when parsing Avro schemas > --------------------------------------------------------- > > Key: HIVE-24797 > URL: https://issues.apache.org/jira/browse/HIVE-24797 > Project: Hive > Issue Type: Bug > Reporter: Yuming Wang > Priority: Major > Labels: pull-request-available > Time Spent: 1.5h > Remaining Estimate: 0h > > It will throw exceptions when upgrading Avro to 1.10.1 for this schema: > {code:json} > { > "type": "record", > "name": "EventData", > "doc": "event data", > "fields": [ > {"name": "ARRAY_WITH_DEFAULT", "type": {"type": "array", "items": > "string"}, "default": null } > ] > } > {code} > {noformat} > org.apache.avro.AvroTypeException: Invalid default for field > ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"} > at org.apache.avro.Schema.validateDefault(Schema.java:1571) > at org.apache.avro.Schema.access$500(Schema.java:87) > at org.apache.avro.Schema$Field.<init>(Schema.java:544) > at org.apache.avro.Schema.parse(Schema.java:1678) > at org.apache.avro.Schema$Parser.parse(Schema.java:1425) > at org.apache.avro.Schema$Parser.parse(Schema.java:1396) > at > org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:287) > at > org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFromFS(AvroSerdeUtils.java:170) > at > org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:139) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83) > at > org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533) > at > org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:493) > at > org.apache.hadoop.hive.ql.metadata.Partition.getDeserializer(Partition.java:225) > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)