Hi, This sounds like a bug. It works if I put an *arbitrary limit on insert*
INSERT INTO TABLE test.randomData SELECT ID , CLUSTERED , SCATTERED , RANDOMISED , RANDOM_STRING , SMALL_VC , PADDING FROM tmp LIMIT 10000000 This works fine on Spark version 2.11.12! Can someone please test and confirm this? Thanks, Mich LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* *Disclaimer:* Use it at your own risk. Any and all responsibility for any loss, damage or destruction of data or any other property which may arise from relying on this email's technical content is explicitly disclaimed. The author will in no case be liable for any monetary damages arising from such loss, damage or destruction. On Sat, 19 Dec 2020 at 20:27, Mich Talebzadeh <mich.talebza...@gmail.com> wrote: > Hi, > > Upgraded Spark from 2.11.12 to Spark 3.0.1 > > Hive version 3.1.1 and Hadoop version 3.1.1 > > The following used to work with Spark 2.11.12 > > scala> sqltext = s""" > | INSERT INTO TABLE ${fullyQualifiedTableName} > | SELECT > | ID > | , CLUSTERED > | , SCATTERED > | , RANDOMISED > | , RANDOM_STRING > | , SMALL_VC > | , PADDING > | FROM tmp > | """ > > INSERT INTO TABLE test.randomData > SELECT > ID > , CLUSTERED > , SCATTERED > , RANDOMISED > , RANDOM_STRING > , SMALL_VC > , PADDING > FROM tmp > > scala> spark.sql(sqltext) > > But with Spark 3.0.1 it fails throwing the following error > > java.lang.ArrayIndexOutOfBoundsException: 0 > at org.apache.spark.sql.types.StructType.apply(StructType.scala:414) > at > org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning$$anonfun$apply$4.$anonfun$applyOrElse$3(objects.scala:224) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) > at scala.collection.immutable.List.foreach(List.scala:392) > at scala.collection.TraversableLike.map(TraversableLike.scala:238) > at scala.collection.TraversableLike.map$(TraversableLike.scala:231) > at scala.collection.immutable.List.map(List.scala:298) > at > org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning$$anonfun$apply$4.applyOrElse(objects.scala:223) > at > org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning$$anonfun$apply$4.applyOrElse(objects.scala:211) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$1(TreeNode.scala:309) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:72) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:309) > at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:149) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:147) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDown$3(TreeNode.scala:314) > at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$mapChildren$1(TreeNode.scala:399) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:237) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:397) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:350) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:314) > at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org > $apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:149) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:147) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning$.apply(objects.scala:211) > at > org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning$.apply(objects.scala:121) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:149) > at > scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:38) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:146) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:138) > at scala.collection.immutable.List.foreach(List.scala:392) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:138) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:116) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:116) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:82) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:133) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:133) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:82) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:79) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$4(QueryExecution.scala:197) > at > org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:381) > at org.apache.spark.sql.execution.QueryExecution.org > $apache$spark$sql$execution$QueryExecution$$writePlans(QueryExecution.scala:197) > at > org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:207) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:95) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229) > at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) > at > org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602) > ... 47 elided > > Sounds like some incompatibility issue here? > > > Thanks, > > > Mich > > > LinkedIn * > https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw > <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* > > > > > > *Disclaimer:* Use it at your own risk. Any and all responsibility for any > loss, damage or destruction of data or any other property which may arise > from relying on this email's technical content is explicitly disclaimed. > The author will in no case be liable for any monetary damages arising from > such loss, damage or destruction. > > >