Ismail, MERGE INTO is supported through our SQL extensions, so you'll need to enable them to get it working: http://iceberg.apache.org/spark-configuration/#sql-extensions
Also, we found during the 0.11.0 release vote that Spark 3.1.1 has changes that break the extensions. Spark 3.1 has not been released yet, so we went ahead with the 0.11.0 release. We will follow up with fixes for the issues hit in 3.1. For now, I recommend using Spark 3.0.1 if you want to try out the new DDL, DELETE, or MERGE syntax. rb On Fri, Jan 29, 2021 at 11:09 AM ismail simsek <ismailxsim...@gmail.com> wrote: > Hi all > > congratulations all for the new release 11, Im trying to create SCD 2 > table using new MERGE INTO feature > but getting "MERGE INTO TABLE is not supported temporarily." error cant > see what is wrong > > using spark 3.1.1 and iceberg 0.11.0 > full code is here : > > https://github.com/ismailsimsek/iceberg-examples/blob/master/src/main/java/IcebergSCD2.java > pom file > https://github.com/ismailsimsek/iceberg-examples/blob/master/pom.xml > > String merge = "MERGE INTO default.scd2_table t \n" + > " USING ( SELECT customer_id, name, > effective_date FROM default.stg_scd2_table ) s \n" + > " ON s.customer_id = t.customer_id \n" + > " WHEN MATCHED \n" + > " THEN UPDATE SET t.current = false, > t.effective_date = s.effective_date \n" + > " WHEN NOT MATCHED THEN \n" + > " INSERT(customer_id, name, current, > effective_date, end_date) \n" + > " VALUES(s.customer_id, s.name, true, > s.effective_date, null)" + > ";"; > spark.sql(merge); > > Exception message: > > 13975 [shutdown-hook-0] INFO org.apache.spark.SparkContext - Invoking > stop() from shutdown hook > Exception in thread "main" java.lang.UnsupportedOperationException: MERGE > INTO TABLE is not supported temporarily. > at > org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:718) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) > at > org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78) > at > scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:162) > at > scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:162) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) > at > scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:162) > at > scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:160) > at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1429) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75) > at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) > at > org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:67) > at > org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:391) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:104) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) > at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:104) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:97) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:117) > at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143) > at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:117) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:110) > at > org.apache.spark.sql.execution.QueryExecution.$anonfun$simpleString$2(QueryExecution.scala:161) > at > org.apache.spark.sql.execution.ExplainUtils$.processPlan(ExplainUtils.scala:115) > at > org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:161) > at org.apache.spark.sql.execution.QueryExecution.org > $apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:206) > at > org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:175) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:98) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) > at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:228) > at > org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) > at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96) > at > org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:615) > at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:610) > at IcebergSCD2.run(IcebergSCD2.java:46) > at IcebergSCD2.main(IcebergSCD2.java:12) > > > -- Ryan Blue Software Engineer Netflix