andygrove opened a new issue, #1754:
URL: https://github.com/apache/datafusion-comet/issues/1754

   ### Describe the bug
   
   Repro:
   
   ```
     ignore("read map[struct, struct] from parquet") {
       assume(usingDataSourceExec(conf))
   
       withTempPath { dir =>
         // create input file with Comet disabled
         withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
           val df = spark
             .range(5)
             .withColumn("id2", col("id"))
             .withColumn("id3", col("id"))
             // Spark does not allow null as a key but does allow null as a
             // value, and the entire map be null
             .select(
               when(col("id") > 1, map(struct(col("id"), col("id2"), 
col("id3")), when(col("id") > 2,
                 struct(col("id"), col("id2"), col("id3"))))).alias("map1"))
           df.write.parquet(dir.toString())
         }
   
         Seq("", "parquet").foreach { v1List =>
           withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
             val df = spark.read.parquet(dir.toString())
             df.createOrReplaceTempView("tbl")
             if (v1List.isEmpty) {
               checkSparkAnswer(df.select("map1"))
             } else {
               checkSparkAnswerAndOperator(df.select("map1"))
             }
             // we fall back to Spark for map_keys and map_values
             checkSparkAnswer(df.select(map_keys(col("map1"))))
             checkSparkAnswer(df.select(map_values(col("map1"))))
             checkSparkAnswer(spark.sql("SELECT map_keys(map1).id2 FROM tbl"))
             checkSparkAnswer(spark.sql("SELECT map_values(map1).id2 FROM tbl"))
           }
         }
       }
     }
   ```
   
   
   Error:
   
   ```
   Invalid argument error: column types must match schema types, 
   
   expected:
   
   Map(Field { name: "entries", data_type: Struct([Field { name: "key", 
data_type: Struct([Field { name: "id2", data_type: Int64 }]) }, Field { name: 
"value", data_type: Struct([Field { name: "id", data_type: Int64 }, Field { 
name: "id2", data_type: Int64 }, Field { name: "id3", data_type: Int64 }]) }]) 
}, false) 
   
   but found 
   
   Map(Field { name: "key_value", data_type: Struct([Field { name: "key", 
data_type: Struct([Field { name: "id", data_type: Int64 }, Field { name: "id2", 
data_type: Int64 }, Field { name: "id3", data_type: Int64 }]) }, Field { name: 
"value", data_type: Struct([Field { name: "id", data_type: Int64 }, Field { 
name: "id2", data_type: Int64 }, Field { name: "id3", data_type: Int64 }]) }]) 
}, false) at column index 0
   ```
   
   ### Steps to reproduce
   
   _No response_
   
   ### Expected behavior
   
   _No response_
   
   ### Additional context
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to