wayneguow commented on code in PR #47506: URL: https://github.com/apache/spark/pull/47506#discussion_r1694406012
########## sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala: ########## @@ -1739,6 +1739,32 @@ abstract class CSVSuite Row(1, Date.valueOf("1983-08-04"), null) :: Nil) } + test("SPARK-49016: Queries from raw CSV files are disallowed when the referenced columns only" + + " include the internal corrupt record column") { + val schema = new StructType() + .add("a", IntegerType) + .add("b", DateType) + .add("_corrupt_record", StringType) + + // negative cases + checkError( + exception = intercept[AnalysisException] { + spark.read.schema(schema).csv(testFile(valueMalformedFile)) + .select("_corrupt_record").collect() Review Comment: @HyukjinKwon Thanks for you explanation. Do you think we should change the documentation and remove the CSV and just keep the JSON to avoid misunderstandings? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org