Nevermind, there is already a Jira open for this https://issues.apache.org/jira/browse/SPARK-16698
On Fri, Aug 5, 2016 at 5:33 PM, Kiran Chitturi < kiran.chitt...@lucidworks.com> wrote: > Hi, > > During our upgrade to 2.0.0, we found this issue with one of our failing > tests. > > Any csv/json files that contains field names with dots are unreadable > using DataFrames. > > My sample csv file: > > flag_s,params.url_s >> test,http://www.google.com > > > In spark-shell, I ran the following code: > > scala> val csvDF = > spark.read.format("com.databricks.spark.csv").option("header", >> "true").option("inferSchema", "true").load("test.csv") >> csvDF: org.apache.spark.sql.DataFrame = [flag_s: string, params.url_s: >> string] >> scala> csvDF.take(1) >> org.apache.spark.sql.AnalysisException: Unable to resolve params.url_s >> given [flag_s, params.url_s]; >> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$ >> anonfun$resolve$1$$anonfun$apply$5.apply(LogicalPlan.scala:134) >> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$ >> anonfun$resolve$1$$anonfun$apply$5.apply(LogicalPlan.scala:134) >> at scala.Option.getOrElse(Option.scala:121) >> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$ >> anonfun$resolve$1.apply(LogicalPlan.scala:133) >> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$ >> anonfun$resolve$1.apply(LogicalPlan.scala:129) >> at scala.collection.TraversableLike$$anonfun$map$ >> 1.apply(TraversableLike.scala:234) >> at scala.collection.TraversableLike$$anonfun$map$ >> 1.apply(TraversableLike.scala:234) >> at scala.collection.Iterator$class.foreach(Iterator.scala:893) >> at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) >> at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) >> at org.apache.spark.sql.types.StructType.foreach(StructType.scala:95) >> at scala.collection.TraversableLike$class.map( >> TraversableLike.scala:234) >> at org.apache.spark.sql.types.StructType.map(StructType.scala:95) >> at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan. >> resolve(LogicalPlan.scala:129) >> at org.apache.spark.sql.execution.datasources. >> FileSourceStrategy$.apply(FileSourceStrategy.scala:87) >> at org.apache.spark.sql.catalyst.planning.QueryPlanner$$ >> anonfun$1.apply(QueryPlanner.scala:60) >> at org.apache.spark.sql.catalyst.planning.QueryPlanner$$ >> anonfun$1.apply(QueryPlanner.scala:60) >> at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) >> at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan( >> QueryPlanner.scala:61) >> at org.apache.spark.sql.execution.SparkPlanner.plan( >> SparkPlanner.scala:47) >> at org.apache.spark.sql.execution.SparkPlanner$$ >> anonfun$plan$1$$anonfun$apply$1.applyOrElse(SparkPlanner.scala:51) >> at org.apache.spark.sql.execution.SparkPlanner$$ >> anonfun$plan$1$$anonfun$apply$1.applyOrElse(SparkPlanner.scala:48) >> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$ >> transformUp$1.apply(TreeNode.scala:301) >> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$ >> transformUp$1.apply(TreeNode.scala:301) >> at org.apache.spark.sql.catalyst.trees.CurrentOrigin$. >> withOrigin(TreeNode.scala:69) >> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp( >> TreeNode.scala:300) >> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4. >> apply(TreeNode.scala:298) >> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4. >> apply(TreeNode.scala:298) >> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5. >> apply(TreeNode.scala:321) >> at org.apache.spark.sql.catalyst.trees.TreeNode. >> mapProductIterator(TreeNode.scala:179) >> at org.apache.spark.sql.catalyst.trees.TreeNode. >> transformChildren(TreeNode.scala:319) >> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp( >> TreeNode.scala:298) >> at org.apache.spark.sql.execution.SparkPlanner$$anonfun$plan$1.apply( >> SparkPlanner.scala:48) >> at org.apache.spark.sql.execution.SparkPlanner$$anonfun$plan$1.apply( >> SparkPlanner.scala:48) >> at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) >> at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute( >> QueryExecution.scala:78) >> at org.apache.spark.sql.execution.QueryExecution. >> sparkPlan(QueryExecution.scala:76) >> at org.apache.spark.sql.execution.QueryExecution. >> executedPlan$lzycompute(QueryExecution.scala:83) >> at org.apache.spark.sql.execution.QueryExecution. >> executedPlan(QueryExecution.scala:83) >> at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2558) >> at org.apache.spark.sql.Dataset.head(Dataset.scala:1924) >> at org.apache.spark.sql.Dataset.take(Dataset.scala:2139) >> ... 48 elided >> scala> > > The same happens for json files too. Is this a known issue in 2.0.0 ? > > Removing the field with dots from the csv/json file fixes the issue :) > > Thanks, > > -- > Kiran Chitturi > > -- Kiran Chitturi