hey guys
I am at AmpCamp 2014 at UCB right now :-)
Funny Issue...
This code works in Spark-Shell but throws a funny exception in IntelliJ
CODE
====val sqlContext = new
org.apache.spark.sql.SQLContext(sc)sqlContext.setConf("spark.sql.parquet.binaryAsString",
"true")val wikiData =
sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet")wikiData.registerTempTable("wikiData")sqlContext.sql("SELECT
username, COUNT(*) AS cnt FROM wikiData WHERE username <> '' GROUP BY username
ORDER BY cnt DESC LIMIT 10").collect().foreach(println)
RESULTS========[Waacstats,2003][Cydebot,949][BattyBot,939][Yobot,890][Addbot,853][Monkbot,668][ChrisGualtieri,438][RjwilmsiBot,387][OccultZone,377][ClueBot
NG,353]
INTELLIJ CODE=============object ParquetSql {
def main(args: Array[String]) {
val sconf = new
SparkConf().setMaster("local").setAppName("MedicalSideFx-NamesFoodSql")
val sc = new SparkContext(sconf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
sqlContext.setConf("spark.sql.parquet.binaryAsString", "true")
val wikiData =
sqlContext.parquetFile("/Users/sansub01/mycode/knowledge/spark_ampcamp_2014/data/wiki_parquet")
wikiData.registerTempTable("wikiData")
val results = sqlContext.sql("SELECT username, COUNT(*) AS cnt FROM
wikiData WHERE username <> '' GROUP BY username ORDER BY cnt DESC LIMIT 10")
results.collect().foreach(println)
}
}
INTELLIJ ERROR==============Exception in thread "main"
java.lang.IncompatibleClassChangeError: Found interface
org.apache.spark.serializer.Serializer, but class was expected at
org.apache.spark.sql.parquet.ParquetFilters$.serializeFilterExpressions(ParquetFilters.scala:244)
at
org.apache.spark.sql.parquet.ParquetTableScan.execute(ParquetTableOperations.scala:109)
at org.apache.spark.sql.execution.Filter.execute(basicOperators.scala:57) at
org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151)
at
org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at
org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:48)
at
org.apache.spark.sql.execution.Exchange$$anonfun$execute$1.apply(Exchange.scala:45)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
at org.apache.spark.sql.execution.Exchange.execute(Exchange.scala:44) at
org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:151)
at
org.apache.spark.sql.execution.Aggregate$$anonfun$execute$1.apply(Aggregate.scala:127)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:46)
at org.apache.spark.sql.execution.Aggregate.execute(Aggregate.scala:126) at
org.apache.spark.sql.execution.TakeOrdered.executeCollect(basicOperators.scala:171)
at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:438) at
org.medicalsidefx.common.utils.ParquetSql$.main(ParquetSql.scala:18) at
org.medicalsidefx.common.utils.ParquetSql.main(ParquetSql.scala) at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606) at
com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)