Hey Everyone, We're running Zeppelin 0.7.0. We've just cut over to spark2, using scala11 via the CDH parcel (SPARK2-2.0.0.cloudera1-1.cdh5.7.0.p0.113931).
Running a simple job, throws a "Caused by: java.lang.ClassNotFoundException: $anonfun$1". It appears that during execution time on the yarn hosts, the native CDH spark1.5 jars are loaded before the new spark2 jars. I've tried using spark.yarn.archive to specify the spark2 jars in hdfs as well as using other spark options, none of which seems to make a difference. Any suggestions you can offer is appreciated. Thanks, Rob ------------------------ %spark val taxonomy = sc.textFile("/user/user1/data/") .map(l => l.split("\t")) %spark taxonomy.first org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 7, data08.hadoop.prod.ostk.com, executor 2): java.lang.ClassNotFoundException: $anonfun$1 at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor ClassLoader.scala:82) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:348) at org.apache.spark.serializer.JavaDeserializationStream$$anon$ 1.resolveClass(JavaSerializer.scala:67) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371) at org.apache.spark.serializer.JavaDeserializationStream.readOb ject(JavaSerializer.scala:75) at org.apache.spark.serializer.JavaSerializerInstance.deseriali ze(JavaSerializer.scala:114) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool Executor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo lExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ClassNotFoundException: $anonfun$1 at java.lang.ClassLoader.findClass(ClassLoader.java:530) at org.apache.spark.util.ParentClassLoader.findClass(ParentClas sLoader.scala:26) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas sLoader.scala:34) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas sLoader.scala:30) at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor ClassLoader.scala:77) ... 30 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$sch eduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$ 1.apply(DAGScheduler.scala:1442) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$ 1.apply(DAGScheduler.scala:1441) at scala.collection.mutable.ResizableArray$class.foreach(Resiza bleArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGSchedu ler.scala:1441) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS etFailed$1.apply(DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS etFailed$1.apply(DAGScheduler.scala:811) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOn Receive(DAGScheduler.scala:1669) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe ceive(DAGScheduler.scala:1624) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe ceive(DAGScheduler.scala:1613) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1893) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1906) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919) at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1318) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati onScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati onScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) at org.apache.spark.rdd.RDD.take(RDD.scala:1292) at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1332) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati onScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati onScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) at org.apache.spark.rdd.RDD.first(RDD.scala:1331) ... 37 elided Caused by: java.lang.ClassNotFoundException: $anonfun$1 at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor ClassLoader.scala:82) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:348) at org.apache.spark.serializer.JavaDeserializationStream$$anon$ 1.resolveClass(JavaSerializer.scala:67) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371) at org.apache.spark.serializer.JavaDeserializationStream.readOb ject(JavaSerializer.scala:75) at org.apache.spark.serializer.JavaSerializerInstance.deseriali ze(JavaSerializer.scala:114) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:86) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool Executor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo lExecutor.java:617) ... 1 more Caused by: java.lang.ClassNotFoundException: $anonfun$1 at java.lang.ClassLoader.findClass(ClassLoader.java:530) at org.apache.spark.util.ParentClassLoader.findClass(ParentClas sLoader.scala:26) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas sLoader.scala:34) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas sLoader.scala:30) at org.apache.spark.repl.ExecutorClassL oader.findClass(ExecutorClassLoader.scala:77) ... 30 more