Hey Everyone,

We're running Zeppelin 0.7.0.  We've just cut over to spark2, using scala11
via the CDH parcel (SPARK2-2.0.0.cloudera1-1.cdh5.7.0.p0.113931).

Running a simple job, throws a "Caused by: java.lang.ClassNotFoundException:
$anonfun$1".  It appears that  during execution time on the yarn hosts, the
native CDH spark1.5 jars are loaded before the new spark2 jars.  I've tried
using spark.yarn.archive to specify the spark2 jars in hdfs as well as
using other spark options, none of which seems to make a difference.


Any suggestions you can offer is appreciated.

Thanks,

Rob

------------------------


%spark
val taxonomy = sc.textFile("/user/user1/data/")
                 .map(l => l.split("\t"))

%spark
taxonomy.first


org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage
1.0 (TID 7, data08.hadoop.prod.ostk.com, executor 2):
java.lang.ClassNotFoundException: $anonfun$1
at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor
ClassLoader.scala:82)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$
1.resolveClass(JavaSerializer.scala:67)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
at org.apache.spark.serializer.JavaDeserializationStream.readOb
ject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deseriali
ze(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool
Executor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo
lExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: $anonfun$1
at java.lang.ClassLoader.findClass(ClassLoader.java:530)
at org.apache.spark.util.ParentClassLoader.findClass(ParentClas
sLoader.scala:26)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas
sLoader.scala:34)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas
sLoader.scala:30)
at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor
ClassLoader.scala:77)
... 30 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$sch
eduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$
1.apply(DAGScheduler.scala:1442)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$
1.apply(DAGScheduler.scala:1441)
at scala.collection.mutable.ResizableArray$class.foreach(Resiza
bleArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGSchedu
ler.scala:1441)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS
etFailed$1.apply(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskS
etFailed$1.apply(DAGScheduler.scala:811)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(
DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOn
Receive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe
ceive(DAGScheduler.scala:1624)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onRe
ceive(DAGScheduler.scala:1613)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1893)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1906)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1318)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati
onScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati
onScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.take(RDD.scala:1292)
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1332)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati
onScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperati
onScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.first(RDD.scala:1331)
... 37 elided
Caused by: java.lang.ClassNotFoundException: $anonfun$1
at org.apache.spark.repl.ExecutorClassLoader.findClass(Executor
ClassLoader.scala:82)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$
1.resolveClass(JavaSerializer.scala:67)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
at org.apache.spark.serializer.JavaDeserializationStream.readOb
ject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deseriali
ze(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool
Executor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo
lExecutor.java:617)
... 1 more
Caused by: java.lang.ClassNotFoundException: $anonfun$1
at java.lang.ClassLoader.findClass(ClassLoader.java:530)
at org.apache.spark.util.ParentClassLoader.findClass(ParentClas
sLoader.scala:26)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas
sLoader.scala:34)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.apache.spark.util.ParentClassLoader.loadClass(ParentClas
sLoader.scala:30)
at org.apache.spark.repl.ExecutorClassL
oader.findClass(ExecutorClassLoader.scala:77)
... 30 more

Reply via email to