Build.sbt
name := "spark" version := "1.0" scalaVersion := "2.11.7" libraryDependencies ++= Seq( "org.apache.spark" % "spark-core_2.11" % "1.5.1", "org.apache.spark" % "spark-streaming_2.11" % "1.5.1", "org.apache.spark" % "spark-mllib_2.11" % "1.5.1" From: Бобров Виктор [mailto:[email protected]] Sent: Thursday, December 10, 2015 2:54 PM To: 'Harsh J' <[email protected]> Cc: [email protected] Subject: RE: Can't filter Spark – 1.5.1, ty for help. import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf import scala.io.Source object SimpleApp { def main(args: Array[String]) { var A = scala.collection.mutable.Map[Array[String], Int]() val filename = "C:\\Users\\bobrov\\IdeaProjects\\spark\\file\\spark1.txt" for((line, i) <- Source.fromFile(filename).getLines().zipWithIndex){ val lst = line.split(" ") A += (lst -> i) } def filter1(tp: ((Array[String], Int), (Array[String], Int))): Boolean= { tp._1._2 < tp._2._2 } val conf = new SparkConf().setMaster("spark://web01:7077").setAppName("Simple Application") val sc = new SparkContext(conf) val mail_rdd = sc.parallelize(A.toSeq).cache() val step1 = mail_rdd.cartesian(mail_rdd) val step2 = step1.filter(filter1) //step1.collect().foreach(println) } } From: Harsh J [ <mailto:[email protected]> mailto:[email protected]] Sent: Thursday, December 10, 2015 2:50 PM To: Бобров Виктор < <mailto:[email protected]> [email protected]>; Ndjido Ardo Bar < <mailto:[email protected]> [email protected]> Cc: <mailto:[email protected]> [email protected] Subject: Re: Can't filter Are you sure you do not have any messages preceding the trace, such as one quoting which class is found to be missing? That'd be helpful to see and suggest what may (exactly) be going wrong. It appear similar to https://issues.apache.org/jira/browse/SPARK-8368, but I cannot tell for certain cause I don't know if your code uses the SparkSQL features. Also, what version is your Spark running? I am able to run your program without a problem in Spark 1.5.x (with a sample Seq). On Thu, Dec 10, 2015 at 5:01 PM Бобров Виктор <[email protected] <mailto:[email protected]> > wrote: 0 = {StackTraceElement@7132} "com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.ClassReader.a(Unknown Source)" 1 = {StackTraceElement@7133} "com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.ClassReader.<init>(Unknown Source)" 2 = {StackTraceElement@7134} "org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:40)" 3 = {StackTraceElement@7135} "org.apache.spark.util.ClosureCleaner$.getInnerClosureClasses(ClosureCleaner.scala:81)" 4 = {StackTraceElement@7136} "org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:187)" 5 = {StackTraceElement@7137} "org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122)" 6 = {StackTraceElement@7138} "org.apache.spark.SparkContext.clean(SparkContext.scala:2030)" 7 = {StackTraceElement@7139} "org.apache.spark.rdd.RDD$$anonfun$filter$1.apply(RDD.scala:331)" 8 = {StackTraceElement@7140} "org.apache.spark.rdd.RDD$$anonfun$filter$1.apply(RDD.scala:330)" 9 = {StackTraceElement@7141} "org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)" 10 = {StackTraceElement@7142} "org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)" 11 = {StackTraceElement@7143} "org.apache.spark.rdd.RDD.withScope(RDD.scala:306)" 12 = {StackTraceElement@7144} "org.apache.spark.rdd.RDD.filter(RDD.scala:330)" 13 = {StackTraceElement@7145} "SimpleApp$GeneratedEvaluatorClass$44$1.invoke(FileToCompile0.scala:30)" 14 = {StackTraceElement@7146} "SimpleApp$.main(test1.scala:26)" 15 = {StackTraceElement@7147} "SimpleApp.main(test1.scala)" From: Ndjido Ardo Bar [mailto:[email protected] <mailto:[email protected]> ] Sent: Thursday, December 10, 2015 2:20 PM To: Бобров Виктор <[email protected] <mailto:[email protected]> > Cc: [email protected] <mailto:[email protected]> Subject: Re: Can't filter Please send your call stack with the full description of the exception . On 10 Dec 2015, at 12:10, Бобров Виктор <[email protected] <mailto:[email protected]> > wrote: Hi, I can’t filter my rdd. def filter1(tp: ((Array[String], Int), (Array[String], Int))): Boolean= { tp._1._2 > tp._2._2 } val mail_rdd = sc.parallelize(A.toSeq).cache() val step1 = mail_rdd.cartesian(mail_rdd) val step2 = step1.filter(filter1) Get error “Class not found”. What I’m doing wrong ? Thanks for help.
