I configured 4 pcs with spark-1.2.0-bin-hadoop2.4.tgz with Spark Standalone on a Cluster. On master pc i executed: ./sbin/start-master.sh ./sbin/start-slaves.sh (4 instances)
On datanode1 ,datanode2, secondarymaster pcs i executed: ./bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077 so when i go to https://localhost:8080 i see this: <http://apache-spark-user-list.1001560.n3.nabble.com/file/n21396/Screen_Shot_2015-01-28_at_2.png> But when i run a script i see errors like those below and i don't what those errors exist: --------------------------------------------------------------------------------------------------------------------- :load Recommender/Recommender.scala Loading Recommender/Recommender.scala... import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating import java.io._ import scala.util.control.Breaks._ import scala.collection.mutable.ListBuffer data: org.apache.spark.rdd.RDD[String] = data/cut/ratings.txt MappedRDD[819] at textFile at <console>:51 users: org.apache.spark.rdd.RDD[String] = data/cut/user.txt MappedRDD[821] at textFile at <console>:51 ratings: org.apache.spark.rdd.RDD[org.apache.spark.mllib.recommendation.Rating] = MappedRDD[822] at map at <console>:55 rank: Int = 10 numIterations: Int = 20 15/01/28 02:13:18 ERROR TaskSetManager: Task 1 in stage 105.0 failed 4 times; aborting job org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 105.0 failed 4 times, most recent failure: Lost task 1.3 in stage 105.0 (TID 212, datanode2): java.io.FileNotFoundException: File file:/home/sparkuser/spark-1.2.0/spark-1.2.0-bin-hadoop2.4/data/cut/ratings.txt does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:511) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:724) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:501) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:397) at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:137) at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:339) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:764) at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:108) at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67) at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:233) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:210) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:99) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) at java.lang.Thread.run(Thread.java:662) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) usersProducts: org.apache.spark.rdd.RDD[(Int, Int)] = MappedRDD[1203] at map at <console>:57 <console>:60: error: not found: value model model.predict(usersProducts).map { case Rating(user, product, rate) => ^ <console>:59: error: not found: value predictions }.join(predictions) ^ <console>:49: error: not found: value ratesAndPreds val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => ^ numofrecommends: Int = 6 15/01/28 02:13:19 ERROR TaskSetManager: Task 0 in stage 150.0 failed 4 times; aborting job org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 150.0 failed 4 times, most recent failure: Lost task 0.3 in stage 150.0 (TID 217, datanode1): java.io.FileNotFoundException: File file:/home/sparkuser/spark-1.2.0/spark-1.2.0-bin-hadoop2.4/data/cut/ratings.txt does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:511) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:724) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:501) at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:397) at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:137) at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:339) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:764) at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:108) at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67) at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:233) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:210) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:99) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:56) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1214) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1202) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1202) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:696) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:696) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1420) at akka.actor.Actor$class.aroundReceive(Actor.scala:465) at org.apache.spark.scheduler.DAGSchedulerEventProcessActor.aroundReceive(DAGScheduler.scala:1375) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516) at akka.actor.ActorCell.invoke(ActorCell.scala:487) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238) at akka.dispatch.Mailbox.run(Mailbox.scala:220) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:393) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) warning: there were 1 deprecation warning(s); re-run with -deprecation for details user_map: scala.collection.immutable.Map[Int,String] = Map(2163 -> Fiona Griffin, 8607 -> S.GONZÁLEZ, 645 -> MARINA IOANNOU MAKE UP ARTIST, 892 -> Έλια Αζά, 10822 -> Stelios Agathoklis, 5385 -> Christina Panayiotou, 5659 -> nataly kaimaklioti, 9929 -> ΕΥΗ ΓΕΩΡΓΙΟΥ, 11866 -> andreas, 12928 -> vasiliki mpasdani, 2199 -> wajahat bajwa, 3021 -> Dimitris Gavriilidis, 5437 -> Tonia Ioannou, 1322 -> Ερατώ Βασιλείου, 5509 -> xrusa kortari, 5686 -> Nicos Zeniou, 1036 -> Tasos Demetriou, 9982 -> ULTIMATE SOLUTIONS, 2822 -> FOTIS ARTEMIOU, 10007 -> Christos Stavrinou, 9131 -> Litsa DanceFighter, 13052 -> Allison Susan, 6085 -> YMF Associates Ltd, 3873 -> Zymaras Carpets & Ceilings LTD, 10917 -> constantina, 4188 -> Γιάννης Γιακαλάρας, 1586 -> Loukia Metaxa, 1501 -> christina christodoulou, 4201 -...writer: java.io.PrintWriter = java.io.PrintWriter@3c825831 <console>:60: error: not found: value rating_data for( a <- 0 to rating_data.length-1){ ^ recommend_file: org.apache.spark.rdd.RDD[String] = data/cut/recommends.txt MappedRDD[1212] at textFile at <console>:55 writer: java.io.PrintWriter = java.io.PrintWriter@3c0b53c4 recommend_file_update: Array[scala.collection.mutable.ListBuffer[String]] = Array() <console>:50: error: not found: value MSE println("Mean Squared Error = " + MSE) --------------------------------------------------------------------------------------------------------------------- -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Running-a-script-on-scala-shell-on-Spark-Standalone-Cluster-tp21396.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org