Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). 16/08/05 15:37:57 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 16/08/05 15:37:59 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041. 16/08/05 15:38:01 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.net.SocketException: Connection reset by peer: socket write error at java.net.SocketOutputStream.socketWrite0(Native Method) at java.net.SocketOutputStream.socketWrite(Unknown Source) at java.net.SocketOutputStream.write(Unknown Source) at java.io.BufferedOutputStream.flushBuffer(Unknown Source) at java.io.BufferedOutputStream.write(Unknown Source) at java.io.DataOutputStream.write(Unknown Source) at java.io.FilterOutputStream.write(Unknown Source) at org.apache.spark.api.python.PythonRDD$.writeUTF(PythonRDD.scala:674) at org.apache.spark.api.python.PythonRDD$.org$apache$spark$api$python$PythonRDD$$write$1(PythonRDD.scala:494) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.api.python.PythonRDD$.writeIteratorToStream(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRunner$WriterThread$$anonfun$run$3.apply(PythonRDD.scala:328) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1857) at org.apache.spark.api.python.PythonRunner$WriterThread.run(PythonRDD.scala:269) 16/08/05 15:38:01 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.net.SocketException: Connection reset by peer: socket write error at java.net.SocketOutputStream.socketWrite0(Native Method) at java.net.SocketOutputStream.socketWrite(Unknown Source) at java.net.SocketOutputStream.write(Unknown Source) at java.io.BufferedOutputStream.flushBuffer(Unknown Source) at java.io.BufferedOutputStream.write(Unknown Source) at java.io.DataOutputStream.write(Unknown Source) at java.io.FilterOutputStream.write(Unknown Source) at org.apache.spark.api.python.PythonRDD$.writeUTF(PythonRDD.scala:674) at org.apache.spark.api.python.PythonRDD$.org$apache$spark$api$python$PythonRDD$$write$1(PythonRDD.scala:494) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.api.python.PythonRDD$.writeIteratorToStream(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRunner$WriterThread$$anonfun$run$3.apply(PythonRDD.scala:328) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1857) at org.apache.spark.api.python.PythonRunner$WriterThread.run(PythonRDD.scala:269)
16/08/05 15:38:01 ERROR TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job Traceback (most recent call last): File "C:/workspacecode/pyspark/pyspark/churn/test.py", line 11, in <module> print rdd.first() File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\rdd.py", line 1328, in first rs = self.take(1) File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\rdd.py", line 1310, in take res = self.context.runJob(self, takeUpToNumLeft, p) File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\context.py", line 941, in runJob port = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions) File "C:\spark-2.0.0-bin-hadoop2.6\python\lib\py4j-0.10.1-src.zip\py4j\java_gateway.py", line 933, in __call__ File "C:\spark-2.0.0-bin-hadoop2.6\python\lib\py4j-0.10.1-src.zip\py4j\protocol.py", line 312, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.net.SocketException: Connection reset by peer: socket write error at java.net.SocketOutputStream.socketWrite0(Native Method) at java.net.SocketOutputStream.socketWrite(Unknown Source) at java.net.SocketOutputStream.write(Unknown Source) at java.io.BufferedOutputStream.flushBuffer(Unknown Source) at java.io.BufferedOutputStream.write(Unknown Source) at java.io.DataOutputStream.write(Unknown Source) at java.io.FilterOutputStream.write(Unknown Source) at org.apache.spark.api.python.PythonRDD$.writeUTF(PythonRDD.scala:674) at org.apache.spark.api.python.PythonRDD$.org$apache$spark$api$python$PythonRDD$$write$1(PythonRDD.scala:494) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.api.python.PythonRDD$.writeIteratorToStream(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRunner$WriterThread$$anonfun$run$3.apply(PythonRDD.scala:328) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1857) at org.apache.spark.api.python.PythonRunner$WriterThread.run(PythonRDD.scala:269) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1450) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1438) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1437) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1437) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1659) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1618) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1607) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1871) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1884) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1897) at org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:441) at org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:280) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:211) at java.lang.Thread.run(Unknown Source) Caused by: java.net.SocketException: Connection reset by peer: socket write error at java.net.SocketOutputStream.socketWrite0(Native Method) at java.net.SocketOutputStream.socketWrite(Unknown Source) at java.net.SocketOutputStream.write(Unknown Source) at java.io.BufferedOutputStream.flushBuffer(Unknown Source) at java.io.BufferedOutputStream.write(Unknown Source) at java.io.DataOutputStream.write(Unknown Source) at java.io.FilterOutputStream.write(Unknown Source) at org.apache.spark.api.python.PythonRDD$.writeUTF(PythonRDD.scala:674) at org.apache.spark.api.python.PythonRDD$.org$apache$spark$api$python$PythonRDD$$write$1(PythonRDD.scala:494) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRDD$$anonfun$writeIteratorToStream$1.apply(PythonRDD.scala:504) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.api.python.PythonRDD$.writeIteratorToStream(PythonRDD.scala:504) at org.apache.spark.api.python.PythonRunner$WriterThread$$anonfun$run$3.apply(PythonRDD.scala:328) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1857) at org.apache.spark.api.python.PythonRunner$WriterThread.run(PythonRDD.scala:269) Process finished with exit code 1 2016-08-05 15:35 GMT+02:00 pseudo oduesp <pseudo20...@gmail.com>: > HI, > > i configured th pycharm like describe on stack overflow with spark_home > and hadoop_conf_dir and donwload winutils to use it with prebuild version > of spark 2.0 (pyspark 2.0) > > and i get this error i f you can help me to find solution thanks > > C:\Users\AppData\Local\Continuum\Anaconda2\python.exe > C:/workspacecode/pyspark/pyspark/churn/test.py --master local[*] > Using Spark's default log4j profile: > org/apache/spark/log4j-defaults.properties > Setting default log level to "WARN". > To adjust logging level use sc.setLogLevel(newLevel). > 16/08/05 15:32:33 WARN NativeCodeLoader: Unable to load native-hadoop > library for your platform... using builtin-java classes where applicable > 16/08/05 15:32:35 WARN Utils: Service 'SparkUI' could not bind on port > 4040. Attempting port 4041. > Traceback (most recent call last): > File "C:/workspacecode/pyspark/pyspark/churn/test.py", line 11, in > <module> > print rdd.first() > File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\rdd.py", line 1328, in > first > rs = self.take(1) > File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\rdd.py", line 1280, in > take > totalParts = self.getNumPartitions() > File "C:\spark-2.0.0-bin-hadoop2.6\python\pyspark\rdd.py", line 356, in > getNumPartitions > return self._jrdd.partitions().size() > File > "C:\spark-2.0.0-bin-hadoop2.6\python\lib\py4j-0.10.1-src.zip\py4j\java_gateway.py", > line 933, in __call__ > File > "C:\spark-2.0.0-bin-hadoop2.6\python\lib\py4j-0.10.1-src.zip\py4j\protocol.py", > line 312, in get_return_value > py4j.protocol.Py4JJavaError: An error occurred while calling > o21.partitions. > : org.apache.hadoop.mapred.InvalidInputException: Input path does not > exist: file:/C:workspacecode/rapexp1412.csv > at > org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:285) > at > org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228) > at > org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:313) > at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:200) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:246) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:248) > at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:246) > at scala.Option.getOrElse(Option.scala:121) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:246) > at > org.apache.spark.api.java.JavaRDDLike$class.partitions(JavaRDDLike.scala:60) > at > org.apache.spark.api.java.AbstractJavaRDDLike.partitions(JavaRDDLike.scala:45) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) > at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) > at java.lang.reflect.Method.invoke(Unknown Source) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) > at py4j.Gateway.invoke(Gateway.java:280) > at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128) > at py4j.commands.CallCommand.execute(CallCommand.java:79) > at py4j.GatewayConnection.run(GatewayConnection.java:211) > at java.lang.T >