your HDFS path to spark job is incorrect.

On 8 June 2015 at 16:24, Nirmal Fernando <nir...@wso2.com> wrote:

> HDFS path should be something like; hdfs://
> 127.0.0.1:8020/user/cloudera/inputs/
>
> On Mon, Jun 8, 2015 at 4:15 PM, Pa Rö <paul.roewer1...@googlemail.com>
> wrote:
>
>> hello,
>>
>> i submit my spark job with the following parameters:
>>
>> ./spark-1.1.0-bin-hadoop2.4/bin/spark-submit \
>>   --class mgm.tp.bigdata.ma_spark.SparkMain \
>>   --master spark://quickstart.cloudera:7077 \
>>   ma-spark.jar \
>>   1000
>>
>> and get the following exception:
>>
>> java.io.IOException: Mkdirs failed to create file:/
>> 127.0.0.1:8020/user/cloudera/outputs/output_spark
>>     at
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:438)
>>     at
>> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
>>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:906)
>>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:887)
>>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:784)
>>     at mgm.tp.bigdata.ma_spark.Helper.writeCenterHistory(Helper.java:35)
>>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:100)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>     at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>     at java.lang.reflect.Method.invoke(Method.java:606)
>>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative
>> path in absolute URI: 127.0.0.1:8020
>>     at org.apache.hadoop.fs.Path.initialize(Path.java:206)
>>     at org.apache.hadoop.fs.Path.<init>(Path.java:172)
>>     at org.apache.hadoop.fs.Path.<init>(Path.java:94)
>>     at org.apache.hadoop.fs.Globber.glob(Globber.java:211)
>>     at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304)
>>     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at
>> org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135)
>>     at org.apache.spark.rdd.RDD.count(RDD.scala:904)
>>     at org.apache.spark.rdd.RDD.takeSample(RDD.scala:401)
>>     at
>> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:426)
>>     at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32)
>>     at
>> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:422)
>>     at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32)
>>     at mgm.tp.bigdata.ma_spark.SparkMain.kmeans(SparkMain.java:123)
>>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:102)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>     at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>     at java.lang.reflect.Method.invoke(Method.java:606)
>>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> Caused by: java.net.URISyntaxException: Relative path in absolute URI:
>> 127.0.0.1:8020
>>     at java.net.URI.checkPath(URI.java:1804)
>>     at java.net.URI.<init>(URI.java:752)
>>     at org.apache.hadoop.fs.Path.initialize(Path.java:203)
>>     ... 43 more
>> Exception in thread "main" java.lang.IllegalArgumentException:
>> java.net.URISyntaxException: Relative path in absolute URI:
>> 127.0.0.1:8020
>>     at org.apache.hadoop.fs.Path.initialize(Path.java:206)
>>     at org.apache.hadoop.fs.Path.<init>(Path.java:172)
>>     at org.apache.hadoop.fs.Path.<init>(Path.java:94)
>>     at org.apache.hadoop.fs.Globber.glob(Globber.java:211)
>>     at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
>>     at
>> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304)
>>     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at
>> org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>>     at scala.Option.getOrElse(Option.scala:120)
>>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135)
>>     at org.apache.spark.rdd.RDD.foreach(RDD.scala:759)
>>     at
>> org.apache.spark.api.java.JavaRDDLike$class.foreach(JavaRDDLike.scala:297)
>>     at org.apache.spark.api.java.JavaPairRDD.foreach(JavaPairRDD.scala:44)
>>     at mgm.tp.bigdata.ma_spark.SparkMain.saveResults(SparkMain.java:216)
>>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:108)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at
>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>>     at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>     at java.lang.reflect.Method.invoke(Method.java:606)
>>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>> Caused by: java.net.URISyntaxException: Relative path in absolute URI:
>> 127.0.0.1:8020
>>     at java.net.URI.checkPath(URI.java:1804)
>>     at java.net.URI.<init>(URI.java:752)
>>     at org.apache.hadoop.fs.Path.initialize(Path.java:203)
>>     ... 45 more
>>
>> i set my path like:
>> file:///127.0.0.1:8020/user/cloudera/inputs/
>> (namenode of hadoop)
>>
>> how i must set the path to hdfs??
>>
>> best regards,
>> paul
>>
>
>
>
> --
>
> Thanks & regards,
> Nirmal
>
> Associate Technical Lead - Data Technologies Team, WSO2 Inc.
> Mobile: +94715779733
> Blog: http://nirmalfdo.blogspot.com/
>
>
>

Reply via email to