your HDFS path to spark job is incorrect. On 8 June 2015 at 16:24, Nirmal Fernando <nir...@wso2.com> wrote:
> HDFS path should be something like; hdfs:// > 127.0.0.1:8020/user/cloudera/inputs/ > > On Mon, Jun 8, 2015 at 4:15 PM, Pa Rö <paul.roewer1...@googlemail.com> > wrote: > >> hello, >> >> i submit my spark job with the following parameters: >> >> ./spark-1.1.0-bin-hadoop2.4/bin/spark-submit \ >> --class mgm.tp.bigdata.ma_spark.SparkMain \ >> --master spark://quickstart.cloudera:7077 \ >> ma-spark.jar \ >> 1000 >> >> and get the following exception: >> >> java.io.IOException: Mkdirs failed to create file:/ >> 127.0.0.1:8020/user/cloudera/outputs/output_spark >> at >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:438) >> at >> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424) >> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:906) >> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:887) >> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:784) >> at mgm.tp.bigdata.ma_spark.Helper.writeCenterHistory(Helper.java:35) >> at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:100) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:606) >> at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) >> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) >> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) >> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative >> path in absolute URI: 127.0.0.1:8020 >> at org.apache.hadoop.fs.Path.initialize(Path.java:206) >> at org.apache.hadoop.fs.Path.<init>(Path.java:172) >> at org.apache.hadoop.fs.Path.<init>(Path.java:94) >> at org.apache.hadoop.fs.Globber.glob(Globber.java:211) >> at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642) >> at >> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257) >> at >> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228) >> at >> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304) >> at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at >> org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135) >> at org.apache.spark.rdd.RDD.count(RDD.scala:904) >> at org.apache.spark.rdd.RDD.takeSample(RDD.scala:401) >> at >> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:426) >> at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32) >> at >> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:422) >> at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32) >> at mgm.tp.bigdata.ma_spark.SparkMain.kmeans(SparkMain.java:123) >> at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:102) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:606) >> at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) >> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) >> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) >> Caused by: java.net.URISyntaxException: Relative path in absolute URI: >> 127.0.0.1:8020 >> at java.net.URI.checkPath(URI.java:1804) >> at java.net.URI.<init>(URI.java:752) >> at org.apache.hadoop.fs.Path.initialize(Path.java:203) >> ... 43 more >> Exception in thread "main" java.lang.IllegalArgumentException: >> java.net.URISyntaxException: Relative path in absolute URI: >> 127.0.0.1:8020 >> at org.apache.hadoop.fs.Path.initialize(Path.java:206) >> at org.apache.hadoop.fs.Path.<init>(Path.java:172) >> at org.apache.hadoop.fs.Path.<init>(Path.java:94) >> at org.apache.hadoop.fs.Globber.glob(Globber.java:211) >> at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642) >> at >> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257) >> at >> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228) >> at >> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304) >> at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at >> org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204) >> at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202) >> at scala.Option.getOrElse(Option.scala:120) >> at org.apache.spark.rdd.RDD.partitions(RDD.scala:202) >> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135) >> at org.apache.spark.rdd.RDD.foreach(RDD.scala:759) >> at >> org.apache.spark.api.java.JavaRDDLike$class.foreach(JavaRDDLike.scala:297) >> at org.apache.spark.api.java.JavaPairRDD.foreach(JavaPairRDD.scala:44) >> at mgm.tp.bigdata.ma_spark.SparkMain.saveResults(SparkMain.java:216) >> at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:108) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at >> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:606) >> at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328) >> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) >> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) >> Caused by: java.net.URISyntaxException: Relative path in absolute URI: >> 127.0.0.1:8020 >> at java.net.URI.checkPath(URI.java:1804) >> at java.net.URI.<init>(URI.java:752) >> at org.apache.hadoop.fs.Path.initialize(Path.java:203) >> ... 45 more >> >> i set my path like: >> file:///127.0.0.1:8020/user/cloudera/inputs/ >> (namenode of hadoop) >> >> how i must set the path to hdfs?? >> >> best regards, >> paul >> > > > > -- > > Thanks & regards, > Nirmal > > Associate Technical Lead - Data Technologies Team, WSO2 Inc. > Mobile: +94715779733 > Blog: http://nirmalfdo.blogspot.com/ > > >