Ok, digging a bit into Spark i think i got it: sc.newAPIHadoopFile("s3n://missingPattern/*", EmptiableTextInputFormat.class, LongWritable.class, Text.class, sc.hadoopConfiguration()) .map(new Function<Tuple2<LongWritable, Text>, String>() { @Override public String call(Tuple2<LongWritable, Text> arg0) throws Exception { return arg0._2.toString(); } }) .count(); And the EmptiableTextInputFormat: import java.io.IOException;import java.util.Collections;import java.util.List;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.lib.input.InvalidInputException;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;public class EmptiableTextInputFormat extends TextInputFormat { @Override public List getSplits(JobContext arg0) throws IOException { try { return super.getSplits(arg0); } catch (InvalidInputException e) { return Collections. emptyList(); } }} Thanks !
-- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Ignoring-S3-0-files-exception-tp6101p6252.html Sent from the Apache Spark User List mailing list archive at Nabble.com.