snappy native lib was installed as part of hadoop (cdh5.1.0) the location is in /usr/lib/hadoop/lib/
-rwxr-xr-x 1 hdfs hadoop 23904 Jul 12 14:11 libsnappy.so.1.1.3 lrwxrwxrwx 1 hdfs hadoop 18 Aug 11 20:56 libsnappy.so.1 -> libsnappy.so.1.1.3 lrwxrwxrwx 1 hdfs hadoop 18 Aug 11 20:56 libsnappy.so -> libsnappy.so.1.1.3 Chen On Sat, Sep 20, 2014 at 9:31 AM, Ted Yu <yuzhih...@gmail.com> wrote: > Can you tell us how you installed native snappy ? > > See D.3.1.5 in: > http://hbase.apache.org/book.html#snappy.compression.installation > > Cheers > > On Sat, Sep 20, 2014 at 2:12 AM, Chen Song <chen.song...@gmail.com> wrote: > >> I have seen below exception from spark 1.1.0. Any insights on the snappy >> exception? >> >> 14/09/18 16:45:11 ERROR executor.Executor: Exception in task 763.1 in >> stage 6.0 (TID 17035) >> java.io.IOException: PARSING_ERROR(2) >> at >> org.xerial.snappy.SnappyNative.throw_error(SnappyNative.java:78) >> at org.xerial.snappy.SnappyNative.uncompressedLength(Native >> Method) >> at org.xerial.snappy.Snappy.uncompressedLength(Snappy.java:545) >> at >> org.xerial.snappy.SnappyInputStream.readFully(SnappyInputStream.java:125) >> at >> org.xerial.snappy.SnappyInputStream.readHeader(SnappyInputStream.java:88) >> at >> org.xerial.snappy.SnappyInputStream.<init>(SnappyInputStream.java:58) >> at >> org.apache.spark.io.SnappyCompressionCodec.compressedInputStream(CompressionCodec.scala:128) >> at >> org.apache.spark.storage.BlockManager.wrapForCompression(BlockManager.scala:1036) >> at org.apache.spark.storage.BlockManager.org >> $apache$spark$storage$BlockManager$$getIterator$1(BlockManager.scala:1071) >> at >> org.apache.spark.storage.BlockManager$$anonfun$dataDeserialize$1.apply(BlockManager.scala:1085) >> at >> org.apache.spark.storage.BlockManager$$anonfun$dataDeserialize$1.apply(BlockManager.scala:1085) >> at >> org.apache.spark.storage.BlockManager$LazyProxyIterator$1.proxy$lzycompute(BlockManager.scala:1081) >> at >> org.apache.spark.storage.BlockManager$LazyProxyIterator$1.proxy(BlockManager.scala:1081) >> at >> org.apache.spark.storage.BlockManager$LazyProxyIterator$1.hasNext(BlockManager.scala:1082) >> at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) >> at >> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:30) >> at >> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) >> at >> org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:137) >> at >> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:159) >> at >> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$5.apply(CoGroupedRDD.scala:158) >> at >> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) >> at >> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) >> at >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) >> at >> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) >> at >> org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:158) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) >> at >> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) >> at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) >> at >> org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) >> at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:87) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:229) >> at >> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) >> at >> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) >> at org.apache.spark.scheduler.Task.run(Task.scala:54) >> at >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >> at java.lang.Thread.run(Thread.java:745) >> >> -- >> Chen Song >> >> > -- Chen Song