Hi, I run spark to write data to hbase, but found NoSuchMethodException: 15/10/23 18:45:21 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, dn18-formal.i.nease.net): java.lang.NoSuchMethodError: com.google.common.io.ByteStreams.limit(Ljava/io/InputStream;J)Ljava/io/InputStream;
I found guava.jar in hadoop/hbase dir and the version is 12.0, but com.google.common.io.ByteStreams.limit is since 14.0, so NoSuchMethodException occurs. I try to run spark-submmit by - -jars,but the same. and I try to add configuration.set("spark.executor.extraClassPath", "/home/ljh") configuration.set("spark.driver.userClassPathFirst","true"); to my code, still the same. How to solve this? How to remove the guava.jar in hadoop/hbase from class path? why it does not use the guava.jar in spark dir. Here is my code: rdd.foreach({ res => val configuration = HBaseConfiguration.create(); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set("hbase.zookeeper.quorum", “ip.66"); configuration.set("hbase.master", “ip:60000"); configuration.set("spark.executor.extraClassPath", "/home/ljh") configuration.set("spark.driver.userClassPathFirst","true"); val hadmin = new HBaseAdmin(configuration); configuration.clear(); configuration.addResource("/home/hadoop/conf/core-default.xml") configuration.addResource("/home/hadoop/conf/core-site.xml") configuration.addResource("/home/hadoop/conf/mapred-default.xml") configuration.addResource("/home/hadoop/conf/mapred-site.xml") configuration.addResource("/home/hadoop/conf/yarn-default.xml") configuration.addResource("/home/hadoop/conf/yarn-site.xml") configuration.addResource("/home/hadoop/conf/hdfs-default.xml") configuration.addResource("/home/hadoop/conf/hdfs-site.xml") configuration.addResource("/home/hadoop/conf/hbase-default.xml") configuration.addResource("/home/ljhn1829/hbase-site.xml") val table = new HTable(configuration, "ljh_test2"); var put = new Put(Bytes.toBytes(res.toKey())); put.add(Bytes.toBytes("basic"), Bytes.toBytes("name"), Bytes.toBytes(res.totalCount + "\t" + res.positiveCount)); table.put(put); table.flushCommits() }) and the error message: 15/10/23 19:06:42 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 1, gdc-dn126-formal.i.nease.net): java.lang.NoSuchMethodError: com.google.common.io.ByteStreams.limit(Ljava/io/InputStream;J)Ljava/io/InputStream; at org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.nextBatchStream(ExternalAppendOnlyMap.scala:420) at org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.<init>(ExternalAppendOnlyMap.scala:392) at org.apache.spark.util.collection.ExternalAppendOnlyMap.spill(ExternalAppendOnlyMap.scala:207) at org.apache.spark.util.collection.ExternalAppendOnlyMap.spill(ExternalAppendOnlyMap.scala:63) at org.apache.spark.util.collection.Spillable$class.maybeSpill(Spillable.scala:83) at org.apache.spark.util.collection.ExternalAppendOnlyMap.maybeSpill(ExternalAppendOnlyMap.scala:63) at org.apache.spark.util.collection.ExternalAppendOnlyMap.insertAll(ExternalAppendOnlyMap.scala:129) at org.apache.spark.Aggregator.combineValuesByKey(Aggregator.scala:60) at org.apache.spark.shuffle.hash.HashShuffleReader.read(HashShuffleReader.scala:46) at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:90) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) 15/10/23 19:06:42 INFO TaskSetManager: Starting task 0.1 in stage 1.0 (TID 2, gdc-dn166-formal.i.nease.net, PROCESS_LOCAL, 1277 bytes) 15/10/23 19:06:42 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on gdc-dn166-formal.i.nease.net:3838 (size: 3.2 KB, free: 1060.3 MB) 15/10/23 19:06:42 ERROR YarnScheduler: Lost executor 1 on gdc-dn126-formal.i.nease.net: remote Rpc client disassociated 15/10/23 19:06:42 WARN ReliableDeliverySupervisor: Association with remote system [akka.tcp://sparkexecu...@gdc-dn126-formal.i.nease.net:1656] has failed, address is now gated for [5000] ms. Reason is: [Disassociated]. 15/10/23 19:06:42 INFO TaskSetManager: Re-queueing tasks for 1 from TaskSet 1.0 15/10/23 19:06:42 INFO DAGScheduler: Executor lost: 1 (epoch 1) 15/10/23 19:06:42 INFO BlockManagerMasterEndpoint: Trying to remove executor 1 from BlockManagerMaster. 15/10/23 19:06:42 INFO BlockManagerMasterEndpoint: Removing block manager BlockManagerId(1, gdc-dn126-formal.i.nease.net, 44635) 15/10/23 19:06:42 INFO BlockManagerMaster: Removed 1 successfully in removeExecutor 15/10/23 19:06:42 INFO ShuffleMapStage: ShuffleMapStage 0 is now unavailable on executor 1 (0/1, false) 15/10/23 19:06:42 INFO MapOutputTrackerMasterEndpoint: Asked to send map output locations for shuffle 1 to gdc-dn166-formal.i.nease.net:28595 15/10/23 19:06:42 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 1 is 84 bytes 15/10/23 19:06:42 WARN TaskSetManager: Lost task 0.1 in stage 1.0 (TID 2, gdc-dn166-formal.i.nease.net): FetchFailed(null, shuffleId=1, mapId=-1, reduceId=0, message= org.apache.spark.shuffle.MetadataFetchFailedException: Missing an output location for shuffle 1 at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$1.apply(MapOutputTracker.scala:389) at org.apache.spark.MapOutputTracker$$anonfun$org$apache$spark$MapOutputTracker$$convertMapStatuses$1.apply(MapOutputTracker.scala:386) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) at org.apache.spark.MapOutputTracker$.org$apache$spark$MapOutputTracker$$convertMapStatuses(MapOutputTracker.scala:385) at org.apache.spark.MapOutputTracker.getServerStatuses(MapOutputTracker.scala:172) at org.apache.spark.shuffle.hash.BlockStoreShuffleFetcher$.fetch(BlockStoreShuffleFetcher.scala:42) at org.apache.spark.shuffle.hash.HashShuffleReader.read(HashShuffleReader.scala:40) at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:90) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)