HI,

I am getting below error when running spark sql jobs. This error is thrown
after running 80% of tasks. any solution?

spark.storage.memoryFraction=0.4
spark.sql.shuffle.partitions=2000
spark.default.parallelism=100
#spark.eventLog.enabled=false
#spark.scheduler.revive.interval=1s
spark.driver.memory=8g


java.lang.OutOfMemoryError: GC overhead limit exceeded
        at java.util.ArrayList.subList(ArrayList.java:955)
        at java.lang.String.split(String.java:2311)
        at
sun.net.util.IPAddressUtil.textToNumericFormatV4(IPAddressUtil.java:47)
        at java.net.InetAddress.getAllByName(InetAddress.java:1129)
        at java.net.InetAddress.getAllByName(InetAddress.java:1098)
        at java.net.InetAddress.getByName(InetAddress.java:1048)
        at
org.apache.hadoop.net.NetUtils.normalizeHostName(NetUtils.java:562)
        at
org.apache.hadoop.net.NetUtils.normalizeHostNames(NetUtils.java:579)
        at
org.apache.hadoop.net.CachedDNSToSwitchMapping.resolve(CachedDNSToSwitchMapping.java:109)
        at
org.apache.hadoop.yarn.util.RackResolver.coreResolve(RackResolver.java:101)
        at
org.apache.hadoop.yarn.util.RackResolver.resolve(RackResolver.java:81)
        at
org.apache.spark.scheduler.cluster.YarnScheduler.getRackForHost(YarnScheduler.scala:37)
        at
org.apache.spark.scheduler.TaskSetManager.dequeueTask(TaskSetManager.scala:380)
        at
org.apache.spark.scheduler.TaskSetManager.resourceOffer(TaskSetManager.scala:433)
        at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$org$apache$spark$scheduler$TaskSchedulerImpl$$resourceOfferSingleTaskSet$1.apply$mcVI$sp(TaskSchedulerImpl.scala:276)
        at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
        at org.apache.spark.scheduler.TaskSchedulerImpl.org
$apache$spark$scheduler$TaskSchedulerImpl$$resourceOfferSingleTaskSet(TaskSchedulerImpl.scala:271)
        at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$4$$anonfun$apply$9.apply(TaskSchedulerImpl.scala:357)
        at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$4$$anonfun$apply$9.apply(TaskSchedulerImpl.scala:355)
        at
scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
        at
scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
        at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$4.apply(TaskSchedulerImpl.scala:355)
        at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$4.apply(TaskSchedulerImpl.scala:352)
        at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
        at
org.apache.spark.scheduler.TaskSchedulerImpl.resourceOffers(TaskSchedulerImpl.scala:352)
        at
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.org$apache$spark$scheduler$cluster$CoarseGrainedSchedulerBackend$DriverEndpoint$$makeOffers(CoarseGrainedSchedulerBackend.scala:222)

Reply via email to