scala:161)
>>>> at
>>>>
>>>> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102)
>>>> at org.apache.spark.scheduler.Task.run(Task.scala:53)
>>>> at
>>>>
>>>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213)
>>>> at
>>>>
>>>> org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49)
>>>> at
>>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178)
>>>> at
>>>>
>>>> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>>>> at
>>>>
>>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>>>> at java.lang.Thread.run(Thread.java:662)
>>>>
>>>>
>>>>
>>>> --
>>>> View this message in context:
>>>> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3084.html
>>>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>>
>>>
ask.run(Task.scala:53)
>>> at
>>>
>>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213)
>>> at
>>>
>>> org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49)
>>> at
>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178)
>>> at
>>>
>>> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>>> at
>>>
>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>>> at java.lang.Thread.run(Thread.java:662)
>>>
>>>
>>>
>>> --
>>> View this message in context:
>>> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3084.html
>>> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>>
>>
tor.scala:213)
>> at
>>
>> org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49)
>> at
>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178)
>> at
>>
>> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecu
t; org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
> at java.lang.T
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3084.html
Sent from the Apache Spark User List mailing list archive at
:
> Yes, there was an error in data, after fixing it - count fails with Out of
> Memory Error.
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3051.html
> Sent from the Apache Spark User L
Yes, there was an error in data, after fixing it - count fails with Out of
Memory Error.
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3051.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
ote:
>
>> But i was wrong - map also fails on big file and setting
>> spark.shuffle.spill
>> doesn't help. Map fails with the same error.
>>
>>
>>
>> --
>> View this message in context:
>> http://apache-spark-user-list.1001560.n3.nabble.
"Kane" wrote:
> But i was wrong - map also fails on big file and setting
> spark.shuffle.spill
> doesn't help. Map fails with the same error.
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-
But i was wrong - map also fails on big file and setting spark.shuffle.spill
doesn't help. Map fails with the same error.
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3039.html
Sent from the Apache Spark User List ma
Yes, that helped, at least it was able to advance a bit further.
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3038.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
er-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3034.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
I mean everything works with the small file. With huge file only count and
map work, distinct - doesn't
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3034.html
Sent from the Apache Spark User List mailing list archi
Yes it works with smaller file, it can count and map, but not distinct.
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3033.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3027.html
> > Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3027.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
It's 0.9.0
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025p3027.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
> at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
> at java.lang.Thread.run(Thread.java:662)
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-huge-dataset-tp3025.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
olExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/distinct-on-hu
19 matches
Mail list logo