Hi,
I've been trying to use com.twitter.chill.MeatLocker to serialize a
third-party class. So far I'm having no luck and I'm still getting the
dreaded Task not Serializable error for org.ahocorasick.trie.Trie. Am I
doing something obviously wrong? 

Below is my test code that is failing:

import com.twitter.chill.MeatLocker
import org.ahocorasick.trie.Trie

def genMapper[A, B](f: A => B): A => B = {
  val locker = com.twitter.chill.MeatLocker(f)
  x => locker.get.apply(x)
}

val myTrie = new Trie().onlyWholeWords()

myTrie.addKeyword("foo")
myTrie.addKeyword("bar")

val samples = sc.parallelize(Array("foo word", "bar word", "baz word"))

samples.map(t => myTrie.parseText(t)).foreach(println)

And the error:
scala> samples.map(t => myTrie.parseText(t)).foreach(println)
14/08/11 23:53:40 INFO SparkContext: Starting job: foreach at <console>:19
14/08/11 23:53:40 INFO DAGScheduler: Got job 0 (foreach at <console>:19)
with 8 output partitions (allowLocal=false)
14/08/11 23:53:40 INFO DAGScheduler: Final stage: Stage 0(foreach at
<console>:19)
14/08/11 23:53:40 INFO DAGScheduler: Parents of final stage: List()
14/08/11 23:53:40 INFO DAGScheduler: Missing parents: List()
14/08/11 23:53:40 INFO DAGScheduler: Submitting Stage 0 (MappedRDD[1] at map
at <console>:19), which has no missing parents
14/08/11 23:53:40 INFO DAGScheduler: Failed to run foreach at <console>:19
org.apache.spark.SparkException: Job aborted due to stage failure: Task not
serializable: java.io.NotSerializableException: org.ahocorasick.trie.Trie
        at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1049)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1033)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1031)
        at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1031)
        at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitMissingTasks(DAGScheduler.scala:772)
        at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:715)
        at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:699)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1203)
        at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
        at akka.actor.ActorCell.invoke(ActorCell.scala:456)
        at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
        at akka.dispatch.Mailbox.run(Mailbox.scala:219)
        at
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
        at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
        at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
        at 
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
        at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)




--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/Serialization-with-com-twitter-chill-MeatLocker-tp11965.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org

Reply via email to