Is it permissible to use a custom class (as opposed to e.g. the built-in String or Int) for the key in groupByKey? It doesn't seem to be working for me on Spark 0.9.0/Scala 2.10.3:
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ class C(val s:String) extends Serializable { override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s == s else false override def toString = s } object SimpleApp { def main(args: Array[String]) { val sc = new SparkContext("local", "Simple App", null, null) val r1 = sc.parallelize(Array((new C("a"),11),(new C("a"),12))) println("r1=" + r1.groupByKey.collect.mkString(";")) val r2 = sc.parallelize(Array(("a",11),("a",12))) println("r2=" + r2.groupByKey.collect.mkString(";")) } } Output ====== r1=(a,ArrayBuffer(11));(a,ArrayBuffer(12)) r2=(a,ArrayBuffer(11, 12))