In Scala, if you override .equals() you also need to override .hashCode(), just like in Java:
http://www.scala-lang.org/api/2.10.3/index.html#scala.AnyRef I suspect if your .hashCode() delegates to just the hashcode of s then you'd be good. On Tue, May 13, 2014 at 3:30 PM, Michael Malak <michaelma...@yahoo.com>wrote: > Is it permissible to use a custom class (as opposed to e.g. the built-in > String or Int) for the key in groupByKey? It doesn't seem to be working for > me on Spark 0.9.0/Scala 2.10.3: > > import org.apache.spark.SparkContext > import org.apache.spark.SparkContext._ > > class C(val s:String) extends Serializable { > override def equals(o: Any) = if (o.isInstanceOf[C]) o.asInstanceOf[C].s > == s else false > override def toString = s > } > > object SimpleApp { > def main(args: Array[String]) { > val sc = new SparkContext("local", "Simple App", null, null) > val r1 = sc.parallelize(Array((new C("a"),11),(new C("a"),12))) > println("r1=" + r1.groupByKey.collect.mkString(";")) > val r2 = sc.parallelize(Array(("a",11),("a",12))) > println("r2=" + r2.groupByKey.collect.mkString(";")) > } > } > > > Output > ====== > r1=(a,ArrayBuffer(11));(a,ArrayBuffer(12)) > r2=(a,ArrayBuffer(11, 12)) >