Thank you, David! It works. import org.apache.spark.util.StatCounter
val a = ordersRDD.join(ordersRDD).map{case((partnerid, itemid),((matchedida, pricea), (matchedidb, priceb))) => ((matchedida, matchedidb), (if(priceb > 0) (pricea/priceb).toDouble else 0.toDouble))} .groupByKey .mapValues( value => org.apache.spark.util.StatCounter(value)) .take(5) .foreach(println) output: ((2383,2465),(count: 4, mean: 0.883642, stdev: 0.086068, max: 0.933333, min: 0.734568)) ((2600,6786),(count: 4, mean: 2.388889, stdev: 0.559094, max: 3.148148, min: 1.574074)) ((2375,2606),(count: 6, mean: 0.693981, stdev: 0.305744, max: 1.125000, min: 0.453704)) ((6780,2475),(count: 2, mean: 0.827549, stdev: 0.150991, max: 0.978541, min: 0.676558)) ((2475,2606),(count: 7, mean: 3.975737, stdev: 3.356274, max: 9.628572, min: 0.472222)) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Computing-mean-and-standard-deviation-by-key-tp11192p14068.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org