Tried the sample code in both Zeppelin and spark-shell, and got the same error. Please try following code as a workaround.
import org.apache.spark.sql.expressions.MutableAggregationBuffer import org.apache.spark.sql.expressions.UserDefinedAggregateFunction class GeometricMean extends org.apache.spark.sql.expressions.UserDefinedAggregateFunction { def inputSchema: org.apache.spark.sql.types.StructType = org.apache.spark.sql.types.StructType(org.apache.spark.sql.types.StructField("value", org.apache.spark.sql.types.DoubleType) :: Nil) def bufferSchema: org.apache.spark.sql.types.StructType = org.apache.spark.sql.types.StructType( org.apache.spark.sql.types.StructField("count", org.apache.spark.sql.types.LongType) :: org.apache.spark.sql.types.StructField("product", org.apache.spark.sql.types.DoubleType) :: Nil ) def dataType: org.apache.spark.sql.types.DataType = org.apache.spark.sql.types.DoubleType def deterministic: Boolean = true def initialize(buffer: org.apache.spark.sql.expressions.MutableAggregationBuffer): Unit = { buffer(0) = 0L buffer(1) = 1.0 } def update(buffer: org.apache.spark.sql.expressions.MutableAggregationBuffer,input: org.apache.spark.sql.Row): Unit = { buffer(0) = buffer.getAs[Long](0) + 1 buffer(1) = buffer.getAs[Double](1) * input.getAs[Double](0) } def merge(buffer1: org.apache.spark.sql.expressions.MutableAggregationBuffer, buffer2: org.apache.spark.sql.Row): Unit = { buffer1(0) = buffer1.getAs[Long](0) + buffer2.getAs[Long](0) buffer1(1) = buffer1.getAs[Double](1) * buffer2.getAs[Double](1) } def evaluate(buffer: org.apache.spark.sql.Row): Any = { math.pow(buffer.getDouble(1), 1.toDouble / buffer.getLong(0)) } } Thanks, moon On Sat, Oct 29, 2016 at 9:16 AM Manjunath, Kiran <kiman...@akamai.com> wrote: > I am trying the below sample code > > > > Code > > > > %spark > > import org.apache.spark.sql.expressions.MutableAggregationBuffer > > import org.apache.spark.sql.expressions.UserDefinedAggregateFunction > > > > > > class GeometricMean extends UserDefinedAggregateFunction { > > def inputSchema: org.apache.spark.sql.types.StructType = > > StructType(StructField("value", DoubleType) :: Nil) > > def bufferSchema: StructType = StructType( > > StructField("count", LongType) :: > > StructField("product", DoubleType) :: Nil > > ) > > def dataType: DataType = DoubleType > > def deterministic: Boolean = true > > def initialize(buffer: MutableAggregationBuffer): Unit = { > > buffer(0) = 0L > > buffer(1) = 1.0 > > } > > def update(buffer: MutableAggregationBuffer,input: Row): Unit = { > > buffer(0) = buffer.getAs[Long](0) + 1 > > buffer(1) = buffer.getAs[Double](1) * input.getAs[Double](0) > > } > > def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { > > buffer1(0) = buffer1.getAs[Long](0) + buffer2.getAs[Long](0) > > buffer1(1) = buffer1.getAs[Double](1) * buffer2.getAs[Double](1) > > } > > def evaluate(buffer: Row): Any = { > > math.pow(buffer.getDouble(1), 1.toDouble / buffer.getLong(0)) > > } > > } > > > > > > Error > > > > import org.apache.spark.sql.expressions.MutableAggregationBuffer > > import org.apache.spark.sql.expressions.UserDefinedAggregateFunction > > <console>:11: error: not found: type UserDefinedAggregateFunction > > class GeometricMean extends UserDefinedAggregateFunction { > > ^ > > > > > > Is this a known problem? > > Any workaround for this? > > > > Regards, > > Kiran >