This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch kll_functions in repository https://gitbox.apache.org/repos/asf/datasketches-spark.git
commit 517f55ab22639e2900e24c7924284bd0799d9eb6 Author: Jon <[email protected]> AuthorDate: Wed Mar 5 23:34:09 2025 -0800 move kll expressions into separate files --- .../kll/expressions/KllDoublesSketchGetMax.scala | 82 ++++++++++++++++ .../kll/expressions/KllDoublesSketchGetMin.scala | 82 ++++++++++++++++ ...sions.scala => KllDoublesSketchGetPmfCdf.scala} | 109 +-------------------- 3 files changed, 165 insertions(+), 108 deletions(-) diff --git a/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMax.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMax.scala new file mode 100644 index 0000000..5b9ba74 --- /dev/null +++ b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMax.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasketches.kll.expressions + +import org.apache.spark.sql.catalyst.expressions.{Expression, + ExpressionDescription, + UnaryExpression, + ExpectsInputTypes, + NullIntolerant} +import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeBlock, CodegenContext, ExprCode} +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType +import org.apache.datasketches.kll.KllDoublesSketch +import org.apache.datasketches.memory.Memory + +@ExpressionDescription( + usage = """ + _FUNC_(expr) - Returns the maximum value seem by the sketch given the binary representation + of a Datasketches KllDoublesSketch. """, + examples = """ + Examples: + > SELECT _FUNC_(kll_sketch_agg(col)) FROM VALUES (1.0), (2.0), (3.0) tab(col); + 3.0 + """ + //group = "misc_funcs", +) +case class KllDoublesSketchGetMax(sketchExpr: Expression) + extends UnaryExpression + with ExpectsInputTypes + with NullIntolerant { + + override def child: Expression = sketchExpr + + override protected def withNewChildInternal(newChild: Expression): KllDoublesSketchGetMax = { + copy(sketchExpr = newChild) + } + + override def prettyName: String = "kll_sketch_double_get_max" + + override def inputTypes: Seq[AbstractDataType] = Seq(KllDoublesSketchType) + + override def dataType: DataType = DoubleType + + override def nullSafeEval(input: Any): Any = { + val bytes = input.asInstanceOf[Array[Byte]] + val sketch = KllDoublesSketch.wrap(Memory.wrap(bytes)) + sketch.getMaxItem + } + + override protected def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = { + val sketchEval = child.genCode(ctx) + val sketch = ctx.freshName("sketch") + + val code = + s""" + |${sketchEval.code} + |final org.apache.datasketches.kll.KllDoublesSketch $sketch = org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType.wrap(${sketchEval.value}); + |final double ${ev.value} = $sketch.getMaxItem(); + |final boolean ${ev.isNull} = ${sketchEval.isNull}; + """.stripMargin + ev.copy(code = CodeBlock(Seq(code), Seq.empty)) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, c => s"($c)") + } +} diff --git a/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMin.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMin.scala new file mode 100644 index 0000000..cd88000 --- /dev/null +++ b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetMin.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.datasketches.kll.expressions + +import org.apache.spark.sql.catalyst.expressions.{Expression, + ExpressionDescription, + UnaryExpression, + ExpectsInputTypes, + NullIntolerant} +import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeBlock, CodegenContext, ExprCode} +import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType +import org.apache.datasketches.kll.KllDoublesSketch +import org.apache.datasketches.memory.Memory + +@ExpressionDescription( + usage = """ + _FUNC_(expr) - Returns the minimum value seem by the sketch given the binary representation + of a Datasketches KllDoublesSketch. """, + examples = """ + Examples: + > SELECT _FUNC_(kll_sketch_agg(col)) FROM VALUES (1.0), (2.0), (3.0) tab(col); + 1.0 + """ + //group = "misc_funcs", +) +case class KllDoublesSketchGetMin(sketchExpr: Expression) + extends UnaryExpression + with ExpectsInputTypes + with NullIntolerant { + + override def child: Expression = sketchExpr + + override protected def withNewChildInternal(newChild: Expression): KllDoublesSketchGetMin = { + copy(sketchExpr = newChild) + } + + override def prettyName: String = "kll_sketch_double_get_min" + + override def inputTypes: Seq[AbstractDataType] = Seq(KllDoublesSketchType) + + override def dataType: DataType = DoubleType + + override def nullSafeEval(input: Any): Any = { + val bytes = input.asInstanceOf[Array[Byte]] + val sketch = KllDoublesSketch.wrap(Memory.wrap(bytes)) + sketch.getMinItem + } + + override protected def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = { + val sketchEval = child.genCode(ctx) + val sketch = ctx.freshName("sketch") + + val code = + s""" + |${sketchEval.code} + |final org.apache.datasketches.kll.KllDoublesSketch $sketch = org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType.wrap(${sketchEval.value}); + |final double ${ev.value} = $sketch.getMinItem(); + |final boolean ${ev.isNull} = ${sketchEval.isNull}; + """.stripMargin + ev.copy(code = CodeBlock(Seq(code), Seq.empty)) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + nullSafeCodeGen(ctx, ev, c => s"($c)") + } +} diff --git a/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchExpressions.scala b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetPmfCdf.scala similarity index 74% rename from src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchExpressions.scala rename to src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetPmfCdf.scala index 087a9cb..60d43ff 100644 --- a/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchExpressions.scala +++ b/src/main/scala/org/apache/spark/sql/datasketches/kll/expressions/KllDoublesSketchGetPmfCdf.scala @@ -24,119 +24,13 @@ import org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType import org.apache.spark.sql.types.{AbstractDataType, ArrayType, BooleanType, DataType, DoubleType} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ExpectsInputTypes, ImplicitCastInputTypes} -import org.apache.spark.sql.catalyst.expressions.{UnaryExpression, TernaryExpression} +import org.apache.spark.sql.catalyst.expressions.TernaryExpression import org.apache.spark.sql.catalyst.expressions.{Literal, NullIntolerant, RuntimeReplaceable} import org.apache.spark.sql.catalyst.expressions.codegen.{CodeBlock, CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.catalyst.trees.TernaryLike import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -@ExpressionDescription( - usage = """ - _FUNC_(expr) - Returns the minimum value seem by the sketch given the binary representation - of a Datasketches KllDoublesSketch. """, - examples = """ - Examples: - > SELECT _FUNC_(kll_sketch_agg(col)) FROM VALUES (1.0), (2.0), (3.0) tab(col); - 1.0 - """ - //group = "misc_funcs", -) -case class KllDoublesSketchGetMin(sketchExpr: Expression) - extends UnaryExpression - with ExpectsInputTypes - with NullIntolerant { - - override def child: Expression = sketchExpr - - override protected def withNewChildInternal(newChild: Expression): KllDoublesSketchGetMin = { - copy(sketchExpr = newChild) - } - - override def prettyName: String = "kll_sketch_double_get_min" - - override def inputTypes: Seq[AbstractDataType] = Seq(KllDoublesSketchType) - - override def dataType: DataType = DoubleType - - override def nullSafeEval(input: Any): Any = { - val bytes = input.asInstanceOf[Array[Byte]] - val sketch = KllDoublesSketch.wrap(Memory.wrap(bytes)) - sketch.getMinItem - } - - override protected def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = { - val sketchEval = child.genCode(ctx) - val sketch = ctx.freshName("sketch") - - val code = - s""" - |${sketchEval.code} - |final org.apache.datasketches.kll.KllDoublesSketch $sketch = org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType.wrap(${sketchEval.value}); - |final double ${ev.value} = $sketch.getMinItem(); - |final boolean ${ev.isNull} = ${sketchEval.isNull}; - """.stripMargin - ev.copy(code = CodeBlock(Seq(code), Seq.empty)) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, c => s"($c)") - } -} - -@ExpressionDescription( - usage = """ - _FUNC_(expr) - Returns the maximum value seem by the sketch given the binary representation - of a Datasketches KllDoublesSketch. """, - examples = """ - Examples: - > SELECT _FUNC_(kll_sketch_agg(col)) FROM VALUES (1.0), (2.0), (3.0) tab(col); - 3.0 - """ - //group = "misc_funcs", -) -case class KllDoublesSketchGetMax(sketchExpr: Expression) - extends UnaryExpression - with ExpectsInputTypes - with NullIntolerant { - - override def child: Expression = sketchExpr - - override protected def withNewChildInternal(newChild: Expression): KllDoublesSketchGetMax = { - copy(sketchExpr = newChild) - } - - override def prettyName: String = "kll_sketch_double_get_max" - - override def inputTypes: Seq[AbstractDataType] = Seq(KllDoublesSketchType) - - override def dataType: DataType = DoubleType - - override def nullSafeEval(input: Any): Any = { - val bytes = input.asInstanceOf[Array[Byte]] - val sketch = KllDoublesSketch.wrap(Memory.wrap(bytes)) - sketch.getMaxItem - } - - override protected def nullSafeCodeGen(ctx: CodegenContext, ev: ExprCode, f: String => String): ExprCode = { - val sketchEval = child.genCode(ctx) - val sketch = ctx.freshName("sketch") - - val code = - s""" - |${sketchEval.code} - |final org.apache.datasketches.kll.KllDoublesSketch $sketch = org.apache.spark.sql.datasketches.kll.types.KllDoublesSketchType.wrap(${sketchEval.value}); - |final double ${ev.value} = $sketch.getMaxItem(); - |final boolean ${ev.isNull} = ${sketchEval.isNull}; - """.stripMargin - ev.copy(code = CodeBlock(Seq(code), Seq.empty)) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, c => s"($c)") - } -} - @ExpressionDescription( usage = """ _FUNC_(expr, expr, isInclusive) - Returns an approximation to the PMF @@ -205,7 +99,6 @@ case class KllDoublesSketchGetCdf(sketchExpr: Expression, } } - /** * Returns the PMF and CDF of the given quantile search criteria. * --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
