Michael-J-Ward commented on code in PR #880:
URL: https://github.com/apache/datafusion-python/pull/880#discussion_r1777760825
##########
docs/source/user-guide/common-operations/udf-and-udfa.rst:
##########
@@ -57,30 +126,122 @@ Additionally the
:py:func:`~datafusion.udf.AggregateUDF.udaf` function allows yo
Interface of a user-defined accumulation.
"""
def __init__(self):
- self._sum = pyarrow.scalar(0.0)
+ self._sum = 0.0
- def update(self, values: pyarrow.Array) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks
on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() +
pyarrow.compute.sum(values).as_py())
+ def update(self, values_a: pyarrow.Array, values_b: pyarrow.Array) ->
None:
+ self._sum = self._sum + pyarrow.compute.sum(values_a).as_py() -
pyarrow.compute.sum(values_b).as_py()
def merge(self, states: List[pyarrow.Array]) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks
on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() +
pyarrow.compute.sum(states[0]).as_py())
+ self._sum = self._sum + pyarrow.compute.sum(states[0]).as_py()
def state(self) -> pyarrow.Array:
- return pyarrow.array([self._sum.as_py()])
+ return pyarrow.array([self._sum])
def evaluate(self) -> pyarrow.Scalar:
- return self._sum
+ return pyarrow.scalar(self._sum)
ctx = datafusion.SessionContext()
df = ctx.from_pydict(
{
- "a": [1, 2, 3],
- "b": [4, 5, 6],
+ "a": [4, 5, 6],
+ "b": [1, 2, 3],
}
)
- my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(),
[pyarrow.float64()], 'stable')
+ my_udaf = udaf(MyAccumulator, [pyarrow.float64(), pyarrow.float64()],
pyarrow.float64(), [pyarrow.float64()], 'stable')
+
+ df.aggregate([], [my_udaf(col("a"), col("b")).alias("col_diff")])
+
+Window Functions
+----------------
+
+To implement a User-Defined Window Function (UDWF) you must call the
+:py:func:`~datafusion.udf.WindowUDF.udwf` function using a class that
implements the abstract
+class :py:class:`~datafusion.udf.WindowEvaluator`.
+
+There are three methods of evaluation of UDWFs.
+
+- ``evaluate`` is the simplest case, where you are given an array and are
expected to calculate the
+ value for a single row of that array. This is the simplest case, but also
the least performant.
+- ``evaluate_all`` computes the values for all rows for an input array at a
single time.
+- ``evaluate_all_with_rank`` computes the values for all rows, but you only
have the rank
+ information for the rows.
+
+Which methods you implement are based upon which of these options are set.
+
+.. list-table:: Title
Review Comment:
```suggestion
.. list-table::
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]