Michael-J-Ward commented on code in PR #880:
URL: https://github.com/apache/datafusion-python/pull/880#discussion_r1777760825


##########
docs/source/user-guide/common-operations/udf-and-udfa.rst:
##########
@@ -57,30 +126,122 @@ Additionally the 
:py:func:`~datafusion.udf.AggregateUDF.udaf` function allows yo
         Interface of a user-defined accumulation.
         """
         def __init__(self):
-            self._sum = pyarrow.scalar(0.0)
+            self._sum = 0.0
 
-        def update(self, values: pyarrow.Array) -> None:
-            # not nice since pyarrow scalars can't be summed yet. This breaks 
on `None`
-            self._sum = pyarrow.scalar(self._sum.as_py() + 
pyarrow.compute.sum(values).as_py())
+        def update(self, values_a: pyarrow.Array, values_b: pyarrow.Array) -> 
None:
+            self._sum = self._sum + pyarrow.compute.sum(values_a).as_py() - 
pyarrow.compute.sum(values_b).as_py()
 
         def merge(self, states: List[pyarrow.Array]) -> None:
-            # not nice since pyarrow scalars can't be summed yet. This breaks 
on `None`
-            self._sum = pyarrow.scalar(self._sum.as_py() + 
pyarrow.compute.sum(states[0]).as_py())
+            self._sum = self._sum + pyarrow.compute.sum(states[0]).as_py()
 
         def state(self) -> pyarrow.Array:
-            return pyarrow.array([self._sum.as_py()])
+            return pyarrow.array([self._sum])
 
         def evaluate(self) -> pyarrow.Scalar:
-            return self._sum
+            return pyarrow.scalar(self._sum)
 
     ctx = datafusion.SessionContext()
     df = ctx.from_pydict(
         {
-            "a": [1, 2, 3],
-            "b": [4, 5, 6],
+            "a": [4, 5, 6],
+            "b": [1, 2, 3],
         }
     )
 
-    my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), 
[pyarrow.float64()], 'stable')
+    my_udaf = udaf(MyAccumulator, [pyarrow.float64(), pyarrow.float64()], 
pyarrow.float64(), [pyarrow.float64()], 'stable')
+
+    df.aggregate([], [my_udaf(col("a"), col("b")).alias("col_diff")])
+
+Window Functions
+----------------
+
+To implement a User-Defined Window Function (UDWF) you must call the
+:py:func:`~datafusion.udf.WindowUDF.udwf` function using a class that 
implements the abstract
+class :py:class:`~datafusion.udf.WindowEvaluator`.
+
+There are three methods of evaluation of UDWFs.
+
+- ``evaluate`` is the simplest case, where you are given an array and are 
expected to calculate the
+  value for a single row of that array. This is the simplest case, but also 
the least performant.
+- ``evaluate_all`` computes the values for all rows for an input array at a 
single time.
+- ``evaluate_all_with_rank`` computes the values for all rows, but you only 
have the rank
+  information for the rows.
+
+Which methods you implement are based upon which of these options are set.
+
+.. list-table:: Title

Review Comment:
   ```suggestion
   .. list-table::
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to