This is an automated email from the ASF dual-hosted git repository.
zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new 6e9c20cc2ac [Enhancement](udf) Support volatility property for UDF
(#3909)
6e9c20cc2ac is described below
commit 6e9c20cc2ac42ab22b331754daec3886c1baa6f6
Author: linrrarity <[email protected]>
AuthorDate: Thu Jun 4 22:05:55 2026 +0800
[Enhancement](udf) Support volatility property for UDF (#3909)
## Versions
- [x] dev
- [x] 4.x
- [ ] 3.x
- [ ] 2.1 or older (not covered by version/language sync gate)
## Languages
- [x] Chinese
- [x] English
- [ ] Japanese candidate translation needed
## Docs Checklist
- [ ] Checked by AI
- [ ] Test Cases Built
- [ ] Updated required version and language counterparts, or explained
why not
- [ ] If only one language changed, confirmed whether source/translation
counterparts need sync
---
docs/query-data/udf/java-user-defined-function.md | 15 +-
.../query-data/udf/python-user-defined-function.md | 153 ++++++++++++++-------
.../sql-statements/function/CREATE-FUNCTION.md | 97 ++++++++++++-
.../query-data/udf/java-user-defined-function.md | 15 +-
.../query-data/udf/python-user-defined-function.md | 153 ++++++++++++++-------
.../sql-statements/function/CREATE-FUNCTION.md | 97 ++++++++++++-
.../query-data/udf/java-user-defined-function.md | 15 +-
.../query-data/udf/python-user-defined-function.md | 153 ++++++++++++++-------
.../sql-statements/function/CREATE-FUNCTION.md | 97 ++++++++++++-
.../query-data/udf/java-user-defined-function.md | 15 +-
.../query-data/udf/python-user-defined-function.md | 153 ++++++++++++++-------
.../sql-statements/function/CREATE-FUNCTION.md | 97 ++++++++++++-
12 files changed, 832 insertions(+), 228 deletions(-)
diff --git a/docs/query-data/udf/java-user-defined-function.md
b/docs/query-data/udf/java-user-defined-function.md
index 96e1fe99e7c..fc704ab8aec 100644
--- a/docs/query-data/udf/java-user-defined-function.md
+++ b/docs/query-data/udf/java-user-defined-function.md
@@ -137,7 +137,8 @@ When writing a UDF in Java, the main entry point must be
the `evaluate` function
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -348,7 +349,8 @@ public void destroy(State state) {
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -390,7 +392,8 @@ void reset(State state)
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -435,7 +438,8 @@ Like UDF, UDTF requires you to implement the `evaluate`
method, but the return v
"file"="file:///pathTo/java-udtf.jar",
"symbol"="org.apache.doris.udf.demo.UDTFStringTest",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -531,7 +535,8 @@ The steps are as follows:
"file"="file:///pathTo/FunctionUdf.jar",
"symbol"="org.apache.doris.udf.FunctionUdf",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
diff --git a/docs/query-data/udf/python-user-defined-function.md
b/docs/query-data/udf/python-user-defined-function.md
index e80d511cbd9..6547eb76131 100644
--- a/docs/query-data/udf/python-user-defined-function.md
+++ b/docs/query-data/udf/python-user-defined-function.md
@@ -81,7 +81,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "entry_function_name",
"runtime_version" = "python_version",
- "always_nullable" = "true|false"
+ "always_nullable" = "true|false",
+ "volatility" = "immutable|stable|volatile"
)
AS $$
def entry_function_name(param1, param2, ...):
@@ -100,7 +101,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -120,7 +122,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -294,7 +297,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -304,7 +308,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -314,7 +319,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -332,7 +338,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -342,7 +349,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -352,7 +360,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -369,7 +378,8 @@ PROPERTIES (
"file" = "file:///path/to/my_udf.zip",
"symbol" = "my_udf.math_ops.multiply_by_two",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -412,6 +422,7 @@ DROP FUNCTION IF EXISTS py_is_prime(INT);
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"`.
The complete version number is required |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `volatile` | Function volatility. Valid values are
`immutable`, `stable`, and `volatile`.<br/>`immutable`: the same input always
produces the same output. Most deterministic UDFs should use this value for
better plan optimization.<br/>`stable`: the result is stable within one SQL
statement but may change between statements, similar to `now()`. SQL cache and
materialized view rewrite are disabled.<br/>`volatile`: the result may change
on each call, similar to `random [...]
#### Runtime Version Notes
@@ -479,7 +490,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -530,7 +542,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "add",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -553,7 +566,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "to_upper",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -576,7 +590,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "sqrt",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -616,7 +631,8 @@ PROPERTIES (
"type"="PYTHON_UDF",
"symbol"="py_mixed_vector_add_impl",
"always_nullable"="true",
- "runtime_version"="3.12.11"
+ "runtime_version"="3.12.11",
+ "volatility"="immutable"
)
AS $$
import pandas as pd
@@ -702,7 +718,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -726,7 +743,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -751,7 +769,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m):
@@ -775,7 +794,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m, key):
@@ -800,7 +820,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s):
@@ -827,7 +848,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(email):
@@ -853,7 +875,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -890,7 +913,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime
@@ -919,7 +943,8 @@ RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(id_card):
@@ -1156,7 +1181,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "SumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class SumUDAF:
@@ -1223,7 +1249,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "AvgUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class AvgUDAF:
@@ -1430,7 +1457,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.VarianceUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_stddev(DOUBLE)
@@ -1440,7 +1468,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.StdDevUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_median(DOUBLE)
@@ -1450,7 +1479,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.MedianUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -1532,6 +1562,7 @@ DROP FUNCTION IF EXISTS py_variance(DOUBLE);
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"` |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `immutable` | Function volatility. Valid values are
`immutable`, `stable`, and `volatile`.<br/>`immutable`: the same input always
produces the same output. Most deterministic UDFs should use this value for
better plan optimization.<br/>`stable`: the result is stable within one SQL
statement but may change between statements, similar to `now()`. SQL cache and
materialized view rewrite are disabled.<br/>`volatile`: the result may change
on each call, similar to `rando [...]
#### runtime_version Notes
@@ -1574,7 +1605,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "RunningSumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class RunningSumUDAF:
@@ -1650,7 +1682,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "PercentileUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class PercentileUDAF:
@@ -1745,7 +1778,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "CollectSetUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class CollectSetUDAF:
@@ -2060,7 +2094,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "split_string_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def split_string_udtf(text, delimiter):
@@ -2096,7 +2131,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "generate_series_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def generate_series_udtf(start, end):
@@ -2148,7 +2184,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "duplicate_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def duplicate_udtf(text, n):
@@ -2182,7 +2219,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "cartesian_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def cartesian_udtf(list1, list2):
@@ -2222,7 +2260,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "explode_json_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import json
@@ -2385,7 +2424,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.split_lines_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_extract_emails(STRING)
@@ -2395,7 +2435,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.extract_emails_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_parse_json(STRING)
@@ -2405,7 +2446,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.parse_json_object_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_expand_json(STRING)
@@ -2415,7 +2457,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.expand_json_array_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_ngram(STRING, INT)
@@ -2425,7 +2468,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.ngram_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -2534,6 +2578,7 @@ CREATE TABLES FUNCTION py_split(STRING, STRING) ...;
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"` |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `immutable` | Function volatility. Valid values are
`immutable`, `stable`, and `volatile`.<br/>`immutable`: the same input always
produces the same output. Most deterministic UDFs should use this value for
better plan optimization.<br/>`stable`: the result is stable within one SQL
statement but may change between statements, similar to `now()`. SQL cache and
materialized view rewrite are disabled.<br/>`volatile`: the result may change
on each call, similar to `rando [...]
#### runtime_version Notes
@@ -2565,7 +2610,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_csv_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def parse_csv_udtf(csv_data):
@@ -2608,7 +2654,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "date_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime, timedelta
@@ -2656,7 +2703,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "tokenize_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import re
@@ -2699,7 +2747,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_url_params_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from urllib.parse import urlparse, parse_qs
@@ -2745,7 +2794,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "expand_ip_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def expand_ip_range_udtf(start_ip, end_ip):
@@ -3102,7 +3152,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- Must specify the complete version
number to match Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(x):
@@ -3204,7 +3255,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.9.18", -- Must specify the complete version number
to match Python 3.9.18
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(text):
@@ -3218,7 +3270,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- Must specify the complete version
number to match Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import numpy as np
diff --git a/docs/sql-manual/sql-statements/function/CREATE-FUNCTION.md
b/docs/sql-manual/sql-statements/function/CREATE-FUNCTION.md
index 1de8ba51cb1..095fbf35099 100644
--- a/docs/sql-manual/sql-statements/function/CREATE-FUNCTION.md
+++ b/docs/sql-manual/sql-statements/function/CREATE-FUNCTION.md
@@ -75,6 +75,10 @@ CREATE [ GLOBAL ]
> - `symbol`: Indicates the class name containing the UDF class. This
> parameter is mandatory.
> - `type`: Indicates the UDF call type. The default is Native. Use JAVA_UDF
> when using a Java UDF.
> - `always_nullable`: Indicates whether the UDF result may contain NULL
> values. This is an optional parameter with a default value of true.
+> - `volatility`: Indicates how stable the function result is. This is an
optional parameter. The default value is `volatile` for scalar UDFs, and
`immutable` for UDAF/UDTF. Valid values are:
+> - `immutable`: The same input always produces the same output. Most
deterministic UDFs should be marked as `immutable` so that the optimizer can
apply more plan optimizations.
+> - `stable`: The result is stable within a single SQL statement but may
change between statements, similar to `now()`. SQL cache and materialized view
rewrite are disabled for this type of function.
+> - `volatile`: The result may change on each call, similar to `random()`.
SQL cache, materialized view rewrite, and many optimizer rewrite rules are
disabled for this type of function.
## Access Control Requirements
@@ -91,7 +95,8 @@ To execute this command, the user must have `ADMIN_PRIV`
privileges.
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -135,4 +140,92 @@ To execute this command, the user must have `ADMIN_PRIV`
privileges.
```sql
CREATE GLOBAL ALIAS FUNCTION id_masking(INT) WITH PARAMETER(id) AS
CONCAT(LEFT(id, 3), '****', RIGHT(id, 4));
- ```
\ No newline at end of file
+ ```
+
+6. Create a volatile Python UDF. Functions such as `uuid.uuid4()` that depend
on randomness should keep the default `volatility = volatile` and must not be
incorrectly marked as `immutable`.
+
+ ```sql
+ CREATE TABLE cte_uuid_seed (id INT) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ("replication_num" = "1");
+ INSERT INTO cte_uuid_seed VALUES (1),(2),(3);
+
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "volatile"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+
+ SET enable_cte_materialize = true;
+ SET inline_cte_referenced_threshold = 10;
+
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ Correct result:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 1 |
+ | 2 | 1 |
+ | 3 | 1 |
+ +------+-----------------+
+ ```
+
+ For this function, the following definition is incorrect:
+
+ ```sql
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "immutable"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+ ```
+
+ Run the same query again:
+
+ ```sql
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ Incorrect result:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 2 |
+ | 2 | 2 |
+ | 3 | 2 |
+ +------+-----------------+
+ ```
+
+ Why this is wrong:
+ Because `py_uuid_token` is volatile, each call to `uuid.uuid4()` generates
a new value. If the function is incorrectly marked as `volatility = immutable`,
the optimizer may treat repeated references as safe to rewrite and may choose a
plan that evaluates the UDF separately on both sides of `UNION ALL`. As a
result, the same `id` can produce two different `token` values, and
`COUNT(DISTINCT token)` changes from `1` to `2`.
\ No newline at end of file
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/java-user-defined-function.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/java-user-defined-function.md
index 3311653e554..c1d0a8b9852 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/java-user-defined-function.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/java-user-defined-function.md
@@ -137,7 +137,8 @@ insert into test_table values (6, 666.66, "d,e");
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -348,7 +349,8 @@ public void destroy(State state) {
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -390,7 +392,8 @@ void reset(State state)
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -435,7 +438,8 @@ UDTF 与 UDF 一样需要用户实现 `evaluate` 方法,但 UDTF 的返回值
"file"="file:///pathTo/java-udtf.jar",
"symbol"="org.apache.doris.udf.demo.UDTFStringTest",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -531,7 +535,8 @@ public class FunctionUdf {
"file"="file:///pathTo/FunctionUdf.jar",
"symbol"="org.apache.doris.udf.FunctionUdf",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/python-user-defined-function.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/python-user-defined-function.md
index d35d539cd0b..efa4c74c606 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/python-user-defined-function.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/udf/python-user-defined-function.md
@@ -81,7 +81,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "entry_function_name",
"runtime_version" = "python_version",
- "always_nullable" = "true|false"
+ "always_nullable" = "true|false",
+ "volatility" = "immutable|stable|volatile"
)
AS $$
def entry_function_name(param1, param2, ...):
@@ -100,7 +101,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -120,7 +122,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -294,7 +297,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -304,7 +308,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -314,7 +319,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -332,7 +338,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -342,7 +349,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -352,7 +360,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -369,7 +378,8 @@ PROPERTIES (
"file" = "file:///path/to/my_udf.zip",
"symbol" = "my_udf.math_ops.multiply_by_two",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -412,6 +422,7 @@ DROP FUNCTION IF EXISTS py_is_prime(INT);
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"`,需填写完整的版本号 |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `volatile` | 函数易变性。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### 运行时版本说明
@@ -479,7 +490,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -530,7 +542,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "add",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -553,7 +566,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "to_upper",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -576,7 +590,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "sqrt",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -616,7 +631,8 @@ PROPERTIES (
"type"="PYTHON_UDF",
"symbol"="py_mixed_vector_add_impl",
"always_nullable"="true",
- "runtime_version"="3.12.11"
+ "runtime_version"="3.12.11",
+ "volatility"="immutable"
)
AS $$
import pandas as pd
@@ -702,7 +718,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -726,7 +743,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -751,7 +769,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m):
@@ -775,7 +794,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m, key):
@@ -800,7 +820,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s):
@@ -827,7 +848,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(email):
@@ -853,7 +875,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -890,7 +913,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime
@@ -919,7 +943,8 @@ RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(id_card):
@@ -1156,7 +1181,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "SumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class SumUDAF:
@@ -1223,7 +1249,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "AvgUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class AvgUDAF:
@@ -1430,7 +1457,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.VarianceUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_stddev(DOUBLE)
@@ -1440,7 +1468,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.StdDevUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_median(DOUBLE)
@@ -1450,7 +1479,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.MedianUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -1532,6 +1562,7 @@ DROP FUNCTION IF EXISTS py_variance(DOUBLE);
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"` |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `immutable` | 函数易变性。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### runtime_version 说明
@@ -1574,7 +1605,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "RunningSumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class RunningSumUDAF:
@@ -1650,7 +1682,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "PercentileUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class PercentileUDAF:
@@ -1745,7 +1778,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "CollectSetUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class CollectSetUDAF:
@@ -2060,7 +2094,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "split_string_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def split_string_udtf(text, delimiter):
@@ -2096,7 +2131,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "generate_series_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def generate_series_udtf(start, end):
@@ -2148,7 +2184,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "duplicate_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def duplicate_udtf(text, n):
@@ -2182,7 +2219,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "cartesian_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def cartesian_udtf(list1, list2):
@@ -2222,7 +2260,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "explode_json_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import json
@@ -2385,7 +2424,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.split_lines_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_extract_emails(STRING)
@@ -2395,7 +2435,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.extract_emails_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_parse_json(STRING)
@@ -2405,7 +2446,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.parse_json_object_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_expand_json(STRING)
@@ -2415,7 +2457,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.expand_json_array_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_ngram(STRING, INT)
@@ -2425,7 +2468,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.ngram_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -2534,6 +2578,7 @@ CREATE TABLES FUNCTION py_split(STRING, STRING) ...;
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"` |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `immutable` | 函数易变性。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### runtime_version 说明
@@ -2565,7 +2610,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_csv_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def parse_csv_udtf(csv_data):
@@ -2608,7 +2654,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "date_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime, timedelta
@@ -2656,7 +2703,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "tokenize_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import re
@@ -2699,7 +2747,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_url_params_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from urllib.parse import urlparse, parse_qs
@@ -2745,7 +2794,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "expand_ip_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def expand_ip_range_udtf(start_ip, end_ip):
@@ -3102,7 +3152,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- 必须指定完整版本号,匹配 Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(x):
@@ -3204,7 +3255,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.9.18", -- 必须指定完整版本号,匹配 Python 3.9.18
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(text):
@@ -3218,7 +3270,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- 必须指定完整版本号,匹配 Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import numpy as np
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-statements/function/CREATE-FUNCTION.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-statements/function/CREATE-FUNCTION.md
index f398af2dce1..3e222218bd7 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-statements/function/CREATE-FUNCTION.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/sql-manual/sql-statements/function/CREATE-FUNCTION.md
@@ -74,6 +74,10 @@ CREATE [ GLOBAL ]
> - `symbol`: 表示的是包含 UDF 类的类名。这个参数是必须设定的
> - `type`: 表示的 UDF 调用类型,默认为 Native,使用 Java UDF 时传 JAVA_UDF。
> - `always_nullable`:表示的 UDF 返回结果中是否有可能出现 NULL 值,是可选参数,默认值为 true。
+> - `volatility`:表示函数结果的稳定性,是可选参数。标量 UDF 的默认值为 `volatile`,UDAF/UDTF 的默认值为
`immutable`。可选值为:
+> - `immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为 `immutable`,这样优化器可以应用更多计划优化。
+> - `stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似 `now()`。这类函数会禁用 SQL Cache
和物化视图改写。
+> - `volatile`:每次调用结果都可能变化,类似 `random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。
## 权限控制
@@ -88,7 +92,8 @@ CREATE [ GLOBAL ]
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -124,4 +129,92 @@ CREATE [ GLOBAL ]
```sql
CREATE GLOBAL ALIAS FUNCTION id_masking(INT) WITH PARAMETER(id) AS
CONCAT(LEFT(id, 3), '****', RIGHT(id, 4));
- ```
\ No newline at end of file
+ ```
+
+6. 创建一个 volatile Python UDF。像 `uuid.uuid4()` 这类依赖随机数的函数,应保持 `volatility` 的默认值
`volatile`,不要错误标记为 `immutable`。
+
+ ```sql
+ CREATE TABLE cte_uuid_seed (id INT) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ("replication_num" = "1");
+ INSERT INTO cte_uuid_seed VALUES (1),(2),(3);
+
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "volatile"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+
+ SET enable_cte_materialize = true;
+ SET inline_cte_referenced_threshold = 10;
+
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ 正确结果:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 1 |
+ | 2 | 1 |
+ | 3 | 1 |
+ +------+-----------------+
+ ```
+
+ 对于上述函数,不应写成下面这样:
+
+ ```sql
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "immutable"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+ ```
+
+ 重新执行同一条查询:
+
+ ```sql
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ 错误结果:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 2 |
+ | 2 | 2 |
+ | 3 | 2 |
+ +------+-----------------+
+ ```
+
+ 错误原因:
+ `py_uuid_token` 是 volatile 函数,`uuid.uuid4()` 每次调用都会生成新值。如果错误地将它标记为
`volatility = immutable`,优化器可能会把重复引用视为可安全改写,并选择在 `UNION ALL` 两侧分别执行 UDF
的计划。这样同一个 `id` 会生成两个不同的 `token`,`COUNT(DISTINCT token)` 就会从 `1` 变成 `2`。
\ No newline at end of file
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/java-user-defined-function.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/java-user-defined-function.md
index 3311653e554..c1d0a8b9852 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/java-user-defined-function.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/java-user-defined-function.md
@@ -137,7 +137,8 @@ insert into test_table values (6, 666.66, "d,e");
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -348,7 +349,8 @@ public void destroy(State state) {
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -390,7 +392,8 @@ void reset(State state)
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -435,7 +438,8 @@ UDTF 与 UDF 一样需要用户实现 `evaluate` 方法,但 UDTF 的返回值
"file"="file:///pathTo/java-udtf.jar",
"symbol"="org.apache.doris.udf.demo.UDTFStringTest",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -531,7 +535,8 @@ public class FunctionUdf {
"file"="file:///pathTo/FunctionUdf.jar",
"symbol"="org.apache.doris.udf.FunctionUdf",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/python-user-defined-function.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/python-user-defined-function.md
index 9f402640451..b6a8842ba5b 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/python-user-defined-function.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/query-data/udf/python-user-defined-function.md
@@ -81,7 +81,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "entry_function_name",
"runtime_version" = "python_version",
- "always_nullable" = "true|false"
+ "always_nullable" = "true|false",
+ "volatility" = "immutable|stable|volatile"
)
AS $$
def entry_function_name(param1, param2, ...):
@@ -100,7 +101,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -120,7 +122,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -293,7 +296,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -303,7 +307,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -313,7 +318,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -331,7 +337,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -341,7 +348,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -351,7 +359,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -368,7 +377,8 @@ PROPERTIES (
"file" = "file:///path/to/my_udf.zip",
"symbol" = "my_udf.math_ops.multiply_by_two",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -411,6 +421,7 @@ DROP FUNCTION IF EXISTS py_is_prime(INT);
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"`,需填写完整的版本号 |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `volatile` | 函数易变性,自 4.1.2 起支持。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### 运行时版本说明
@@ -478,7 +489,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -529,7 +541,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "add",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -552,7 +565,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "to_upper",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -575,7 +589,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "sqrt",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -615,7 +630,8 @@ PROPERTIES (
"type"="PYTHON_UDF",
"symbol"="py_mixed_vector_add_impl",
"always_nullable"="true",
- "runtime_version"="3.12.11"
+ "runtime_version"="3.12.11",
+ "volatility"="immutable"
)
AS $$
import pandas as pd
@@ -701,7 +717,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -725,7 +742,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -750,7 +768,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m):
@@ -774,7 +793,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m, key):
@@ -799,7 +819,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s):
@@ -826,7 +847,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(email):
@@ -852,7 +874,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -889,7 +912,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime
@@ -918,7 +942,8 @@ RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(id_card):
@@ -1155,7 +1180,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "SumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class SumUDAF:
@@ -1222,7 +1248,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "AvgUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class AvgUDAF:
@@ -1428,7 +1455,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.VarianceUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_stddev(DOUBLE)
@@ -1438,7 +1466,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.StdDevUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_median(DOUBLE)
@@ -1448,7 +1477,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.MedianUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -1530,6 +1560,7 @@ DROP FUNCTION IF EXISTS py_variance(DOUBLE);
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"` |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `immutable` | 函数易变性,自 4.1.2 起支持。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### runtime_version 说明
@@ -1572,7 +1603,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "RunningSumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class RunningSumUDAF:
@@ -1648,7 +1680,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "PercentileUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class PercentileUDAF:
@@ -1743,7 +1776,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "CollectSetUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class CollectSetUDAF:
@@ -2058,7 +2092,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "split_string_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def split_string_udtf(text, delimiter):
@@ -2094,7 +2129,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "generate_series_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def generate_series_udtf(start, end):
@@ -2146,7 +2182,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "duplicate_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def duplicate_udtf(text, n):
@@ -2180,7 +2217,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "cartesian_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def cartesian_udtf(list1, list2):
@@ -2220,7 +2258,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "explode_json_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import json
@@ -2382,7 +2421,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.split_lines_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_extract_emails(STRING)
@@ -2392,7 +2432,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.extract_emails_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_parse_json(STRING)
@@ -2402,7 +2443,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.parse_json_object_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_expand_json(STRING)
@@ -2412,7 +2454,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.expand_json_array_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_ngram(STRING, INT)
@@ -2422,7 +2465,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.ngram_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -2531,6 +2575,7 @@ CREATE TABLES FUNCTION py_split(STRING, STRING) ...;
| `file` | 否 | - | Python `.zip` 包路径,仅模块模式需要。支持三种协议:<br/>• `file://` ——
本地文件系统路径<br/>• `http://` —— HTTP 远程下载<br/>• `https://` —— HTTPS 远程下载 |
| `runtime_version` | 是 | - | Python 运行时版本,如 `"3.10.12"` |
| `always_nullable` | 否 | `true` | 是否总是返回可空结果 |
+| `volatility` | 否 | `immutable` | 函数易变性,自 4.1.2 起支持。可选值为
`immutable`、`stable`、`volatile`。<br/>`immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为
`immutable`,这样计划优化效果更好。<br/>`stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似
`now()`。这类函数会禁用 SQL Cache 和物化视图改写。<br/>`volatile`:每次调用结果都可能变化,类似
`random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。 |
#### runtime_version 说明
@@ -2562,7 +2607,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_csv_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def parse_csv_udtf(csv_data):
@@ -2605,7 +2651,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "date_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime, timedelta
@@ -2653,7 +2700,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "tokenize_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import re
@@ -2696,7 +2744,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_url_params_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from urllib.parse import urlparse, parse_qs
@@ -2742,7 +2791,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "expand_ip_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def expand_ip_range_udtf(start_ip, end_ip):
@@ -3099,7 +3149,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- 必须指定完整版本号,匹配 Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(x):
@@ -3201,7 +3252,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.9.18", -- 必须指定完整版本号,匹配 Python 3.9.18
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(text):
@@ -3215,7 +3267,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- 必须指定完整版本号,匹配 Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import numpy as np
diff --git
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
index f398af2dce1..c7f95c64bde 100644
---
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
+++
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
@@ -74,6 +74,10 @@ CREATE [ GLOBAL ]
> - `symbol`: 表示的是包含 UDF 类的类名。这个参数是必须设定的
> - `type`: 表示的 UDF 调用类型,默认为 Native,使用 Java UDF 时传 JAVA_UDF。
> - `always_nullable`:表示的 UDF 返回结果中是否有可能出现 NULL 值,是可选参数,默认值为 true。
+> - `volatility`:表示函数结果的稳定性,自 4.1.2 起支持。该参数是可选参数。标量 UDF 的默认值为
`volatile`,UDAF/UDTF 的默认值为 `immutable`。可选值为:
+> - `immutable`:相同输入始终产生相同输出。大部分确定性 UDF 建议设置为 `immutable`,这样优化器可以应用更多计划优化。
+> - `stable`:在同一条 SQL 语句内结果稳定,但不同语句之间可能变化,类似 `now()`。这类函数会禁用 SQL Cache
和物化视图改写。
+> - `volatile`:每次调用结果都可能变化,类似 `random()`。这类函数会禁用 SQL Cache、物化视图改写以及很多优化器改写规则。
## 权限控制
@@ -88,7 +92,8 @@ CREATE [ GLOBAL ]
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -124,4 +129,92 @@ CREATE [ GLOBAL ]
```sql
CREATE GLOBAL ALIAS FUNCTION id_masking(INT) WITH PARAMETER(id) AS
CONCAT(LEFT(id, 3), '****', RIGHT(id, 4));
- ```
\ No newline at end of file
+ ```
+
+6. 创建一个 volatile Python UDF。像 `uuid.uuid4()` 这类依赖随机数的函数,应保持 `volatility` 的默认值
`volatile`,不要错误标记为 `immutable`。
+
+ ```sql
+ CREATE TABLE cte_uuid_seed (id INT) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ("replication_num" = "1");
+ INSERT INTO cte_uuid_seed VALUES (1),(2),(3);
+
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "volatile"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+
+ SET enable_cte_materialize = true;
+ SET inline_cte_referenced_threshold = 10;
+
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ 正确结果:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 1 |
+ | 2 | 1 |
+ | 3 | 1 |
+ +------+-----------------+
+ ```
+
+ 对于上述函数,不应写成下面这样:
+
+ ```sql
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "immutable"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+ ```
+
+ 重新执行同一条查询:
+
+ ```sql
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ 错误结果:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 2 |
+ | 2 | 2 |
+ | 3 | 2 |
+ +------+-----------------+
+ ```
+
+ 错误原因:
+ `py_uuid_token` 是 volatile 函数,`uuid.uuid4()` 每次调用都会生成新值。如果错误地将它标记为
`volatility = immutable`,优化器可能会把重复引用视为可安全改写,并选择在 `UNION ALL` 两侧分别执行 UDF
的计划。这样同一个 `id` 会生成两个不同的 `token`,`COUNT(DISTINCT token)` 就会从 `1` 变成 `2`。
\ No newline at end of file
diff --git
a/versioned_docs/version-4.x/query-data/udf/java-user-defined-function.md
b/versioned_docs/version-4.x/query-data/udf/java-user-defined-function.md
index 96e1fe99e7c..fc704ab8aec 100644
--- a/versioned_docs/version-4.x/query-data/udf/java-user-defined-function.md
+++ b/versioned_docs/version-4.x/query-data/udf/java-user-defined-function.md
@@ -137,7 +137,8 @@ When writing a UDF in Java, the main entry point must be
the `evaluate` function
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -348,7 +349,8 @@ public void destroy(State state) {
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -390,7 +392,8 @@ void reset(State state)
"file"="file:///pathTo/java-udaf.jar",
"symbol"="org.apache.doris.udf.SimpleDemo",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -435,7 +438,8 @@ Like UDF, UDTF requires you to implement the `evaluate`
method, but the return v
"file"="file:///pathTo/java-udtf.jar",
"symbol"="org.apache.doris.udf.demo.UDTFStringTest",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -531,7 +535,8 @@ The steps are as follows:
"file"="file:///pathTo/FunctionUdf.jar",
"symbol"="org.apache.doris.udf.FunctionUdf",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
diff --git
a/versioned_docs/version-4.x/query-data/udf/python-user-defined-function.md
b/versioned_docs/version-4.x/query-data/udf/python-user-defined-function.md
index 463dd549d0f..de2303edd9d 100644
--- a/versioned_docs/version-4.x/query-data/udf/python-user-defined-function.md
+++ b/versioned_docs/version-4.x/query-data/udf/python-user-defined-function.md
@@ -81,7 +81,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "entry_function_name",
"runtime_version" = "python_version",
- "always_nullable" = "true|false"
+ "always_nullable" = "true|false",
+ "volatility" = "immutable|stable|volatile"
)
AS $$
def entry_function_name(param1, param2, ...):
@@ -100,7 +101,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -120,7 +122,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -293,7 +296,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -303,7 +307,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -313,7 +318,8 @@ PROPERTIES (
"file" = "file:///path/to/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -331,7 +337,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.add_three_numbers",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_reverse(STRING)
@@ -341,7 +348,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.reverse_string",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE FUNCTION py_is_prime(INT)
@@ -351,7 +359,8 @@ PROPERTIES (
"file" = "https://your-storage.com/udf/python_udf_scalar_ops.zip",
"symbol" = "python_udf_scalar_ops.is_prime",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -368,7 +377,8 @@ PROPERTIES (
"file" = "file:///path/to/my_udf.zip",
"symbol" = "my_udf.math_ops.multiply_by_two",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -411,6 +421,7 @@ DROP FUNCTION IF EXISTS py_is_prime(INT);
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"`.
The complete version number is required |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `volatile` | Function volatility, supported since 4.1.2.
Valid values are `immutable`, `stable`, and `volatile`.<br/>`immutable`: the
same input always produces the same output. Most deterministic UDFs should use
this value for better plan optimization.<br/>`stable`: the result is stable
within one SQL statement but may change between statements, similar to `now()`.
SQL cache and materialized view rewrite are disabled.<br/>`volatile`: the
result may change on each c [...]
#### Runtime Version Notes
@@ -478,7 +489,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(a, b):
@@ -529,7 +541,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "add",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -552,7 +565,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "to_upper",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -575,7 +589,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "sqrt",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import pandas as pd
@@ -615,7 +630,8 @@ PROPERTIES (
"type"="PYTHON_UDF",
"symbol"="py_mixed_vector_add_impl",
"always_nullable"="true",
- "runtime_version"="3.12.11"
+ "runtime_version"="3.12.11",
+ "volatility"="immutable"
)
AS $$
import pandas as pd
@@ -701,7 +717,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -725,7 +742,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(arr):
@@ -750,7 +768,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m):
@@ -774,7 +793,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(m, key):
@@ -799,7 +819,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s):
@@ -826,7 +847,8 @@ RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(email):
@@ -852,7 +874,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(s1, s2):
@@ -889,7 +912,8 @@ RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime
@@ -918,7 +942,8 @@ RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
- "runtime_version" = "3.10.12"
+ "runtime_version" = "3.10.12",
+ "volatility" = "immutable"
)
AS $$
def evaluate(id_card):
@@ -1155,7 +1180,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "SumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class SumUDAF:
@@ -1222,7 +1248,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "AvgUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class AvgUDAF:
@@ -1428,7 +1455,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.VarianceUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_stddev(DOUBLE)
@@ -1438,7 +1466,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.StdDevUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE AGGREGATE FUNCTION py_median(DOUBLE)
@@ -1448,7 +1477,8 @@ PROPERTIES (
"file" = "file:///path/to/stats_udaf.zip",
"symbol" = "stats_udaf.MedianUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -1530,6 +1560,7 @@ DROP FUNCTION IF EXISTS py_variance(DOUBLE);
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"` |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `immutable` | Function volatility, supported since
4.1.2. Valid values are `immutable`, `stable`, and `volatile`.<br/>`immutable`:
the same input always produces the same output. Most deterministic UDFs should
use this value for better plan optimization.<br/>`stable`: the result is stable
within one SQL statement but may change between statements, similar to `now()`.
SQL cache and materialized view rewrite are disabled.<br/>`volatile`: the
result may change on each [...]
#### runtime_version Notes
@@ -1572,7 +1603,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "RunningSumUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class RunningSumUDAF:
@@ -1648,7 +1680,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "PercentileUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class PercentileUDAF:
@@ -1743,7 +1776,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "CollectSetUDAF",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
class CollectSetUDAF:
@@ -2058,7 +2092,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "split_string_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def split_string_udtf(text, delimiter):
@@ -2094,7 +2129,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "generate_series_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def generate_series_udtf(start, end):
@@ -2146,7 +2182,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "duplicate_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def duplicate_udtf(text, n):
@@ -2180,7 +2217,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "cartesian_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def cartesian_udtf(list1, list2):
@@ -2220,7 +2258,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "explode_json_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import json
@@ -2382,7 +2421,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.split_lines_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_extract_emails(STRING)
@@ -2392,7 +2432,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.extract_emails_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_parse_json(STRING)
@@ -2402,7 +2443,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.parse_json_object_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_expand_json(STRING)
@@ -2412,7 +2454,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.expand_json_array_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
CREATE TABLES FUNCTION py_ngram(STRING, INT)
@@ -2422,7 +2465,8 @@ PROPERTIES (
"file" = "file:///path/to/text_udtf.zip",
"symbol" = "text_udtf.ngram_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
);
```
@@ -2531,6 +2575,7 @@ CREATE TABLES FUNCTION py_split(STRING, STRING) ...;
| `file` | No | - | Path to the Python `.zip` package. Required only in module
mode. Supports three protocols:<br/>• `file://`: local file system path<br/>•
`http://`: HTTP remote download<br/>• `https://`: HTTPS remote download |
| `runtime_version` | Yes | - | Python runtime version, such as `"3.10.12"` |
| `always_nullable` | No | `true` | Whether the function always returns a
nullable result |
+| `volatility` | No | `immutable` | Function volatility, supported since
4.1.2. Valid values are `immutable`, `stable`, and `volatile`.<br/>`immutable`:
the same input always produces the same output. Most deterministic UDFs should
use this value for better plan optimization.<br/>`stable`: the result is stable
within one SQL statement but may change between statements, similar to `now()`.
SQL cache and materialized view rewrite are disabled.<br/>`volatile`: the
result may change on each [...]
#### runtime_version Notes
@@ -2562,7 +2607,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_csv_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def parse_csv_udtf(csv_data):
@@ -2605,7 +2651,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "date_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from datetime import datetime, timedelta
@@ -2653,7 +2700,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "tokenize_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import re
@@ -2696,7 +2744,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "parse_url_params_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
from urllib.parse import urlparse, parse_qs
@@ -2742,7 +2791,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "expand_ip_range_udtf",
"runtime_version" = "3.10.12",
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def expand_ip_range_udtf(start_ip, end_ip):
@@ -3099,7 +3149,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- Must specify the complete version
number to match Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(x):
@@ -3201,7 +3252,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.9.18", -- Must specify the complete version number
to match Python 3.9.18
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
def evaluate(text):
@@ -3215,7 +3267,8 @@ PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "3.12.11", -- Must specify the complete version
number to match Python 3.12.11
- "always_nullable" = "true"
+ "always_nullable" = "true",
+ "volatility" = "immutable"
)
AS $$
import numpy as np
diff --git
a/versioned_docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
b/versioned_docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
index 1de8ba51cb1..68e9e5f77b1 100644
---
a/versioned_docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
+++
b/versioned_docs/version-4.x/sql-manual/sql-statements/function/CREATE-FUNCTION.md
@@ -75,6 +75,10 @@ CREATE [ GLOBAL ]
> - `symbol`: Indicates the class name containing the UDF class. This
> parameter is mandatory.
> - `type`: Indicates the UDF call type. The default is Native. Use JAVA_UDF
> when using a Java UDF.
> - `always_nullable`: Indicates whether the UDF result may contain NULL
> values. This is an optional parameter with a default value of true.
+> - `volatility`: Indicates how stable the function result is. This parameter
is supported since 4.1.2. It is optional. The default value is `volatile` for
scalar UDFs, and `immutable` for UDAF/UDTF. Valid values are:
+> - `immutable`: The same input always produces the same output. Most
deterministic UDFs should be marked as `immutable` so that the optimizer can
apply more plan optimizations.
+> - `stable`: The result is stable within a single SQL statement but may
change between statements, similar to `now()`. SQL cache and materialized view
rewrite are disabled for this type of function.
+> - `volatile`: The result may change on each call, similar to `random()`.
SQL cache, materialized view rewrite, and many optimizer rewrite rules are
disabled for this type of function.
## Access Control Requirements
@@ -91,7 +95,8 @@ To execute this command, the user must have `ADMIN_PRIV`
privileges.
"file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar",
"symbol"="org.apache.doris.udf.AddOne",
"always_nullable"="true",
- "type"="JAVA_UDF"
+ "type"="JAVA_UDF",
+ "volatility"="immutable"
);
```
@@ -135,4 +140,92 @@ To execute this command, the user must have `ADMIN_PRIV`
privileges.
```sql
CREATE GLOBAL ALIAS FUNCTION id_masking(INT) WITH PARAMETER(id) AS
CONCAT(LEFT(id, 3), '****', RIGHT(id, 4));
- ```
\ No newline at end of file
+ ```
+
+6. Create a volatile Python UDF. Functions such as `uuid.uuid4()` that depend
on randomness should keep the default `volatility = volatile` and must not be
incorrectly marked as `immutable`.
+
+ ```sql
+ CREATE TABLE cte_uuid_seed (id INT) ENGINE=OLAP DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ("replication_num" = "1");
+ INSERT INTO cte_uuid_seed VALUES (1),(2),(3);
+
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "volatile"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+
+ SET enable_cte_materialize = true;
+ SET inline_cte_referenced_threshold = 10;
+
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ Correct result:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 1 |
+ | 2 | 1 |
+ | 3 | 1 |
+ +------+-----------------+
+ ```
+
+ For this function, the following definition is incorrect:
+
+ ```sql
+ DROP FUNCTION IF EXISTS py_uuid_token(INT);
+ CREATE FUNCTION py_uuid_token(INT)
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "py_uuid_token_impl",
+ "always_nullable" = "false",
+ "runtime_version" = "3.12.11",
+ "volatility" = "immutable"
+ )
+ AS $$
+import uuid
+def py_uuid_token_impl(x):
+ return f"{x}-{uuid.uuid4()}"
+$$;
+ ```
+
+ Run the same query again:
+
+ ```sql
+ WITH cte AS (SELECT id, py_uuid_token(id) AS token FROM cte_uuid_seed)
+ SELECT id, COUNT(DISTINCT token) AS distinct_tokens
+ FROM (SELECT id, token FROM cte UNION ALL SELECT id, token FROM cte) u
+ GROUP BY id ORDER BY id;
+ ```
+
+ Incorrect result:
+
+ ```text
+ +------+-----------------+
+ | id | distinct_tokens |
+ +------+-----------------+
+ | 1 | 2 |
+ | 2 | 2 |
+ | 3 | 2 |
+ +------+-----------------+
+ ```
+
+ Why this is wrong:
+ Because `py_uuid_token` is volatile, each call to `uuid.uuid4()` generates
a new value. If the function is incorrectly marked as `volatility = immutable`,
the optimizer may treat repeated references as safe to rewrite and may choose a
plan that evaluates the UDF separately on both sides of `UNION ALL`. As a
result, the same `id` can produce two different `token` values, and
`COUNT(DISTINCT token)` changes from `1` to `2`.
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]