Omega359 commented on code in PR #12861:
URL: https://github.com/apache/datafusion/pull/12861#discussion_r1796994434


##########
datafusion/functions-aggregate/src/bool_and_or.rs:
##########
@@ -172,6 +175,34 @@ impl AggregateUDFImpl for BoolAnd {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_and_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_bool_and_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if all non-null input values are true, otherwise 
false.",
+            )
+            .with_syntax_example("bool_and(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   We can likely just the with_standard_argument(..) builder option here and 
cut down on boilerplate text.



##########
datafusion/functions-aggregate/src/count.rs:
##########
@@ -324,6 +326,41 @@ impl AggregateUDFImpl for Count {
         }
         None
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_count_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_count_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the number of non-null values in the specified 
column. To include null values in the total count, use `count(*)`.",
+            )
+            .with_syntax_example("count(expression)")
+            .with_sql_example(r#"```sql
+> SELECT count(column_name) FROM table_name;
++-----------------------+
+| count(column_name)     |
++-----------------------+
+| 100                   |
++-----------------------+
+
+> SELECT count(*) FROM table_name;
++------------------+
+| count(*)         |
++------------------+
+| 120              |
++------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/first_last.rs:
##########
@@ -165,6 +166,35 @@ impl AggregateUDFImpl for FirstValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(last_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_first_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_first_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the first element in an aggregation group according 
to the requested ordering. If no ordering is given, returns an arbitrary 
element from the group.",
+            )
+            .with_syntax_example("first_value(expression [ORDER BY 
expression])")
+            .with_sql_example(r#"```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/min_max.rs:
##########
@@ -321,6 +324,35 @@ impl AggregateUDFImpl for Max {
     fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> 
Option<ScalarValue> {
         self.value_from_statistics(statistics_args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_max_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_max_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the maximum value in the specified column.",
+            )
+            .with_syntax_example("max(expression)")
+            .with_sql_example(r#"```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/sum.rs:
##########
@@ -233,6 +236,35 @@ impl AggregateUDFImpl for Sum {
     fn order_sensitivity(&self) -> AggregateOrderSensitivity {
         AggregateOrderSensitivity::Insensitive
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_sum_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sum_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the sum of all values in the specified column.",
+            )
+            .with_syntax_example("sum(expression)")
+            .with_sql_example(r#"```sql
+> SELECT sum(column_name) FROM table_name;
++-----------------------+
+| sum(column_name)       |
++-----------------------+
+| 12345                 |
++-----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/average.rs:
##########
@@ -235,6 +237,36 @@ impl AggregateUDFImpl for Avg {
         }
         coerce_avg_type(self.name(), arg_types)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_avg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_avg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the average of numeric values in the specified 
column.",
+            )
+            .with_syntax_example("avg(expression)")
+            .with_sql_example(r#"```sql
+> SELECT avg(column_name) FROM table_name;
++---------------------------+
+| avg(column_name)           |
++---------------------------+
+| 42.75                      |
++---------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("Aliases: ", "`mean`")

Review Comment:
   This shouldn't be necessary as the code that creates the documentation will 
already inject udf aliases if they exist into the generated docs.



##########
datafusion/functions-aggregate/src/bool_and_or.rs:
##########
@@ -293,6 +324,32 @@ impl AggregateUDFImpl for BoolOr {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_or_doc())
+    }
+}
+
+fn get_bool_or_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if any non-null input value is true, otherwise 
false.",
+            )
+            .with_syntax_example("bool_or(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_or(column_name) FROM table_name;
++----------------------------+
+| bool_or(column_name)        |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_standard_argument("expression", "Expression to operate on. 
Can be a constant, column, or function, and any combination of arithmetic 
operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/min_max.rs:
##########
@@ -1119,7 +1151,35 @@ impl AggregateUDFImpl for Min {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_min_doc())
+    }
+}
+
+fn get_min_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the minimum value in the specified column.",
+            )
+            .with_syntax_example("min(expression)")
+            .with_sql_example(r#"```sql
+> SELECT min(column_name) FROM table_name;
++----------------------+
+| min(column_name)      |
++----------------------+
+| 12                   |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/first_last.rs:
##########
@@ -466,6 +496,33 @@ impl AggregateUDFImpl for LastValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(first_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_last_value_doc())
+    }
+}
+
+fn get_last_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the last element in an aggregation group according to 
the requested ordering. If no ordering is given, returns an arbitrary element 
from the group.",
+            )
+            .with_syntax_example("last_value(expression [ORDER BY 
expression])")
+            .with_sql_example(r#"```sql
+> SELECT last_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| last_value(column_name ORDER BY other_column) |
++-----------------------------------------------+
+| last_element                                  |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/median.rs:
##########
@@ -152,6 +153,35 @@ impl AggregateUDFImpl for Median {
     fn aliases(&self) -> &[String] {
         &[]
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_median_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_median_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the median value in the specified column.",
+            )
+            .with_syntax_example("median(expression)")
+            .with_sql_example(r#"```sql
+> SELECT median(column_name) FROM table_name;
++----------------------+
+| median(column_name)   |
++----------------------+
+| 45.5                 |
++----------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/nth_value.rs:
##########
@@ -161,6 +162,40 @@ impl AggregateUDFImpl for NthValueAgg {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Reversed(nth_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nth_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nth_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the nth value in a group of values.",
+            )
+            .with_syntax_example("nth_value(expression, n ORDER BY 
expression)")
+            .with_sql_example(r#"```sql
+> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id 
ORDER BY salary ASC) AS second_salary_by_dept
+  FROM employee;
++---------+--------+-------------------------+
+| dept_id | salary | second_salary_by_dept   |
++---------+--------+-------------------------+
+| 1       | 30000  | NULL                    |
+| 1       | 40000  | 40000                   |
+| 1       | 50000  | 40000                   |
+| 2       | 35000  | NULL                    |
+| 2       | 45000  | 45000                   |
++---------+--------+-------------------------+
+```"#)
+            .with_standard_argument("expression", "The column or expression to 
retrieve the nth value from.")

Review Comment:
   ```suggestion
               .with_argument("expression", "The column or expression to 
retrieve the nth value from. Can be a constant, column, or function, and any 
combination of operators.")
   ```



##########
datafusion/functions-aggregate/src/grouping.rs:
##########
@@ -94,4 +98,37 @@ impl AggregateUDFImpl for Grouping {
             "physical plan is not yet implemented for GROUPING aggregate 
function"
         )
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_grouping_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_grouping_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns 1 if the data is aggregated across the specified 
column, or 0 if it is not aggregated in the result set.",
+            )
+            .with_syntax_example("grouping(expression)")
+            .with_sql_example(r#"```sql
+> SELECT column_name, GROUPING(column_name) AS group_column
+  FROM table_name
+  GROUP BY GROUPING SETS ((column_name), ());
++-------------+-------------+
+| column_name | group_column |
++-------------+-------------+
+| value1      | 0           |
+| value2      | 0           |
+| NULL        | 1           |
++-------------+-------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to evaluate whether data 
is aggregated across the specified column. Can be a constant, column, or 
function.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/stddev.rs:
##########
@@ -132,6 +134,35 @@ impl AggregateUDFImpl for Stddev {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         Ok(Box::new(StddevGroupsAccumulator::new(StatsType::Sample)))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_stddev_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the standard deviation of a set of numbers.",
+            )
+            .with_syntax_example("stddev(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



##########
datafusion/functions-aggregate/src/stddev.rs:
##########
@@ -228,6 +259,33 @@ impl AggregateUDFImpl for StddevPop {
             StatsType::Population,
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_pop_doc())
+    }
+}
+
+fn get_stddev_pop_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the population standard deviation of a set of 
numbers.",
+            )
+            .with_syntax_example("stddev_pop(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev_pop(column_name) FROM table_name;
++--------------------------+
+| stddev_pop(column_name)   |
++--------------------------+
+| 10.56                    |
++--------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")

Review Comment:
   ```suggestion
               .with_standard_argument("expression", "The")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to