cloud-fan commented on code in PR #48748: URL: https://github.com/apache/spark/pull/48748#discussion_r1843597510
########## sql/core/src/test/resources/sql-tests/results/listagg.sql.out: ########## @@ -0,0 +1,436 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMP VIEW df AS +SELECT * FROM (VALUES ('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), (NULL, NULL)) AS t(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMP VIEW df2 AS +SELECT * FROM (VALUES (1, true), (2, false), (3, false)) AS t(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT listagg(b) FROM df GROUP BY a +-- !query schema +struct<listagg(b, NULL):string> +-- !query output +NULL +bc +cd + + +-- !query +SELECT string_agg(b) FROM df GROUP BY a +-- !query schema +struct<string_agg(b, NULL):string> +-- !query output +NULL +bc +cd + + +-- !query +SELECT listagg(b, NULL) FROM df GROUP BY a +-- !query schema +struct<listagg(b, NULL):string> +-- !query output +NULL +bc +cd + + +-- !query +SELECT listagg(b) FROM df WHERE 1 != 1 +-- !query schema +struct<listagg(b, NULL):string> +-- !query output +NULL + + +-- !query +SELECT listagg(b, '|') FROM df GROUP BY a +-- !query schema +struct<listagg(b, |):string> +-- !query output +NULL +b|c +c|d + + +-- !query +SELECT listagg(a) FROM df +-- !query schema +struct<listagg(a, NULL):string> +-- !query output +aabb + + +-- !query +SELECT listagg(DISTINCT a) FROM df +-- !query schema +struct<listagg(DISTINCT a, NULL):string> +-- !query output +ab + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY a) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a ASC NULLS FIRST):string> +-- !query output +aabb + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST):string> +-- !query output +bbaa + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) OVER (PARTITION BY b) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):string> +-- !query output +NULL +a +b +ba +ba + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY b) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b ASC NULLS FIRST):string> +-- !query output +aabb + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST):string> +-- !query output +baba + + +-- !query +SELECT listagg(a, '|') WITHIN GROUP (ORDER BY b DESC) FROM df +-- !query schema +struct<listagg(a, |) WITHIN GROUP (ORDER BY b DESC NULLS LAST):string> +-- !query output +b|a|b|a + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a ASC) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a ASC NULLS FIRST):string> +-- !query output +baba + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a DESC) FROM df +-- !query schema +struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a DESC NULLS LAST):string> +-- !query output +bbaa + + +-- !query +SELECT listagg(c1) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1) +-- !query schema +struct<listagg(c1, NULL):binary> +-- !query output +ޭ�� + + +-- !query +SELECT listagg(c1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1) +-- !query schema +struct<listagg(c1, NULL):binary> +-- !query output +ޭ�� + + +-- !query +SELECT listagg(c1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1) +-- !query schema +struct<listagg(c1, X'42'):binary> +-- !query output +ޭB�� + + +-- !query +SELECT listagg(a), listagg(b, ',') FROM df2 +-- !query schema +struct<listagg(a, NULL):string,listagg(b, ,):string> +-- !query output +123 true,false,false + + +-- !query +SELECT listagg(c1) FROM (VALUES (ARRAY['a', 'b'])) AS t(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "','", + "hint" : "" + } +} + + +-- !query +SELECT listagg(c1, ', ') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES", + "sqlState" : "42K09", + "messageParameters" : { + "dataType" : "(\"BINARY\" or \"STRING\")", + "functionName" : "`listagg`", + "sqlExpr" : "\"listagg(c1, , )\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "listagg(c1, ', ')" + } ] +} + + +-- !query +SELECT listagg(b, a) FROM df GROUP BY a +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT", + "sqlState" : "42K09", + "messageParameters" : { + "inputExpr" : "\"a\"", + "inputName" : "`delimiter`", + "inputType" : "\"STRING\"", + "sqlExpr" : "\"listagg(b, a)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 20, + "fragment" : "listagg(b, a)" + } ] +} + + +-- !query +SELECT listagg(a) OVER (ORDER BY a) FROM df +-- !query schema +struct<listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):string> +-- !query output +NULL +aa +aa +aabb +aabb + + +-- !query +SELECT listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC", + "sqlState" : "42601", + "messageParameters" : { + "aggFunc" : "\"listagg(a, NULL, a)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 61, + "fragment" : "listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)" + } ] +} + + +-- !query +SELECT string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC", + "sqlState" : "42601", + "messageParameters" : { + "aggFunc" : "\"listagg(a, NULL, a)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 64, + "fragment" : "string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)" + } ] +} + + +-- !query +SELECT listagg(DISTINCT a) OVER (ORDER BY a) FROM df +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED", + "sqlState" : "0A000", + "messageParameters" : { + "windowExpr" : "\"listagg(DISTINCT a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 44, + "fragment" : "listagg(DISTINCT a) OVER (ORDER BY a)" + } ] +} + + +-- !query +SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY b) FROM df +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "FUNCTION_AND_ORDER_EXPRESSION_MISMATCH", + "sqlState" : "42822", + "messageParameters" : { + "functionArgs" : "\"a\"", + "functionName" : "`listagg`", + "orderExpr" : "\"b\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 52, + "fragment" : "listagg(DISTINCT a) WITHIN GROUP (ORDER BY b)" + } ] +} + + +-- !query +SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY a, b) FROM df +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "FUNCTION_AND_ORDER_EXPRESSION_MISMATCH", + "sqlState" : "42822", + "messageParameters" : { + "functionArgs" : "\"a\"", + "functionName" : "`listagg`", + "orderExpr" : "\"a\", \"b\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 55, + "fragment" : "listagg(DISTINCT a) WITHIN GROUP (ORDER BY a, b)" + } ] +} + + +-- !query +SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1) +-- !query schema +struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_binary) ASC NULLS FIRST):string> +-- !query output +ABab + + +-- !query +SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1) +-- !query schema +struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST):string> +-- !query output +aAbB + + +-- !query +SELECT listagg(DISTINCT c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1) +-- !query schema +struct<listagg(DISTINCT collate(c1, utf8_binary), NULL):string> +-- !query output +aAbB + + +-- !query +SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1) +-- !query schema +struct<listagg(DISTINCT collate(c1, utf8_lcase), NULL):string collate UTF8_LCASE> +-- !query output +ab + + +-- !query +SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('B'), ('b'), ('A')) AS t(c1) +-- !query schema +struct<listagg(DISTINCT collate(c1, utf8_lcase), NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST):string collate UTF8_LCASE> +-- !query output +aB + + +-- !query +SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "FUNCTION_AND_ORDER_EXPRESSION_MISMATCH", Review Comment: so the contract is, `listagg(expr1) WITHIN GROUP (ORDER BY expr2)`, expr1 and expr2 must be the same? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org