alamb commented on code in PR #16625:
URL: https://github.com/apache/datafusion/pull/16625#discussion_r2176095740


##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -623,8 +668,11 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
         let mut partition_ordering_values = vec![];
 
         // Existing values should be merged also.
-        partition_values.push(self.values.clone().into());
-        partition_ordering_values.push(self.ordering_values.clone().into());
+        if !self.is_input_pre_ordered {

Review Comment:
   does this have the effect of sorting all inputs even when `ARRAY_AGG` didn't 
explicitly call for ordering?



##########
datafusion/sqllogictest/test_files/aggregate.slt:
##########
@@ -232,9 +282,8 @@ physical_plan
 01)AggregateExec: mode=Final, gby=[], aggr=[array_agg(agg_order.c1) ORDER BY 
[agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
 03)----AggregateExec: mode=Partial, gby=[], aggr=[array_agg(agg_order.c1) 
ORDER BY [agg_order.c2 DESC NULLS FIRST, agg_order.c3 ASC NULLS LAST]]
-04)------SortExec: expr=[c2@1 DESC, c3@2 ASC NULLS LAST], 
preserve_partitioning=[true]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_agg_multi_order.csv]]}, 
projection=[c1, c2, c3], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1

Review Comment:
   I wonder if we should benchmark this -- sorting by `ScalarValue` is likely a 
lot less efficient than using the fast array sort / etc that is done in SortExec



##########
datafusion/sqllogictest/test_files/aggregate.slt:
##########
@@ -206,6 +206,56 @@ query error Execution error: In an aggregate with 
DISTINCT, ORDER BY expressions
 SELECT array_agg(DISTINCT c13 ORDER BY c13, c12)
 FROM aggregate_test_100
 
+query ?? rowsort
+with tbl as (SELECT * FROM (VALUES ('xxx', 'yyy'), ('xxx', 'yyy'), ('xxx2', 
'yyy2')) AS t(x, y))
+select
+    array_agg(x order by x) as x_agg,
+    array_agg(y order by y) as y_agg
+from tbl
+group by all
+----
+[xxx, xxx, xxx2] [yyy, yyy, yyy2]
+
+query ??
+SELECT
+    (SELECT array_agg(c12 ORDER BY c12) FROM aggregate_test_100),
+    (SELECT array_agg(c13 ORDER BY c13) FROM aggregate_test_100)
+----
+[0.01479305307777301, 0.02182578039211991, 0.03968347085780355, 
0.04429073092078406, 0.047343434291126085, 0.04893135681998029, 
0.0494924465469434, 0.05573662213439634, 0.05636955101974106, 
0.061029375346466685, 0.07260475960924484, 0.09465635123783445, 
0.12357539988406441, 0.152498292971736, 0.16301110515739792, 
0.1640882545084913, 0.1754261586710173, 0.17592486905979987, 
0.17909035118828576, 0.18628859265874176, 0.19113293583306745, 
0.2145232647388039, 0.21535402343780985, 0.24899794314659673, 
0.2537253407987472, 0.2667177795079635, 0.27159190516490006, 
0.2739938529235548, 0.28534428578703896, 0.2944158618048994, 0.296036538664718, 
0.3051364088814128, 0.30585375151301186, 0.3114712539863804, 
0.3231750610081745, 0.32869374687050157, 0.33639590659276175, 
0.3600766362333053, 0.36936304600612724, 0.38870280983958583, 
0.39144436569161134, 0.40342283197779727, 0.4094218353587008, 
0.40975383525297016, 0.42073125331890115, 0.4273123318932347, 
0.42950521730777025, 0.4830878559436823, 0.508
 1765563442366, 0.5437595540422571, 0.5590205548347534, 0.5593249815276734, 
0.5603062368164834, 0.560333188635217, 0.5614503754617461, 0.565352842229935, 
0.574210838214554, 0.5759450483859969, 0.5773498217058918, 0.5991138115095911, 
0.6009475544728957, 0.6108938307533, 0.6316565296547284, 0.6404495093354053, 
0.6405262429561641, 0.6425694115212065, 0.658671129040488, 0.6668423897406515, 
0.6864391962767343, 0.7035635283169166, 0.7325106678655877, 0.7328050041291218, 
0.7614304100703713, 0.7631239070049998, 0.7670021786149205, 0.7697753383420857, 
0.7764360990307122, 0.7784918983501654, 0.7973920072996036, 0.819715865079681, 
0.8506721053047003, 0.8813167497816289, 0.8824879447595726, 0.9185813970744787, 
0.9231889896940375, 0.9237877978193884, 0.9255031346434324, 0.9293883502480845, 
0.9294097332465232, 0.9463098243875633, 0.946325164889271, 0.9491397432856566, 
0.9567595541247681, 0.9706712283358269, 0.9723580396501548, 0.9748360509016578, 
0.9800193410444061, 0.980809631269599, 0.9915178286
 51004, 0.9965400387585364] [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm, 
0keZ5G8BffGwgF2RwQD59TFzMStxCB, 0og6hSkhbX8AC1ktFS4kounvTzy8Vo, 
1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO, 2T3wSlHdEmASmO0xcXHnndkKEt6bz8, 
3BEOHQsMEFZ58VcNTOJYShTBpAPzbt, 4HX6feIvmNXBN7XGqgO4YVBkhu8GDI, 
4JznSdBajNWhu4hRQwjV1FjTTxY68i, 52mKlRE3aHCBZtjECq6sY9OqVf8Dze, 
56MZa5O1hVtX4c5sbnCfxuX5kDChqI, 6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ, 
6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW, 6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE, 
6x93sxYioWuq5c9Kkk8oTAAORM7cH0, 802bgTGl6Bk5TlkPYYTxp5JkKyaYUA, 
8LIh0b6jmDGm87BmIyjdxNIpX4ugjD, 90gAtmGEeIqUTbo1ZrxCvWtsseukXC, 
9UbObCsVkmYpJGcGrgfK90qOnwb2Lj, AFGCj7OWlEB5QfniEFgonMq90Tq5uH, 
ALuRhobVWbnQTTWZdSOk0iVe8oYFhW, Amn2K87Db5Es3dFQO9cw9cvpAM6h35, 
AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz, BJqx5WokrmrrezZA0dUbleMYkG5U2O, 
BPtQMxnuSPpxMExYV9YkDa6cAN7GP3, BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE, 
C2GT5KVyOPZpgKVl110TyZO0NcJ434, DuJNG8tufSqW0ZstHqWj3aGvFLMg4A, 
EcCuckwsF3gV1Ecgmh5v4KM8g1ozif, ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU, 
F7NSTjWvQJyBburN7CXRUlbgp2dIrA, Fi4rJeTQq
 4eXj8Lxg3Hja5hBVTVV5u, H5j5ZHy1FGesOAHjkQEDYCucbpKWRu, 
HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g, IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr, 
IZTkHMLvIKuiLjhDjYMmIHxh166we4, Ig1QcuKsjHXkproePdERo2w0mYzIqd, 
JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ, JN0VclewmjwYlSl8386MlWv5rEhWCz, 
JafwVLSVk5AVoXFuzclesQ000EE2k1, KJFcmTVjdkCMv94wYCtfHMFhzyRsmH, 
Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn, Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV, 
LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW, MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ, 
MeSTAXq8gVxVjbEjgkvU9YLte0X9uE, NEhyk8uIx4kEULJGa8qIyFjjBcP2G6, 
O66j6PaYuZhEUtqV6fuU7TyjM2WxC5, OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh, 
OPwBqCEK5PWTjWaiOyL45u2NLTaDWv, Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0, 
Ow5PGpfTm4dXCfTDsXAOTatXRoAydR, QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv, 
QJYm7YRA3YetcBHI5wkMZeLXVmfuNy, QYlaIAnJA6r8rlAb6f59wcxvcPcWFf, 
RilTlL1tKkPOUFuzmLydHAVZwv1OGl, Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH, 
TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX, TtDKUZxzVxsq758G6AWPSYuZgVgbcl, 
VDhtJkYjAYPykCgOU9x3v7v3t4SO1a, VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4, 
Vp3gmWunM5A7wOC9YW2JroFqTWjvTi, WHmjWk2AY4c6m7
 DA4GitUx6nmb1yYS, XemNcT1xp61xcM1Qz3wZ1VECCnq06O, 
Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK, aDxBtor7Icd9C5hnTvvw5NrIre740e, 
akiiY5N0I44CMwEnBL6RTBk7BRkxEj, b3b9esRhTzFEawbs6XhpKnD9ojutHB, 
bgK1r6v3BCTh0aejJUhkA1Hn6idXGp, cBGc0kSm32ylBDnxogG727C0uhZEYZ, 
cq4WSAIFwx3wwTUS5bp1wCe71R6U5I, dVdvo6nUD5FgCgsbOZLds28RyGTpnx, 
e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG, f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX, 
fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG, gTpyQnEODMcpsPnJMZC66gh33i3m0b, 
gpo8K5qtYePve6jyPt6xgJx4YOVjms, gxfHWUF8XgY2KdFxigxvNEXe2V2XMl, 
i6RQVXKUh7MzuGMDaNclUYnFUAireU, ioEncce3mPOXD2hWhpZpCPWGATG6GU, 
jQimhdepw3GKmioWUlVSWeBVRKFkY3, l7uwDoTepWwnAP0ufqtHJS3CRi7RfP, 
lqhzgLsXZ8JhtpeeUWWNbMz8PHI705, m6jD0LBIQWaMfenwRCTANI9eOdyyto, 
mhjME0zBHbrK6NMkytMTQzOssOa1gF, mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS, 
nYVJnVicpGRqKZibHyBAmtmzBXAFfT, oHJMNvWuunsIMIWFnYG31RCfkOo2V7, 
oLZ21P2JEDooxV1pU31cIxQHEeeoLu, okOkcWflkNXIy4R8LzmySyY1EC3sYd, 
pLk3i59bZwd5KBZrI1FiweYTd5hteG, pTeu0WMjBRTaNRT15rLCuEh3tBJVc5, 
qnPOOmslCJaT45buUisMRnM0rc77EK, t6fQUjJejPcjc04wHvH
 TPe55S65B4V, ukOiFGGFnQJDHFgZxHMpvhD3zybF0M, ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8, 
waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs, wwXqSGKLyBQyPkonlzBNYUJTCo4LRS, 
xipQ93429ksjNcXPX5326VSg1xJZcW, y7C453hRWd4E7ImjNDWlpexB8nUqjh, 
ydkwycaISlYSlEq3TlkS2m15I2pcp8]
+
+query ??
+SELECT
+    array_agg(c12 ORDER BY c12),
+    array_agg(c13 ORDER BY c13)
+FROM aggregate_test_100
+----
+[0.01479305307777301, 0.02182578039211991, 0.03968347085780355, 
0.04429073092078406, 0.047343434291126085, 0.04893135681998029, 
0.0494924465469434, 0.05573662213439634, 0.05636955101974106, 
0.061029375346466685, 0.07260475960924484, 0.09465635123783445, 
0.12357539988406441, 0.152498292971736, 0.16301110515739792, 
0.1640882545084913, 0.1754261586710173, 0.17592486905979987, 
0.17909035118828576, 0.18628859265874176, 0.19113293583306745, 
0.2145232647388039, 0.21535402343780985, 0.24899794314659673, 
0.2537253407987472, 0.2667177795079635, 0.27159190516490006, 
0.2739938529235548, 0.28534428578703896, 0.2944158618048994, 0.296036538664718, 
0.3051364088814128, 0.30585375151301186, 0.3114712539863804, 
0.3231750610081745, 0.32869374687050157, 0.33639590659276175, 
0.3600766362333053, 0.36936304600612724, 0.38870280983958583, 
0.39144436569161134, 0.40342283197779727, 0.4094218353587008, 
0.40975383525297016, 0.42073125331890115, 0.4273123318932347, 
0.42950521730777025, 0.4830878559436823, 0.508
 1765563442366, 0.5437595540422571, 0.5590205548347534, 0.5593249815276734, 
0.5603062368164834, 0.560333188635217, 0.5614503754617461, 0.565352842229935, 
0.574210838214554, 0.5759450483859969, 0.5773498217058918, 0.5991138115095911, 
0.6009475544728957, 0.6108938307533, 0.6316565296547284, 0.6404495093354053, 
0.6405262429561641, 0.6425694115212065, 0.658671129040488, 0.6668423897406515, 
0.6864391962767343, 0.7035635283169166, 0.7325106678655877, 0.7328050041291218, 
0.7614304100703713, 0.7631239070049998, 0.7670021786149205, 0.7697753383420857, 
0.7764360990307122, 0.7784918983501654, 0.7973920072996036, 0.819715865079681, 
0.8506721053047003, 0.8813167497816289, 0.8824879447595726, 0.9185813970744787, 
0.9231889896940375, 0.9237877978193884, 0.9255031346434324, 0.9293883502480845, 
0.9294097332465232, 0.9463098243875633, 0.946325164889271, 0.9491397432856566, 
0.9567595541247681, 0.9706712283358269, 0.9723580396501548, 0.9748360509016578, 
0.9800193410444061, 0.980809631269599, 0.9915178286
 51004, 0.9965400387585364] [0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm, 
0keZ5G8BffGwgF2RwQD59TFzMStxCB, 0og6hSkhbX8AC1ktFS4kounvTzy8Vo, 
1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO, 2T3wSlHdEmASmO0xcXHnndkKEt6bz8, 
3BEOHQsMEFZ58VcNTOJYShTBpAPzbt, 4HX6feIvmNXBN7XGqgO4YVBkhu8GDI, 
4JznSdBajNWhu4hRQwjV1FjTTxY68i, 52mKlRE3aHCBZtjECq6sY9OqVf8Dze, 
56MZa5O1hVtX4c5sbnCfxuX5kDChqI, 6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ, 
6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW, 6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE, 
6x93sxYioWuq5c9Kkk8oTAAORM7cH0, 802bgTGl6Bk5TlkPYYTxp5JkKyaYUA, 
8LIh0b6jmDGm87BmIyjdxNIpX4ugjD, 90gAtmGEeIqUTbo1ZrxCvWtsseukXC, 
9UbObCsVkmYpJGcGrgfK90qOnwb2Lj, AFGCj7OWlEB5QfniEFgonMq90Tq5uH, 
ALuRhobVWbnQTTWZdSOk0iVe8oYFhW, Amn2K87Db5Es3dFQO9cw9cvpAM6h35, 
AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz, BJqx5WokrmrrezZA0dUbleMYkG5U2O, 
BPtQMxnuSPpxMExYV9YkDa6cAN7GP3, BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE, 
C2GT5KVyOPZpgKVl110TyZO0NcJ434, DuJNG8tufSqW0ZstHqWj3aGvFLMg4A, 
EcCuckwsF3gV1Ecgmh5v4KM8g1ozif, ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU, 
F7NSTjWvQJyBburN7CXRUlbgp2dIrA, Fi4rJeTQq
 4eXj8Lxg3Hja5hBVTVV5u, H5j5ZHy1FGesOAHjkQEDYCucbpKWRu, 
HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g, IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr, 
IZTkHMLvIKuiLjhDjYMmIHxh166we4, Ig1QcuKsjHXkproePdERo2w0mYzIqd, 
JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ, JN0VclewmjwYlSl8386MlWv5rEhWCz, 
JafwVLSVk5AVoXFuzclesQ000EE2k1, KJFcmTVjdkCMv94wYCtfHMFhzyRsmH, 
Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn, Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV, 
LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW, MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ, 
MeSTAXq8gVxVjbEjgkvU9YLte0X9uE, NEhyk8uIx4kEULJGa8qIyFjjBcP2G6, 
O66j6PaYuZhEUtqV6fuU7TyjM2WxC5, OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh, 
OPwBqCEK5PWTjWaiOyL45u2NLTaDWv, Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0, 
Ow5PGpfTm4dXCfTDsXAOTatXRoAydR, QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv, 
QJYm7YRA3YetcBHI5wkMZeLXVmfuNy, QYlaIAnJA6r8rlAb6f59wcxvcPcWFf, 
RilTlL1tKkPOUFuzmLydHAVZwv1OGl, Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH, 
TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX, TtDKUZxzVxsq758G6AWPSYuZgVgbcl, 
VDhtJkYjAYPykCgOU9x3v7v3t4SO1a, VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4, 
Vp3gmWunM5A7wOC9YW2JroFqTWjvTi, WHmjWk2AY4c6m7
 DA4GitUx6nmb1yYS, XemNcT1xp61xcM1Qz3wZ1VECCnq06O, 
Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK, aDxBtor7Icd9C5hnTvvw5NrIre740e, 
akiiY5N0I44CMwEnBL6RTBk7BRkxEj, b3b9esRhTzFEawbs6XhpKnD9ojutHB, 
bgK1r6v3BCTh0aejJUhkA1Hn6idXGp, cBGc0kSm32ylBDnxogG727C0uhZEYZ, 
cq4WSAIFwx3wwTUS5bp1wCe71R6U5I, dVdvo6nUD5FgCgsbOZLds28RyGTpnx, 
e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG, f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX, 
fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG, gTpyQnEODMcpsPnJMZC66gh33i3m0b, 
gpo8K5qtYePve6jyPt6xgJx4YOVjms, gxfHWUF8XgY2KdFxigxvNEXe2V2XMl, 
i6RQVXKUh7MzuGMDaNclUYnFUAireU, ioEncce3mPOXD2hWhpZpCPWGATG6GU, 
jQimhdepw3GKmioWUlVSWeBVRKFkY3, l7uwDoTepWwnAP0ufqtHJS3CRi7RfP, 
lqhzgLsXZ8JhtpeeUWWNbMz8PHI705, m6jD0LBIQWaMfenwRCTANI9eOdyyto, 
mhjME0zBHbrK6NMkytMTQzOssOa1gF, mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS, 
nYVJnVicpGRqKZibHyBAmtmzBXAFfT, oHJMNvWuunsIMIWFnYG31RCfkOo2V7, 
oLZ21P2JEDooxV1pU31cIxQHEeeoLu, okOkcWflkNXIy4R8LzmySyY1EC3sYd, 
pLk3i59bZwd5KBZrI1FiweYTd5hteG, pTeu0WMjBRTaNRT15rLCuEh3tBJVc5, 
qnPOOmslCJaT45buUisMRnM0rc77EK, t6fQUjJejPcjc04wHvH
 TPe55S65B4V, ukOiFGGFnQJDHFgZxHMpvhD3zybF0M, ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8, 
waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs, wwXqSGKLyBQyPkonlzBNYUJTCo4LRS, 
xipQ93429ksjNcXPX5326VSg1xJZcW, y7C453hRWd4E7ImjNDWlpexB8nUqjh, 
ydkwycaISlYSlEq3TlkS2m15I2pcp8]
+
+query ?? rowsort
+with tbl as (SELECT * FROM (VALUES ('xxx', 'yyy'), ('xxx', 'yyy'), ('xxx2', 
'yyy2')) AS t(x, y))
+select
+    array_agg(distinct x order by x) as x_agg,

Review Comment:
   These columns already look distinct to me  -- maybe testing with another 
column would be better to show that distinct is actually working as designed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to