This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 8fe926d387 test: Add `datafusion-cli` `fair` and `unbounded` 
memory-pool test coverage (#20565)
8fe926d387 is described below

commit 8fe926d3871a011bdca5040c9cac043759339b46
Author: Eren Avsarogullari <[email protected]>
AuthorDate: Sat Mar 7 04:04:07 2026 -0800

    test: Add `datafusion-cli` `fair` and `unbounded` memory-pool test coverage 
(#20565)
    
    ## Which issue does this PR close?
    - Closes #20564.
    
    ## Rationale for this change
    Currently, `datafusion-cli` submodule does not have test coverage for
    `--mem-pool-type = fair / unbounded` (default: `greedy` and `unbounded`
    memory pool is used when both `--memory-limit` and `--mem-pool-type` are
    `not` set). Legacy test cases use `greedy` memory pool by setting
    `--memory-limit`. This PR aims to cover `--mem-pool-type = fair /
    unbounded` cases with `--memory-limit` and `--top-memory-consumers`
    usages. Also, `datafusion-cli` is a client used by end-users so
    extending test coverages of the exposed features can be useful for the
    functional verification and long-term maintenance.
    
    **Case1:** `fair` memory pool usage by `datafusion-cli` when
    `top-memory-consumers = 0` (Top Memory Consumers will not be listed when
    memory is exhausted)
    ```
    program: datafusion-cli
      args:
        - "--memory-limit"
        - 10M
        - "--mem-pool-type"
        - fair
        - "--command"
        - "select * from generate_series(1,500000) as t1(v1) order by v1;"
        - "--top-memory-consumers"
        - "0"
    ```
    
    **Case2:** `fair` memory pool usage by `datafusion-cli` when
    `top-memory-consumers > 0` (Top Memory Consumers will be listed when
    memory is exhausted)
    ```
    program: datafusion-cli
      args:
        - "--memory-limit"
        - 10M
        - "--mem-pool-type"
        - fair
        - "--command"
        - "select * from generate_series(1,500000) as t1(v1) order by v1;"
        - "--top-memory-consumers"
        - "2"
    ```
    
    **Case3:** `unbounded` memory pool usage by `datafusion-cli`
    ```
    program: datafusion-cli
      args:
        - "--maxrows"
        - "10"
        - "--command"
        - "select * from generate_series(1,500000) as t1(v1) order by v1;"
    ```
    
    ## What changes are included in this PR?
    Explained under above section.
    
    ## Are these changes tested?
    Yes, being added new integration tests and they are successful locally:
    ```
    test test_cli_top_memory_consumers_with_mem_pool_type::case_1 ... ok
    test test_cli_top_memory_consumers_with_mem_pool_type::case_2 ... ok
    test test_cli_with_unbounded_memory_pool::case_1 ... ok
    ```
    
    ## Are there any user-facing changes?
    No
---
 datafusion-cli/tests/cli_integration.rs            | 49 +++++++++++++++++++++-
 ...mory_consumers_with_mem_pool_type@no_track.snap | 23 ++++++++++
 [email protected] | 26 ++++++++++++
 .../[email protected]    | 36 ++++++++++++++++
 4 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/datafusion-cli/tests/cli_integration.rs 
b/datafusion-cli/tests/cli_integration.rs
index 99fc2d527e..7bc45693a8 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -20,6 +20,7 @@ use std::process::Command;
 use rstest::rstest;
 
 use async_trait::async_trait;
+use insta::internals::SettingsBindDropGuard;
 use insta::{Settings, glob};
 use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
 use std::path::PathBuf;
@@ -215,6 +216,42 @@ fn test_cli_top_memory_consumers<'a>(
     #[case] snapshot_name: &str,
     #[case] top_memory_consumers: impl IntoIterator<Item = &'a str>,
 ) {
+    let _bound = bind_to_settings(snapshot_name);
+
+    let mut cmd = cli();
+    let sql = "select * from generate_series(1,500000) as t1(v1) order by v1;";
+    cmd.args(["--memory-limit", "10M", "--command", sql]);
+    cmd.args(top_memory_consumers);
+
+    assert_cmd_snapshot!(cmd);
+}
+
+#[rstest]
+#[case("no_track", ["--top-memory-consumers", "0"])]
+#[case("top2", ["--top-memory-consumers", "2"])]
+#[test]
+fn test_cli_top_memory_consumers_with_mem_pool_type<'a>(
+    #[case] snapshot_name: &str,
+    #[case] top_memory_consumers: impl IntoIterator<Item = &'a str>,
+) {
+    let _bound = bind_to_settings(snapshot_name);
+
+    let mut cmd = cli();
+    let sql = "select * from generate_series(1,500000) as t1(v1) order by v1;";
+    cmd.args([
+        "--memory-limit",
+        "10M",
+        "--mem-pool-type",
+        "fair",
+        "--command",
+        sql,
+    ]);
+    cmd.args(top_memory_consumers);
+
+    assert_cmd_snapshot!(cmd);
+}
+
+fn bind_to_settings(snapshot_name: &str) -> SettingsBindDropGuard {
     let mut settings = make_settings();
 
     settings.set_snapshot_suffix(snapshot_name);
@@ -232,12 +269,20 @@ fn test_cli_top_memory_consumers<'a>(
         "Resources exhausted: Failed to allocate",
     );
 
+    settings.bind_to_scope()
+}
+
+#[test]
+fn test_cli_with_unbounded_memory_pool() {
+    let mut settings = make_settings();
+
+    settings.set_snapshot_suffix("default");
+
     let _bound = settings.bind_to_scope();
 
     let mut cmd = cli();
     let sql = "select * from generate_series(1,500000) as t1(v1) order by v1;";
-    cmd.args(["--memory-limit", "10M", "--command", sql]);
-    cmd.args(top_memory_consumers);
+    cmd.args(["--maxrows", "10", "--command", sql]);
 
     assert_cmd_snapshot!(cmd);
 }
diff --git 
a/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
 
b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
new file mode 100644
index 0000000000..25267ea161
--- /dev/null
+++ 
b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
@@ -0,0 +1,23 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--memory-limit"
+    - 10M
+    - "--mem-pool-type"
+    - fair
+    - "--command"
+    - "select * from generate_series(1,500000) as t1(v1) order by v1;"
+    - "--top-memory-consumers"
+    - "0"
+---
+success: false
+exit_code: 1
+----- stdout -----
+[CLI_VERSION]
+Error: Not enough memory to continue external sort. Consider increasing the 
memory limit config: 'datafusion.runtime.memory_limit', or decreasing the 
config: 'datafusion.execution.sort_spill_reservation_bytes'.
+caused by
+Resources exhausted: Failed to allocate
+
+----- stderr -----
diff --git 
a/datafusion-cli/tests/snapshots/[email protected]
 
b/datafusion-cli/tests/snapshots/[email protected]
new file mode 100644
index 0000000000..6515050047
--- /dev/null
+++ 
b/datafusion-cli/tests/snapshots/[email protected]
@@ -0,0 +1,26 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--memory-limit"
+    - 10M
+    - "--mem-pool-type"
+    - fair
+    - "--command"
+    - "select * from generate_series(1,500000) as t1(v1) order by v1;"
+    - "--top-memory-consumers"
+    - "2"
+---
+success: false
+exit_code: 1
+----- stdout -----
+[CLI_VERSION]
+Error: Not enough memory to continue external sort. Consider increasing the 
memory limit config: 'datafusion.runtime.memory_limit', or decreasing the 
config: 'datafusion.execution.sort_spill_reservation_bytes'.
+caused by
+Resources exhausted: Additional allocation failed for ExternalSorter[0] with 
top memory consumers (across reservations) as:
+  Consumer(can spill: bool) consumed XB, peak XB,
+  Consumer(can spill: bool) consumed XB, peak XB.
+Error: Failed to allocate 
+
+----- stderr -----
diff --git 
a/datafusion-cli/tests/snapshots/[email protected] 
b/datafusion-cli/tests/snapshots/[email protected]
new file mode 100644
index 0000000000..7bdcd63dc7
--- /dev/null
+++ b/datafusion-cli/tests/snapshots/[email protected]
@@ -0,0 +1,36 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--maxrows"
+    - "10"
+    - "--command"
+    - "select * from generate_series(1,500000) as t1(v1) order by v1;"
+---
+success: true
+exit_code: 0
+----- stdout -----
+[CLI_VERSION]
++----+
+| v1 |
++----+
+| 1  |
+| 2  |
+| 3  |
+| 4  |
+| 5  |
+| 6  |
+| 7  |
+| 8  |
+| 9  |
+| 10 |
+| .  |
+| .  |
+| .  |
++----+
+500000 row(s) fetched. (First 10 displayed. Use --maxrows to adjust)
+[ELAPSED]
+
+
+----- stderr -----


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to