This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch stefan/while-dfs in repository https://gitbox.apache.org/repos/asf/hamilton.git
commit 97770850cd89b8f7b2e9079e9e23afe9e60e3a79 Author: Stefan Krawczyk <[email protected]> AuthorDate: Mon Feb 16 23:27:28 2026 -0800 While loop DFS implementation This should help someone create a really large and deep graph. This needs more testing/eyes to think about impact. --- hamilton/graph.py | 22 ++++++++++++++-------- tests/test_graph.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/hamilton/graph.py b/hamilton/graph.py index 11b6f55f..d807a6ea 100644 --- a/hamilton/graph.py +++ b/hamilton/graph.py @@ -1090,13 +1090,19 @@ class FunctionGraph: nodes = set() user_nodes = set() - def dfs_traverse(node: node.Node): - nodes.add(node) - for n in next_nodes_fn(node): - if n not in nodes: - dfs_traverse(n) - if node.user_defined: - user_nodes.add(node) + def dfs_traverse_iterative(start_node: node.Node): + """Iterative DFS to avoid recursion depth limits with large DAGs.""" + stack = [start_node] + while stack: + n = stack.pop() + if n in nodes: + continue + nodes.add(n) + if n.user_defined: + user_nodes.add(n) + for next_n in next_nodes_fn(n): + if next_n not in nodes: + stack.append(next_n) missing_vars = [] for var in starting_nodes: @@ -1107,7 +1113,7 @@ class FunctionGraph: # if it's not in the runtime inputs, it's a properly missing variable missing_vars.append(var) continue # collect all missing final variables - dfs_traverse(self.nodes[var]) + dfs_traverse_iterative(self.nodes[var]) if missing_vars: missing_vars_str = ",\n".join(missing_vars) raise ValueError(f"Unknown nodes [{missing_vars_str}] requested. Check for typos?") diff --git a/tests/test_graph.py b/tests/test_graph.py index 48027e7b..de02135e 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -17,6 +17,7 @@ import inspect import pathlib +import sys import uuid from itertools import permutations from typing import List @@ -1479,3 +1480,41 @@ def test_display_name_list_value_uses_first_element(): assert "First Name" in dot_string # The function name should NOT appear since display_name is set assert "<b>node_with_list_display_name</b>" not in dot_string + + +def test_get_upstream_nodes_large_chain_no_recursion_error(): + """Regression test: get_upstream_nodes with only final_node on a large chain DAG. + + A recursive DFS would exceed Python's recursion limit (~1000) when traversing + a long dependency chain from a single final node. This test verifies that + the iterative DFS in directional_dfs_traverse handles large DAGs correctly. + + Chain size is chosen to exceed recursion limit: 1200 nodes > 1000. + """ + from hamilton import ad_hoc_utils + from hamilton import function_modifiers as fm + + def step(prev: float) -> float: + """Single step in a linear chain.""" + return prev + 1.0 + + # Build a linear chain: node_0 -> node_1 -> ... -> node_N + chain_size = sys.getrecursionlimit() + 200 # Exceeds recursion limit + config = {} + for i in range(chain_size): + prev = f"node_{i - 1}" if i > 0 else 0.0 + config[f"node_{i}"] = { + "prev": fm.source(prev) if i > 0 else fm.value(0.0), + } + decorated = fm.parameterize(**config)(step) + module = ad_hoc_utils.create_temporary_module(decorated, module_name="large_chain") + + fg = graph.FunctionGraph.from_modules(module, config={}) + final_node = f"node_{chain_size - 1}" + + # This would raise RecursionError with recursive DFS + nodes, user_nodes = fg.get_upstream_nodes([final_node]) + + assert len(nodes) == chain_size + assert len(user_nodes) == 0 + assert all(fg.nodes[f"node_{i}"] in nodes for i in range(chain_size))
