This is an automated email from the ASF dual-hosted git repository. asoare pushed a commit to branch alexandrusoare/feat/date-parse-improvements in repository https://gitbox.apache.org/repos/asf/superset.git
commit e46d3284721aeb88386ceed1c2bcccf75db7a146 Author: alexandrusoare <[email protected]> AuthorDate: Tue Jan 13 15:29:35 2026 +0200 feat(dates): adding handling for first of --- superset/utils/date_parser.py | 115 +++++++++++++++++++++++++++- tests/unit_tests/utils/date_parser_tests.py | 67 ++++++++++++++++ 2 files changed, 178 insertions(+), 4 deletions(-) diff --git a/superset/utils/date_parser.py b/superset/utils/date_parser.py index 4fbe79e472..0f874d8e8a 100644 --- a/superset/utils/date_parser.py +++ b/superset/utils/date_parser.py @@ -53,6 +53,12 @@ ParserElement.enable_packrat() logger = logging.getLogger(__name__) +# Mapping of ordinal words to their numeric values for date expressions +ORDINAL_MAP: dict[str, int] = { + "first": 1, + "1st": 1, +} + def parse_human_datetime(human_readable: str) -> datetime: """Returns ``datetime.datetime`` from human readable strings""" @@ -228,6 +234,67 @@ def handle_end_of(base_expression: str, unit: str) -> str: raise ValueError(f"Invalid unit for 'end of': {unit}") +def handle_nth_of( + ordinal: str, + subunit: str | None, + scope: str | None, + unit: str, + relative_start: str | None, +) -> str: + """ + Handles "first" time expressions like "first of the month" or + "first week of this year". + + This handler returns either a single date expression or a range expression + depending on whether a subunit is provided. + + Args: + ordinal: The ordinal word or number ("first", "1st") + subunit: The smaller time unit ("week", "day", "month") or None + scope: Time scope ("this", "last", "next", "prior", "previous") or None + (defaults to "this") + unit: The larger time unit ("month", "year", "quarter", "week") + relative_start: Optional user-provided base time + + Returns: + - Single date expression if subunit is None (e.g., "first of the month") + - Range expression "since : until" if subunit is provided + (e.g., "first week of year") + + Examples: + >>> handle_nth_of("first", None, "this", "month", None) + "DATETRUNC(DATETIME('today'), month)" + + >>> handle_nth_of("first", "week", "this", "year", None) + "DATETRUNC(..., year) : DATEADD(DATETRUNC(..., year), 1, week)" + """ + # Convert ordinal to number + n = ORDINAL_MAP.get(ordinal.lower(), int(ordinal) if ordinal.isdigit() else 1) + + relative_base = get_relative_base(unit, relative_start) + effective_scope = scope.lower() if scope else "this" + + # Get the start of the larger unit with scope applied + base_expr = handle_scope_and_unit(effective_scope, "", unit, relative_base) + start_of_unit = f"DATETRUNC({base_expr}, {unit.lower()})" + + if subunit is None: + # "first of the month" -> single date (first day of the unit) + return start_of_unit + else: + # "first week of the year" -> range + # Start: beginning of unit + (n-1) subunits + if n == 1: + range_start = start_of_unit + else: + range_start = f"DATEADD({start_of_unit}, {n - 1}, {subunit.lower()})" + + # End: start + 1 subunit + range_end = f"DATEADD({range_start}, 1, {subunit.lower()})" + + return f"{range_start} : {range_end}" + + def handle_modifier_and_unit( modifier: str, scope: str, delta: str, unit: str, relative_base: str ) -> str: @@ -303,7 +370,7 @@ def handle_scope_and_unit(scope: str, delta: str, unit: str, relative_base: str) _delta = int(delta) if delta else 1 if scope.lower() == "this": return f"DATETIME('{relative_base}')" - elif scope.lower() in ["last", "prior"]: + elif scope.lower() in ["last", "prior", "previous"]: return f"DATEADD(DATETIME('{relative_base}'), -{_delta}, {unit})" elif scope.lower() == "next": return f"DATEADD(DATETIME('{relative_base}'), {_delta}, {unit})" @@ -415,13 +482,31 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m ): time_range = "DATETRUNC(DATEADD(DATETIME('today'), 0, YEAR), YEAR) : DATETRUNC(DATEADD(DATETIME('today'), 1, YEAR), YEAR)" # noqa: E501 + # Handle "first [subunit] of [scope] [unit]" patterns that produce a range + # e.g., "first week of this year" -> returns start of year to end of first week + # e.g., "first month of this quarter" -> returns start of first month to end + # Note: "day" is NOT included as a subunit here because "first day of X" should + # return a single date, not a range. Those are handled in time_range_lookup below. + if time_range and separator not in time_range: + nth_subunit_pattern = ( + r"^(first|1st)\s+" + r"(week|month|quarter)\s+of\s+" + r"(?:(this|last|next|prior|previous)\s+)?" + r"(?:the\s+)?" + r"(week|month|quarter|year)$" + ) + match = re.search(nth_subunit_pattern, time_range, re.IGNORECASE) + if match: + ordinal, subunit, scope, unit = match.groups() + time_range = handle_nth_of(ordinal, subunit, scope, unit, relative_start) + if time_range and separator in time_range: time_range_lookup = [ ( r"^(start of|beginning of|end of)\s+" - r"(this|last|next|prior)\s+" + r"(this|last|next|prior|previous)\s+" r"([0-9]+)?\s*" - r"(day|week|month|quarter|year)s?$", # Matches phrases like "start of next month" # noqa: E501 + r"(day|week|month|quarter|year)s?$", # Matches "start of next month", "end of last year" # noqa: E501 lambda modifier, scope, delta, unit: handle_modifier_and_unit( modifier, scope, @@ -431,7 +516,29 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m ), ), ( - r"^(this|last|next|prior)\s+" + # Pattern for "first of [scope] [unit]" - single date + # e.g., "first of this month", "first of last year" + r"^(first|1st)\s+" + r"(?:day\s+)?of\s+" + r"(this|last|next|prior|previous)\s+" + r"(day|week|month|quarter|year)s?$", + lambda ordinal, scope, unit: handle_nth_of( + ordinal, None, scope, unit, relative_start + ), + ), + ( + # Pattern for "first of the [unit]" - single date with default scope + # e.g., "first of the month", "first day of the year" + r"^(first|1st)\s+" + r"(?:day\s+)?of\s+" + r"(?:the\s+)?" + r"(week|month|quarter|year)$", + lambda ordinal, unit: handle_nth_of( + ordinal, None, None, unit, relative_start + ), + ), + ( + r"^(this|last|next|prior|previous)\s+" r"([0-9]+)?\s*" r"(second|minute|day|week|month|quarter|year)s?$", # Matches "next 5 days" or "last 2 weeks" # noqa: E501 lambda scope, delta, unit: handle_scope_and_unit( diff --git a/tests/unit_tests/utils/date_parser_tests.py b/tests/unit_tests/utils/date_parser_tests.py index c5e6d512d3..de23123988 100644 --- a/tests/unit_tests/utils/date_parser_tests.py +++ b/tests/unit_tests/utils/date_parser_tests.py @@ -611,3 +611,70 @@ def test_date_range_migration() -> None: field = "10 years ago" assert not re.search(DateRangeMigration.x_dateunit, field) + + +@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime) +def test_first_of_with_explicit_scope() -> None: + """Test 'first of [scope] [unit]' expressions that return a single date.""" + result = get_since_until("first of this month : ") + assert result == (datetime(2016, 11, 1), None) + + result = get_since_until("first of last month : ") + assert result == (datetime(2016, 10, 1), None) + + result = get_since_until("first of next month : ") + assert result == (datetime(2016, 12, 1), None) + + result = get_since_until("first of prior month : ") + assert result == (datetime(2016, 10, 1), None) + + result = get_since_until("first of previous month : ") + assert result == (datetime(2016, 10, 1), None) + + result = get_since_until("first day of this year : ") + assert result == (datetime(2016, 1, 1), None) + + result = get_since_until("first day of last year : ") + assert result == (datetime(2015, 1, 1), None) + + result = get_since_until("first day of this week : ") + assert result == (datetime(2016, 11, 7), None) + + +@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime) +def test_first_of_with_default_scope() -> None: + """Test 'first of the [unit]' expressions that default to 'this'.""" + result = get_since_until("first of the month : ") + assert result == (datetime(2016, 11, 1), None) + + result = get_since_until("first of the year : ") + assert result == (datetime(2016, 1, 1), None) + + result = get_since_until("first day of the month : ") + assert result == (datetime(2016, 11, 1), None) + + result = get_since_until("first day of the week : ") + assert result == (datetime(2016, 11, 7), None) + + +@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime) +def test_first_subunit_of_with_explicit_scope() -> None: + """Test 'first [subunit] of [scope] [unit]' expressions that return a range.""" + result = get_since_until("first week of this year") + assert result == (datetime(2016, 1, 1), datetime(2016, 1, 8)) + + result = get_since_until("first month of this quarter") + assert result == (datetime(2016, 10, 1), datetime(2016, 11, 1)) + + result = get_since_until("first week of last month") + assert result == (datetime(2016, 10, 1), datetime(2016, 10, 8)) + + +@patch("superset.utils.date_parser.parse_human_datetime", mock_parse_human_datetime) +def test_first_subunit_of_with_default_scope() -> None: + """Test 'first [subunit] of the [unit]' expressions that default to 'this'.""" + result = get_since_until("first week of the year") + assert result == (datetime(2016, 1, 1), datetime(2016, 1, 8)) + + result = get_since_until("first month of the quarter") + assert result == (datetime(2016, 10, 1), datetime(2016, 11, 1))
