msyavuz commented on code in PR #34213:
URL: https://github.com/apache/superset/pull/34213#discussion_r2217298198


##########
superset/utils/core.py:
##########
@@ -1503,6 +1538,67 @@ def get_column_names_from_metrics(metrics: list[Metric]) 
-> list[str]:
     return [col for col in map(get_column_name_from_metric, metrics) if col]
 
 
+def map_sql_type_to_inferred_type(sql_type: Optional[str]) -> str:
+    """
+    Map a SQL type to a type string recognized by pandas' `infer_objects` 
method.
+
+    If the SQL type is not recognized, the function will return "string" as the
+    default type.
+
+    :param sql_type: SQL type to map
+    :return: string type recognized by pandas
+    """
+    if not sql_type:
+        return "string"  # If no SQL type is provided, return "string" as 
default
+
+    # Use regular expressions to check the SQL type. The first match is 
returned.
+    for pattern, inferred_type in TYPE_MAPPING.items():
+        if re.search(pattern, sql_type, re.IGNORECASE):
+            return inferred_type
+
+    return "string"  # If no match is found, return "string" as default
+
+
+def get_metric_type_from_column(column: Any, datasource: BaseDatasource | 
Query) -> str:
+    """
+    Determine the metric type from a given column in a datasource.
+
+    This function checks if the specified column is a metric in the provided
+    datasource. If it is, it extracts the SQL expression associated with the
+    metric and attempts to identify the aggregation operation used within
+    the expression (e.g., SUM, COUNT, etc.). It then maps the operation to
+    a corresponding GenericDataType.
+
+    :param column: The column name or identifier to check.
+    :param datasource: The datasource containing metrics to search within.
+    :return: The inferred metric type as a string, or an empty string if the
+             column is not a metric or no valid operation is found.
+    """
+
+    from superset.connectors.sqla.models import SqlMetric
+
+    metric: SqlMetric = next(
+        (metric for metric in datasource.metrics if metric.metric_name == 
column),
+        SqlMetric(metric_name=""),
+    )
+
+    if metric.metric_name == "":
+        return ""
+
+    expression: str = metric.expression
+
+    match = re.match(
+        r"(SUM|AVG|COUNT|COUNT_DISTINCT|MIN|MAX|FIRST|LAST)\((.*)\)", 
expression
+    )
+
+    if match:
+        operation = match.group(1)
+        return MetricMapType.get(operation, "")

Review Comment:
   ```suggestion
           return METRIC_MAP_TYPE.get(operation, "")
   ```



##########
superset/utils/core.py:
##########
@@ -120,6 +129,32 @@
 
 ADHOC_FILTERS_REGEX = re.compile("^adhoc_filters")
 
+TYPE_MAPPING = {
+    r"INT": "integer",
+    r"CHAR|TEXT|VARCHAR": "string",
+    r"DECIMAL|NUMERIC|FLOAT|DOUBLE": "floating",
+    r"BOOL": "boolean",
+    r"DATE|TIME": "datetime64",
+}
+
+MetricMapType = {

Review Comment:
   ```suggestion
   METRIC_MAP_TYPE = {
   ```



##########
superset/utils/core.py:
##########
@@ -120,6 +129,32 @@
 
 ADHOC_FILTERS_REGEX = re.compile("^adhoc_filters")
 
+TYPE_MAPPING = {
+    r"INT": "integer",

Review Comment:
   Should we use `re.compile` here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to