Re: [PR] feat(mcp): MCP Service POC - Phase 1 [superset]

via GitHub Wed, 06 Aug 2025 07:48:33 -0700


geido commented on code in PR #33976:
URL: https://github.com/apache/superset/pull/33976#discussion_r2257177916



##########
superset/cli/mcp.py:
##########
@@ -0,0 +1,366 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""CLI module for MCP service"""
+
+import os
+import secrets
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+from colorama import Fore, Style
+from flask import current_app
+from flask.cli import with_appcontext
+
+from superset import db, security_manager
+from superset.mcp_service.server import run_server
+
+
[email protected]()
+def mcp() -> None:
+    """Model Context Protocol service commands"""
+    pass
+
+
[email protected]()
[email protected]("--host", default="127.0.0.1", help="Host to bind to")
[email protected]("--port", default=5008, help="Port to bind to")
[email protected]("--debug", is_flag=True, help="Enable debug mode")
[email protected]("--sql-debug", is_flag=True, help="Enable SQL query logging")
+def run(host: str, port: int, debug: bool, sql_debug: bool) -> None:
+    """Run the MCP service"""
+    if sql_debug:
+        os.environ["SQLALCHEMY_DEBUG"] = "1"
+        click.echo("🔍 SQL Debug mode enabled")
+
+    run_server(host=host, port=port, debug=debug)
+
+
[email protected]()
[email protected]("--force", is_flag=True, help="Force setup even if configuration 
exists")
[email protected]("--skip-config", is_flag=True, help="Skip configuration file 
setup")
[email protected]("--skip-examples", is_flag=True, help="Skip loading example 
datasets")
[email protected]("--api-key", help="Anthropic API key for MCP service")
+@with_appcontext
+def setup(
+    force: bool, skip_config: bool, skip_examples: bool, api_key: Optional[str]
+) -> None:
+    """Set up MCP service for Apache Superset"""
+    click.echo(f"{Fore.CYAN}=== Apache Superset MCP Service Setup 
==={Style.RESET_ALL}")
+    click.echo()
+
+    # Check if already set up
+    config_path = Path("superset_config.py")
+
+    # 1. Configuration file setup
+    if not skip_config:
+        if config_path.exists() and not force:
+            click.echo(
+                f"{Fore.YELLOW}⚠️  superset_config.py already 
exists{Style.RESET_ALL}"
+            )
+            if click.confirm("Do you want to check/add missing MCP settings?"):
+                _update_config_file(config_path, api_key)
+            else:
+                click.echo("Keeping existing configuration")
+        else:
+            _create_config_file(config_path, api_key)
+
+    # 2. Database check
+    try:
+        # Check if database is initialized
+        with db.engine.connect() as conn:
+            conn.execute("SELECT COUNT(*) FROM ab_user")
+            click.echo(f"{Fore.GREEN}✓ Database already 
initialized{Style.RESET_ALL}")
+
+            # Check for admin user
+            admin_exists = security_manager.find_user(username="admin")
+            if admin_exists:
+                click.echo(f"{Fore.GREEN}✓ Admin user already 
exists{Style.RESET_ALL}")
+            else:
+                if click.confirm("Create admin user (admin/admin)?"):
+                    _create_admin_user()
+    except Exception:
+        click.echo(
+            f"{Fore.YELLOW}Database not initialized. "
+            f"Run 'superset db upgrade' and 'superset init' 
first.{Style.RESET_ALL}"
+        )
+        sys.exit(1)
+
+    # 3. Example data
+    if not skip_examples:
+        if click.confirm("Load example datasets?"):
+            click.echo("Loading example datasets...")
+            from superset.cli.examples import load_examples
+
+            load_examples()
+            click.echo(f"{Fore.GREEN}✓ Example datasets 
loaded{Style.RESET_ALL}")
+
+    # 4. Verify Superset configuration
+    _verify_superset_config()
+
+    # 5. Show final instructions
+    click.echo()
+    click.echo(f"{Fore.GREEN}=== Setup Complete! ==={Style.RESET_ALL}")
+    click.echo()
+    click.echo("To start Superset:")
+    click.echo(
+        "  1. In terminal 1: superset run -p 8088 --with-threads --reload 
--debugger"
+    )
+    click.echo("  2. In terminal 2: cd superset-frontend && npm run 
dev-server")
+    click.echo("  3. Open http://localhost:8088";)
+    click.echo("  4. Login with admin/admin")
+    click.echo()
+    click.echo("To start MCP service:")
+    click.echo("  superset mcp run")
+    click.echo()
+    if _check_csrf_issue():
+        click.echo(
+            f"{Fore.YELLOW}Note: If you encounter login loops, "
+            f"edit superset_config.py and set:{Style.RESET_ALL}"
+        )
+        click.echo("  WTF_CSRF_ENABLED = False")
+
+
+def _create_config_file(config_path: Path, api_key: Optional[str]) -> None:
+    """Create a new superset_config.py file"""
+    click.echo("Creating new superset_config.py...")
+
+    config_content = f"""# Apache Superset Configuration
+SECRET_KEY = '{secrets.token_urlsafe(42)}'
+
+# Session configuration for local development
+SESSION_COOKIE_HTTPONLY = True
+SESSION_COOKIE_SECURE = False
+SESSION_COOKIE_SAMESITE = 'Lax'
+SESSION_COOKIE_NAME = 'superset_session'
+PERMANENT_SESSION_LIFETIME = 86400
+
+# CSRF Protection (disable if login loop occurs)
+WTF_CSRF_ENABLED = True
+WTF_CSRF_TIME_LIMIT = None
+
+# MCP Service Configuration
+MCP_ADMIN_USERNAME = 'admin'
+MCP_DEV_USERNAME = 'admin'
+SUPERSET_WEBSERVER_ADDRESS = 'http://localhost:8088'
+
+# WebDriver Configuration for screenshots
+WEBDRIVER_BASEURL = 'http://localhost:8088/'
+WEBDRIVER_BASEURL_USER_FRIENDLY = WEBDRIVER_BASEURL
+"""
+
+    if api_key or click.confirm("Do you have an Anthropic API key for MCP 
service?"):
+        if not api_key:
+            api_key = click.prompt("Enter your Anthropic API key", 
hide_input=True)
+        config_content += (
+            f"\n# Anthropic API Configuration\nANTHROPIC_API_KEY = 
'{api_key}'\n"
+        )
+
+    config_path.write_text(config_content)
+    click.echo(f"{Fore.GREEN}✓ Created superset_config.py{Style.RESET_ALL}")
+
+
+def _update_config_file(config_path: Path, api_key: Optional[str]) -> None:
+    """Update existing config file with missing settings"""
+    content = config_path.read_text()
+    updated = False
+
+    # Check for missing settings
+    if "SECRET_KEY" not in content:
+        click.echo("Adding SECRET_KEY...")
+        content = f"SECRET_KEY = '{secrets.token_urlsafe(42)}'\n" + content
+        updated = True
+
+    if "MCP_ADMIN_USERNAME" not in content:
+        click.echo("Adding MCP configuration...")
+        content += "\n# MCP Service Configuration\n"
+        content += "MCP_ADMIN_USERNAME = 'admin'\n"
+        content += "MCP_DEV_USERNAME = 'admin'\n"
+        content += "SUPERSET_WEBSERVER_ADDRESS = 'http://localhost:8088'\n"
+        updated = True
+
+    if "WEBDRIVER_BASEURL" not in content:
+        click.echo("Adding WebDriver configuration...")
+        content += "\n# WebDriver Configuration for screenshots\n"
+        content += "WEBDRIVER_BASEURL = 'http://localhost:8088/'\n"
+        content += "WEBDRIVER_BASEURL_USER_FRIENDLY = WEBDRIVER_BASEURL\n"
+        updated = True
+
+    # Handle API key
+    if "ANTHROPIC_API_KEY" in content:
+        click.echo(
+            f"{Fore.GREEN}✓ Anthropic API key already 
configured{Style.RESET_ALL}"
+        )
+        if api_key or click.confirm("Update Anthropic API key?"):
+            if not api_key:
+                api_key = click.prompt(
+                    "Enter your new Anthropic API key", hide_input=True
+                )
+            # Remove old key
+            lines = content.split("\n")
+            content = "\n".join(
+                line for line in lines if "ANTHROPIC_API_KEY" not in line
+            )
+            content += (
+                f"\n# Anthropic API Configuration\nANTHROPIC_API_KEY = 
'{api_key}'\n"
+            )
+            updated = True
+    else:
+        if api_key or click.confirm(
+            "Do you have an Anthropic API key for MCP service?"
+        ):
+            if not api_key:
+                api_key = click.prompt("Enter your Anthropic API key", 
hide_input=True)
+            content += (
+                f"\n# Anthropic API Configuration\nANTHROPIC_API_KEY = 
'{api_key}'\n"
+            )
+            updated = True
+
+    if updated:
+        config_path.write_text(content)
+        click.echo(f"{Fore.GREEN}✓ Configuration updated{Style.RESET_ALL}")
+
+
+def _create_admin_user() -> None:
+    """Create admin user"""
+    from superset.utils.decorators import transaction
+
+    click.echo("Creating admin user...")
+    click.echo("Username: admin")
+    click.echo("Password: admin")
+
+    @transaction()
+    def create_user() -> bool:
+        admin_user = security_manager.add_user(
+            username="admin",
+            first_name="Admin",
+            last_name="User",
+            email="admin@localhost",
+            role=security_manager.find_role("Admin"),
+            password="admin",  # noqa: S106
+        )
+        return bool(admin_user)
+
+    if create_user():
+        click.echo(f"{Fore.GREEN}✓ Admin user created{Style.RESET_ALL}")
+    else:
+        click.echo(f"{Fore.RED}Failed to create admin user{Style.RESET_ALL}")
+
+
+def _check_csrf_issue() -> bool:

Review Comment:
   I am curious about the implications here



##########
superset/cli/mcp.py:
##########
@@ -0,0 +1,366 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""CLI module for MCP service"""
+
+import os
+import secrets
+import sys
+from pathlib import Path
+from typing import Optional
+
+import click
+from colorama import Fore, Style
+from flask import current_app
+from flask.cli import with_appcontext
+
+from superset import db, security_manager
+from superset.mcp_service.server import run_server
+
+
[email protected]()
+def mcp() -> None:
+    """Model Context Protocol service commands"""
+    pass
+
+
[email protected]()
[email protected]("--host", default="127.0.0.1", help="Host to bind to")
[email protected]("--port", default=5008, help="Port to bind to")
[email protected]("--debug", is_flag=True, help="Enable debug mode")
[email protected]("--sql-debug", is_flag=True, help="Enable SQL query logging")
+def run(host: str, port: int, debug: bool, sql_debug: bool) -> None:
+    """Run the MCP service"""
+    if sql_debug:
+        os.environ["SQLALCHEMY_DEBUG"] = "1"
+        click.echo("🔍 SQL Debug mode enabled")
+
+    run_server(host=host, port=port, debug=debug)
+
+
[email protected]()
[email protected]("--force", is_flag=True, help="Force setup even if configuration 
exists")
[email protected]("--skip-config", is_flag=True, help="Skip configuration file 
setup")
[email protected]("--skip-examples", is_flag=True, help="Skip loading example 
datasets")
[email protected]("--api-key", help="Anthropic API key for MCP service")
+@with_appcontext
+def setup(
+    force: bool, skip_config: bool, skip_examples: bool, api_key: Optional[str]
+) -> None:
+    """Set up MCP service for Apache Superset"""
+    click.echo(f"{Fore.CYAN}=== Apache Superset MCP Service Setup 
==={Style.RESET_ALL}")
+    click.echo()
+
+    # Check if already set up
+    config_path = Path("superset_config.py")
+
+    # 1. Configuration file setup
+    if not skip_config:
+        if config_path.exists() and not force:
+            click.echo(
+                f"{Fore.YELLOW}⚠️  superset_config.py already 
exists{Style.RESET_ALL}"
+            )
+            if click.confirm("Do you want to check/add missing MCP settings?"):
+                _update_config_file(config_path, api_key)
+            else:
+                click.echo("Keeping existing configuration")
+        else:
+            _create_config_file(config_path, api_key)
+
+    # 2. Database check
+    try:
+        # Check if database is initialized
+        with db.engine.connect() as conn:
+            conn.execute("SELECT COUNT(*) FROM ab_user")
+            click.echo(f"{Fore.GREEN}✓ Database already 
initialized{Style.RESET_ALL}")
+
+            # Check for admin user
+            admin_exists = security_manager.find_user(username="admin")
+            if admin_exists:
+                click.echo(f"{Fore.GREEN}✓ Admin user already 
exists{Style.RESET_ALL}")
+            else:
+                if click.confirm("Create admin user (admin/admin)?"):
+                    _create_admin_user()
+    except Exception:
+        click.echo(
+            f"{Fore.YELLOW}Database not initialized. "
+            f"Run 'superset db upgrade' and 'superset init' 
first.{Style.RESET_ALL}"
+        )
+        sys.exit(1)
+
+    # 3. Example data
+    if not skip_examples:
+        if click.confirm("Load example datasets?"):
+            click.echo("Loading example datasets...")
+            from superset.cli.examples import load_examples
+
+            load_examples()
+            click.echo(f"{Fore.GREEN}✓ Example datasets 
loaded{Style.RESET_ALL}")
+
+    # 4. Verify Superset configuration
+    _verify_superset_config()
+
+    # 5. Show final instructions
+    click.echo()
+    click.echo(f"{Fore.GREEN}=== Setup Complete! ==={Style.RESET_ALL}")
+    click.echo()
+    click.echo("To start Superset:")
+    click.echo(
+        "  1. In terminal 1: superset run -p 8088 --with-threads --reload 
--debugger"
+    )
+    click.echo("  2. In terminal 2: cd superset-frontend && npm run 
dev-server")
+    click.echo("  3. Open http://localhost:8088";)
+    click.echo("  4. Login with admin/admin")
+    click.echo()
+    click.echo("To start MCP service:")
+    click.echo("  superset mcp run")
+    click.echo()
+    if _check_csrf_issue():
+        click.echo(
+            f"{Fore.YELLOW}Note: If you encounter login loops, "
+            f"edit superset_config.py and set:{Style.RESET_ALL}"
+        )
+        click.echo("  WTF_CSRF_ENABLED = False")
+
+
+def _create_config_file(config_path: Path, api_key: Optional[str]) -> None:
+    """Create a new superset_config.py file"""
+    click.echo("Creating new superset_config.py...")
+
+    config_content = f"""# Apache Superset Configuration
+SECRET_KEY = '{secrets.token_urlsafe(42)}'
+
+# Session configuration for local development
+SESSION_COOKIE_HTTPONLY = True
+SESSION_COOKIE_SECURE = False
+SESSION_COOKIE_SAMESITE = 'Lax'
+SESSION_COOKIE_NAME = 'superset_session'
+PERMANENT_SESSION_LIFETIME = 86400
+
+# CSRF Protection (disable if login loop occurs)
+WTF_CSRF_ENABLED = True
+WTF_CSRF_TIME_LIMIT = None
+
+# MCP Service Configuration
+MCP_ADMIN_USERNAME = 'admin'
+MCP_DEV_USERNAME = 'admin'
+SUPERSET_WEBSERVER_ADDRESS = 'http://localhost:8088'
+
+# WebDriver Configuration for screenshots
+WEBDRIVER_BASEURL = 'http://localhost:8088/'
+WEBDRIVER_BASEURL_USER_FRIENDLY = WEBDRIVER_BASEURL
+"""
+
+    if api_key or click.confirm("Do you have an Anthropic API key for MCP 
service?"):
+        if not api_key:
+            api_key = click.prompt("Enter your Anthropic API key", 
hide_input=True)
+        config_content += (
+            f"\n# Anthropic API Configuration\nANTHROPIC_API_KEY = 
'{api_key}'\n"
+        )
+
+    config_path.write_text(config_content)
+    click.echo(f"{Fore.GREEN}✓ Created superset_config.py{Style.RESET_ALL}")
+
+
+def _update_config_file(config_path: Path, api_key: Optional[str]) -> None:
+    """Update existing config file with missing settings"""
+    content = config_path.read_text()
+    updated = False
+
+    # Check for missing settings
+    if "SECRET_KEY" not in content:
+        click.echo("Adding SECRET_KEY...")
+        content = f"SECRET_KEY = '{secrets.token_urlsafe(42)}'\n" + content
+        updated = True
+
+    if "MCP_ADMIN_USERNAME" not in content:
+        click.echo("Adding MCP configuration...")
+        content += "\n# MCP Service Configuration\n"
+        content += "MCP_ADMIN_USERNAME = 'admin'\n"
+        content += "MCP_DEV_USERNAME = 'admin'\n"
+        content += "SUPERSET_WEBSERVER_ADDRESS = 'http://localhost:8088'\n"
+        updated = True
+
+    if "WEBDRIVER_BASEURL" not in content:
+        click.echo("Adding WebDriver configuration...")
+        content += "\n# WebDriver Configuration for screenshots\n"
+        content += "WEBDRIVER_BASEURL = 'http://localhost:8088/'\n"
+        content += "WEBDRIVER_BASEURL_USER_FRIENDLY = WEBDRIVER_BASEURL\n"
+        updated = True
+
+    # Handle API key
+    if "ANTHROPIC_API_KEY" in content:

Review Comment:
   Are we planning to take an opinionated stance on using Anthropic in this 
case? Alternatively, should we consider implementing a utility to detect which 
provider is available and has been selected? I realize this would add some 
complexity, but I wanted to raise it here for discussion.



##########
superset/mcp_service/chart/prompts/create_chart_guided.py:
##########
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Chart prompts for visualization guidance
+"""
+
+import logging
+
+from superset.mcp_service.auth import mcp_auth_hook
+from superset.mcp_service.mcp_app import mcp
+
+logger = logging.getLogger(__name__)
+
+
[email protected]("create_chart_guided")
+@mcp_auth_hook
+async def create_chart_guided_prompt(

Review Comment:
   Great prompt!



##########
superset/mcp_service/chart/preview_utils.py:
##########
@@ -0,0 +1,561 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Preview utilities for chart generation without saving.
+
+This module provides utilities for generating chart previews
+from form data without requiring a saved chart object.
+"""
+
+import logging
+from typing import Any, Dict, List
+
+from superset.commands.chart.data.get_data_command import ChartDataCommand
+from superset.mcp_service.chart.schemas import (
+    ASCIIPreview,
+    ChartError,
+    TablePreview,
+    VegaLitePreview,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def generate_preview_from_form_data(
+    form_data: Dict[str, Any], dataset_id: int, preview_format: str
+) -> Any:
+    """
+    Generate preview from form data without a saved chart.
+
+    Args:
+        form_data: Chart configuration form data
+        dataset_id: Dataset ID
+        preview_format: Preview format (ascii, table, etc.)
+
+    Returns:
+        Preview object or ChartError
+    """
+    try:
+        # Execute query to get data
+        from superset.connectors.sqla.models import SqlaTable
+        from superset.extensions import db
+
+        dataset = db.session.query(SqlaTable).get(dataset_id)
+        if not dataset:
+            return ChartError(
+                error=f"Dataset {dataset_id} not found", 
error_type="DatasetNotFound"
+            )
+
+        # Create query context from form data using factory
+        from superset.common.query_context_factory import QueryContextFactory
+
+        factory = QueryContextFactory()
+        query_context_obj = factory.create(
+            datasource={"id": dataset_id, "type": "table"},
+            queries=[
+                {
+                    "columns": form_data.get("columns", []),
+                    "metrics": form_data.get("metrics", []),
+                    "orderby": form_data.get("orderby", []),
+                    "row_limit": form_data.get("row_limit", 100),
+                    "filters": form_data.get("adhoc_filters", []),
+                    "time_range": form_data.get("time_range", "No filter"),
+                }
+            ],
+            form_data=form_data,
+        )
+
+        # Execute query
+        command = ChartDataCommand(query_context_obj)
+        result = command.run()
+
+        if not result or not result.get("queries"):
+            return ChartError(
+                error="No data returned from query", error_type="EmptyResult"
+            )
+
+        query_result = result["queries"][0]
+        data = query_result.get("data", [])
+
+        # Generate preview based on format
+        if preview_format == "ascii":
+            return _generate_ascii_preview_from_data(data, form_data)
+        elif preview_format == "table":
+            return _generate_table_preview_from_data(data, form_data)
+        elif preview_format == "vega_lite":
+            return _generate_vega_lite_preview_from_data(data, form_data)
+        else:
+            return ChartError(
+                error=f"Unsupported preview format: {preview_format}",
+                error_type="UnsupportedFormat",
+            )
+
+    except Exception as e:
+        logger.error(f"Preview generation from form data failed: {e}")
+        return ChartError(
+            error=f"Failed to generate preview: {str(e)}", 
error_type="PreviewError"
+        )
+
+
+def _generate_ascii_preview_from_data(
+    data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> ASCIIPreview:
+    """Generate ASCII preview from raw data."""
+    viz_type = form_data.get("viz_type", "table")
+
+    # Handle different chart types
+    if viz_type in ["bar", "dist_bar", "column"]:
+        content = _generate_safe_ascii_bar_chart(data)
+    elif viz_type in ["line", "area"]:
+        content = _generate_safe_ascii_line_chart(data)
+    elif viz_type == "pie":
+        content = _generate_safe_ascii_pie_chart(data)
+    else:
+        content = _generate_safe_ascii_table(data)
+
+    return ASCIIPreview(
+        ascii_content=content, width=80, height=20, supports_color=False
+    )
+
+
+def _calculate_column_widths(
+    display_columns: List[str], data: List[Dict[str, Any]]
+) -> Dict[str, int]:
+    """Calculate optimal width for each column."""
+    column_widths = {}
+    for col in display_columns:
+        # Start with column name length
+        max_width = len(str(col))
+
+        # Check data values to determine width
+        for row in data[:20]:  # Sample first 20 rows
+            val = row.get(col, "")
+            if isinstance(val, float):
+                val_str = f"{val:.2f}"
+            elif isinstance(val, int):
+                val_str = str(val)
+            else:
+                val_str = str(val)
+            max_width = max(max_width, len(val_str))
+
+        # Set reasonable bounds
+        column_widths[col] = min(max(max_width, 8), 25)
+    return column_widths
+
+
+def _format_value(val: Any, width: int) -> str:
+    """Format a value based on its type."""
+    if isinstance(val, float):
+        if abs(val) >= 1000000:
+            val_str = f"{val:.2e}"  # Scientific notation for large numbers
+        elif abs(val) >= 1000:
+            val_str = f"{val:,.2f}"  # Thousands separator
+        else:
+            val_str = f"{val:.2f}"
+    elif isinstance(val, int):
+        if abs(val) >= 1000:
+            val_str = f"{val:,}"  # Thousands separator
+        else:
+            val_str = str(val)
+    elif val is None:
+        val_str = "NULL"
+    else:
+        val_str = str(val)
+
+    # Truncate if too long
+    if len(val_str) > width:
+        val_str = val_str[: width - 2] + ".."
+    return val_str
+
+
+def _generate_table_preview_from_data(
+    data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> TablePreview:
+    """Generate table preview from raw data with improved formatting."""
+    if not data:
+        return TablePreview(
+            table_data="No data available", row_count=0, supports_sorting=False
+        )
+
+    # Get columns
+    columns = list(data[0].keys()) if data else []
+
+    # Determine optimal column widths and how many columns to show
+    max_columns = 8  # Show more columns than before
+    display_columns = columns[:max_columns]
+
+    # Calculate optimal width for each column
+    column_widths = _calculate_column_widths(display_columns, data)
+
+    # Format table with proper alignment
+    lines = ["Table Preview", "=" * 80]
+
+    # Header with dynamic width
+    header_parts = []
+    separator_parts = []
+    for col in display_columns:
+        width = column_widths[col]
+        col_name = str(col)
+        if len(col_name) > width:
+            col_name = col_name[: width - 2] + ".."
+        header_parts.append(f"{col_name:<{width}}")
+        separator_parts.append("-" * width)
+
+    lines.append(" | ".join(header_parts))
+    lines.append("-+-".join(separator_parts))
+
+    # Data rows with proper formatting
+    rows_shown = min(len(data), 15)  # Show more rows
+    for row in data[:rows_shown]:
+        row_parts = []
+        for col in display_columns:
+            width = column_widths[col]
+            val = row.get(col, "")
+            val_str = _format_value(val, width)
+            row_parts.append(f"{val_str:<{width}}")
+        lines.append(" | ".join(row_parts))
+
+    # Summary information
+    if len(data) > rows_shown:
+        lines.append(f"... and {len(data) - rows_shown} more rows")
+
+    if len(columns) > max_columns:
+        lines.append(f"... and {len(columns) - max_columns} more columns")
+
+    lines.append("")
+    lines.append(f"Total: {len(data)} rows × {len(columns)} columns")
+
+    return TablePreview(
+        table_data="\n".join(lines), row_count=len(data), supports_sorting=True
+    )
+
+
+def _generate_safe_ascii_bar_chart(data: List[Dict[str, Any]]) -> str:
+    """Generate ASCII bar chart with proper error handling."""
+    if not data:
+        return "No data available for bar chart"
+
+    lines = ["ASCII Bar Chart", "=" * 50]
+
+    # Extract values safely
+    values = []
+    labels = []
+
+    for row in data[:10]:
+        label = None
+        value = None
+
+        for _, val in row.items():
+            if isinstance(val, (int, float)) and not _is_nan(val) and value is 
None:
+                value = val
+            elif isinstance(val, str) and label is None:
+                label = val
+
+        if value is not None:
+            values.append(value)
+            labels.append(label or f"Item {len(values)}")
+
+    if not values:
+        return "No numeric data found for bar chart"
+
+    # Generate bars
+    max_val = max(values)
+    if max_val == 0:
+        return "All values are zero"
+
+    for label, value in zip(labels, values, strict=False):
+        bar_length = int((value / max_val) * 30)
+        bar = "█" * bar_length
+        lines.append(f"{label[:10]:>10} |{bar:<30} {value:.2f}")
+
+    return "\n".join(lines)
+
+
+def _generate_safe_ascii_line_chart(data: List[Dict[str, Any]]) -> str:
+    """Generate ASCII line chart with proper NaN handling."""
+    if not data:
+        return "No data available for line chart"
+
+    lines = ["ASCII Line Chart", "=" * 50]
+    values = _extract_numeric_values_safe(data)
+
+    if not values:
+        return "No valid numeric data found for line chart"
+
+    range_str = _format_range_display(values)
+    lines.append(range_str)
+
+    sparkline = _generate_sparkline_safe(values)
+    lines.append(sparkline)
+
+    return "\n".join(lines)
+
+
+def _extract_numeric_values_safe(data: List[Dict[str, Any]]) -> List[float]:
+    """Extract numeric values safely from data."""
+    values = []
+    for row in data[:20]:
+        for _, val in row.items():
+            if isinstance(val, (int, float)) and not _is_nan(val):
+                values.append(val)
+                break
+    return values
+
+
+def _format_range_display(values: List[float]) -> str:
+    """Format range display safely."""
+    min_val = min(values)
+    max_val = max(values)
+
+    if _is_nan(min_val) or _is_nan(max_val):
+        return "Range: Unable to calculate"
+    else:
+        return f"Range: {min_val:.2f} to {max_val:.2f}"
+
+
+def _generate_sparkline_safe(values: List[float]) -> str:
+    """Generate sparkline from values."""
+    if not values:
+        return ""
+
+    min_val = min(values)
+
+    if (max_val := max(values)) != min_val:
+        sparkline = ""
+        for val in values:
+            normalized = (val - min_val) / (max_val - min_val)
+            if normalized < 0.2:
+                sparkline += "▁"
+            elif normalized < 0.4:
+                sparkline += "▂"
+            elif normalized < 0.6:
+                sparkline += "▄"
+            elif normalized < 0.8:
+                sparkline += "▆"
+            else:
+                sparkline += "█"
+        return sparkline
+    else:
+        return "─" * len(values)  # Flat line if all values are same
+
+
+def _generate_safe_ascii_pie_chart(data: List[Dict[str, Any]]) -> str:
+    """Generate ASCII pie chart representation."""
+    if not data:
+        return "No data available for pie chart"
+
+    lines = ["ASCII Pie Chart", "=" * 50]
+
+    # Extract values and labels
+    values = []
+    labels = []
+
+    for row in data[:8]:  # Limit slices
+        label = None
+        value = None
+
+        for _, val in row.items():
+            if isinstance(val, (int, float)) and not _is_nan(val) and value is 
None:
+                value = val
+            elif isinstance(val, str) and label is None:
+                label = val
+
+        if value is not None and value > 0:
+            values.append(value)
+            labels.append(label or f"Slice {len(values)}")
+
+    if not values:
+        return "No valid data for pie chart"
+
+    # Calculate percentages
+    total = sum(values)
+    if total == 0:
+        return "Total is zero"
+
+    for label, value in zip(labels, values, strict=False):
+        percentage = (value / total) * 100
+        bar_length = int(percentage / 3)  # Scale to fit
+        bar = "●" * bar_length
+        lines.append(f"{label[:15]:>15}: {bar} {percentage:.1f}%")
+
+    return "\n".join(lines)
+
+
+def _generate_safe_ascii_table(data: List[Dict[str, Any]]) -> str:
+    """Generate ASCII table with safe formatting."""
+    if not data:
+        return "No data available"
+
+    lines = ["Data Table", "=" * 50]
+
+    # Get columns
+    columns = list(data[0].keys()) if data else []
+
+    # Format header
+    header = " | ".join(str(col)[:10] for col in columns[:5])
+    lines.append(header)
+    lines.append("-" * len(header))
+
+    # Format rows
+    for row in data[:10]:
+        row_str = " | ".join(str(row.get(col, ""))[:10] for col in columns[:5])
+        lines.append(row_str)
+
+    if len(data) > 10:
+        lines.append(f"... {len(data) - 10} more rows")
+
+    return "\n".join(lines)
+
+
+def _is_nan(value: Any) -> bool:
+    """Check if a value is NaN."""
+    try:
+        import math
+
+        return math.isnan(float(value))
+    except (ValueError, TypeError):
+        return False
+
+
+def _generate_vega_lite_preview_from_data(  # noqa: C901
+    data: List[Dict[str, Any]], form_data: Dict[str, Any]
+) -> VegaLitePreview:

Review Comment:
   Lovely



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(mcp): MCP Service POC - Phase 1 [superset]

Reply via email to