korbit-ai[bot] commented on code in PR #34561: URL: https://github.com/apache/superset/pull/34561#discussion_r2255147109
########## superset/utils/screenshot_utils.py: ########## @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import io +import logging +from typing import TYPE_CHECKING + +from PIL import Image + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + try: + from playwright.sync_api import Page + except ImportError: + Page = None + + +def combine_screenshot_tiles(screenshot_tiles: list[bytes]) -> bytes: + """ + Combine multiple screenshot tiles into a single vertical image. + + Args: + screenshot_tiles: List of screenshot bytes in PNG format + + Returns: + Combined screenshot as bytes + """ + if not screenshot_tiles: + return b"" + + if len(screenshot_tiles) == 1: + return screenshot_tiles[0] + + try: + # Open all images + images = [Image.open(io.BytesIO(tile)) for tile in screenshot_tiles] + + # Calculate total dimensions + total_width = max(img.width for img in images) + total_height = sum(img.height for img in images) + + # Create combined image + combined = Image.new("RGB", (total_width, total_height), "white") + + # Paste each tile + y_offset = 0 + for img in images: + combined.paste(img, (0, y_offset)) + y_offset += img.height + + # Convert back to bytes + output = io.BytesIO() + combined.save(output, format="PNG") + return output.getvalue() + + except Exception as e: + logger.exception(f"Failed to combine screenshot tiles: {e}") + # Return the first tile as fallback + return screenshot_tiles[0] + + +def take_tiled_screenshot( + page: "Page", element_name: str, viewport_height: int = 2000 +) -> bytes | None: + """ + Take a tiled screenshot of a large dashboard by scrolling and capturing sections. + + Args: + page: Playwright page object + element_name: CSS class name of the element to screenshot + viewport_height: Height of each tile in pixels + + Returns: + Combined screenshot bytes or None if failed + """ + try: + # Get the target element + element = page.locator(f".{element_name}") + element.wait_for(timeout=30000) # 30 second timeout + + # Get dashboard dimensions and position + element_info = page.evaluate(f"""() => {{ + const el = document.querySelector(".{element_name}"); + const rect = el.getBoundingClientRect(); + return {{ + height: el.scrollHeight, + top: rect.top + window.scrollY, + left: rect.left + window.scrollX, + width: el.scrollWidth + }}; + }}""") Review Comment: ### Redundant JavaScript evaluations <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Multiple separate JavaScript evaluations are performed for similar DOM queries, causing unnecessary browser-Python communication overhead. ###### Why this matters Each evaluation requires a context switch between Python and the browser's JavaScript engine, adding latency to the screenshot process. ###### Suggested change ∙ *Feature Preview* Combine related JavaScript operations into a single evaluation call: ```python element_info = page.evaluate(f"""() => {{ const el = document.querySelector(".{element_name}"); const initialRect = el.getBoundingClientRect(); const dimensions = {{ height: el.scrollHeight, width: el.scrollWidth, positions: [] }}; // Calculate all positions in one go for(let i = 0; i < numTiles; i++) {{ // Add position calculations here }} return dimensions; }}""") ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/aab3a102-fced-4767-99ba-b3676caf871e/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/aab3a102-fced-4767-99ba-b3676caf871e?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/aab3a102-fced-4767-99ba-b3676caf871e?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/aab3a102-fced-4767-99ba-b3676caf871e?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/aab3a102-fced-4767-99ba-b3676caf871e) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:1ee10e21-f9a3-4cff-bbae-e63b96d65b11 --> [](1ee10e21-f9a3-4cff-bbae-e63b96d65b11) ########## superset/utils/screenshot_utils.py: ########## @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import io +import logging +from typing import TYPE_CHECKING + +from PIL import Image + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + try: + from playwright.sync_api import Page + except ImportError: + Page = None + + +def combine_screenshot_tiles(screenshot_tiles: list[bytes]) -> bytes: + """ + Combine multiple screenshot tiles into a single vertical image. + + Args: + screenshot_tiles: List of screenshot bytes in PNG format + + Returns: + Combined screenshot as bytes + """ + if not screenshot_tiles: + return b"" + + if len(screenshot_tiles) == 1: + return screenshot_tiles[0] + + try: + # Open all images + images = [Image.open(io.BytesIO(tile)) for tile in screenshot_tiles] Review Comment: ### Memory-intensive batch image loading <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Loading all image tiles into memory simultaneously using a list comprehension could consume significant memory for large dashboards with many tiles. ###### Why this matters For dashboards with many tiles, this approach could lead to memory spikes and potential out-of-memory errors on systems with limited resources. ###### Suggested change ∙ *Feature Preview* Process images sequentially or in smaller batches. Consider using a generator pattern: ```python def process_tiles(tiles): for tile in tiles: yield Image.open(io.BytesIO(tile)) ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/01ab87c1-4822-44d8-9bf0-aa61280d437e/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/01ab87c1-4822-44d8-9bf0-aa61280d437e?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/01ab87c1-4822-44d8-9bf0-aa61280d437e?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/01ab87c1-4822-44d8-9bf0-aa61280d437e?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/01ab87c1-4822-44d8-9bf0-aa61280d437e) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:d99f1898-5814-4296-8bb0-ce100568353d --> [](d99f1898-5814-4296-8bb0-ce100568353d) ########## superset/utils/screenshot_utils.py: ########## @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import io +import logging +from typing import TYPE_CHECKING + +from PIL import Image + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + try: + from playwright.sync_api import Page + except ImportError: + Page = None + + +def combine_screenshot_tiles(screenshot_tiles: list[bytes]) -> bytes: + """ + Combine multiple screenshot tiles into a single vertical image. + + Args: + screenshot_tiles: List of screenshot bytes in PNG format + + Returns: + Combined screenshot as bytes + """ + if not screenshot_tiles: + return b"" + + if len(screenshot_tiles) == 1: + return screenshot_tiles[0] + + try: + # Open all images + images = [Image.open(io.BytesIO(tile)) for tile in screenshot_tiles] + + # Calculate total dimensions + total_width = max(img.width for img in images) + total_height = sum(img.height for img in images) + + # Create combined image + combined = Image.new("RGB", (total_width, total_height), "white") + + # Paste each tile + y_offset = 0 + for img in images: + combined.paste(img, (0, y_offset)) + y_offset += img.height + + # Convert back to bytes + output = io.BytesIO() + combined.save(output, format="PNG") + return output.getvalue() + + except Exception as e: + logger.exception(f"Failed to combine screenshot tiles: {e}") + # Return the first tile as fallback + return screenshot_tiles[0] + + +def take_tiled_screenshot( + page: "Page", element_name: str, viewport_height: int = 2000 +) -> bytes | None: + """ + Take a tiled screenshot of a large dashboard by scrolling and capturing sections. + + Args: + page: Playwright page object + element_name: CSS class name of the element to screenshot + viewport_height: Height of each tile in pixels + + Returns: + Combined screenshot bytes or None if failed + """ + try: + # Get the target element + element = page.locator(f".{element_name}") + element.wait_for(timeout=30000) # 30 second timeout + + # Get dashboard dimensions and position + element_info = page.evaluate(f"""() => {{ + const el = document.querySelector(".{element_name}"); Review Comment: ### JavaScript injection vulnerability in element selector <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? Direct interpolation of user input (element_name) into JavaScript code which could allow for XSS attacks if element_name is not properly validated. ###### Why this matters An attacker could inject malicious JavaScript code through the element_name parameter that would be executed in the browser context, potentially compromising the application or user data. ###### Suggested change ∙ *Feature Preview* Use Playwright's built-in selector escaping mechanisms or validate element_name to ensure it only contains valid CSS class characters: ```python # Add validation before use if not re.match(r'^[a-zA-Z0-9_-]+$', element_name): raise ValueError('Invalid element_name') ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/43395f2f-79b6-4f48-858e-f50810436b13/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/43395f2f-79b6-4f48-858e-f50810436b13?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/43395f2f-79b6-4f48-858e-f50810436b13?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/43395f2f-79b6-4f48-858e-f50810436b13?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/43395f2f-79b6-4f48-858e-f50810436b13) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:6fdffe6e-27d0-4867-9426-928ab7fcc2ea --> [](6fdffe6e-27d0-4867-9426-928ab7fcc2ea) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
