korbit-ai[bot] commented on code in PR #35014:
URL: https://github.com/apache/superset/pull/35014#discussion_r2321504807


##########
superset/reports/notifications/email.py:
##########
@@ -132,19 +133,124 @@ def _get_content(self) -> EmailContent:
             attributes=ALLOWED_ATTRIBUTES,
         )
 
-        # Strip malicious HTML from embedded data, allowing only table elements
-        if self._content.embedded_data is not None:
+        pdf_data = None
+        html_table = ""
+
+        # Check if the report format is PDF and embedded data is available
+        # Assuming self._content.report_format exists and holds the report 
format string
+        if hasattr(self._content, 'report_format') and \
+           self._content.report_format == "PDF" and \
+           self._content.embedded_data is not None:
             df = self._content.embedded_data
-            # pylint: disable=no-member
-            html_table = nh3.clean(
-                df.to_html(na_rep="", index=True, escape=True),
-                # pandas will escape the HTML in cells already, so passing
-                # more allowed tags here will not work
-                tags=TABLE_TAGS,
-                attributes=ALLOWED_TABLE_ATTRIBUTES,
-            )
-        else:
+            report_name_val = self._name # Renamed to avoid clash with CSS 
variable name
+            generation_date_val = self.now.strftime('%Y-%m-%d %H:%M:%S UTC')
+
+            # Retrieve PDF export configurations
+            pdf_headers_footers_enabled = 
app.config.get("PDF_EXPORT_HEADERS_FOOTERS_ENABLED", True)
+            pdf_header_template = app.config.get("PDF_EXPORT_HEADER_TEMPLATE", 
"Report: {report_name} - Page {page_number} of {total_pages}")
+            pdf_footer_template = app.config.get("PDF_EXPORT_FOOTER_TEMPLATE", 
"Generated: {generation_date}")
+            pdf_page_size = app.config.get("PDF_EXPORT_PAGE_SIZE", "A4")
+            pdf_orientation = app.config.get("PDF_EXPORT_ORIENTATION", 
"portrait")
+
+            # Prepare header and footer content based on templates and config
+            header_content_str = ""
+            footer_content_str = ""
+
+            if pdf_headers_footers_enabled:
+                # Sanitize report_name_val for CSS content (simple escaping 
for quotes)
+                css_report_name = report_name_val.replace("\\", 
"\\\\").replace("\"", "\\\"").replace("\n", "\\A")
+                css_generation_date = generation_date_val.replace("\\", 
"\\\\").replace("\"", "\\\"").replace("\n", "\\A")
+
+                # For header: replace {report_name}, keep {page_number} and 
{total_pages} for CSS counters
+                header_content_str = 
pdf_header_template.replace("{report_name}", css_report_name)
+                header_content_str = 
header_content_str.replace("{page_number}", "counter(page)")
+                header_content_str = 
header_content_str.replace("{total_pages}", "counter(pages)")
+
+                # For footer: replace {generation_date} and {report_name}
+                footer_content_str = 
pdf_footer_template.replace("{generation_date}", css_generation_date)
+                footer_content_str = 
footer_content_str.replace("{report_name}", css_report_name)
+
+
+            pdf_html_content = f"""
+            <html>
+            <head>
+                <meta charset="UTF-8">
+                <style>
+                    :root {{
+                        /* Keeping these for potential use in body styles if 
needed */
+                        --report-name-var: "{report_name_val.replace('"', 
'&quot;').replace("'", "&apos;")}";
+                        --generation-date-var: "{generation_date_val}";
+                    }}
+                </style>
+            </head>
+            <body>
+                <div class="report-description">{description}</div>
+                <br>
+                {df.to_html(na_rep="", index=True, escape=False)}

Review Comment:
   ### HTML escaping disabled in DataFrame rendering <sub>![category 
Security](https://img.shields.io/badge/Security-e11d48)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The DataFrame's to_html method is called with escape=False when generating 
PDF content, which could allow HTML injection attacks if the data contains 
malicious content.
   
   
   ###### Why this matters
   Malicious data in the DataFrame could execute unwanted HTML/JavaScript code 
in the generated PDF, potentially leading to XSS attacks when the PDF is viewed.
   
   ###### Suggested change ∙ *Feature Preview*
   Change `escape=False` to `escape=True` in the DataFrame to_html call:
   ```python
   df.to_html(na_rep="", index=True, escape=True)
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d339fed7-bf10-40ce-9166-0a3f128a5bdf/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d339fed7-bf10-40ce-9166-0a3f128a5bdf?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d339fed7-bf10-40ce-9166-0a3f128a5bdf?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d339fed7-bf10-40ce-9166-0a3f128a5bdf?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/d339fed7-bf10-40ce-9166-0a3f128a5bdf)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:418a6187-2e6b-4be0-aa8f-2bd96ed1b80e -->
   
   
   [](418a6187-2e6b-4be0-aa8f-2bd96ed1b80e)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to