https://gcc.gnu.org/g:25910642d367097d19ea90fb2b54e2858ddd74b9

commit r16-6931-g25910642d367097d19ea90fb2b54e2858ddd74b9
Author: David Malcolm <[email protected]>
Date:   Tue Jan 20 12:06:12 2026 -0500

    sarif-replay: skip "sarif:/" embedded links [PR123056]
    
    PR sarif-replay/123056 notes that when using sarif-replay to generate
    HTML from a .sarif file containing an embedded "sarif:/" link we get
    bogus output containing SGR codes.
    
    The links in question come from GCC's sarif output for cross-referencing
    event IDs within an execution path.
    
    These links are JSON pointers.  I experimented with propertly supporting
    the JSON Pointer spec (RFC 6901) within GCC, and I have a partially
    working implementation which parses JSON pointers here, and, where
    appropriate, reconstructs the pertinent event ID.
    
    However, that feels too invasive to be pushing in stage 4.   Hence for
    GCC 16, this patch simply skips the link part of "sarif:/" links in
    sarif-replay, avoiding corrupt output, deferring the more ambitious
    round-tripping fix to GCC 17.
    
    gcc/ChangeLog:
            PR sarif-replay/123056
            * libsarifreplay.cc (struct embedded_link): Move decl earlier.
            (sarif_replayer::append_embeddded_link): New.
            (sarif_replayer::make_plain_text_within_result_message): Move the
            link-replay logic to the above, and skip the link part of
            intra-sarif links.
    
    gcc/testsuite/ChangeLog:
            PR sarif-replay/123056
            * sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif: 
New test.
            * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py:
            New test script.
            * 
sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py:
            New test script.
    
    Signed-off-by: David Malcolm <[email protected]>

Diff:
---
 gcc/libsarifreplay.cc                              |  39 ++++--
 .../3.11.6-embedded-links-pr123056.sarif           | 139 +++++++++++++++++++++
 .../embedded-links-pr123056-check-html.py          |  25 ++++
 ...mbedded-links-pr123056-check-sarif-roundtrip.py |  14 +++
 4 files changed, 204 insertions(+), 13 deletions(-)

diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc
index 01d3a975f62f..20a0aec3c7fc 100644
--- a/gcc/libsarifreplay.cc
+++ b/gcc/libsarifreplay.cc
@@ -292,6 +292,12 @@ public:
   libgdiagnostics::message_buffer m_label;
 };
 
+struct embedded_link
+{
+  std::string text;
+  std::string destination;
+};
+
 using id_map = std::map<std::string, const json::string *>;
 
 class sarif_replayer
@@ -724,6 +730,10 @@ private:
     return sub;
   }
 
+  void
+  append_embeddded_link (libgdiagnostics::message_buffer &result,
+                        const embedded_link &link);
+
   /* The manager to replay the SARIF files to.  */
   libgdiagnostics::manager m_output_mgr;
 
@@ -1499,12 +1509,6 @@ maybe_consume_placeholder (const char *&iter_src, 
unsigned *out_arg_idx)
   return false;
 }
 
-struct embedded_link
-{
-  std::string text;
-  std::string destination;
-};
-
 /*  If ITER_SRC starts with an embedded link as per §3.11.6, advance ITER_SRC
     to immediately beyond the link, and return the link.
 
@@ -1578,6 +1582,21 @@ maybe_consume_embedded_link (const char *&iter_src)
   return std::make_unique<embedded_link> (std::move (result));
 }
 
+void
+sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result,
+                                      const embedded_link &link)
+{
+  /* We can't yet decode intra-sarif links, so simply use their text.  */
+  if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/")))
+    {
+      result += link.text.c_str ();
+      return;
+    }
+  result.begin_url (link.destination.c_str ());
+  result += link.text.c_str ();
+  result.end_url ();
+}
+
 /* Lookup the plain text string within a result.message (§3.27.11),
    and substitute for any placeholders (§3.11.5) and handle any
    embedded links (§3.11.6).
@@ -1662,13 +1681,7 @@ make_plain_text_within_result_message (const 
json::object *tool_component_obj,
            }
        }
       else if (auto link = maybe_consume_embedded_link (iter_src))
-       {
-         result.begin_url (link->destination.c_str ());
-         result += link->text.c_str ();
-         result.end_url ();
-         /* TODO: potentially could try to convert
-            intra-sarif links into event ids.  */
-       }
+       append_embeddded_link (result, *link);
       else
        {
          result += ch;
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
new file mode 100644
index 000000000000..cbf5f4af64b4
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
@@ -0,0 +1,139 @@
+/* { dg-additional-options 
"-fdiagnostics-add-output=experimental-html:file=3.11.6-embedded-links-pr123056.sarif.html,javascript=no"
 } */
+/* { dg-additional-options 
"-fdiagnostics-add-output=sarif:file=3.11.6-embedded-links-pr123056.sarif.roundtrip.sarif"
 } */
+
+{"$schema": 
"https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json";,
+ "version": "2.1.0",
+ "runs": [{"tool": {"driver": {"name": "GNU C23",
+                               "fullName": "GNU C23 (GCC) version 16.0.1 
20260114 (experimental) (x86_64-pc-linux-gnu)",
+                               "version": "16.0.1 20260114 (experimental)",
+                               "informationUri": "https://gcc.gnu.org/gcc-16/";,
+                               "rules": [{"id": "-Wanalyzer-malloc-leak",
+                                          "helpUri": 
"https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}},
+           "taxonomies": [{"name": "CWE",
+                           "version": "4.7",
+                           "organization": "MITRE",
+                           "shortDescription": {"text": "The MITRE Common 
Weakness Enumeration"},
+                           "taxa": [{"id": "401",
+                                     "helpUri": 
"https://cwe.mitre.org/data/definitions/401.html"}]}],
+           "invocations": [{"arguments": ["./cc1",
+                                          "-quiet",
+                                          "-iprefix",
+                                          
"/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/",
+                                          "-isystem",
+                                          "./include",
+                                          "-isystem",
+                                          "./include-fixed",
+                                          "pr123056.c",
+                                          "-quiet",
+                                          "-dumpbase",
+                                          "pr123056.c",
+                                          "-dumpbase-ext",
+                                          ".c",
+                                          "-mtune=generic",
+                                          "-march=x86-64",
+                                          "-fanalyzer",
+                                          "-fdiagnostics-add-output=sarif",
+                                          
"-fdiagnostics-add-output=experimental-html",
+                                          "-o",
+                                          "pr123056.s"],
+                            "workingDirectory": {"uri": 
"/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"},
+                            "startTimeUtc": "2026-01-16T17:43:19Z",
+                            "executionSuccessful": true,
+                            "toolExecutionNotifications": [],
+                            "endTimeUtc": "2026-01-16T17:43:19Z"}],
+           "originalUriBaseIds": {"PWD": {"uri": 
"file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}},
+           "artifacts": [{"location": {"uri": "pr123056.c",
+                                       "uriBaseId": "PWD"},
+                          "sourceLanguage": "c",
+                          "contents": {"text": "void test (void)\n{\n  void *p 
= __builtin_malloc (1024);\n}\n"},
+                          "roles": ["analysisTarget",
+                                    "tracedFile"]}],
+           "results": [{"ruleId": "-Wanalyzer-malloc-leak",
+                        "taxa": [{"id": "401",
+                                  "toolComponent": {"name": "cwe"}}],
+                        "properties": {"gcc/analyzer/saved_diagnostic/sm": 
"malloc",
+                                       "gcc/analyzer/saved_diagnostic/ploc": 
{"enode": 5},
+                                       "gcc/analyzer/saved_diagnostic/var": 
"p_3",
+                                       "gcc/analyzer/saved_diagnostic/sval": 
"&HEAP_ALLOCATED_REGION(14)",
+                                       "gcc/analyzer/saved_diagnostic/state": 
"unchecked ({free})",
+                                       "gcc/analyzer/saved_diagnostic/idx": 0,
+                                       
"gcc/analyzer/saved_diagnostic/duplicates": [{"properties": 
{"gcc/analyzer/saved_diagnostic/sm": "malloc",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
+                                                                               
                     "gcc/analyzer/saved_diagnostic/var": "p_3",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/sval": 
"&HEAP_ALLOCATED_REGION(14)",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/state": "unchecked 
({free})",
+                                                                               
                     "gcc/analyzer/saved_diagnostic/idx": 1,
+                                                                               
                     "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}],
+                                       "gcc/analyzer/pending_diagnostic/kind": 
"malloc_leak"},
+                        "level": "warning",
+                        "message": {"text": "leak of ‘p’"},
+                        "locations": [{"physicalLocation": 
{"artifactLocation": {"uri": "pr123056.c",
+                                                                               
  "uriBaseId": "PWD"},
+                                                            "region": 
{"startLine": 4,
+                                                                       
"startColumn": 1,
+                                                                       
"endColumn": 2},
+                                                            "contextRegion": 
{"startLine": 4,
+                                                                              
"snippet": {"text": "}\n"}}},
+                                       "logicalLocations": [{"index": 0,
+                                                             
"fullyQualifiedName": "test"}]}],
+                        "codeFlows": [{"threadFlows": [{"id": "main",
+                                                        "locations": 
[{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)",
+                                                                               
       "gcc/analyzer/checker_event/kind": "state_change"},
+                                                                       
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                               
                                               "uriBaseId": "PWD"},
+                                                                               
                          "region": {"startLine": 3,
+                                                                               
                                     "startColumn": 13,
+                                                                               
                                     "endColumn": 36},
+                                                                               
                          "contextRegion": {"startLine": 3,
+                                                                               
                                            "snippet": {"text": "  void *p = 
__builtin_malloc (1024);\n"}}},
+                                                                               
     "logicalLocations": [{"index": 0,
+                                                                               
                           "fullyQualifiedName": "test"}],
+                                                                               
     "message": {"text": "allocated here"}},
+                                                                       
"kinds": ["acquire",
+                                                                               
  "memory"],
+                                                                       
"nestingLevel": 1,
+                                                                       
"executionOrder": 1},
+                                                                      
{"properties": {"gcc/analyzer/checker_event/emission_id": "(2)",
+                                                                               
       "gcc/analyzer/checker_event/kind": "warning"},
+                                                                       
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                               
                                               "uriBaseId": "PWD"},
+                                                                               
                          "region": {"startLine": 4,
+                                                                               
                                     "startColumn": 1,
+                                                                               
                                     "endColumn": 2},
+                                                                               
                          "contextRegion": {"startLine": 4,
+                                                                               
                                            "snippet": {"text": "}\n"}}},
+                                                                               
     "logicalLocations": [{"index": 0,
+                                                                               
                           "fullyQualifiedName": "test"}],
+                                                                               
     "message": {"text": "‘p’ leaks here; was allocated at 
[(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)"}},
+                                                                       
"kinds": ["danger"],
+                                                                       
"nestingLevel": 1,
+                                                                       
"executionOrder": 2}]}]}]}],
+           "logicalLocations": [{"name": "test",
+                                 "fullyQualifiedName": "test",
+                                 "decoratedName": "test",
+                                 "kind": "function",
+                                 "index": 0}]}]}
+
+/* { dg-begin-multiline-output "" }
+In function 'test':
+pr123056.c:4:1: warning: leak of ‘p’ [-Wanalyzer-malloc-leak]
+    4 | }
+      | ^
+  'test': events 1-2
+    3 |   void *p = __builtin_malloc (1024);
+      |             ^~~~~~~~~~~~~~~~~~~~~~~
+      |             |
+      |             (1) allocated here
+    4 | }
+      | ~            
+      | |
+      | (2) ‘p’ leaks here; was allocated at (1)
+   { dg-end-multiline-output "" } */
+
+/* Use a Python script to verify various properties about the generated
+   .html file:
+   { dg-final { run-html-pytest 3.11.6-embedded-links-pr123056.sarif 
"2.1.0-valid/embedded-links-pr123056-check-html.py" } } */
+
+/* Use a Python script to verify various properties about the *generated*
+   .sarif file:
+   { dg-final { run-sarif-pytest 
3.11.6-embedded-links-pr123056.sarif.roundtrip 
"2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py" } } */
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
new file mode 100644
index 000000000000..1e8069f21774
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
@@ -0,0 +1,25 @@
+from htmltest import *
+
+import pytest
+
[email protected](scope='function', autouse=True)
+def html_tree():
+    return html_tree_from_env()
+
+def test_generated_html(html_tree):
+    root = html_tree.getroot ()
+    assert root.tag == make_tag('html')
+
+    head = root.find('xhtml:head', ns)
+    assert head is not None
+
+    diag = get_diag_by_index(html_tree, 0)
+
+    exec_path = diag.find("./xhtml:div[@id='execution-path']", ns)
+    assert exec_path is not None
+
+    label = exec_path.find('xhtml:label', ns)
+    assert label.text == 'Execution path with 2 events'
+
+    final_event = exec_path.find(".//xhtml:span[@id='gcc-diag-0-event-1']", ns)
+    assert final_event.text == '(2) ‘p’ leaks here; was allocated at (1)'
diff --git 
a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
new file mode 100644
index 000000000000..5e45e2a0c7f5
--- /dev/null
+++ 
b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
@@ -0,0 +1,14 @@
+from sarif import *
+
+import pytest
+
[email protected](scope='function', autouse=True)
+def sarif():
+    return sarif_from_env()
+
+def test_roundtrip_of_url_in_generated_sarif(sarif):
+    result = get_result_by_index(sarif, 0)
+    assert result['level'] == 'warning'
+    assert result['message']['text'] == "leak of ‘p’"
+    assert 
(result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text']
+            == "‘p’ leaks here; was allocated at (1)")

Reply via email to