https://gcc.gnu.org/g:25910642d367097d19ea90fb2b54e2858ddd74b9
commit r16-6931-g25910642d367097d19ea90fb2b54e2858ddd74b9 Author: David Malcolm <[email protected]> Date: Tue Jan 20 12:06:12 2026 -0500 sarif-replay: skip "sarif:/" embedded links [PR123056] PR sarif-replay/123056 notes that when using sarif-replay to generate HTML from a .sarif file containing an embedded "sarif:/" link we get bogus output containing SGR codes. The links in question come from GCC's sarif output for cross-referencing event IDs within an execution path. These links are JSON pointers. I experimented with propertly supporting the JSON Pointer spec (RFC 6901) within GCC, and I have a partially working implementation which parses JSON pointers here, and, where appropriate, reconstructs the pertinent event ID. However, that feels too invasive to be pushing in stage 4. Hence for GCC 16, this patch simply skips the link part of "sarif:/" links in sarif-replay, avoiding corrupt output, deferring the more ambitious round-tripping fix to GCC 17. gcc/ChangeLog: PR sarif-replay/123056 * libsarifreplay.cc (struct embedded_link): Move decl earlier. (sarif_replayer::append_embeddded_link): New. (sarif_replayer::make_plain_text_within_result_message): Move the link-replay logic to the above, and skip the link part of intra-sarif links. gcc/testsuite/ChangeLog: PR sarif-replay/123056 * sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif: New test. * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py: New test script. * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py: New test script. Signed-off-by: David Malcolm <[email protected]> Diff: --- gcc/libsarifreplay.cc | 39 ++++-- .../3.11.6-embedded-links-pr123056.sarif | 139 +++++++++++++++++++++ .../embedded-links-pr123056-check-html.py | 25 ++++ ...mbedded-links-pr123056-check-sarif-roundtrip.py | 14 +++ 4 files changed, 204 insertions(+), 13 deletions(-) diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc index 01d3a975f62f..20a0aec3c7fc 100644 --- a/gcc/libsarifreplay.cc +++ b/gcc/libsarifreplay.cc @@ -292,6 +292,12 @@ public: libgdiagnostics::message_buffer m_label; }; +struct embedded_link +{ + std::string text; + std::string destination; +}; + using id_map = std::map<std::string, const json::string *>; class sarif_replayer @@ -724,6 +730,10 @@ private: return sub; } + void + append_embeddded_link (libgdiagnostics::message_buffer &result, + const embedded_link &link); + /* The manager to replay the SARIF files to. */ libgdiagnostics::manager m_output_mgr; @@ -1499,12 +1509,6 @@ maybe_consume_placeholder (const char *&iter_src, unsigned *out_arg_idx) return false; } -struct embedded_link -{ - std::string text; - std::string destination; -}; - /* If ITER_SRC starts with an embedded link as per §3.11.6, advance ITER_SRC to immediately beyond the link, and return the link. @@ -1578,6 +1582,21 @@ maybe_consume_embedded_link (const char *&iter_src) return std::make_unique<embedded_link> (std::move (result)); } +void +sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result, + const embedded_link &link) +{ + /* We can't yet decode intra-sarif links, so simply use their text. */ + if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/"))) + { + result += link.text.c_str (); + return; + } + result.begin_url (link.destination.c_str ()); + result += link.text.c_str (); + result.end_url (); +} + /* Lookup the plain text string within a result.message (§3.27.11), and substitute for any placeholders (§3.11.5) and handle any embedded links (§3.11.6). @@ -1662,13 +1681,7 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, } } else if (auto link = maybe_consume_embedded_link (iter_src)) - { - result.begin_url (link->destination.c_str ()); - result += link->text.c_str (); - result.end_url (); - /* TODO: potentially could try to convert - intra-sarif links into event ids. */ - } + append_embeddded_link (result, *link); else { result += ch; diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif new file mode 100644 index 000000000000..cbf5f4af64b4 --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif @@ -0,0 +1,139 @@ +/* { dg-additional-options "-fdiagnostics-add-output=experimental-html:file=3.11.6-embedded-links-pr123056.sarif.html,javascript=no" } */ +/* { dg-additional-options "-fdiagnostics-add-output=sarif:file=3.11.6-embedded-links-pr123056.sarif.roundtrip.sarif" } */ + +{"$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [{"tool": {"driver": {"name": "GNU C23", + "fullName": "GNU C23 (GCC) version 16.0.1 20260114 (experimental) (x86_64-pc-linux-gnu)", + "version": "16.0.1 20260114 (experimental)", + "informationUri": "https://gcc.gnu.org/gcc-16/", + "rules": [{"id": "-Wanalyzer-malloc-leak", + "helpUri": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}}, + "taxonomies": [{"name": "CWE", + "version": "4.7", + "organization": "MITRE", + "shortDescription": {"text": "The MITRE Common Weakness Enumeration"}, + "taxa": [{"id": "401", + "helpUri": "https://cwe.mitre.org/data/definitions/401.html"}]}], + "invocations": [{"arguments": ["./cc1", + "-quiet", + "-iprefix", + "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/", + "-isystem", + "./include", + "-isystem", + "./include-fixed", + "pr123056.c", + "-quiet", + "-dumpbase", + "pr123056.c", + "-dumpbase-ext", + ".c", + "-mtune=generic", + "-march=x86-64", + "-fanalyzer", + "-fdiagnostics-add-output=sarif", + "-fdiagnostics-add-output=experimental-html", + "-o", + "pr123056.s"], + "workingDirectory": {"uri": "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"}, + "startTimeUtc": "2026-01-16T17:43:19Z", + "executionSuccessful": true, + "toolExecutionNotifications": [], + "endTimeUtc": "2026-01-16T17:43:19Z"}], + "originalUriBaseIds": {"PWD": {"uri": "file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}}, + "artifacts": [{"location": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "sourceLanguage": "c", + "contents": {"text": "void test (void)\n{\n void *p = __builtin_malloc (1024);\n}\n"}, + "roles": ["analysisTarget", + "tracedFile"]}], + "results": [{"ruleId": "-Wanalyzer-malloc-leak", + "taxa": [{"id": "401", + "toolComponent": {"name": "cwe"}}], + "properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 0, + "gcc/analyzer/saved_diagnostic/duplicates": [{"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 1, + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}], + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}, + "level": "warning", + "message": {"text": "leak of ‘p’"}, + "locations": [{"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}]}], + "codeFlows": [{"threadFlows": [{"id": "main", + "locations": [{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)", + "gcc/analyzer/checker_event/kind": "state_change"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 3, + "startColumn": 13, + "endColumn": 36}, + "contextRegion": {"startLine": 3, + "snippet": {"text": " void *p = __builtin_malloc (1024);\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "allocated here"}}, + "kinds": ["acquire", + "memory"], + "nestingLevel": 1, + "executionOrder": 1}, + {"properties": {"gcc/analyzer/checker_event/emission_id": "(2)", + "gcc/analyzer/checker_event/kind": "warning"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "‘p’ leaks here; was allocated at [(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)"}}, + "kinds": ["danger"], + "nestingLevel": 1, + "executionOrder": 2}]}]}]}], + "logicalLocations": [{"name": "test", + "fullyQualifiedName": "test", + "decoratedName": "test", + "kind": "function", + "index": 0}]}]} + +/* { dg-begin-multiline-output "" } +In function 'test': +pr123056.c:4:1: warning: leak of ‘p’ [-Wanalyzer-malloc-leak] + 4 | } + | ^ + 'test': events 1-2 + 3 | void *p = __builtin_malloc (1024); + | ^~~~~~~~~~~~~~~~~~~~~~~ + | | + | (1) allocated here + 4 | } + | ~ + | | + | (2) ‘p’ leaks here; was allocated at (1) + { dg-end-multiline-output "" } */ + +/* Use a Python script to verify various properties about the generated + .html file: + { dg-final { run-html-pytest 3.11.6-embedded-links-pr123056.sarif "2.1.0-valid/embedded-links-pr123056-check-html.py" } } */ + +/* Use a Python script to verify various properties about the *generated* + .sarif file: + { dg-final { run-sarif-pytest 3.11.6-embedded-links-pr123056.sarif.roundtrip "2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py" } } */ diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py new file mode 100644 index 000000000000..1e8069f21774 --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py @@ -0,0 +1,25 @@ +from htmltest import * + +import pytest + [email protected](scope='function', autouse=True) +def html_tree(): + return html_tree_from_env() + +def test_generated_html(html_tree): + root = html_tree.getroot () + assert root.tag == make_tag('html') + + head = root.find('xhtml:head', ns) + assert head is not None + + diag = get_diag_by_index(html_tree, 0) + + exec_path = diag.find("./xhtml:div[@id='execution-path']", ns) + assert exec_path is not None + + label = exec_path.find('xhtml:label', ns) + assert label.text == 'Execution path with 2 events' + + final_event = exec_path.find(".//xhtml:span[@id='gcc-diag-0-event-1']", ns) + assert final_event.text == '(2) ‘p’ leaks here; was allocated at (1)' diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py new file mode 100644 index 000000000000..5e45e2a0c7f5 --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py @@ -0,0 +1,14 @@ +from sarif import * + +import pytest + [email protected](scope='function', autouse=True) +def sarif(): + return sarif_from_env() + +def test_roundtrip_of_url_in_generated_sarif(sarif): + result = get_result_by_index(sarif, 0) + assert result['level'] == 'warning' + assert result['message']['text'] == "leak of ‘p’" + assert (result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text'] + == "‘p’ leaks here; was allocated at (1)")
