This work-in-progress hacks up the file_cache code in input.cc so that it can have an optional path remapper, which can map e.g. paths in a .sarif file to paths relative to that .sarif file, so that the sarif-replayer can locate and display sources.
gcc/ChangeLog: * input.cc (file_cache::get_file_path): Replace with... (file_cache::get_original_file_path): ...this, and... (file_cache::get_remapped_file_path): ...this. (file_cache::create): Split "file_path" param into "original_file_path" and "remapped_file_path". (file_cache::m_file_path): Replace with... (file_cache::m_original_file_path): ...this, and... (file_cache::m_remapped_file_path): ...this. (total_lines_num): Rename file_path to original_file_path. (file_cache::lookup_file): Rename file_path to remapped_file_path; update cache lookup to use remapped file path. (file_cache::remap_file): New. (file_cache::set_path_remapper): New. (diagnostics_file_cache_set_path_remapper): New. (file_cache_slot::evict): Update for split into a pair of paths. (file_cache::evicted_cache_tab_entry): Likewise. (file_cache::add_file): Likewise. (file_cache_slot::create): Likewise. (file_cache::file_cache): Initialize m_remapper (file_cache::~file_cache): Delete m_remapper. (file_cache::lookup_or_add_file): Remap the file path. (file_cache_slot::file_cache_slot): Update for changes to fields. (file_cache_slot::~file_cache_slot): Free the m_remapped_file_path. * input.h (class path_remapper): New. (file_cache::set_path_remapper): New decl. (file_cache::lookup_file): Update decl. (file_cache::add_file): Update decl. (file_cache::remap_file): New decl. (file_cache::m_remapper): New field. (diagnostics_file_cache_set_path_remapper): New decl. * sarif/sarif-replay.cc (class sarif_path_remapper): New class. (sarif_replayer::emit_sarif_as_diagnostics): Use it. gcc/testsuite/ChangeLog: * sarif/tutorial-example-foo.sarif: Fix the line numbers. Update the expected multiline output to show the source code. Signed-off-by: David Malcolm <dmalc...@redhat.com> --- gcc/input.cc | 107 +++++++++++++----- gcc/input.h | 18 ++- gcc/sarif/sarif-replay.cc | 39 +++++++ .../sarif/tutorial-example-foo.sarif | 25 +++- 4 files changed, 155 insertions(+), 34 deletions(-) diff --git a/gcc/input.cc b/gcc/input.cc index 2acbfdea4f8..a78b949faa5 100644 --- a/gcc/input.cc +++ b/gcc/input.cc @@ -55,7 +55,8 @@ public: char ** line, ssize_t *line_len); /* Accessors. */ - const char *get_file_path () const { return m_file_path; } + const char *get_original_file_path () const { return m_original_file_path; } + const char *get_remapped_file_path () const { return m_remapped_file_path; } unsigned get_use_count () const { return m_use_count; } bool missing_trailing_newline_p () const { @@ -65,7 +66,10 @@ public: void inc_use_count () { m_use_count++; } bool create (const file_cache::input_context &in_context, - const char *file_path, FILE *fp, unsigned highest_use_count); + const char *original_file_path, + char *remapped_file_path, + FILE *fp, + unsigned highest_use_count); void evict (); private: @@ -112,11 +116,14 @@ public: array. */ unsigned m_use_count; - /* The file_path is the key for identifying a particular file in + /* The m_original_file_path is the key for identifying a particular file in the cache. For libcpp-using code, the underlying buffer for this field is owned by the corresponding _cpp_file within the cpp_reader. */ - const char *m_file_path; + const char *m_original_file_path; + + // FIXME: + char *m_remapped_file_path; FILE *m_fp; @@ -310,11 +317,11 @@ diagnostic_file_cache_fini (void) equals the actual number of lines of the file. */ static size_t -total_lines_num (const char *file_path) +total_lines_num (const char *original_file_path) { size_t r = 0; location_t l = 0; - if (linemap_get_file_highest_location (line_table, file_path, &l)) + if (linemap_get_file_highest_location (line_table, original_file_path, &l)) { gcc_assert (l >= RESERVED_LOCATION_COUNT); expanded_location xloc = expand_location (l); @@ -328,16 +335,17 @@ total_lines_num (const char *file_path) cached file was found. */ file_cache_slot * -file_cache::lookup_file (const char *file_path) +file_cache::lookup_file (const char *remapped_file_path) { - gcc_assert (file_path); + gcc_assert (remapped_file_path); /* This will contain the found cached file. */ file_cache_slot *r = NULL; for (unsigned i = 0; i < num_file_slots; ++i) { file_cache_slot *c = &m_file_slots[i]; - if (c->get_file_path () && !strcmp (c->get_file_path (), file_path)) + if (c->get_remapped_file_path () + && !strcmp (c->get_remapped_file_path (), remapped_file_path)) { c->inc_use_count (); r = c; @@ -350,6 +358,27 @@ file_cache::lookup_file (const char *file_path) return r; } +// FIXME + +char * +file_cache::remap_file (const char *file_path) const +{ + if (m_remapper) + return m_remapper->remap_file (file_path); + else + return xstrdup (file_path); + // FIXME: this is probably being called too much +} + +// FIXME: + +void +file_cache::set_path_remapper (path_remapper *remapper) +{ + delete m_remapper; + m_remapper = remapper; +} + /* Purge any mention of FILENAME from the cache of files used for printing source code. For use in selftests when working with tempfiles. */ @@ -365,6 +394,15 @@ diagnostics_file_cache_forcibly_evict_file (const char *file_path) global_dc->m_file_cache->forcibly_evict_file (file_path); } +// FIXME: + +void +diagnostics_file_cache_set_path_remapper (path_remapper *remapper) +{ + diagnostic_file_cache_init (); + global_dc->m_file_cache->set_path_remapper (remapper); +} + void file_cache::forcibly_evict_file (const char *file_path) { @@ -381,7 +419,9 @@ file_cache::forcibly_evict_file (const char *file_path) void file_cache_slot::evict () { - m_file_path = NULL; + m_original_file_path = NULL; + free (m_remapped_file_path); + m_remapped_file_path = NULL; if (m_fp) fclose (m_fp); m_fp = NULL; @@ -409,10 +449,10 @@ file_cache::evicted_cache_tab_entry (unsigned *highest_use_count) for (unsigned i = 1; i < num_file_slots; ++i) { file_cache_slot *c = &m_file_slots[i]; - bool c_is_empty = (c->get_file_path () == NULL); + bool c_is_empty = (c->get_original_file_path () == NULL); if (c->get_use_count () < to_evict->get_use_count () - || (to_evict->get_file_path () && c_is_empty)) + || (to_evict->get_original_file_path () && c_is_empty)) /* We evict C because it's either an entry with a lower use count or one that is empty. */ to_evict = c; @@ -432,6 +472,7 @@ file_cache::evicted_cache_tab_entry (unsigned *highest_use_count) return to_evict; } +// FIXME: /* Create the cache used for the content of a given file to be accessed by caret diagnostic. This cache is added to an array of cache and can be retrieved by lookup_file_in_cache_tab. This @@ -439,29 +480,35 @@ file_cache::evicted_cache_tab_entry (unsigned *highest_use_count) num_file_slots files are cached. */ file_cache_slot* -file_cache::add_file (const char *file_path) +file_cache::add_file (const char *original_file_path, + char *remapped_file_path) { - - FILE *fp = fopen (file_path, "r"); + FILE *fp = fopen (remapped_file_path, "r"); if (fp == NULL) return NULL; unsigned highest_use_count = 0; file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count); - if (!r->create (in_context, file_path, fp, highest_use_count)) + if (!r->create (in_context, original_file_path, remapped_file_path, + fp, highest_use_count)) return NULL; return r; } +// FIXME: /* Populate this slot for use on FILE_PATH and FP, dropping any existing cached content within it. */ +// FIXME: take ownership of REMAPPED_FILE_PATH. bool file_cache_slot::create (const file_cache::input_context &in_context, - const char *file_path, FILE *fp, + const char *original_file_path, + char *remapped_file_path, + FILE *fp, unsigned highest_use_count) { - m_file_path = file_path; + m_original_file_path = original_file_path; + m_remapped_file_path = remapped_file_path; if (m_fp) fclose (m_fp); m_fp = fp; @@ -474,19 +521,20 @@ file_cache_slot::create (const file_cache::input_context &in_context, /* Ensure that this cache entry doesn't get evicted next time add_file_to_cache_tab is called. */ m_use_count = ++highest_use_count; - m_total_lines = total_lines_num (file_path); + m_total_lines = total_lines_num (original_file_path); m_missing_trailing_newline = true; + // FIXME: which file_path should we be using below? /* Check the input configuration to determine if we need to do any transformations, such as charset conversion or BOM skipping. */ - if (const char *input_charset = in_context.ccb (file_path)) + if (const char *input_charset = in_context.ccb (original_file_path)) { /* Need a full-blown conversion of the input charset. */ fclose (m_fp); m_fp = NULL; const cpp_converted_source cs - = cpp_get_converted_source (file_path, input_charset); + = cpp_get_converted_source (original_file_path, input_charset); if (!cs.data) return false; if (m_data) @@ -511,7 +559,8 @@ file_cache_slot::create (const file_cache::input_context &in_context, /* file_cache's ctor. */ file_cache::file_cache () -: m_file_slots (new file_cache_slot[num_file_slots]) +: m_file_slots (new file_cache_slot[num_file_slots]), + m_remapper (NULL) { initialize_input_context (nullptr, false); } @@ -521,6 +570,7 @@ file_cache::file_cache () file_cache::~file_cache () { delete[] m_file_slots; + delete m_remapper; } /* Lookup the cache used for the content of a given file accessed by @@ -529,11 +579,14 @@ file_cache::~file_cache () it. */ file_cache_slot* -file_cache::lookup_or_add_file (const char *file_path) +file_cache::lookup_or_add_file (const char *original_file_path) { - file_cache_slot *r = lookup_file (file_path); + char *remapped_file_path = remap_file (original_file_path); + file_cache_slot *r = lookup_file (remapped_file_path); if (r == NULL) - r = add_file (file_path); + r = add_file (original_file_path, remapped_file_path); + else + free (remapped_file_path); return r; } @@ -541,7 +594,8 @@ file_cache::lookup_or_add_file (const char *file_path) diagnostic. */ file_cache_slot::file_cache_slot () -: m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0), +: m_use_count (0), m_original_file_path (NULL), m_remapped_file_path (NULL), + m_fp (NULL), m_data (0), m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true) { @@ -552,6 +606,7 @@ file_cache_slot::file_cache_slot () file_cache_slot::~file_cache_slot () { + free (m_remapped_file_path); if (m_fp) { fclose (m_fp); diff --git a/gcc/input.h b/gcc/input.h index f1ae3aec95c..8539317c513 100644 --- a/gcc/input.h +++ b/gcc/input.h @@ -118,6 +118,13 @@ extern bool location_missing_trailing_newline (const char *file_path); need to be in this header. */ class file_cache_slot; +class path_remapper +{ +public: + virtual ~path_remapper () {} + virtual char *remap_file (const char *file_path) const = 0; +}; + /* A cache of source files for use when emitting diagnostics (and in a few places in the C/C++ frontends). @@ -145,15 +152,20 @@ class file_cache void initialize_input_context (diagnostic_input_charset_callback ccb, bool should_skip_bom); + void set_path_remapper (path_remapper *remapper); + private: file_cache_slot *evicted_cache_tab_entry (unsigned *highest_use_count); - file_cache_slot *add_file (const char *file_path); - file_cache_slot *lookup_file (const char *file_path); + file_cache_slot *add_file (const char *original_file_path, + char *remapped_file_path); + file_cache_slot *lookup_file (const char *remapped_file_path); + char *remap_file (const char *file_path) const; private: static const size_t num_file_slots = 16; file_cache_slot *m_file_slots; input_context in_context; + path_remapper *m_remapper; }; extern expanded_location @@ -246,6 +258,8 @@ void diagnostics_file_cache_fini (void); void diagnostics_file_cache_forcibly_evict_file (const char *file_path); +void diagnostics_file_cache_set_path_remapper (path_remapper *remapper); + class GTY(()) string_concat { public: diff --git a/gcc/sarif/sarif-replay.cc b/gcc/sarif/sarif-replay.cc index 2d5c58ead1e..b8f9c152879 100644 --- a/gcc/sarif/sarif-replay.cc +++ b/gcc/sarif/sarif-replay.cc @@ -597,6 +597,37 @@ sarif_replayer::~sarif_replayer () delete iter.second; } +// FIXME: + +class sarif_path_remapper : public path_remapper +{ +public: + sarif_path_remapper (const char *sarif_filename) + { + /* Get the directory containing SARIF_FILENAME. */ + size_t overall_len = strlen (sarif_filename); + const char *last_comp = basename (sarif_filename); + gcc_assert (last_comp); + size_t dir_len = last_comp - sarif_filename; + m_dir = (char *)xmalloc (dir_len + 1); + memcpy (m_dir, sarif_filename, dir_len); + m_dir[dir_len] = '\0'; + } + ~sarif_path_remapper () + { + free (m_dir); + } + + char *remap_file (const char *file_path) const final override + { + // TODO: what about absolute FILE_PATH ? + return concat (m_dir, file_path, NULL); + } + +private: + char *m_dir; +}; + /* Perform one pass of replay of the output file. Pass 0 captures the source locations of interest, so that we can generate line_maps. @@ -607,6 +638,14 @@ sarif_replayer::emit_sarif_as_diagnostics (json::value *jv, int pass) { m_pass = pass; + /* Remap paths on 2nd pass: that way, errors in the SARIF file get + reported directly, whereas replayed diagnostics get remapped. */ + if (pass == 1) + { + diagnostics_file_cache_set_path_remapper + (new sarif_path_remapper (m_filename)); + } + /* We expect a sarifLog object as the top-level value (SARIF v2.1.0 section 3.13). */ json::object *toplev_obj = dyn_cast <json::object *> (jv); diff --git a/gcc/testsuite/sarif/tutorial-example-foo.sarif b/gcc/testsuite/sarif/tutorial-example-foo.sarif index 8fa37ad4b42..d73bbd3b93e 100644 --- a/gcc/testsuite/sarif/tutorial-example-foo.sarif +++ b/gcc/testsuite/sarif/tutorial-example-foo.sarif @@ -25,7 +25,7 @@ "uri": "bad-eval-with-code-flow.py" }, "region": { - "startLine": 8 + "startLine": 10 } } } @@ -45,7 +45,7 @@ "uri": "bad-eval-with-code-flow.py" }, "region": { - "startLine": 3 + "startLine": 5 } } }, @@ -63,7 +63,7 @@ "uri": "bad-eval-with-code-flow.py" }, "region": { - "startLine": 4 + "startLine": 6 } } }, @@ -81,7 +81,7 @@ "uri": "bad-eval-with-code-flow.py" }, "region": { - "startLine": 38 + "startLine": 10 } } }, @@ -104,14 +104,27 @@ } /* { dg-begin-multiline-output "" } -bad-eval-with-code-flow.py:8:1: warning: Use of tainted variable 'raw_input' in the insecure function 'eval'. [PY2335] +bad-eval-with-code-flow.py:10:1: warning: Use of tainted variable 'raw_input' in the insecure function 'eval'. [PY2335] + 10 | print(eval(raw_input)) + | ^ events 1-2 | + | 5 | print("Hello, world!") + | | ^ + | | | + | | (1) + | 6 | expr = input("Expression> ") + | | ~ + | | | + | | (2) | +--> event 3 | + | 10 | print(eval(raw_input)) + | | ^ + | | | + | | (3) | { dg-end-multiline-output "" } */ // TODO: logical locations? -// TODO: fix showing the source code -- 2.26.3