The SARIF 2.1.0 spec says that although a "SARIF log file SHALL contain a serialization of the SARIF object model into the JSON format ... in the future, other serializations might be defined." (ยง3.1)
I've been experimenting with alternative serializations of SARIF (CBOR and JSON5 for now). To help with these experiments, this patch adds a new param "serialization" to -fdiagnostics-add-output='s "sarif" scheme. For now this must have value "json", but will be helpful for any followup patches. Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. Pushed to trunk as r16-314-ge504a59bd149f8. gcc/ChangeLog: * diagnostic-format-sarif.cc (sarif_serialization_format_json::write_to_file): New. (sarif_builder::m_formatted): Replace field with... (sarif_builder::m_serialization_format): ...this. (sarif_builder::sarif_builder): Update for field change. (sarif_builder::flush_to_file): Call m_serialization_format's write_to_file vfunc. (sarif_output_format::sarif_output_format): Replace param "formatted" with "serialization_format". (sarif_stream_output_format::sarif_output_format): Likewise. (sarif_file_output_format::sarif_file_output_format): Likewise. (diagnostic_output_format_init_sarif_stderr): Make a sarif_serialization_format_json and pass it to diagnostic_output_format_init_sarif. (diagnostic_output_format_open_sarif_file): Split out into... (diagnostic_output_file::try_to_open): ...this, adding "serialization_kind" param. (diagnostic_output_format_init_sarif_file): Update for new param to diagnostic_output_format_open_sarif_file. Make a sarif_serialization_format_json and pass it to diagnostic_output_format_init_sarif. (diagnostic_output_format_init_sarif_stream): Make a sarif_serialization_format_json and pass it to diagnostic_output_format_init_sarif. (make_sarif_sink): Replace param "formatted" with "serialization". (selftest::test_make_location_object): Update for changes to sarif_builder ctor. * diagnostic-format-sarif.h (enum class sarif_serialization): New. (diagnostic_output_format_open_sarif_file): Add param "serialization_kind". (class sarif_serialization_format): New. (class sarif_serialization_format_json): New. (make_sarif_sink): Replace param "formatted" with "serialization_format". * diagnostic-output-file.h (diagnostic_output_file::try_to_open): New decl. * diagnostic.h (enum diagnostics_output_format): Tweak comments. * doc/invoke.texi (-fdiagnostics-add-output): Add "serialization" param to sarif scheme. * libgdiagnostics.cc (sarif_sink::sarif_sink): Update for change to make_sarif_sink. * opts-diagnostic.cc (sarif_scheme_handler::make_sink): Add "serialization" param and pass it on to make_sarif_sink. --- gcc/diagnostic-format-sarif.cc | 115 ++++++++++++++++++++++++--------- gcc/diagnostic-format-sarif.h | 42 +++++++++++- gcc/diagnostic-output-file.h | 7 ++ gcc/diagnostic.h | 4 +- gcc/doc/invoke.texi | 5 ++ gcc/libgdiagnostics.cc | 3 +- gcc/opts-diagnostic.cc | 33 +++++++++- 7 files changed, 173 insertions(+), 36 deletions(-) diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc index f322991ab2e..bc6abdff5e4 100644 --- a/gcc/diagnostic-format-sarif.cc +++ b/gcc/diagnostic-format-sarif.cc @@ -634,6 +634,18 @@ private: std::vector<std::unique_ptr<sarif_result>> m_results; }; +/* Classes for abstracting away JSON vs other serialization formats. */ + +// class sarif_serialization_format_json : public sarif_serialization_format + +void +sarif_serialization_format_json::write_to_file (FILE *outf, + const json::value &top) +{ + top.dump (outf, m_formatted); + fprintf (outf, "\n"); +} + /* A class for managing SARIF output (for -fdiagnostics-format=sarif-stderr and -fdiagnostics-format=sarif-file). @@ -687,7 +699,7 @@ public: pretty_printer &printer, const line_maps *line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts); ~sarif_builder (); @@ -891,7 +903,7 @@ private: int m_tabstop; - bool m_formatted; + std::unique_ptr<sarif_serialization_format> m_serialization_format; const sarif_generation_options m_sarif_gen_opts; unsigned m_next_result_idx; @@ -1561,7 +1573,7 @@ sarif_builder::sarif_builder (diagnostic_context &context, pretty_printer &printer, const line_maps *line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts) : m_context (context), m_printer (&printer), @@ -1576,12 +1588,13 @@ sarif_builder::sarif_builder (diagnostic_context &context, m_rule_id_set (), m_rules_arr (new json::array ()), m_tabstop (context.m_tabstop), - m_formatted (formatted), + m_serialization_format (std::move (serialization_format)), m_sarif_gen_opts (sarif_gen_opts), m_next_result_idx (0), m_current_code_flow (nullptr) { gcc_assert (m_line_maps); + gcc_assert (m_serialization_format); /* Mark MAIN_INPUT_FILENAME_ as the artifact that the tool was instructed to scan. @@ -1823,8 +1836,7 @@ void sarif_builder::flush_to_file (FILE *outf) { std::unique_ptr<sarif_log> top = flush_to_object (); - top->dump (outf, m_formatted); - fprintf (outf, "\n"); + m_serialization_format->write_to_file (outf, *top); } /* Attempt to convert DIAG_KIND to a suitable value for the "level" @@ -3534,11 +3546,11 @@ protected: sarif_output_format (diagnostic_context &context, const line_maps *line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts) : diagnostic_output_format (context), m_builder (context, *get_printer (), line_maps, main_input_filename_, - formatted, sarif_gen_opts), + std::move (serialization_format), sarif_gen_opts), m_buffer (nullptr) {} @@ -3552,12 +3564,11 @@ public: sarif_stream_output_format (diagnostic_context &context, const line_maps *line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts, FILE *stream) : sarif_output_format (context, line_maps, main_input_filename_, - formatted, - sarif_gen_opts), + std::move (serialization_format), sarif_gen_opts), m_stream (stream) { } @@ -3579,11 +3590,11 @@ public: sarif_file_output_format (diagnostic_context &context, const line_maps *line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts, diagnostic_output_file output_file) : sarif_output_format (context, line_maps, main_input_filename_, - formatted, sarif_gen_opts), + std::move (serialization_format), sarif_gen_opts), m_output_file (std::move (output_file)) { gcc_assert (m_output_file.get_open_file ()); @@ -3747,26 +3758,33 @@ diagnostic_output_format_init_sarif_stderr (diagnostic_context &context, { gcc_assert (line_maps); const sarif_generation_options sarif_gen_opts; + auto serialization + = std::make_unique<sarif_serialization_format_json> (formatted); diagnostic_output_format_init_sarif (context, std::make_unique<sarif_stream_output_format> (context, line_maps, main_input_filename_, - formatted, + std::move (serialization), sarif_gen_opts, stderr)); } -/* Attempt to open BASE_FILE_NAME.sarif for writing. +/* Attempt to open "BASE_FILE_NAME""EXTENSION" for writing. Return a non-null diagnostic_output_file, or return a null diagnostic_output_file and complain to CONTEXT using LINE_MAPS. */ diagnostic_output_file -diagnostic_output_format_open_sarif_file (diagnostic_context &context, - line_maps *line_maps, - const char *base_file_name) +diagnostic_output_file::try_to_open (diagnostic_context &context, + line_maps *line_maps, + const char *base_file_name, + const char *extension, + bool is_binary) { + gcc_assert (extension); + gcc_assert (extension[0] == '.'); + if (!base_file_name) { rich_location richloc (line_maps, UNKNOWN_LOCATION); @@ -3777,21 +3795,51 @@ diagnostic_output_format_open_sarif_file (diagnostic_context &context, } label_text filename = label_text::take (concat (base_file_name, - ".sarif", + extension, nullptr)); - FILE *outf = fopen (filename.get (), "w"); + FILE *outf = fopen (filename.get (), is_binary ? "wb" : "w"); if (!outf) { rich_location richloc (line_maps, UNKNOWN_LOCATION); context.emit_diagnostic_with_group (DK_ERROR, richloc, nullptr, 0, - "unable to open %qs for SARIF output: %m", + "unable to open %qs for diagnostic output: %m", filename.get ()); return diagnostic_output_file (); } return diagnostic_output_file (outf, true, std::move (filename)); } +/* Attempt to open BASE_FILE_NAME.sarif for writing JSON. + Return a non-null diagnostic_output_file, + or return a null diagnostic_output_file and complain to CONTEXT + using LINE_MAPS. */ + +diagnostic_output_file +diagnostic_output_format_open_sarif_file (diagnostic_context &context, + line_maps *line_maps, + const char *base_file_name, + enum sarif_serialization_kind serialization_kind) +{ + const char *suffix; + bool is_binary; + switch (serialization_kind) + { + default: + gcc_unreachable (); + case sarif_serialization_kind::json: + suffix = ".sarif"; + is_binary = false; + break; + } + + return diagnostic_output_file::try_to_open (context, + line_maps, + base_file_name, + suffix, + is_binary); +} + /* Populate CONTEXT in preparation for SARIF output to a file named BASE_FILE_NAME.sarif. */ @@ -3807,7 +3855,10 @@ diagnostic_output_format_init_sarif_file (diagnostic_context &context, diagnostic_output_file output_file = diagnostic_output_format_open_sarif_file (context, line_maps, - base_file_name); + base_file_name, + sarif_serialization_kind::json); + auto serialization + = std::make_unique<sarif_serialization_format_json> (formatted); const sarif_generation_options sarif_gen_opts; diagnostic_output_format_init_sarif @@ -3815,7 +3866,7 @@ diagnostic_output_format_init_sarif_file (diagnostic_context &context, std::make_unique<sarif_file_output_format> (context, line_maps, main_input_filename_, - formatted, + std::move (serialization), sarif_gen_opts, std::move (output_file))); } @@ -3831,12 +3882,14 @@ diagnostic_output_format_init_sarif_stream (diagnostic_context &context, { gcc_assert (line_maps); const sarif_generation_options sarif_gen_opts; + auto serialization + = std::make_unique<sarif_serialization_format_json> (formatted); diagnostic_output_format_init_sarif (context, std::make_unique<sarif_stream_output_format> (context, line_maps, main_input_filename_, - formatted, + std::move (serialization), sarif_gen_opts, stream)); } @@ -3845,7 +3898,7 @@ std::unique_ptr<diagnostic_output_format> make_sarif_sink (diagnostic_context &context, const line_maps &line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization, const sarif_generation_options &sarif_gen_opts, diagnostic_output_file output_file) { @@ -3853,7 +3906,7 @@ make_sarif_sink (diagnostic_context &context, = std::make_unique<sarif_file_output_format> (context, &line_maps, main_input_filename_, - formatted, + std::move (serialization), sarif_gen_opts, std::move (output_file)); sink->update_printer (); @@ -3908,7 +3961,9 @@ private: bool formatted, const sarif_generation_options &sarif_gen_opts) : sarif_output_format (context, line_maps, main_input_filename_, - formatted, sarif_gen_opts) + std::make_unique<sarif_serialization_format_json> + (formatted), + sarif_gen_opts) { } bool machine_readable_stderr_p () const final override @@ -3940,8 +3995,10 @@ test_make_location_object (const sarif_generation_options &sarif_gen_opts, test_diagnostic_context dc; pretty_printer pp; - sarif_builder builder (dc, pp, line_table, "MAIN_INPUT_FILENAME", - true, sarif_gen_opts); + sarif_builder builder + (dc, pp, line_table, "MAIN_INPUT_FILENAME", + std::make_unique<sarif_serialization_format_json> (true), + sarif_gen_opts); /* These "columns" are byte offsets, whereas later on the columns in the generated SARIF use sarif_builder::get_sarif_column and diff --git a/gcc/diagnostic-format-sarif.h b/gcc/diagnostic-format-sarif.h index 524a0c7c6b5..644625747cc 100644 --- a/gcc/diagnostic-format-sarif.h +++ b/gcc/diagnostic-format-sarif.h @@ -27,10 +27,20 @@ along with GCC; see the file COPYING3. If not see class logical_location; +/* Enum for choosing what format to serializing the generated SARIF into. */ + +enum class sarif_serialization_kind +{ + json, + + num_values +}; + extern diagnostic_output_file diagnostic_output_format_open_sarif_file (diagnostic_context &context, line_maps *line_maps, - const char *base_file_name); + const char *base_file_name, + enum sarif_serialization_kind serialization_kind); extern void diagnostic_output_format_init_sarif_stderr (diagnostic_context &context, @@ -50,6 +60,34 @@ diagnostic_output_format_init_sarif_stream (diagnostic_context &context, bool formatted, FILE *stream); +/* Abstract base class for handling JSON output vs other kinds of + serialization of the json tree. */ + +class sarif_serialization_format +{ +public: + virtual ~sarif_serialization_format () {} + virtual void write_to_file (FILE *outf, + const json::value &top) = 0; +}; + +/* Concrete subclass for serializing SARIF as JSON. */ + +class sarif_serialization_format_json : public sarif_serialization_format +{ +public: + sarif_serialization_format_json (bool formatted) + : m_formatted (formatted) + { + } + void write_to_file (FILE *outf, const json::value &top) final override; + +private: + bool m_formatted; +}; + +/* Control of SARIF generation. */ + enum class sarif_version { v2_1_0, @@ -73,7 +111,7 @@ extern std::unique_ptr<diagnostic_output_format> make_sarif_sink (diagnostic_context &context, const line_maps &line_maps, const char *main_input_filename_, - bool formatted, + std::unique_ptr<sarif_serialization_format> serialization_format, const sarif_generation_options &sarif_gen_opts, diagnostic_output_file output_file); diff --git a/gcc/diagnostic-output-file.h b/gcc/diagnostic-output-file.h index 2e877c998cc..a0b2e1bf459 100644 --- a/gcc/diagnostic-output-file.h +++ b/gcc/diagnostic-output-file.h @@ -91,6 +91,13 @@ public: FILE *get_open_file () const { return m_outf; } const char *get_filename () const { return m_filename.get (); } + static diagnostic_output_file + try_to_open (diagnostic_context &context, + line_maps *line_maps, + const char *base_file_name, + const char *extension, + bool binary); + private: FILE *m_outf; bool m_owned; diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h index 36f4a1c8f9f..5cde881c074 100644 --- a/gcc/diagnostic.h +++ b/gcc/diagnostic.h @@ -72,10 +72,10 @@ enum diagnostics_output_format /* JSON-based output, to a file. */ DIAGNOSTICS_OUTPUT_FORMAT_JSON_FILE, - /* SARIF-based output, to stderr. */ + /* SARIF-based output, as JSON to stderr. */ DIAGNOSTICS_OUTPUT_FORMAT_SARIF_STDERR, - /* SARIF-based output, to a file. */ + /* SARIF-based output, to a JSON file. */ DIAGNOSTICS_OUTPUT_FORMAT_SARIF_FILE }; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 5f1c0b89c1c..d1925c98c2f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6053,6 +6053,11 @@ Specify the filename to write the SARIF output to, potentially with a leading absolute or relative path. If not specified, it defaults to @file{@var{source}.sarif}. +@item serialization=@r{[}json@r{]} +Specify the serialization format to use when writing out the SARIF. +Currently this can only be @code{json}, but is present as an +extension point for experimenting with other serializations. + @item version=@r{[}2.1@r{|}2.2-prerelease@r{]} Specify the version of SARIF to use for the output. If not specified, defaults to 2.1. @code{2.2-prerelease} uses an unofficial draft of the diff --git a/gcc/libgdiagnostics.cc b/gcc/libgdiagnostics.cc index 49524cc922d..c2eb9757d18 100644 --- a/gcc/libgdiagnostics.cc +++ b/gcc/libgdiagnostics.cc @@ -1076,10 +1076,11 @@ sarif_sink::sarif_sink (diagnostic_manager &mgr, { diagnostic_output_file output_file (dst_stream, false, label_text::borrow ("sarif_sink")); + auto serialization = std::make_unique<sarif_serialization_format_json> (true); auto inner_sink = make_sarif_sink (mgr.get_dc (), *mgr.get_line_table (), main_input_file->get_name (), - true, + std::move (serialization), sarif_gen_opts, std::move (output_file)); mgr.get_dc ().add_sink (std::move (inner_sink)); diff --git a/gcc/opts-diagnostic.cc b/gcc/opts-diagnostic.cc index b51c8a8b422..1eec0103d3b 100644 --- a/gcc/opts-diagnostic.cc +++ b/gcc/opts-diagnostic.cc @@ -434,6 +434,8 @@ sarif_scheme_handler::make_sink (const context &ctxt, const scheme_name_and_params &parsed_arg) const { label_text filename; + enum sarif_serialization_kind serialization_kind + = sarif_serialization_kind::json; enum sarif_version version = sarif_version::v2_1_0; for (auto& iter : parsed_arg.m_kvs) { @@ -444,6 +446,20 @@ sarif_scheme_handler::make_sink (const context &ctxt, filename = label_text::take (xstrdup (value.c_str ())); continue; } + if (key == "serialization") + { + static const std::array<std::pair<const char *, enum sarif_serialization_kind>, + (size_t)sarif_serialization_kind::num_values> value_names + {{{"json", sarif_serialization_kind::json}}}; + + if (!parse_enum_value<enum sarif_serialization_kind> + (ctxt, unparsed_arg, + key, value, + value_names, + serialization_kind)) + return nullptr; + continue; + } if (key == "version") { static const std::array<std::pair<const char *, enum sarif_version>, @@ -462,6 +478,7 @@ sarif_scheme_handler::make_sink (const context &ctxt, /* Key not found. */ auto_vec<const char *> known_keys; known_keys.safe_push ("file"); + known_keys.safe_push ("serialization"); known_keys.safe_push ("version"); ctxt.report_unknown_key (unparsed_arg, key, get_scheme_name (), known_keys); @@ -479,7 +496,8 @@ sarif_scheme_handler::make_sink (const context &ctxt, : ctxt.m_opts.x_main_input_basename); output_file = diagnostic_output_format_open_sarif_file (ctxt.m_dc, line_table, - basename); + basename, + serialization_kind); } if (!output_file) return nullptr; @@ -487,10 +505,21 @@ sarif_scheme_handler::make_sink (const context &ctxt, sarif_generation_options sarif_gen_opts; sarif_gen_opts.m_version = version; + std::unique_ptr<sarif_serialization_format> serialization_obj; + switch (serialization_kind) + { + default: + gcc_unreachable (); + case sarif_serialization_kind::json: + serialization_obj + = std::make_unique<sarif_serialization_format_json> (true); + break; + } + auto sink = make_sarif_sink (ctxt.m_dc, *line_table, ctxt.m_opts.x_main_input_filename, - true, + std::move (serialization_obj), sarif_gen_opts, std::move (output_file)); return sink; -- 2.26.3