On Sat, 2024-09-21 at 22:49 -0500, 4444-thor wrote: > From: thor <th...@protonmail.com> > > This patch allows one to dump a tree as HTML from within gdb by > invoking, > i.e, > htlml-tree tree > > gcc/ChangeLog: > * gcc/gdbhooks.py: Rudimentary dumping of GENERIC trees as html > through > one new python function (jsonNodeToHtml) and one new gdb > command > (html-tree). There is also a parameter to allow html-tree to > automatically open a browser to view the HTML, but that needs a > fix > or workaround that I don't understand. > > Signed-off-by: Thor C Preimesberger <tcpreimesber...@gmail.com>
Hi Thor, thanks for the patch. I didn't try running it, but I notice that the patch is building the HTML directly by writing strings to the output file, using python f- strings, and there's no escaping of values. Hence if a value contains characters like '"', '<', or '>' the resulting HTML will be ill-formed (similar to a SQL injection attack). You probably need to use html.escape when writing string values from the JSON into the HTML; see: https://docs.python.org/3/library/html.html#html.escape Another approach would be to the HTML as a DOM tree in the python script, and then serialize that; see e.g.: https://docs.python.org/3/library/xml.etree.elementtree.html for a relatively simple API that's readily available in the Python standard library - but that would be a rewrite of jsonNodeToHtml (but probably be more robust in the long term). Hope this is helpful Dave > > --- > gcc/gdbhooks.py | 113 > ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 113 insertions(+) > > diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py > index 904ee28423a..fff85d738b4 100644 > --- a/gcc/gdbhooks.py > +++ b/gcc/gdbhooks.py > @@ -143,6 +143,7 @@ import os.path > import re > import sys > import tempfile > +import json > > import gdb > import gdb.printing > @@ -889,6 +890,118 @@ class DotFn(gdb.Command): > > DotFn() > > +# Quick and dirty way to turn a tree as JSON object into HTML. > +# Used in treeToHtml. > + > +def jsonNodeToHtml(node_list, html_doc): > + for node in node_list: > + id = node["addr"] > + html_doc.write("<div id=%s>" % id) > + for key, value in node.items(): > + if (key == "addr"): > + html_doc.write("addr:") > + html_doc.write(f"<a href=#{value}>") > + html_doc.write(f"{value}") > + html_doc.write(f"</a><br>") > + if (type(value) == dict): > + html_doc.write(f"{key}:") > + sub = value > + if "ref_addr" in sub.keys(): > + html_doc.write(f"<p style=\"margin-left: > 10px\">") > + subAddress = sub["ref_addr"] > + subCode = sub["tree_code"] > + html_doc.write(f"ref_addr: <a > href=#{subAddress}>{subAddress}</a><br>") > + html_doc.write(f"tree_code: {subCode}") > + html_doc.write("</p>") > + # Currently, child tree nodes that are referred to > by OMP > + # accsessors are not dumped recursively by > + # dump_generic_node_json, i.e. they have no > corresponding > + # entry in node_list. So we just read it out key- > value pairs. > + else: > + html_doc.write(f"<p style=\"margin-left: 10 > px\">") > + for key, value in sub.items(): > + html_doc.write(f"{key}: {value}<br>") > + html_doc.write("</span>") > + elif (type(value) == list): > + html_doc.write(f"{key}:<br>") > + html_doc.write(f"<p style=\"margin-left: 10px;\">") > + for i in value: > + for key, value in i.items(): > + if (key == "ref_addr"): > + html_doc.write("ref_addr:") > + html_doc.write(f"<a href=#{value}>") > + html_doc.write(f"{value}") > + html_doc.write(f"</a><br>") > + else: > + html_doc.write(f"{key}: {value}<br>") > + html_doc.write("</p>") > + elif (key != "addr"): > + html_doc.write(f"{key}: {value}<br>") > + html_doc.write("<br></div>") > + > +class GCChtml (gdb.Parameter): > + """ > + This parameter defines what program is used to view HTML files > + by the html-tree command. It will be invoked as gcc-html <html- > file>. > + """ > + def __init__(self): > + super(GCChtml, self).__init__('gcc-html', > + gdb.COMMAND_NONE, gdb.PARAM_STRING) > + self.value = "firefox" > + > +gcc_html_cmd = GCChtml() > + > +class treeToHtml (gdb.Command): > + """ > + A custom command that converts a tree to html after it is > + first parsed to JSON. The html is saved in cwd as <treename> + > ".html". > + > + TODO : It'd be nice if we then open the html with the program > specified > + by the GCChtml parameter, but there's an error thrown whenever I > try > + to do this while attached to cc1/cc1plus. > + > + Examples of use: > + (gdb) html-tree current_tree > + """ > + > + def __init__(self): > + gdb.Command.__init__(self, 'html-tree', gdb.COMMAND_USER) > + > + def invoke(self, arg, file): > + > + args = gdb.string_to_argv(arg) > + if len(args) >= 2: > + print ("Error: Too many arguments") > + return > + > + if len(args) >= 1: > + treeName = args[0] > + print(treeName) > + > + # We call a function within GCC to dump the JSON > + # and create a tempfile to store the JSON before we pass it > into our > + # Python shell. > + f = tempfile.NamedTemporaryFile(delete=False) > + filename = f.name > + gdb.execute('set $%s = fopen (\"%s\", \"w\")' % ("jsonTemp", > filename)) > + gdb.execute("call debug_dump_node_json (%s, $%s)" > + % (treeName, "jsonTemp")) > + gdb.execute("call fclose($%s)" % "jsonTemp") > + with open(filename, "r") as foobar: > + obj = json.loads(foobar.read()) > + > + # Create an html file in cwd, and dump our tree as HTML. > + htmlFile = open(treeName + ".html", "w") > + with open(htmlFile.name, "w") as _: > + jsonNodeToHtml(obj, _) > + > + print(f"HTML written to {htmlFile.name} in cwd.") > + > + # FIX : Open the HTML. > + # html_cmd = gcc_html_cmd.value > + # os.system("%s \"%s\"" % (html_cmd, htmlFile.name)) > + > +treeToHtml() > # Try and invoke the user-defined command "on-gcc-hooks-load". > Doing > # this allows users to customize the GCC extensions once they've > been > # loaded by defining the hook in their .gdbinit.