From e1537950ce1d7e99b4d9de28cd9ff91a280b2e64 Mon Sep 17 00:00:00 2001
From: John Morris <john.morris@crunchydata.com>
Date: Thu, 1 Feb 2024 15:54:41 -0800
Subject: [PATCH] Rebased doxygen doc

---
 doc/doxygen/Doxyfile.in    |  71 ++++++++++++++
 doc/doxygen/doxy_filter.l  | 193 +++++++++++++++++++++++++++++++++++++
 doc/doxygen/meson.build    |  89 +++++++++++++++++
 doc/src/sgml/docguide.sgml |   4 +
 meson.build                |   1 +
 meson_options.txt          |   2 +
 6 files changed, 360 insertions(+)
 create mode 100644 doc/doxygen/Doxyfile.in
 create mode 100644 doc/doxygen/doxy_filter.l
 create mode 100644 doc/doxygen/meson.build

diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
new file mode 100644
index 0000000000..7e32b702be
--- /dev/null
+++ b/doc/doxygen/Doxyfile.in
@@ -0,0 +1,71 @@
+################################################################################
+#
+# This file holds Doxygen settings used for creating PostgreSQL Documentation.
+# It only shows non-default values.
+# For a list of config values and what they mean, see the
+# complete Doxyfile shipped with Doxygen.
+#
+# Values surrounded by "@" are filled in by Meson config.
+#
+# Creating DOT graphs is very slow, so graphs are disabled by default.
+# If you want to enable then, configure the build with
+#    meson configure -Ddoxygen_graphs=true
+#
+#################################################################################
+
+# Postgres project info
+PROJECT_NAME           = PostgreSQL
+PROJECT_NUMBER         = @PROJECT_NUMBER@
+PROJECT_BRIEF          = "The world's most advanced open source database"
+PROJECT_LOGO           =
+
+# Where the output files go.
+OUTPUT_DIRECTORY       = @OUTPUT_DIRECTORY@
+CREATE_SUBDIRS         = YES
+STRIP_FROM_PATH        = @STRIP_FROM_PATH@
+
+# Output file format.
+GENERATE_LATEX         = NO
+GENERATE_HTML          = YES
+GENERATE_TREEVIEW      = YES
+FULL_SIDEBAR           = YES
+
+# What the output contains.
+JAVADOC_AUTOBRIEF      = YES
+JAVADOC_BANNER         = YES
+OPTIMIZE_OUTPUT_FOR_C  = YES
+DISTRIBUTE_GROUP_DOC   = YES
+INLINE_SIMPLE_STRUCTS  = YES
+TYPEDEF_HIDES_STRUCT   = YES
+SHOW_INCLUDE_FILES     = NO
+SOURCE_BROWSER         = YES
+STRIP_CODE_COMMENTS    = NO
+
+# Performance
+LOOKUP_CACHE_SIZE      = 3
+NUM_PROC_THREADS       = 0
+
+# Which files to process.
+INPUT                  = @INPUT@
+RECURSIVE              = YES
+EXCLUDE_PATTERNS       = ppport.h c.h postgres.h */test/*
+EXCLUDE_SYMBOLS        = __atribute__
+INPUT_FILTER           = @INPUT_FILTER@
+FILTER_PATTERNS        = *.c *.h *.cpp
+CLANG_ASSISTED_PARSING = YES
+
+# Grephs.
+HAVE_DOT               = @GRAPHS@
+CLASS_GRAPH            = @GRAPHS@
+COLLABORATION_GRAPH    = @GRAPHS@
+GROUP_GRAPHS           = @GRAPHS@
+DOT_UML_DETAILS        = YES
+INCLUDE_GRAPH          = @GRAPHS@
+INCLUDED_BY_GRAPH      = @GRAPHS@
+CALL_GRAPH             = @GRAPHS@
+CALLER_GRAPH           = @GRAPHS@
+GRAPHICAL_HIERARCHY    = @GRAPHS@
+DIRECTORY_GRAPH        = @GRAPHS@
+DOT_IMAGE_FORMAT       = svg
+INTERACTIVE_SVG        = YES
+DOT_MULTI_TARGETS      = YES
diff --git a/doc/doxygen/doxy_filter.l b/doc/doxygen/doxy_filter.l
new file mode 100644
index 0000000000..2d49c98682
--- /dev/null
+++ b/doc/doxygen/doxy_filter.l
@@ -0,0 +1,193 @@
+ /* -------------------------------------------------------------------------------------------------------------
+ A Doxygen filter which annotates comments in a C file.
+
+ The goal is to take an existing, non-doxygen comment style and turn it into doxygen comments.
+ It is packaged as a filter. The commented code never gets changed, but doxygen converts
+ the comments "on the fly".
+
+ The idea is to identify all comments in the code while keeping track of a small amount of context from
+ around the comment.  The context allows us to determine:
+    - Is the comment at the beginning of the file?
+    - Is the comment at the global level in the file?
+    - Is the comment inside a struct/union/enum?
+    - Does the comment trail other statements? Or is it in front of other statements. (What about inside?)
+
+ Basically, the filter does the following:
+     - A comment at the start of a file gets the @FILE tag added.
+       The tag is necessary for doxygen to recognize C code.
+       By convention, this first comment describes the overall purpose of the file.
+    -  Doxygen comments are passed on through.
+       Doxygen comments include "/// ..." and /x*** ... /"  and style comments.
+    -  Regular comments at the global level and inside structs/enums/unions are converted into doxygen comments.
+       The assumption is these comments describe fields and entities like procedures, variables and macros.
+    -  Trailing comments are associated with the previous item.
+           "int Foo; // Comment about Foo"  becomes a doxygen comment describing Foo.
+
+ The filter doesn't handle all existing doxygen comments. For example, it doesn't recognize the grouping tags
+     "//{"  and "//}", nor does it passively echo other doxygen constructs.  Also, it has a very limited
+     understanding of C syntax. For now, it is enough to get by.
+
+ TODO? respect conditional compilation, add a File comment if none exists, be robust in not altering existing doxygen comments.
+ ------------------------------------------------------------------------------------------------------------------*/
+%option noyywrap nounput noinput
+%{
+#include <stdbool.h>
+%}
+    /* Variables used to track the context around a comment. */
+    int lines = 0; /* How many lines of code have been processed. */
+    int level = 0;  /* Level of nesting within brackets. Used to detect globals. */
+    bool trailing = false;  /* Is the comment at the end of a statement? */
+    bool complexData = false; // Are we inside a global complex data type (struct,union,enum)?
+    char *fileName;  // The name of the file we are filtering.
+
+    /* Forward reference. Process comments when they are discovered. */
+    static void scanComment(char *comment);
+
+ /* Regular expression patterns for the scanner. */
+QuotedString                            \"([^\\\"]|\\.)*\"
+CharacterLiteral                        '(\\[tvrnafb\\]|[^\\'])'
+Identifier                              [a-zA-Z_$]+[a-zA-Z_$0-9]*
+MultiLineComment                        [/][*][^*]*[*]+([^*/][^*]*[*]+)*[/]
+SingleLineComment                       "//".*
+Comment                                 {SingleLineComment}|{MultiLineComment}
+Define                                  ^#define(.*\\\n)*(.*)
+%%
+
+  /* Complex data types */              /* Make note when we are inside one. Finishes with a global semicolon */
+struct|union|enum                       ECHO; if (level == 0) complexData = true;
+";"                                     ECHO; if (level == 0) complexData = false; trailing = true;
+
+  /* Other keywords or identifiers */   /* Pass them through so they don't get confused with keywords */
+{Identifier}                            ECHO;
+
+  /* Quoted string literals. */         /* Pass them through. */
+{QuotedString}                          ECHO;
+
+  /* Character literals */              /* Pass them through. */
+{CharacterLiteral}                      ECHO;
+
+   /* Comments. */                      /* Scan and convert to doxygen as needed. */
+{Comment}                               scanComment(yytext);
+
+  /* Multi line #define ... */          /* Skip over #defines since they may contain unbalanced braces. */
+{Define}                                ECHO;  trailing=true;
+
+  /* New line. */                       /* Any subsequent comment is leading, and we are no longer at start of file. */
+"\n"                                    ECHO; lines++; trailing = false;  //fprintf(stderr, "  lines=%d  level=%d  complex=%d   ", lines, level, complexData);
+
+ /* Track nesting depth. */             /* Are we at the global level or not? Are we in arg list or not? */
+                                        /*   We assume well formed code, so a paranthesis will never match a brace. */
+"{"|"("                                 ECHO; level++;
+"}"|")"                                 ECHO; level--; if (level == 0) complexData = false; trailing = true;
+
+ /* Anything else */                    /* Pass it through */
+.                                       ECHO;
+
+%%
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+/*
+ * Custom banner character to be removed from comments.
+ * We'll hardcode it to suit postgreSQL, but it should be set through a command line arg.
+ */
+char customBanner = '-';
+
+/*
+ * A doxygen "filter" which annotates existing C comments with doxygen directives.
+ * It is flex scanner which reads a file, updates the comments, and writes to stdout.
+*/
+int main(int argc, char**argv) {
+
+    /* Verify we have a single argument. */
+    if (argc != 2) {
+        fprintf(stderr, "Please run as DoxygenFilter <filename>\n");
+        exit(1);
+    }
+
+    /* Try to open the file as stdin */
+    fileName = argv[1];
+    if (freopen(fileName, "r", stdin) == NULL) {
+        fprintf(stderr, "Unable to open file %s: %s\n", fileName, strerror(errno));
+        exit(1);
+    }
+
+    /* Now, parse the file, sending output to stdout. */
+    return yylex();
+}
+
+/*
+ * Output a string.
+ */
+static void putstr(char *str) {
+    fputs(str, stdout);
+}
+
+
+/*
+ * Remove a banner from the comment, where a banner is a sequence of two or more designated characters.
+ */
+static void removeBanner(char* comment, char c) {
+    char *writePtr = comment + 2;
+    char *readPtr = comment + 2;
+    bool twoInARow = false;
+
+    /* Scan across the comment, skipping two leading chars, moving non-banner characters forward. */
+    for (; *readPtr != '\0'; readPtr++) {
+        twoInARow = (readPtr[0] == c) && (readPtr[1] == c || twoInARow);
+        if (!twoInARow)
+            *writePtr++ = *readPtr;
+    }
+
+    /*
+     * Special case for end of a C comment.
+     * We may have deleted the final '*' of a C comment.
+     * Ensure the last two characters are '*' and '/'.
+     */
+    if (comment[0] == '/' && comment[1] == '*' && writePtr[-2] != '*')
+    {
+        writePtr--;
+        *writePtr++ = '*';
+        *writePtr++ = '/';
+    }
+
+    *writePtr = '\0';
+}
+
+/*
+ * Process comments as they are encountered in the text.
+ *
+ * Uses context information gathered during scanning to decide if the comment should be
+ * converted to a doxygen comment.
+ */
+static void scanComment(char *comment) {
+
+    /* Echo the starting part of the comment, either // or / *.  */
+    putchar(comment[0]); putchar(comment[1]);
+
+    /* If at global level or inside a complex data declarations. TODO: but not inside a statement.*/
+    if (level == 0 || complexData) {
+
+        /* Ensure this is a doxygen comment by repeating the 2nd character. */
+        putchar(comment[1]);  // Note: adds harmless extra char to existing doxygen comment.
+
+        /* If there is a preceding stmt on the line, then associate the doxygen comment with that stmt. */
+        if (trailing)
+            putstr("< ");
+
+        /* If at the start of the file, add the @FILE tag. TODO: should it be at end of comment? */
+        if (lines == 0)
+            printf(" @file %s ", fileName);
+    }
+
+    /*
+     * Remove banners from the comment. A banner is a special character, repeated at least twice.
+     */
+    removeBanner(comment, '/');
+    removeBanner(comment, '*');
+    removeBanner(comment, customBanner);
+
+    /* finish outputting the comment. The first two chars were already output. */
+    putstr(comment+2);
+}
diff --git a/doc/doxygen/meson.build b/doc/doxygen/meson.build
new file mode 100644
index 0000000000..aa4e1408d4
--- /dev/null
+++ b/doc/doxygen/meson.build
@@ -0,0 +1,89 @@
+# Generate doxygen pages for PostgreSQL using "ninja doxygen"
+#
+# Doxygen pages are optional. Nothing in this script should
+# cause PostgreSQL builds to fail.
+#
+# Currently there are no explicit error messages
+#   - If doxygen is not found, the doxygen target will not be defined.
+#   - If dot is not found, no graphs will be generated.
+#   - flex is already required, so we don't check for it.
+#
+# As a future enhancement, display meaningful error messages
+# when typing "ninja doxygen". Meson can display these messages
+# using a python custom target.
+#
+# To generate graphs, install graphviz and configure with
+#     "meson configure -Ddoxygen_graphs=true"
+# Generating graphs can be slow, so they are off by default.
+#
+
+# Check our environment first.
+doxygen_cmd = find_program('doxygen', required: false, native: true)
+dot_cmd = find_program('dot', required: false, native: true)
+doxygen_graphs = get_option('doxygen_graphs')
+
+# Do nothing if doxygen or flex are not found
+if not doxygen_cmd.found()
+    subdir_done()
+endif
+
+# build the doxygen 'C' filter from its flex source
+doxygen_filter_c = custom_target(
+    'doxy_filter_c',
+    build_by_default: false,
+    input: 'doxy_filter.l',
+    output: 'doxy_filter.c',
+    command: [flex_cmd])  # Add '--', '-d' to debug flex filter
+doxygen_filter_exe = executable(
+    'doxy_filter',
+    doxygen_filter_c,
+    build_by_default: false)
+
+# Point to the source code directories
+doxygen_source_dirs = [
+    join_paths(meson.source_root(), 'src'),
+    join_paths(meson.source_root(), 'contrib'),
+]
+
+# Configure the Doxyfile
+doxygen_config = configuration_data()
+doxygen_config.set('INPUT', ' '.join(doxygen_source_dirs))
+doxygen_config.set('OUTPUT_DIRECTORY', meson.current_build_dir())
+doxygen_config.set('PROJECT_NUMBER', pg_version)
+doxygen_config.set('INCLUDE_PATH', ' '.join(postgres_inc_d))
+doxygen_config.set('INPUT_FILTER', doxygen_filter_exe.full_path())
+doxygen_config.set('STRIP_FROM_PATH', meson.source_root())
+if (doxygen_graphs and dot_cmd.found())
+    doxygen_config.set('GRAPHS', 'YES')
+else
+    doxygen_config.set('GRAPHS', 'NO')
+endif
+
+doxyfile = configure_file(input : 'Doxyfile.in',
+                          output : 'Doxyfile',
+                          configuration : doxygen_config)
+
+# Create a target to remove old doxygen html pages.
+# If not removed, the directory will have a confusing mix of old and new pages.
+# To avoid a circular dependency, generate the html path here
+# rather than getting it from doxygen_html.
+doxygen_html_dir = join_paths(meson.current_build_dir(), 'html')
+doxygen_clean = custom_target(
+    'doxygen_clean',
+    output : 'doxygen_clean',
+    command : [python, '-c', 'from shutil import rmtree; rmtree("' + doxygen_html_dir + '", ignore_errors=True)'],
+    build_always_stale: true,
+    build_by_default: false)
+
+# Create a target to generate doxygen html pages
+doxygen_html = custom_target(
+    'html',
+    input : doxyfile,
+    output : 'html',
+    command : [doxygen_cmd, doxyfile],
+    depends: [doxygen_filter_exe, doxygen_clean],
+    build_always_stale: true,
+    build_by_default: false)
+
+# use "ninja doxygen" to build doxygen pages.
+alias_target('doxygen', [doxygen_html])
diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index db4bcce56e..3b812b31ec 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -33,6 +33,10 @@
   Additionally, a number of plain-text <filename>README</filename> files can
   be found throughout the <productname>PostgreSQL</productname> source tree,
   documenting various implementation issues.
+  Doxygen output describing <productname>PostgreSQL</productname> functions
+  and data types can be found at <ulink url="https://doxygen.postgresql.org"></ulink>.
+  If desired, Doxygen output can be  generated locally with the
+  <command>ninja doxygen</command> command.
  </para>
 
  <para>
diff --git a/meson.build b/meson.build
index 8ed51b6aae..8ad652a511 100644
--- a/meson.build
+++ b/meson.build
@@ -2902,6 +2902,7 @@ subdir('src/interfaces/libpq/test')
 subdir('src/interfaces/ecpg/test')
 
 subdir('doc/src/sgml')
+subdir('doc/doxygen')
 
 generated_sources_ac += {'': ['GNUmakefile']}
 
diff --git a/meson_options.txt b/meson_options.txt
index 249ecc5ffd..81cbb5bb15 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -27,6 +27,8 @@ option('krb_srvnam', type: 'string', value: 'postgres',
 option('system_tzdata', type: 'string', value: '',
   description: 'Use system time zone data in specified directory')
 
+option('doxygen_graphs', type: 'boolean', value: false,
+   description: 'Include graphs in "ninja doxygen" output. Generating graphs can be very slow.')
 
 # Defaults
 
-- 
2.33.0