From 13ed98010b398991a25dcd3c54ee8f5f8824a4cc Mon Sep 17 00:00:00 2001
From: John Morris <john.morris@crunchydata.com>
Date: Thu, 1 Feb 2024 15:54:41 -0800
Subject: [PATCH] Rebased doxygen doc

---
 doc/doxygen/Doxyfile.in    |  71 ++++++++++++++++
 doc/doxygen/doxy_filter.l  | 168 +++++++++++++++++++++++++++++++++++++
 doc/doxygen/meson.build    |  73 ++++++++++++++++
 doc/src/sgml/docguide.sgml |   4 +
 meson.build                |   1 +
 meson_options.txt          |   2 +
 6 files changed, 319 insertions(+)
 create mode 100644 doc/doxygen/Doxyfile.in
 create mode 100644 doc/doxygen/doxy_filter.l
 create mode 100644 doc/doxygen/meson.build

diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
new file mode 100644
index 0000000000..76cf4cdd15
--- /dev/null
+++ b/doc/doxygen/Doxyfile.in
@@ -0,0 +1,71 @@
+################################################################################
+#
+# This file holds Doxygen settings used for creating PostgreSQL Documentation.
+# It only shows non-default values.
+# For a list of config values and what they mean, see the
+# complete Doxyfile shipped with Doxygen.
+#
+# Values surrounded by "@" are filled in by Meson config.
+#
+# Creating DOT graphs is very slow, so graphs are disabled by default.
+# If you want to enable then, configure the build with
+#    meson configure -Ddoxygen_graphs=true
+#
+#################################################################################
+
+# Postgres project info
+PROJECT_NAME           = PostgreSQL
+PROJECT_NUMBER         = @PROJECT_NUMBER@
+PROJECT_BRIEF          = "The world's most advanced open source database"
+PROJECT_LOGO           =
+
+# Where the output files go.
+OUTPUT_DIRECTORY       = @OUTPUT_DIRECTORY@
+CREATE_SUBDIRS         = YES
+STRIP_FROM_PATH        = @STRIP_FROM_PATH@
+
+# Output file format.
+GENERATE_LATEX         = NO
+GENERATE_HTML          = YES
+GENERATE_TREEVIEW      = YES
+FULL_SIDEBAR           = YES
+
+# What the output contains.
+JAVADOC_AUTOBRIEF      = YES
+JAVADOC_BANNER         = YES
+OPTIMIZE_OUTPUT_FOR_C  = YES
+DISTRIBUTE_GROUP_DOC   = YES
+INLINE_SIMPLE_STRUCTS  = YES
+TYPEDEF_HIDES_STRUCT   = YES
+SHOW_INCLUDE_FILES     = NO
+SOURCE_BROWSER         = YES
+STRIP_CODE_COMMENTS    = NO
+
+# Performance
+LOOKUP_CACHE_SIZE      = 3
+NUM_PROC_THREADS       = 0
+
+# Which files to process.
+INPUT                  = @INPUT@
+RECURSIVE              = YES
+EXCLUDE_PATTERNS       = ppport.h c.h postgres.h */test/*
+EXCLUDE_SYMBOLS        = __atribute__
+INPUT_FILTER           = @INPUT_FILTER@
+FILTER_PATTERNS        = *.c *.h
+CLANG_ASSISTED_PARSING = YES
+
+# Grephs.
+HAVE_DOT               = @GRAPHS@
+CLASS_GRAPH            = @GRAPHS@
+COLLABORATION_GRAPH    = @GRAPHS@
+GROUP_GRAPHS           = @GRAPHS@
+DOT_UML_DETAILS        = YES
+INCLUDE_GRAPH          = @GRAPHS@
+INCLUDED_BY_GRAPH      = @GRAPHS@
+CALL_GRAPH             = @GRAPHS@
+CALLER_GRAPH           = @GRAPHS@
+GRAPHICAL_HIERARCHY    = @GRAPHS@
+DIRECTORY_GRAPH        = @GRAPHS@
+DOT_IMAGE_FORMAT       = svg
+INTERACTIVE_SVG        = YES
+DOT_MULTI_TARGETS      = YES
diff --git a/doc/doxygen/doxy_filter.l b/doc/doxygen/doxy_filter.l
new file mode 100644
index 0000000000..3eb7042526
--- /dev/null
+++ b/doc/doxygen/doxy_filter.l
@@ -0,0 +1,168 @@
+ /*******************************************************************************************
+ A Doxygen filter which annotates comments in a C file.
+
+ The goal is to take an existing, non-doxygen comment style and turn it into doxygen comments.
+ It is packaged as a filter. The commented code never gets changed, but doxygen converts
+ the comments "on the fly".
+
+ The idea is to identify all comments in the code while keeping track of a small amount of context from
+ around the comment.  The context allows us to determine:
+    - Is the comment at the beginning of the file?
+    - Is the comment at the global level in the file?
+    - Is the comment inside a struct/union/enum?
+    - Does the comment trail other statements? Or is it in front of other statements. (What about inside?)
+
+ Basically, the filter does the following:
+     - A comment at the start of a file gets the @FILE tag added.
+       The tag is necessary for doxygen to recognize C code.
+       By convention, this first comment describes the overall purpose of the file.
+    -  Doxygen comments are passed on through.
+       Doxygen comments include "/// ..." and /x*** ... /"  and style comments.
+    -  Regular comments at the global level and inside structs/enums/unions are converted into doxygen comments.
+       The assumption is these comments describe fields and entities like procedures, variables and macros.
+    -  Trailing comments are associated with the previous item.
+           "int Foo; // Comment about Foo"  becomes a doxygen comment describing Foo.
+
+ The filter doesn't handle all existing doxygen comments. For example, it doesn't recognize the grouping tags
+     "//{"  and "//}", nor does it passively echo other doxygen constructs.  Also, it has a very limited
+     understanding of C syntax. For now, it is enough to get by.
+
+ TODO? respect conditional compilation, add a File comment if none exists, be robust in not altering existing doxygen comments.
+ ******************************************************************************************************************************/
+%option noyywrap nounput noinput
+%{
+#include <stdbool.h>
+%}
+    // Variables used to track the context around a comment.
+    int lines = 0; // How many lines of code have been processed. Comment lines are not counted at the moment.
+    int level = 0; // Level of nesting within brackets. Used to detect globals.
+    bool trailing = false;  // Is there statement before the comment? If so, we are a trailing comment.
+    bool complexData = false; // Are we inside a global complex data type (struct,union,enum)?
+    char *fileName;  // The name of the file we are filtering.
+
+    // Forward reference. Process comments when they are discovered.
+    static void scanComment(char *comment);
+
+%%
+
+  /* Complex data types */             /* Make note when we are inside one. Finishes with a global semicolon */
+struct|union|enum                      ECHO; if (level == 0) complexData = true;
+";"                                    ECHO; if (level == 0) complexData = false; trailing = true;
+
+  /* Other keywords or identifiers */  /* Eat them up so they don't confused with keywords */
+  [a-zA-Z_$]+[a-zA-Z_$0-9]*            ECHO;
+
+  /* Quoted string literals. */        /* Pass them through. */
+\"([^\\\"]|\\.)*\"                     ECHO;
+
+  /* Character literals */            /* Pass them through. */
+'(\\[tvrnafb\\]|[^\\'])'               ECHO;
+
+   /* Single line comment. */          /* Scan and convert to doxygen as needed. */
+"//".*                                 scanComment(yytext);
+
+   /* Multi line comment. */           /* Scan and convert to doxygen as needed. */
+[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/]    scanComment(yytext);
+
+  /* Multi line #define ... */         /* Skip over #defines since they may contain unbalanced braces. */
+^#define(.*\\\n)*(.*)                  ECHO;
+
+  /* New line. */                      /* Any subsequent comment is leading, and we are no longer at start of file. */
+"\n"                                   ECHO; lines++; trailing = false;  //printf("  lines=%d  level=%d  complex=%d   ", lines, level, complexData);
+
+ /* Track nesting depth. */            /* Are we at the global level or not? Are we in arg list or not? */
+                                       /*   We assume well formed code, so a paranthesis will never match a brace. */
+"{"|"("                                ECHO; level++;
+"}"|")"                                ECHO; level--; if (level == 0) complexData = false; trailing = true;
+
+ /* Anything else */                   /* Pass it through */
+.                                      ECHO;
+
+%%
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+/*
+ * Custom banner character to be removed from comments.
+ * We'll hardcode it to suit postgreSQL, but it should be set through a command line arg.
+ */
+char customBanner = '-';
+
+/*************************************************************************************************
+A simple program which reads a file, updates the comments, and writes to stdout.
+This is intended to be used as a doxygen filter, converting existing comments to doxygen comments.
+**********************************************************************************************/
+int main(int argc, char**argv) {
+
+    // Verify we have a single argument.
+    if (argc != 2) {
+        fprintf(stderr, "Please run as DoxygenFilter <filename>\n");
+        exit(1);
+    }
+
+    // Try to open the file.
+    fileName = argv[1];
+    if (freopen(fileName, "r", stdin) == NULL) {
+        fprintf(stderr, "Unable to open file %s: %s\n", fileName, strerror(errno));
+        exit(1);
+    }
+
+    // Now, parse the file, sending output to stdout.
+    return yylex();
+}
+
+// Output a string.
+static void putstr(char *str) {
+    fputs(str, stdout);
+}
+
+
+/*******************************************************************************************************
+Remove a custom banner from the comment, where a banner is a sequence of two or more special characters.
+*********************************************************************************************************/
+static void removeCustomBanner(char* str, char c) {
+    char *writePtr = str;
+    bool twoInARow = false;
+
+    // Scan across the comment, moving non-banner characters forward.
+    for (char *readPtr=str; *readPtr != '\0'; readPtr++) {
+        twoInARow = (readPtr[0] == c) && (readPtr[1] == c || twoInARow);
+        if (!twoInARow)
+            *writePtr++ = *readPtr;
+    }
+    *writePtr = '\0';
+}
+
+/********************************************************************************************************
+Process comments as they are encountered in the text.
+
+Uses context information gathered during scanning to decide if the comment should be
+converted to a doxygen comment.
+*********************************************************************************************************/
+static void scanComment(char *comment) {
+
+    // Echo the starting part of the comment, either // or /*.
+    putchar(comment[0]); putchar(comment[1]);
+
+    // If at global level or inside a complex data declarations. TODO: but not inside a statement.
+    if (level == 0 || complexData) {
+
+        // Ensure this is a doxygen comment by repeating the 2nd character.
+        putchar(comment[1]);  // Note: adds harmless extra char to existing doxygen comment.
+
+        // If there is a preceding stmt on the line, then associate the doxygen comment with that stmt.
+        if (trailing)
+            putstr("< ");
+
+        // If at the start of the file, add the @FILE tag. TODO: should it be at end of comment?
+        if (lines == 0)
+            printf(" @file %s ", fileName);
+    }
+
+    // Remove custom banners from the comment. A custom banner is some banner other than  //////  or /*******.
+    removeCustomBanner(comment, customBanner);
+
+    // finish outputting the comment. We know the comment is at least two characters.
+    putstr(comment+2);
+}
diff --git a/doc/doxygen/meson.build b/doc/doxygen/meson.build
new file mode 100644
index 0000000000..e5539b7854
--- /dev/null
+++ b/doc/doxygen/meson.build
@@ -0,0 +1,73 @@
+# Generate doxygen pages for PostgreSQL using "ninja doxygen"
+#
+# Doxygen pages are optional. Nothing in this script should
+# cause PostgreSQL builds to fail.
+#
+# Currently there are no explicit error messages
+#   - If doxygen is not found, the doxygen target will not be defined.
+#   - If dot is not found, no graphs will be generated.
+#   - flex is already required, so we don't check for it.
+#
+# As a future enhancement, display meaningful error messages
+# when doxygen or dot are not found. Meson does not
+# support build time error messages, but they can be displayed
+# using a python custom target.
+#
+
+# Find the doxygen command. If not found, stop and don't define the target.
+doxygen_cmd = find_program('doxygen', required: false)
+if not doxygen_cmd.found()
+    subdir_done()
+endif
+
+# build the doxygen 'C' filter from its flex source
+doxygen_filter_c = custom_target('doxy_filter_c',
+                                 build_by_default: false,
+                                 input: 'doxy_filter.l',
+                                 output: 'doxy_filter.c',
+                                 command: flex_cmd)
+doxygen_filter_exe = executable('doxy_filter',
+                                doxygen_filter_c,
+                                build_by_default: false)
+
+# Do we want graphs?
+doxygen_graphs = get_option('doxygen_graphs')
+
+# Find the dot command. If not found, no graphs will be generated.
+dot_cmd = find_program('dot', required: false)
+
+# Point to the source code directories
+doxygen_source_dirs = [
+                       join_paths(meson.source_root(), 'src'),
+                       join_paths(meson.source_root(), 'contrib'),
+                      ]
+
+# Configure the Doxyfile
+doxygen_config = configuration_data()
+doxygen_config.set('INPUT', ' '.join(doxygen_source_dirs))
+doxygen_config.set('OUTPUT_DIRECTORY', meson.current_build_dir())
+doxygen_config.set('PROJECT_NUMBER', pg_version)
+doxygen_config.set('INCLUDE_PATH', ' '.join(postgres_inc_d))
+doxygen_config.set('INPUT_FILTER', doxygen_filter_exe.full_path())
+doxygen_config.set('STRIP_FROM_PATH', meson.source_root())
+if (doxygen_graphs and dot_cmd.found())
+    doxygen_config.set('GRAPHS', 'YES')
+else
+    doxygen_config.set('GRAPHS', 'NO')
+endif
+
+doxyfile = configure_file(input : 'Doxyfile.in',
+                          output : 'Doxyfile',
+                          configuration : doxygen_config)
+
+
+# Create a target to generate doxygen html pages.
+doxygen_html = custom_target('html',
+                             input : doxyfile,
+                             output: 'html',
+                             command : [doxygen_cmd, doxyfile],
+                             depends: doxygen_filter_exe,
+                             build_by_default: false)
+
+# use "ninja doxygen" to build doxygen pages.
+alias_target('doxygen', doxygen_html)
diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index db4bcce56e..3b812b31ec 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -33,6 +33,10 @@
   Additionally, a number of plain-text <filename>README</filename> files can
   be found throughout the <productname>PostgreSQL</productname> source tree,
   documenting various implementation issues.
+  Doxygen output describing <productname>PostgreSQL</productname> functions
+  and data types can be found at <ulink url="https://doxygen.postgresql.org"></ulink>.
+  If desired, Doxygen output can be  generated locally with the
+  <command>ninja doxygen</command> command.
  </para>
 
  <para>
diff --git a/meson.build b/meson.build
index 8ed51b6aae..8ad652a511 100644
--- a/meson.build
+++ b/meson.build
@@ -2902,6 +2902,7 @@ subdir('src/interfaces/libpq/test')
 subdir('src/interfaces/ecpg/test')
 
 subdir('doc/src/sgml')
+subdir('doc/doxygen')
 
 generated_sources_ac += {'': ['GNUmakefile']}
 
diff --git a/meson_options.txt b/meson_options.txt
index 249ecc5ffd..81cbb5bb15 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -27,6 +27,8 @@ option('krb_srvnam', type: 'string', value: 'postgres',
 option('system_tzdata', type: 'string', value: '',
   description: 'Use system time zone data in specified directory')
 
+option('doxygen_graphs', type: 'boolean', value: false,
+   description: 'Include graphs in "ninja doxygen" output. Generating graphs can be very slow.')
 
 # Defaults
 
-- 
2.33.0

