From 1f9069cc4e0f8e1642a24d4f9053908838abc049 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Wed, 2 Mar 2022 15:31:45 +1100
Subject: [PATCH v1] PG docs - Logical Replication Filtering.

This patch introduces a new PG docs page for giving an overview of what kinds
of logical replication filtering are available, and how to use them.

The main new content addition to this page is the section describing "Row Filtering".

Author: Peter Smith
Reviewed By: Greg Nancarrow
Discussion: https://www.postgresql.org/message-id/CAHut%2BPsiSQvWpEbxLK0hksjq0hRzko1_uY8EOTOx7dEfNhzeaQ%40mail.gmail.com
---
 doc/src/sgml/logical-replication.sgml    | 399 ++++++++++++++++++++++++++++++-
 doc/src/sgml/ref/create_publication.sgml |   2 +
 2 files changed, 399 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index 96b4886..f620faf 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -116,8 +116,10 @@
   <para>
    Publications can choose to limit the changes they produce to
    any combination of <command>INSERT</command>, <command>UPDATE</command>,
-   <command>DELETE</command>, and <command>TRUNCATE</command>, similar to how triggers are fired by
-   particular event types.  By default, all operation types are replicated.
+   <command>DELETE</command>, and <command>TRUNCATE</command> by using
+   <quote>operation filters</quote>. By default, all operation types are
+   replicated. See <xref linkend="logical-replication-operation-filtering"/>
+   for details.
   </para>
 
   <para>
@@ -317,6 +319,399 @@
   </sect2>
  </sect1>
 
+ <sect1 id="logical-replication-filtering">
+  <title>Filtering</title>
+
+   <para>
+    By default, all data from all published tables will be replicated to the
+    appropriate subscribers.
+   </para>
+
+   <para>
+    The default data replication can be reduced by using filters. A user might
+    choose to use filters for behavioral or for performance reasons.
+   </para>
+
+   <para>
+    There are 3 different ways to filter what data gets replicated.
+   </para>
+
+  <sect2 id="logical-replication-operation-filtering">
+   <title>Operation Filters</title>
+
+   <para>
+    Publications can choose to limit the changes they produce to any combination
+    of <command>INSERT</command>, <command>UPDATE</command>, <command>DELETE</command>,
+    and <command>TRUNCATE</command>, similar to how triggers are fired by
+    particular event types.
+   </para>
+
+   <para>
+    By default, all operation types are replicated.
+   </para>
+
+   <para>
+    Use the <literal>PUBLICATION ...<link linkend="sql-createpublication">
+    WITH (publish = <replaceable class="parameter">value</replaceable>)
+    </link></literal> clause to modify which operations will do replication.
+   </para>
+
+   <sect3 id="logical-replication-opf-examples">
+    <title>Examples</title>
+
+    <para>
+     Create a publication that only publishes <command>INSERT</command>
+     operations in one table:
+<programlisting>
+CREATE PUBLICATION insert_only FOR TABLE mydata WITH (publish = 'insert');
+</programlisting>
+    </para>
+
+   </sect3>
+
+  </sect2>
+
+  <sect2 id="logical-replication-row-filtering">
+   <title>Row Filters</title>
+
+   <para>
+    (This feature is available since PostgreSQL 15)
+   </para>
+
+   <para>
+    With row filtering, the decision whether to replicate row data depends on
+    the row data values. Rows that don't satisfy an optional <literal>WHERE</literal>
+    clause will be filtered out. This allows a database or set of tables to be
+    partially replicated.
+   </para>
+
+   <para>
+    Use the <literal>PUBLICATION ... FOR TABLE </literal>
+    <link linkend="sql-createpublication"><literal>
+    <replaceable class="parameter">table_name</replaceable> WHERE (
+    <replaceable class="parameter">expression</replaceable>)</literal></link>
+    to specify which rows will be replicated.
+   </para>
+
+   <para>
+    The row filtering is defined per table. A new row filter can be added simply
+    by specifying a <literal>WHERE</literal> clause after the table name. The
+    <literal>WHERE</literal> clause must be enclosed by parentheses.
+   </para>
+
+   <sect3 id="logical-replication-rf-rules">
+    <title>Row filter Rules</title>
+
+    <para>
+     If the row filter evaluates to <literal>false</literal> or
+     <literal>NULL</literal> then the row is not replicated.
+    </para>
+
+    <para>
+     Row filters are applied <emphasis>before</emphasis> publishing the changes.
+    </para>
+
+    <para>
+     The <literal>WHERE</literal> clause expression is evaluated with the same
+     role used for the replication connection.
+    </para>
+
+    <para>
+     Row filters have no effect for <command>TRUNCATE</command> commands.
+    </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-transformations">
+    <title>UPDATE transformations</title>
+
+    <para>
+     Whenever an <command>UPDATE</command> is processed, the row filter
+     expression is evaluated for both the old and new row (i.e. before
+     and after the data is updated).
+    </para>
+
+    <para>
+     If both evaluations are <literal>true</literal>, it replicates the
+     <command>UPDATE</command>.
+    </para>
+
+    <para>
+     If both evaluations are <literal>false</literal>, it doesn't replicate
+     anything.
+    </para>
+
+    <para>
+     If only one of the old/new rows matches the row filter expression, the
+     <command>UPDATE</command> is transformed to <command>INSERT</command> or
+     <command>DELETE</command>, to avoid any data inconsistency, as follows:
+<synopsis>
+Case 1: old-row (no match)    new-row (no match)   -->   (drop change)
+Case 2: old-row (no match)    new row (match)      -->   INSERT
+Case 3: old-row (match)       new-row (no match)   -->   DELETE
+Case 4: old-row (match)       new row (match)      -->   UPDATE
+</synopsis>
+    </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-partitions">
+    <title>Partition tables</title>
+
+    <para>
+     If the publication contains a partitioned table, the publication parameter
+     <literal>publish_via_partition_root</literal> determines which row filter
+     is used.
+     <itemizedlist>
+
+      <listitem>
+       <para>
+        If <literal>publish_via_partition_root</literal> is <literal>false</literal>
+       (default), each <emphasis>partition's</emphasis> row filter is used.
+       </para>
+      </listitem>
+
+      <listitem>
+       <para>
+        If <literal>publish_via_partition_root</literal> is <literal>true</literal>,
+        the <emphasis>root partitioned table's</emphasis> row filter is used.
+       </para>
+      </listitem>
+
+     </itemizedlist>
+    </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-restrictions">
+    <title>WHERE clause Expression Restrictions</title>
+
+    <para>
+     The row filter <literal>WHERE</literal> clause for a table added to a
+     publication that publishes <command>UPDATE</command> and/or
+     <command>DELETE</command> operations must contain only columns that are
+     covered by the <quote>replica identity</quote>. The row filter
+     <literal>WHERE</literal> clause for a table added to a publication that
+     publishes <command>INSERT</command> can use any column.
+    </para>
+
+    <para>
+     The <literal>WHERE</literal> clause allows only simple expressions that
+     don't have user-defined functions, operators, non-immutable built-in
+     functions, or references to system columns.
+    </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-tablesync">
+    <title>Subscriber initial table synchronization</title>
+
+    <para>
+     If you choose to do the initial table synchronization, only data that
+     satisfies the row filters is copied to the subscriber.
+    </para>
+
+    <para>
+     If the subscription has several publications in which a table has been
+     published with different <literal>WHERE</literal> clauses, rows that satisfy
+     <emphasis>any</emphasis> of the expressions will be copied. (see
+     <xref linkend="logical-replication-rf-combining"/>).
+    </para>
+
+    <note>
+     <para>
+      Publication operations of filters are ignored during initial table
+      synchronization.
+     </para>
+    </note>
+
+    <note>
+     <para>
+      If a subscriber is a pre-15 version, the initial table synchronization
+      won't use row filters even if they are defined in the publication.
+     </para>
+    </note>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-combining">
+    <title>Combining multiple row filters for the same table</title>
+
+    <para>
+     If the subscription has several publications in which the same table has
+     been published with different row filters (for the same publish
+     operation), those expressions get OR'ed together, so that rows satisfying
+     <emphasis>any</emphasis> of the expressions will be replicated.
+   </para>
+
+   <para>
+    This means all the other filters (for the same table) become redundant if:
+    <itemizedlist>
+
+     <listitem>
+      <para>
+       one of the publications has no filter at all.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using <literal>FOR ALL TABLES</literal>.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using
+       <literal>FOR ALL TABLES IN SCHEMA</literal> and the table belongs to
+       that schema.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-psql">
+    <title>PSQL commands to display Row Filters</title>
+
+    <para>
+     PSQL commands <command>\dRp+</command> and <command>\d</command> will
+     display row filter information.
+    </para>
+
+   </sect3>
+
+   <sect3 id="logical-replication-rf-examples">
+    <title>Examples</title>
+
+    <para>
+     Create some tables to be used in the following examples.
+<programlisting>
+test_pub=# CREATE TABLE t1(a int, b int, c text);
+CREATE TABLE
+test_pub=# CREATE TABLE t2(d int, e int, f int);
+CREATE TABLE
+</programlisting>
+    </para>
+
+    <para>
+     Create some publications.
+    </para>
+
+    <para>
+     Notice that table <literal>t1</literal> is a member of multiple
+     publications.
+<programlisting>
+test_pub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c IS NULL);
+CREATE PUBLICATION
+test_pub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99);
+CREATE PUBLICATION
+</programlisting>
+    </para>
+
+    <para>
+     The PSQL command <command>\dRp+</command> shows what row filters are
+     defined for the publications.
+<programlisting>
+test_pub=# \dRp+
+                               Publication p1
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1" WHERE ((a > 5) AND (c IS NULL))
+
+                               Publication p2
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1"
+    "public.t2" WHERE (e = 99)
+</programlisting>
+    </para>
+
+    <para>
+     The PSQL command <command>\d</command> shows what publications the table is
+     a member of, as well as that table's row filter in those publications.
+    </para>
+
+    <para>
+     Notice that table <literal>t1</literal> is a member of two publications, but
+     only has a <literal>WHERE</literal> clause for <literal>p1</literal>.
+<programlisting>
+test_pub=# \d t1
+                 Table "public.t1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Publications:
+    "p1" WHERE ((a > 5) AND (c IS NULL))
+    "p2"
+
+test_pub=# \d t2
+                 Table "public.t2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ d      | integer |           |          | 
+ e      | integer |           |          | 
+ f      | integer |           |          | 
+Publications:
+    "p2" WHERE (e = 99)
+</programlisting>
+    </para>
+
+   </sect3>
+
+  </sect2>
+
+<!--
+  <sect2 id="logical-replication-col-filtering">
+   <title>Column Filters</title>
+
+   <para>
+    (This feature is available since PostgreSQL 15)
+   </para>
+
+   <para>
+    TBA
+   </para>
+
+   <sect3 id="logical-replication-cf-examples">
+    <title>Examples</title>
+
+    <para>
+     TBA
+    </para>
+
+   </sect3>
+
+  </sect2>
+
+  <sect2 id="logical-replication-filter-combos">
+   <title>Combining Different Kinds of Filters</title>
+
+   <para>
+    TBA
+   </para>
+
+   <sect3 id="logical-replication-filter-combos-examples">
+    <title>Examples</title>
+
+    <para>
+     TBA
+    </para>
+
+   </sect3>
+
+  </sect2>
+-->
+ </sect1>
+
  <sect1 id="logical-replication-conflicts">
   <title>Conflicts</title>
 
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index 4979b9b..cb16e97 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -247,6 +247,8 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    <literal>publish_via_partition_root</literal> determines if it uses the
    partition's row filter (if the parameter is false, the default) or the root
    partitioned table's row filter.
+   See <xref linkend="logical-replication-row-filtering"/> for more details about 
+   row filters.
   </para>
 
   <para>
-- 
1.8.3.1

