From 48359d69107da581d004da37230f949cb8796eb8 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Fri, 4 Mar 2022 14:32:33 +1100
Subject: [PATCH v3] Update the documentation for logical replication.

This patch introduces a new documentation page for describing
the "Row Filtering" feature.

Author: Peter Smith
Reviewed By: Greg Nancarrow, Aleksander Alekseev, Amit Kapila, Ajin Cherian
Discussion: https://www.postgresql.org/message-id/CAHut%2BPtnsBr59%3D_NvxXp_%3DS-em0WxyuDOQmSTuHGb4sVhkHffg%40mail.gmail.com
---
 doc/src/sgml/logical-replication.sgml    | 521 +++++++++++++++++++++++++++++++
 doc/src/sgml/ref/create_publication.sgml |   2 +
 2 files changed, 523 insertions(+)

diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index 96b4886..657b460 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -118,6 +118,8 @@
    any combination of <command>INSERT</command>, <command>UPDATE</command>,
    <command>DELETE</command>, and <command>TRUNCATE</command>, similar to how triggers are fired by
    particular event types.  By default, all operation types are replicated.
+   (Row filters have no effect for <command>TRUNCATE</command>. See 
+   <xref linkend="logical-replication-row-filtering"/>).
   </para>
 
   <para>
@@ -317,6 +319,525 @@
   </sect2>
  </sect1>
 
+ <sect1 id="logical-replication-row-filtering">
+  <title>Row Filters</title>
+
+  <para>
+   By default, all data from all published tables will be replicated to the
+   appropriate subscribers.
+  </para>
+
+  <para>
+   The default data replication can be reduced by using <firstterm>row filters</firstterm>.
+   A user might choose to use row filters for behavioral or for performance
+   reasons.
+  </para>
+
+  <para>
+   With row filtering, the decision whether to replicate row data depends on
+   the row data values. Rows that don't satisfy an optional <literal>WHERE</literal>
+   clause will be filtered out. This allows a database or set of tables to be
+   partially replicated.
+  </para>
+
+  <para>
+   Use the <literal>PUBLICATION ... FOR TABLE </literal>
+   <link linkend="sql-createpublication"><literal>
+   <replaceable class="parameter">table_name</replaceable> WHERE (
+   <replaceable class="parameter">expression</replaceable>)</literal></link>
+   to specify which rows will be replicated.
+  </para>
+
+  <para>
+   The row filtering is defined per table. A new row filter can be added simply
+   by specifying a <literal>WHERE</literal> clause after the table name. The
+   <literal>WHERE</literal> clause must be enclosed by parentheses.
+  </para>
+
+  <sect2 id="logical-replication-rf-rules">
+   <title>Row filter Rules</title>
+
+   <para>
+    If the row filter evaluates to <literal>false</literal> or
+    <literal>NULL</literal> then the row is not replicated.
+   </para>
+
+   <para>
+    Row filters are applied <emphasis>before</emphasis> publishing the changes.
+   </para>
+
+   <para>
+    The <literal>WHERE</literal> clause expression is evaluated with the same
+    role used for the replication connection.
+   </para>
+
+   <para>
+    Row filters have no effect for <command>TRUNCATE</command> commands.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-transformations">
+   <title>UPDATE transformations</title>
+
+   <para>
+    Whenever an <command>UPDATE</command> is processed, the row filter
+    expression is evaluated for both the old and new row (i.e. before
+    and after the data is updated).
+   </para>
+
+   <para>
+    If both evaluations are <literal>true</literal>, it replicates the
+    <command>UPDATE</command>.
+   </para>
+
+   <para>
+    If both evaluations are <literal>false</literal>, it doesn't replicate
+    anything.
+   </para>
+
+   <para>
+    If only one of the old/new rows matches the row filter expression, the
+    <command>UPDATE</command> is transformed to <command>INSERT</command> or
+    <command>DELETE</command>, to avoid any data inconsistency, as follows:
+<synopsis>
+Case 1: old-row (no match)    new-row (no match)   -->   (drop change)
+Case 2: old-row (no match)    new row (match)      -->   INSERT
+Case 3: old-row (match)       new-row (no match)   -->   DELETE
+Case 4: old-row (match)       new row (match)      -->   UPDATE
+</synopsis>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-partitions">
+   <title>Partition tables</title>
+
+   <para>
+    If the publication contains a partitioned table, the publication parameter
+    <literal>publish_via_partition_root</literal> determines which row filter
+    is used.
+    <itemizedlist>
+
+     <listitem>
+      <para>
+       If <literal>publish_via_partition_root</literal> is <literal>false</literal>
+       (default), each <emphasis>partition's</emphasis> row filter is used.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       If <literal>publish_via_partition_root</literal> is <literal>true</literal>,
+       the <emphasis>root partitioned table's</emphasis> row filter is used.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-restrictions">
+   <title>WHERE clause Expression Restrictions</title>
+
+   <para>
+    The row filter <literal>WHERE</literal> clause for a table added to a
+    publication that publishes <command>UPDATE</command> and/or
+    <command>DELETE</command> operations must contain only columns that are
+    covered by the <quote>replica identity</quote>. The row filter
+    <literal>WHERE</literal> clause for a table added to a publication that
+    publishes <command>INSERT</command> can use any column.
+   </para>
+
+   <para>
+    The <literal>WHERE</literal> clause allows only simple expressions that
+    don't have user-defined functions, operators, non-immutable built-in
+    functions, or references to system columns.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-tablesync">
+   <title>Subscriber initial table synchronization</title>
+
+   <para>
+    If you choose to do the initial table synchronization, only data that
+    satisfies the row filters is copied to the subscriber.
+   </para>
+
+   <para>
+    If the subscription has several publications in which a table has been
+    published with different <literal>WHERE</literal> clauses, rows that satisfy
+    <emphasis>any</emphasis> of the expressions will be copied. (see
+    <xref linkend="logical-replication-rf-combining"/>).
+   </para>
+
+   <note>
+    <para>
+     Publication operations of filters are ignored during initial table
+     synchronization.
+    </para>
+   </note>
+
+   <note>
+    <para>
+     If a subscriber is a pre-15 version, the initial table synchronization
+     won't use row filters even if they are defined in the publication.
+    </para>
+   </note>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-combining">
+   <title>Combining multiple row filters for the same table</title>
+
+   <para>
+    If the subscription has several publications in which the same table has
+    been published with different row filters (for the same publish
+    operation), those expressions get OR'ed together, so that rows satisfying
+    <emphasis>any</emphasis> of the expressions will be replicated.
+   </para>
+
+   <para>
+    This means all the other filters (for the same table) become redundant if:
+    <itemizedlist>
+
+     <listitem>
+      <para>
+       one of the publications has no filter at all.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using <literal>FOR ALL TABLES</literal>.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       one of the publications was created using
+       <literal>FOR ALL TABLES IN SCHEMA</literal> and the table belongs to
+       that schema.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-psql">
+   <title>PSQL commands to display Row Filters</title>
+
+   <para>
+    PSQL commands <command>\dRp+</command> and <command>\d</command> will
+    display row filter information.
+   </para>
+
+  </sect2>
+
+  <sect2 id="logical-replication-rf-examples">
+   <title>Examples</title>
+
+   <para>
+    Create some tables to be used in the following examples.
+<programlisting>
+test_pub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+test_pub=# CREATE TABLE t2(d int primary key, e int, f int);
+CREATE TABLE
+test_pub=# CREATE TABLE t3(g int primary key, h int, i int);
+CREATE TABLE
+</programlisting>
+   </para>
+
+   <para>
+    Create some publications.
+   </para>
+
+   <para>
+    Notice that publication <literal>p2</literal> has tables with and without
+    row filters.
+   </para>
+
+   <para>
+    Notice that publication <literal>p3</literal> has row filters on multiple
+    tables.
+<programlisting>
+test_pub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c = 'NSW');
+CREATE PUBLICATION
+test_pub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99);
+CREATE PUBLICATION
+test_pub=# CREATE PUBLICATION p3 FOR TABLE t2 WHERE (d = 10), t3 WHERE (g = 10);
+CREATE PUBLICATION
+</programlisting>
+   </para>
+
+   <para>
+    The PSQL command <command>\dRp+</command> shows what row filters are
+    defined for the publications.
+<programlisting>
+test_pub=# \dRp+
+                               Publication p1
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1" WHERE ((a > 5) AND (c = 'NSW'::text))
+
+                               Publication p2
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t1"
+    "public.t2" WHERE (e = 99)
+
+                               Publication p3
+  Owner   | All tables | Inserts | Updates | Deletes | Truncates | Via root 
+----------+------------+---------+---------+---------+-----------+----------
+ postgres | f          | t       | t       | t       | t         | f
+Tables:
+    "public.t2" WHERE (d = 10)
+    "public.t3" WHERE (g = 10)
+</programlisting>
+   </para>
+
+   <para>
+    The PSQL command <command>\d</command> shows what publications the table is
+    a member of, as well as that table's row filter in those publications.
+   </para>
+
+   <para>
+    Notice that table <literal>t1</literal> is a member of two publications, but
+    has a <literal>WHERE</literal> clause only for <literal>p1</literal>.
+   </para>
+
+   <para>
+    Notice that table <literal>t2</literal> is a member of two publications, and
+    has a different <literal>WHERE</literal> clause for each of them.
+<programlisting>
+test_pub=# \d t1
+                 Table "public.t1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           | not null | 
+ b      | integer |           |          | 
+ c      | text    |           | not null | 
+Indexes:
+    "t1_pkey" PRIMARY KEY, btree (a, c)
+Publications:
+    "p1" WHERE ((a > 5) AND (c = 'NSW'::text))
+    "p2"
+</programlisting>
+<programlisting>
+test_pub=# \d t2
+                 Table "public.t2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ d      | integer |           | not null | 
+ e      | integer |           |          | 
+ f      | integer |           |          | 
+Indexes:
+    "t2_pkey" PRIMARY KEY, btree (d)
+Publications:
+    "p2" WHERE (e = 99)
+    "p3" WHERE (d = 10)
+</programlisting>
+<programlisting>
+test_pub=# \d t3
+                 Table "public.t3"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ g      | integer |           | not null | 
+ h      | integer |           |          | 
+ i      | integer |           |          | 
+Indexes:
+    "t3_pkey" PRIMARY KEY, btree (g)
+Publications:
+    "p3" WHERE (g = 10)
+</programlisting>
+   </para>
+
+   <para>
+    On the subscriber node, create a table <literal>t1</literal> and a
+    subscription <literal>s1</literal> to publication <literal>p1</literal>. 
+<programlisting>
+test_sub=# CREATE TABLE t1(a int, b int, c text, primary key(a,c));
+CREATE TABLE
+test_sub=# CREATE SUBSCRIPTION s1
+test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=s1'
+test_sub-# PUBLICATION p1;
+CREATE SUBSCRIPTION
+</programlisting>
+   </para>
+
+   <para>
+    <literal>INSERT</literal> some data.
+   </para>
+
+   <para>
+    Notice that only the rows satisfying the <literal>t1 WHERE</literal>
+    clause of publication <literal>p1</literal> are replicated.
+<programlisting>
+test_pub=# INSERT INTO t1 VALUES (2, 102, 'NSW');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (3, 103, 'QLD');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (4, 104, 'VIC');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (5, 105, 'ACT');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (6, 106, 'NSW');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (7, 107, 'NT');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (8, 108, 'QLD');
+INSERT 0 1
+test_pub=# INSERT INTO t1 VALUES (9, 109, 'NSW');
+INSERT 0 1
+</programlisting>
+<programlisting>
+test_pub=# SELECT * FROM t1; 
+ a |  b  |  c
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 6 | 106 | NSW
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+test_sub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 6 | 106 | NSW
+ 9 | 109 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+   <para>
+    <literal>UPDATE</literal> some data, where the old and new values both
+    satisfy the <literal>t1 WHERE</literal> clause of publication
+    <literal>p1</literal>.
+   </para>
+
+   <para>
+    Notice this replicates as a normal <literal>UPDATE</literal>.
+<programlisting>
+test_pub=# UPDATE t1 SET b = 999 WHERE a = 6;
+UPDATE 1
+</programlisting>
+<programlisting>
+test_pub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 2 | 102 | NSW
+ 3 | 103 | QLD
+ 4 | 104 | VIC
+ 5 | 105 | ACT
+ 7 | 107 | NT
+ 8 | 108 | QLD
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+test_sub=# SELECT * FROM t1;
+ a |  b  |  c  
+---+-----+-----
+ 9 | 109 | NSW
+ 6 | 999 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+   <para>
+    <literal>UPDATE</literal> some data, where the old values did not satisfy
+    the <literal>t1 WHERE</literal> clause of publication <literal>p1</literal>,
+    but the new values do satisfy it.
+   </para>
+
+   <para>
+    Notice the <literal>UPDATE</literal> is transformed and replicates as an
+    <literal>INSERT</literal>.
+<programlisting>
+test_pub=# UPDATE t1 SET a = 999 WHERE a = 2;
+UPDATE 1
+</programlisting>
+<programlisting>
+test_pub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 999 | 102 | NSW
+(8 rows)
+</programlisting>
+<programlisting>
+test_sub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   9 | 109 | NSW
+   6 | 999 | NSW
+ 999 | 102 | NSW
+(3 rows)
+</programlisting>
+   </para>
+
+   <para>
+    <literal>UPDATE</literal> some data, where the old values satisfied
+    the <literal>t1 WHERE</literal> clause of publication <literal>p1</literal>,
+    but the new values do not satisfy it.
+   </para>
+
+   <para>
+    Notice the <literal>UPDATE</literal> is transformed and replicates as a
+    <literal>DELETE</literal>.
+<programlisting>
+test_pub=# UPDATE t1 SET c = 'VIC' WHERE a = 9;
+UPDATE 1
+</programlisting>
+<programlisting>
+test_pub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   3 | 103 | QLD
+   4 | 104 | VIC
+   5 | 105 | ACT
+   7 | 107 | NT
+   8 | 108 | QLD
+   6 | 999 | NSW
+ 999 | 102 | NSW
+   9 | 109 | VIC
+(8 rows)
+</programlisting>
+<programlisting>
+test_sub=# SELECT * FROM t1;
+  a  |  b  |  c  
+-----+-----+-----
+   6 | 999 | NSW
+ 999 | 102 | NSW
+(2 rows)
+</programlisting>
+   </para>
+
+  </sect2>
+
+ </sect1>
+
  <sect1 id="logical-replication-conflicts">
   <title>Conflicts</title>
 
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index 4979b9b..cb16e97 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -247,6 +247,8 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    <literal>publish_via_partition_root</literal> determines if it uses the
    partition's row filter (if the parameter is false, the default) or the root
    partitioned table's row filter.
+   See <xref linkend="logical-replication-row-filtering"/> for more details about 
+   row filters.
   </para>
 
   <para>
-- 
1.8.3.1

