Re: [PR] [FLINK-35670][cdc-connector][postgres]Flink cdc pipeline support postgres source [flink-cdc]

via GitHub Tue, 20 May 2025 04:59:26 -0700


lvyanquan commented on code in PR #3968:
URL: https://github.com/apache/flink-cdc/pull/3968#discussion_r2097720401



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =

Review Comment:
   Add a new line between these two ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =

Review Comment:
   Unused ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =
+            ConfigOptions.key("scan.incremental.snapshot.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Incremental snapshot is a new mechanism to read 
snapshot of a table. "
+                                    + "Compared to the old snapshot mechanism, 
the incremental snapshot has many advantages, including:\n"
+                                    + "(1) source can be parallel during 
snapshot reading, \n"
+                                    + "(2) source can perform checkpoints in 
the chunk granularity during snapshot reading, \n"
+                                    + "(3) source doesn't need to acquire 
global read lock before snapshot reading.");
+
+    public static final ConfigOption<String> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.key-column")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The chunk key of table snapshot, captured tables 
are split into multiple chunks by a chunk key when read the snapshot of table."
+                                    + "By default, the chunk key is the first 
column of the primary key and the chunk key is the RowId in oracle."
+                                    + "This column must be a column of the 
primary key.");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =
+            ConfigOptions.key("server-time-zone")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The session time zone in database server. If not 
set, then "
+                                    + "ZoneId.systemDefault() is used to 
determine the server time zone.");
+
+    public static final ConfigOption<Integer> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.size")
+                    .intType()
+                    .defaultValue(8096)
+                    .withDescription(
+                            "The chunk size (number of rows) of table 
snapshot, captured tables are split into multiple chunks when read the snapshot 
of table.");
+
+    public static final ConfigOption<Integer> SCAN_SNAPSHOT_FETCH_SIZE =
+            ConfigOptions.key("scan.snapshot.fetch.size")
+                    .intType()
+                    .defaultValue(1024)
+                    .withDescription(
+                            "The maximum fetch size for per poll when read 
table snapshot.");
+
+    public static final ConfigOption<Duration> CONNECT_TIMEOUT =
+            ConfigOptions.key("connect.timeout")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "The maximum time that the connector should wait 
after trying to connect to the PostgreSQL database server before timing out.");
+
+    public static final ConfigOption<Integer> CONNECTION_POOL_SIZE =
+            ConfigOptions.key("connection.pool.size")
+                    .intType()
+                    .defaultValue(20)
+                    .withDescription("The connection pool size.");
+
+    public static final ConfigOption<Integer> CONNECT_MAX_RETRIES =
+            ConfigOptions.key("connect.max-retries")
+                    .intType()
+                    .defaultValue(3)
+                    .withDescription(
+                            "The max retry times that the connector should 
retry to build PostgreSQL database server connection.");
+
+    public static final ConfigOption<String> SCAN_STARTUP_MODE =
+            ConfigOptions.key("scan.startup.mode")
+                    .stringType()
+                    .defaultValue("initial")
+                    .withDescription(
+                            "Optional startup mode for PostgreSQL CDC 
consumer, valid enumerations are "
+                                    + "\"initial\", \"earliest-offset\", 
\"latest-offset\", \"timestamp\"\n"
+                                    + "or \"specific-offset\"");
+
+    public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE 
=
+            ConfigOptions.key("scan.startup.specific-offset.file")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file name used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
+            ConfigOptions.key("scan.startup.specific-offset.pos")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file position used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<String> 
SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET =
+            ConfigOptions.key("scan.startup.specific-offset.gtid-set")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional GTID set used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> 
SCAN_STARTUP_SPECIFIC_OFFSET_SKIP_EVENTS =
+            ConfigOptions.key("scan.startup.specific-offset.skip-events")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional number of events to skip after the 
specific starting offset");
+
+    public static final ConfigOption<Long> 
SCAN_STARTUP_SPECIFIC_OFFSET_SKIP_ROWS =

Review Comment:
   Unused ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =
+            ConfigOptions.key("scan.incremental.snapshot.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Incremental snapshot is a new mechanism to read 
snapshot of a table. "
+                                    + "Compared to the old snapshot mechanism, 
the incremental snapshot has many advantages, including:\n"
+                                    + "(1) source can be parallel during 
snapshot reading, \n"
+                                    + "(2) source can perform checkpoints in 
the chunk granularity during snapshot reading, \n"
+                                    + "(3) source doesn't need to acquire 
global read lock before snapshot reading.");
+
+    public static final ConfigOption<String> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.key-column")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The chunk key of table snapshot, captured tables 
are split into multiple chunks by a chunk key when read the snapshot of table."
+                                    + "By default, the chunk key is the first 
column of the primary key and the chunk key is the RowId in oracle."
+                                    + "This column must be a column of the 
primary key.");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =
+            ConfigOptions.key("server-time-zone")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The session time zone in database server. If not 
set, then "
+                                    + "ZoneId.systemDefault() is used to 
determine the server time zone.");
+
+    public static final ConfigOption<Integer> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.size")
+                    .intType()
+                    .defaultValue(8096)
+                    .withDescription(
+                            "The chunk size (number of rows) of table 
snapshot, captured tables are split into multiple chunks when read the snapshot 
of table.");
+
+    public static final ConfigOption<Integer> SCAN_SNAPSHOT_FETCH_SIZE =
+            ConfigOptions.key("scan.snapshot.fetch.size")
+                    .intType()
+                    .defaultValue(1024)
+                    .withDescription(
+                            "The maximum fetch size for per poll when read 
table snapshot.");
+
+    public static final ConfigOption<Duration> CONNECT_TIMEOUT =
+            ConfigOptions.key("connect.timeout")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "The maximum time that the connector should wait 
after trying to connect to the PostgreSQL database server before timing out.");
+
+    public static final ConfigOption<Integer> CONNECTION_POOL_SIZE =
+            ConfigOptions.key("connection.pool.size")
+                    .intType()
+                    .defaultValue(20)
+                    .withDescription("The connection pool size.");
+
+    public static final ConfigOption<Integer> CONNECT_MAX_RETRIES =
+            ConfigOptions.key("connect.max-retries")
+                    .intType()
+                    .defaultValue(3)
+                    .withDescription(
+                            "The max retry times that the connector should 
retry to build PostgreSQL database server connection.");
+
+    public static final ConfigOption<String> SCAN_STARTUP_MODE =
+            ConfigOptions.key("scan.startup.mode")
+                    .stringType()
+                    .defaultValue("initial")
+                    .withDescription(
+                            "Optional startup mode for PostgreSQL CDC 
consumer, valid enumerations are "
+                                    + "\"initial\", \"earliest-offset\", 
\"latest-offset\", \"timestamp\"\n"
+                                    + "or \"specific-offset\"");
+
+    public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE 
=
+            ConfigOptions.key("scan.startup.specific-offset.file")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file name used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
+            ConfigOptions.key("scan.startup.specific-offset.pos")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file position used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<String> 
SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET =
+            ConfigOptions.key("scan.startup.specific-offset.gtid-set")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional GTID set used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> 
SCAN_STARTUP_SPECIFIC_OFFSET_SKIP_EVENTS =
+            ConfigOptions.key("scan.startup.specific-offset.skip-events")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional number of events to skip after the 
specific starting offset");
+
+    public static final ConfigOption<Long> 
SCAN_STARTUP_SPECIFIC_OFFSET_SKIP_ROWS =
+            ConfigOptions.key("scan.startup.specific-offset.skip-rows")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription("Optional number of rows to skip after 
the specific offset");
+
+    public static final ConfigOption<Long> SCAN_STARTUP_TIMESTAMP_MILLIS =
+            ConfigOptions.key("scan.startup.timestamp-millis")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional timestamp used in case of \"timestamp\" 
startup mode");
+
+    public static final ConfigOption<Duration> HEARTBEAT_INTERVAL =
+            ConfigOptions.key("heartbeat.interval")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "Optional interval of sending heartbeat event for 
tracing the latest available binlog offsets");
+
+    public static final org.apache.flink.configuration.ConfigOption<Double>
+            SPLIT_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND =
+                    org.apache.flink.configuration.ConfigOptions.key(
+                                    
"chunk-key.even-distribution.factor.upper-bound")
+                            .doubleType()
+                            .defaultValue(1000.0d)
+                            
.withFallbackKeys("split-key.even-distribution.factor.upper-bound")
+                            .withDescription(
+                                    "The upper bound of chunk key distribution 
factor. The distribution factor is used to determine whether the"
+                                            + " table is evenly distribution 
or not."
+                                            + " The table chunks would use 
evenly calculation optimization when the data distribution is even,"
+                                            + " and the query for splitting 
would happen when it is uneven."
+                                            + " The distribution factor could 
be calculated by (MAX(id) - MIN(id) + 1) / rowCount.");
+
+    public static final org.apache.flink.configuration.ConfigOption<Double>

Review Comment:
   org.apache.flink.configuration.ConfigOption => ConfigOption



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =
+            ConfigOptions.key("scan.incremental.snapshot.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Incremental snapshot is a new mechanism to read 
snapshot of a table. "
+                                    + "Compared to the old snapshot mechanism, 
the incremental snapshot has many advantages, including:\n"
+                                    + "(1) source can be parallel during 
snapshot reading, \n"
+                                    + "(2) source can perform checkpoints in 
the chunk granularity during snapshot reading, \n"
+                                    + "(3) source doesn't need to acquire 
global read lock before snapshot reading.");
+
+    public static final ConfigOption<String> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.key-column")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The chunk key of table snapshot, captured tables 
are split into multiple chunks by a chunk key when read the snapshot of table."
+                                    + "By default, the chunk key is the first 
column of the primary key and the chunk key is the RowId in oracle."
+                                    + "This column must be a column of the 
primary key.");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =
+            ConfigOptions.key("server-time-zone")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The session time zone in database server. If not 
set, then "
+                                    + "ZoneId.systemDefault() is used to 
determine the server time zone.");
+
+    public static final ConfigOption<Integer> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.size")
+                    .intType()
+                    .defaultValue(8096)
+                    .withDescription(
+                            "The chunk size (number of rows) of table 
snapshot, captured tables are split into multiple chunks when read the snapshot 
of table.");
+
+    public static final ConfigOption<Integer> SCAN_SNAPSHOT_FETCH_SIZE =
+            ConfigOptions.key("scan.snapshot.fetch.size")
+                    .intType()
+                    .defaultValue(1024)
+                    .withDescription(
+                            "The maximum fetch size for per poll when read 
table snapshot.");
+
+    public static final ConfigOption<Duration> CONNECT_TIMEOUT =
+            ConfigOptions.key("connect.timeout")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "The maximum time that the connector should wait 
after trying to connect to the PostgreSQL database server before timing out.");
+
+    public static final ConfigOption<Integer> CONNECTION_POOL_SIZE =
+            ConfigOptions.key("connection.pool.size")
+                    .intType()
+                    .defaultValue(20)
+                    .withDescription("The connection pool size.");
+
+    public static final ConfigOption<Integer> CONNECT_MAX_RETRIES =
+            ConfigOptions.key("connect.max-retries")
+                    .intType()
+                    .defaultValue(3)
+                    .withDescription(
+                            "The max retry times that the connector should 
retry to build PostgreSQL database server connection.");
+
+    public static final ConfigOption<String> SCAN_STARTUP_MODE =
+            ConfigOptions.key("scan.startup.mode")
+                    .stringType()
+                    .defaultValue("initial")
+                    .withDescription(
+                            "Optional startup mode for PostgreSQL CDC 
consumer, valid enumerations are "
+                                    + "\"initial\", \"earliest-offset\", 
\"latest-offset\", \"timestamp\"\n"
+                                    + "or \"specific-offset\"");
+
+    public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE 
=
+            ConfigOptions.key("scan.startup.specific-offset.file")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file name used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
+            ConfigOptions.key("scan.startup.specific-offset.pos")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file position used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<String> 
SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET =

Review Comment:
   Unused ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =

Review Comment:
   Unused ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java:
##########
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.cdc.common.data.binary.BinaryStringData;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.event.DataChangeEvent;
+import org.apache.flink.cdc.common.event.Event;
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.common.source.FlinkSourceProvider;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.DataTypes;
+import org.apache.flink.cdc.common.types.RowType;
+import org.apache.flink.cdc.connectors.base.options.StartupOptions;
+import org.apache.flink.cdc.connectors.postgres.PostgresTestBase;
+import 
org.apache.flink.cdc.connectors.postgres.factory.PostgresDataSourceFactory;
+import 
org.apache.flink.cdc.connectors.postgres.source.config.PostgresSourceConfigFactory;
+import org.apache.flink.cdc.connectors.postgres.testutils.UniqueDatabase;
+import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator;
+import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.planner.factories.TestValuesTableFactory;
+import org.apache.flink.util.CloseableIterator;
+
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.lifecycle.Startables;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static 
org.apache.flink.cdc.connectors.postgres.source.PostgresDataSourceOptions.SCHEMA_CHANGE_ENABLED;
+import static org.assertj.core.api.Assertions.assertThat;
+import static 
org.testcontainers.containers.PostgreSQLContainer.POSTGRESQL_PORT;
+
+/** Integration tests for Postgres source. */
+public class PostgresPipelineITCaseTest extends PostgresTestBase {
+    private static final Logger LOG = 
LoggerFactory.getLogger(PostgresPipelineITCaseTest.class);
+
+    private final UniqueDatabase inventoryDatabase =
+            new UniqueDatabase(
+                    POSTGRES_CONTAINER, "inventory", "inventory", TEST_USER, 
TEST_PASSWORD);
+    private final StreamExecutionEnvironment env =
+            StreamExecutionEnvironment.getExecutionEnvironment();
+
+    @BeforeClass
+    public static void startContainers() {
+        LOG.info("Starting containers...");
+        Startables.deepStart(Stream.of(POSTGRES_CONTAINER)).join();
+        LOG.info("Containers are started.");
+    }
+
+    @AfterClass
+    public static void stopContainers() {
+        LOG.info("Stopping containers...");
+        POSTGRES_CONTAINER.stop();
+        LOG.info("Containers are stopped.");
+    }
+
+    @Before
+    public void before() {
+        TestValuesTableFactory.clearAllData();
+        env.setParallelism(4);
+        env.enableCheckpointing(2000);
+        env.setRestartStrategy(RestartStrategies.noRestart());
+    }
+
+    @Test
+    public void testInitialStartupMode() throws Exception {
+        inventoryDatabase.createAndInitialize();
+        PostgresSourceConfigFactory configFactory =
+                (PostgresSourceConfigFactory)
+                        new PostgresSourceConfigFactory()
+                                .hostname(POSTGRES_CONTAINER.getHost())
+                                
.port(POSTGRES_CONTAINER.getMappedPort(POSTGRESQL_PORT))
+                                .username(TEST_USER)
+                                .password(TEST_PASSWORD)
+                                
.databaseList(inventoryDatabase.getDatabaseName())
+                                .tableList("inventory.products")
+                                .startupOptions(StartupOptions.initial())
+                                .serverTimeZone("UTC")
+                                
.includeSchemaChanges(SCHEMA_CHANGE_ENABLED.defaultValue());
+        configFactory.database(inventoryDatabase.getDatabaseName());
+        configFactory.slotName(getSlotName());
+
+        FlinkSourceProvider sourceProvider =
+                (FlinkSourceProvider)
+                        new 
PostgresDataSource(configFactory).getEventSourceProvider();
+        CloseableIterator<Event> events =
+                env.fromSource(
+                                sourceProvider.getSource(),
+                                WatermarkStrategy.noWatermarks(),
+                                PostgresDataSourceFactory.IDENTIFIER,
+                                new EventTypeInfo())
+                        .executeAndCollect();
+
+        TableId tableId = TableId.tableId("inventory", "products");
+        CreateTableEvent createTableEvent = 
getProductsCreateTableEvent(tableId);
+
+        // generate snapshot data
+        List<Event> expectedSnapshot = getSnapshotExpected(tableId);
+
+        // In this configuration, several subtasks might emit their 
corresponding CreateTableEvent
+        // to downstream. Since it is not possible to predict how many 
CreateTableEvents should we
+        // expect, we simply filter them out from expected sets, and assert 
there's at least one.
+        List<Event> actual = fetchResultsExcept(events, 
expectedSnapshot.size(), createTableEvent);
+        assertThat(actual.subList(0, expectedSnapshot.size()))
+                .containsExactlyInAnyOrder(expectedSnapshot.toArray(new 
Event[0]));
+    }
+
+    private static <T> List<T> fetchResultsExcept(Iterator<T> iter, int size, 
T sideEvent) {
+        List<T> result = new ArrayList<>(size);
+        List<T> sideResults = new ArrayList<>();
+        while (size > 0 && iter.hasNext()) {
+            T event = iter.next();
+            if (!event.equals(sideEvent)) {
+                result.add(event);
+                size--;
+            } else {
+                sideResults.add(sideEvent);
+            }
+        }
+        // Also ensure we've received at least one or many side events.
+        assertThat(sideResults).isNotEmpty();
+        return result;
+    }
+
+    private List<Event> getSnapshotExpected(TableId tableId) {
+        RowType rowType =
+                RowType.of(
+                        new DataType[] {
+                            DataTypes.INT().notNull(),
+                            DataTypes.VARCHAR(255).notNull(),
+                            DataTypes.VARCHAR(512),
+                            DataTypes.FLOAT()

Review Comment:
   It's better to add a test for all supported that described in 
PostgresTypeUtils.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/test/java/org/apache/flink/cdc/connectors/postgres/source/PostgresPipelineITCaseTest.java:
##########
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.cdc.common.data.binary.BinaryStringData;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.event.DataChangeEvent;
+import org.apache.flink.cdc.common.event.Event;
+import org.apache.flink.cdc.common.event.TableId;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.common.source.FlinkSourceProvider;
+import org.apache.flink.cdc.common.types.DataType;
+import org.apache.flink.cdc.common.types.DataTypes;
+import org.apache.flink.cdc.common.types.RowType;
+import org.apache.flink.cdc.connectors.base.options.StartupOptions;
+import org.apache.flink.cdc.connectors.postgres.PostgresTestBase;
+import 
org.apache.flink.cdc.connectors.postgres.factory.PostgresDataSourceFactory;
+import 
org.apache.flink.cdc.connectors.postgres.source.config.PostgresSourceConfigFactory;
+import org.apache.flink.cdc.connectors.postgres.testutils.UniqueDatabase;
+import org.apache.flink.cdc.runtime.typeutils.BinaryRecordDataGenerator;
+import org.apache.flink.cdc.runtime.typeutils.EventTypeInfo;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.planner.factories.TestValuesTableFactory;
+import org.apache.flink.util.CloseableIterator;
+
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.lifecycle.Startables;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static 
org.apache.flink.cdc.connectors.postgres.source.PostgresDataSourceOptions.SCHEMA_CHANGE_ENABLED;
+import static org.assertj.core.api.Assertions.assertThat;
+import static 
org.testcontainers.containers.PostgreSQLContainer.POSTGRESQL_PORT;
+
+/** Integration tests for Postgres source. */
+public class PostgresPipelineITCaseTest extends PostgresTestBase {
+    private static final Logger LOG = 
LoggerFactory.getLogger(PostgresPipelineITCaseTest.class);
+
+    private final UniqueDatabase inventoryDatabase =
+            new UniqueDatabase(
+                    POSTGRES_CONTAINER, "inventory", "inventory", TEST_USER, 
TEST_PASSWORD);
+    private final StreamExecutionEnvironment env =
+            StreamExecutionEnvironment.getExecutionEnvironment();
+
+    @BeforeClass
+    public static void startContainers() {
+        LOG.info("Starting containers...");
+        Startables.deepStart(Stream.of(POSTGRES_CONTAINER)).join();
+        LOG.info("Containers are started.");
+    }
+
+    @AfterClass
+    public static void stopContainers() {
+        LOG.info("Stopping containers...");
+        POSTGRES_CONTAINER.stop();
+        LOG.info("Containers are stopped.");
+    }
+
+    @Before
+    public void before() {
+        TestValuesTableFactory.clearAllData();
+        env.setParallelism(4);
+        env.enableCheckpointing(2000);
+        env.setRestartStrategy(RestartStrategies.noRestart());
+    }
+
+    @Test
+    public void testInitialStartupMode() throws Exception {
+        inventoryDatabase.createAndInitialize();
+        PostgresSourceConfigFactory configFactory =
+                (PostgresSourceConfigFactory)
+                        new PostgresSourceConfigFactory()
+                                .hostname(POSTGRES_CONTAINER.getHost())
+                                
.port(POSTGRES_CONTAINER.getMappedPort(POSTGRESQL_PORT))
+                                .username(TEST_USER)
+                                .password(TEST_PASSWORD)
+                                
.databaseList(inventoryDatabase.getDatabaseName())
+                                .tableList("inventory.products")
+                                .startupOptions(StartupOptions.initial())
+                                .serverTimeZone("UTC")
+                                
.includeSchemaChanges(SCHEMA_CHANGE_ENABLED.defaultValue());
+        configFactory.database(inventoryDatabase.getDatabaseName());
+        configFactory.slotName(getSlotName());
+
+        FlinkSourceProvider sourceProvider =
+                (FlinkSourceProvider)
+                        new 
PostgresDataSource(configFactory).getEventSourceProvider();
+        CloseableIterator<Event> events =
+                env.fromSource(
+                                sourceProvider.getSource(),
+                                WatermarkStrategy.noWatermarks(),
+                                PostgresDataSourceFactory.IDENTIFIER,
+                                new EventTypeInfo())
+                        .executeAndCollect();
+
+        TableId tableId = TableId.tableId("inventory", "products");
+        CreateTableEvent createTableEvent = 
getProductsCreateTableEvent(tableId);
+
+        // generate snapshot data
+        List<Event> expectedSnapshot = getSnapshotExpected(tableId);
+
+        // In this configuration, several subtasks might emit their 
corresponding CreateTableEvent
+        // to downstream. Since it is not possible to predict how many 
CreateTableEvents should we
+        // expect, we simply filter them out from expected sets, and assert 
there's at least one.
+        List<Event> actual = fetchResultsExcept(events, 
expectedSnapshot.size(), createTableEvent);
+        assertThat(actual.subList(0, expectedSnapshot.size()))
+                .containsExactlyInAnyOrder(expectedSnapshot.toArray(new 
Event[0]));
+    }
+
+    private static <T> List<T> fetchResultsExcept(Iterator<T> iter, int size, 
T sideEvent) {
+        List<T> result = new ArrayList<>(size);
+        List<T> sideResults = new ArrayList<>();
+        while (size > 0 && iter.hasNext()) {
+            T event = iter.next();
+            if (!event.equals(sideEvent)) {
+                result.add(event);
+                size--;
+            } else {
+                sideResults.add(sideEvent);
+            }
+        }
+        // Also ensure we've received at least one or many side events.
+        assertThat(sideResults).isNotEmpty();
+        return result;
+    }
+
+    private List<Event> getSnapshotExpected(TableId tableId) {
+        RowType rowType =
+                RowType.of(
+                        new DataType[] {
+                            DataTypes.INT().notNull(),
+                            DataTypes.VARCHAR(255).notNull(),
+                            DataTypes.VARCHAR(512),
+                            DataTypes.FLOAT()

Review Comment:
   It's better to add a test for Whole database synchronization or multi-tables 
synchronization too.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =
+            ConfigOptions.key("scan.incremental.snapshot.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Incremental snapshot is a new mechanism to read 
snapshot of a table. "
+                                    + "Compared to the old snapshot mechanism, 
the incremental snapshot has many advantages, including:\n"
+                                    + "(1) source can be parallel during 
snapshot reading, \n"
+                                    + "(2) source can perform checkpoints in 
the chunk granularity during snapshot reading, \n"
+                                    + "(3) source doesn't need to acquire 
global read lock before snapshot reading.");
+
+    public static final ConfigOption<String> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.key-column")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The chunk key of table snapshot, captured tables 
are split into multiple chunks by a chunk key when read the snapshot of table."
+                                    + "By default, the chunk key is the first 
column of the primary key and the chunk key is the RowId in oracle."
+                                    + "This column must be a column of the 
primary key.");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =

Review Comment:
   Unused ConfigOption.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-postgres/src/main/java/org/apache/flink/cdc/connectors/postgres/source/PostgresDataSourceOptions.java:
##########
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.postgres.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+
+import java.time.Duration;
+
+/** Configurations for {@link PostgresDataSource}. */
+@PublicEvolving
+public class PostgresDataSourceOptions {
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the PostgreSQL 
database server.");
+    public static final ConfigOption<Integer> PG_PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(5432)
+                    .withDescription("Integer port number of the PostgreSQL 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the PostgreSQL database to use when 
connecting to the PostgreSQL database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the PostgreSQL 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the PostgreSQL tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. db0.\\.*, db1.user_table_[0-9]+, 
db[1-2].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> DECODING_PLUGIN_NAME =
+            ConfigOptions.key("decoding.plugin.name")
+                    .stringType()
+                    .defaultValue("decoderbufs")
+                    .withDescription(
+                            "The name of the Postgres logical decoding plug-in 
installed on the server.\n"
+                                    + "Supported values are decoderbufs, 
wal2json, wal2json_rds, wal2json_streaming,\n"
+                                    + "wal2json_rds_streaming and pgoutput.");
+
+    public static final ConfigOption<String> SLOT_NAME =
+            ConfigOptions.key("slot.name")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The name of the PostgreSQL logical decoding slot 
that was created for streaming changes "
+                                    + "from a particular plug-in for a 
particular database/schema. The server uses this slot "
+                                    + "to stream events to the connector that 
you are configuring.");
+
+    public static final ConfigOption<DebeziumChangelogMode> CHANGELOG_MODE =
+            ConfigOptions.key("changelog-mode")
+                    .enumType(DebeziumChangelogMode.class)
+                    .defaultValue(DebeziumChangelogMode.ALL)
+                    .withDescription(
+                            "The changelog mode used for encoding streaming 
changes.\n"
+                                    + "\"all\": Encodes changes as retract 
stream using all RowKinds. This is the default mode.\n"
+                                    + "\"upsert\": Encodes changes as upsert 
stream that describes idempotent updates on a key. It can be used for tables 
with primary keys when replica identity FULL is not an option.");
+
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_SNAPSHOT_ENABLED =
+            ConfigOptions.key("scan.incremental.snapshot.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Incremental snapshot is a new mechanism to read 
snapshot of a table. "
+                                    + "Compared to the old snapshot mechanism, 
the incremental snapshot has many advantages, including:\n"
+                                    + "(1) source can be parallel during 
snapshot reading, \n"
+                                    + "(2) source can perform checkpoints in 
the chunk granularity during snapshot reading, \n"
+                                    + "(3) source doesn't need to acquire 
global read lock before snapshot reading.");
+
+    public static final ConfigOption<String> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_KEY_COLUMN =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.key-column")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The chunk key of table snapshot, captured tables 
are split into multiple chunks by a chunk key when read the snapshot of table."
+                                    + "By default, the chunk key is the first 
column of the primary key and the chunk key is the RowId in oracle."
+                                    + "This column must be a column of the 
primary key.");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =
+            ConfigOptions.key("server-time-zone")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The session time zone in database server. If not 
set, then "
+                                    + "ZoneId.systemDefault() is used to 
determine the server time zone.");
+
+    public static final ConfigOption<Integer> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.size")
+                    .intType()
+                    .defaultValue(8096)
+                    .withDescription(
+                            "The chunk size (number of rows) of table 
snapshot, captured tables are split into multiple chunks when read the snapshot 
of table.");
+
+    public static final ConfigOption<Integer> SCAN_SNAPSHOT_FETCH_SIZE =
+            ConfigOptions.key("scan.snapshot.fetch.size")
+                    .intType()
+                    .defaultValue(1024)
+                    .withDescription(
+                            "The maximum fetch size for per poll when read 
table snapshot.");
+
+    public static final ConfigOption<Duration> CONNECT_TIMEOUT =
+            ConfigOptions.key("connect.timeout")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "The maximum time that the connector should wait 
after trying to connect to the PostgreSQL database server before timing out.");
+
+    public static final ConfigOption<Integer> CONNECTION_POOL_SIZE =
+            ConfigOptions.key("connection.pool.size")
+                    .intType()
+                    .defaultValue(20)
+                    .withDescription("The connection pool size.");
+
+    public static final ConfigOption<Integer> CONNECT_MAX_RETRIES =
+            ConfigOptions.key("connect.max-retries")
+                    .intType()
+                    .defaultValue(3)
+                    .withDescription(
+                            "The max retry times that the connector should 
retry to build PostgreSQL database server connection.");
+
+    public static final ConfigOption<String> SCAN_STARTUP_MODE =
+            ConfigOptions.key("scan.startup.mode")
+                    .stringType()
+                    .defaultValue("initial")
+                    .withDescription(
+                            "Optional startup mode for PostgreSQL CDC 
consumer, valid enumerations are "
+                                    + "\"initial\", \"earliest-offset\", 
\"latest-offset\", \"timestamp\"\n"
+                                    + "or \"specific-offset\"");
+
+    public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE 
=
+            ConfigOptions.key("scan.startup.specific-offset.file")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file name used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
+            ConfigOptions.key("scan.startup.specific-offset.pos")
+                    .longType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional binlog file position used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<String> 
SCAN_STARTUP_SPECIFIC_OFFSET_GTID_SET =
+            ConfigOptions.key("scan.startup.specific-offset.gtid-set")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Optional GTID set used in case of 
\"specific-offset\" startup mode");
+
+    public static final ConfigOption<Long> 
SCAN_STARTUP_SPECIFIC_OFFSET_SKIP_EVENTS =

Review Comment:
   Unused ConfigOption.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] [FLINK-35670][cdc-connector][postgres]Flink cdc pipeline support postgres source [flink-cdc]

Reply via email to