Re: [PR] [FLINK-36796][pipeline-connector][oracle]add oracle pipeline connector. [flink-cdc]

via GitHub Thu, 25 Dec 2025 18:35:16 -0800


lvyanquan commented on code in PR #3995:
URL: https://github.com/apache/flink-cdc/pull/3995#discussion_r2647403630



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/parser/OracleAlterTableParserListener.java:
##########
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source.parser;
+
+import org.apache.flink.cdc.common.event.AddColumnEvent;
+import org.apache.flink.cdc.common.event.AlterColumnTypeEvent;
+import org.apache.flink.cdc.common.event.DropColumnEvent;
+import org.apache.flink.cdc.common.event.RenameColumnEvent;
+import org.apache.flink.cdc.common.event.SchemaChangeEvent;
+import org.apache.flink.cdc.common.types.DataType;
+
+import io.debezium.connector.oracle.antlr.OracleDdlParser;
+import io.debezium.ddl.parser.oracle.generated.PlSqlParser;
+import io.debezium.relational.Column;
+import io.debezium.relational.ColumnEditor;
+import io.debezium.relational.Table;
+import io.debezium.relational.TableEditor;
+import io.debezium.relational.TableId;
+import io.debezium.text.ParsingException;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import static io.debezium.antlr.AntlrDdlParser.getText;
+import static 
org.apache.flink.cdc.connectors.oracle.utils.OracleTypeUtils.fromDbzColumn;
+
+/** Parser listener that is parsing Oracle ALTER TABLE statements. */
+public class OracleAlterTableParserListener extends BaseParserListener {
+
+    private static final Logger LOGGER =
+            LoggerFactory.getLogger(
+                    
io.debezium.connector.oracle.antlr.listener.AlterTableParserListener.class);

Review Comment:
    LoggerFactory.getLogger(OracleAlterTableParserListener.class);



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/reader/OraclePipelineRecordEmitter.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source.reader;
+
+import org.apache.flink.api.connector.source.SourceOutput;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.connectors.base.options.StartupOptions;
+import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory;
+import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitState;
+import org.apache.flink.cdc.connectors.base.source.metrics.SourceReaderMetrics;
+import 
org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceRecordEmitter;
+import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
+import org.apache.flink.cdc.connectors.oracle.utils.OracleSchemaUtils;
+import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
+import org.apache.flink.connector.base.source.reader.RecordEmitter;
+
+import io.debezium.jdbc.JdbcConnection;
+import io.debezium.relational.TableId;
+import org.apache.kafka.connect.source.SourceRecord;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import static 
org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkEvent.isLowWatermarkEvent;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.getTableId;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isDataChangeRecord;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isSchemaChangeEvent;
+
+/** The {@link RecordEmitter} implementation for Oracle pipeline connector. */
+public class OraclePipelineRecordEmitter<T> extends 
IncrementalSourceRecordEmitter<T> {

Review Comment:
   We can clarify that the generic type `T` here is `Event`.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/OracleDataSourceOptions.java:
##########
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source;
+
+import org.apache.flink.cdc.common.annotation.Experimental;
+import org.apache.flink.cdc.common.annotation.PublicEvolving;
+import org.apache.flink.cdc.common.configuration.ConfigOption;
+import org.apache.flink.cdc.common.configuration.ConfigOptions;
+import org.apache.flink.cdc.common.configuration.Configuration;
+
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+
+/** Configurations for {@link OracleDataSource}. */
+@PublicEvolving
+public class OracleDataSourceOptions {
+
+    public static final ConfigOption<String> JDBC_URL =
+            ConfigOptions.key("jdbc.url")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The url for oracle jdbc ,the url will be used 
preferentially,if no url is configured, then use 
\"jdbc:oracle:thin:@localhost:1521:orcl\"，but oracle 19c url is 
\"jdbc:oracle:thin:@//localhost:1521/pdb1\",so the url property is option to 
adapt to different versions of Oracle");
+
+    public static final ConfigOption<String> HOSTNAME =
+            ConfigOptions.key("hostname")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription("IP address or hostname of the oracle 
database server.");
+
+    public static final ConfigOption<Integer> PORT =
+            ConfigOptions.key("port")
+                    .intType()
+                    .defaultValue(3306)
+                    .withDescription("Integer port number of the oracle 
database server.");
+
+    public static final ConfigOption<String> USERNAME =
+            ConfigOptions.key("username")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Name of the oracle database to use when 
connecting to the oracle database server.");
+
+    public static final ConfigOption<String> PASSWORD =
+            ConfigOptions.key("password")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the oracle 
database server.");
+
+    public static final ConfigOption<String> DATABASE =
+            ConfigOptions.key("database")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Password to use when connecting to the oracle 
database server.");
+    public static final ConfigOption<String> TABLES =
+            ConfigOptions.key("tables")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Table names of the oracle tables to monitor. 
Regular expressions are supported. "
+                                    + "It is important to note that the dot 
(.) is treated as a delimiter for database and table names. "
+                                    + "If there is a need to use a dot (.) in 
a regular expression to match any character, "
+                                    + "it is necessary to escape the dot with 
a backslash."
+                                    + "eg. test.user_table_[0-9]+, 
test[0-9].[app|web]_order_\\.*");
+
+    public static final ConfigOption<String> SERVER_TIME_ZONE =
+            ConfigOptions.key("server-time-zone")
+                    .stringType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "The session time zone in database server. If not 
set, then "
+                                    + "ZoneId.systemDefault() is used to 
determine the server time zone.");
+
+    public static final ConfigOption<Integer> 
SCAN_INCREMENTAL_SNAPSHOT_CHUNK_SIZE =
+            ConfigOptions.key("scan.incremental.snapshot.chunk.size")
+                    .intType()
+                    .defaultValue(8096)
+                    .withDescription(
+                            "The chunk size (number of rows) of table 
snapshot, captured tables are split into multiple chunks when read the snapshot 
of table.");
+
+    public static final ConfigOption<Integer> SCAN_SNAPSHOT_FETCH_SIZE =
+            ConfigOptions.key("scan.snapshot.fetch.size")
+                    .intType()
+                    .defaultValue(1024)
+                    .withDescription(
+                            "The maximum fetch size for per poll when read 
table snapshot.");
+
+    public static final ConfigOption<Duration> CONNECT_TIMEOUT =
+            ConfigOptions.key("connect.timeout")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "The maximum time that the connector should wait 
after trying to connect to the oracle database server before timing out.");
+
+    public static final ConfigOption<Integer> CONNECTION_POOL_SIZE =
+            ConfigOptions.key("connection.pool.size")
+                    .intType()
+                    .defaultValue(20)
+                    .withDescription("The connection pool size.");
+
+    public static final ConfigOption<Integer> CONNECT_MAX_RETRIES =
+            ConfigOptions.key("connect.max-retries")
+                    .intType()
+                    .defaultValue(3)
+                    .withDescription(
+                            "The max retry times that the connector should 
retry to build oracle database server connection.");
+
+    public static final ConfigOption<String> SCAN_STARTUP_MODE =
+            ConfigOptions.key("scan.startup.mode")
+                    .stringType()
+                    .defaultValue("initial")
+                    .withDescription(
+                            "Optional startup mode for oracle CDC consumer, 
valid enumerations are "
+                                    + "\"initial\", \"latest-offset\", 
\"snapshot\"");
+
+    public static final ConfigOption<Duration> HEARTBEAT_INTERVAL =
+            ConfigOptions.key("heartbeat.interval")
+                    .durationType()
+                    .defaultValue(Duration.ofSeconds(30))
+                    .withDescription(
+                            "Optional interval of sending heartbeat event for 
tracing the latest available binlog offsets");
+
+    @Experimental
+    public static final ConfigOption<Integer> CHUNK_META_GROUP_SIZE =
+            ConfigOptions.key("chunk-meta.group.size")
+                    .intType()
+                    .defaultValue(1000)
+                    .withDescription(
+                            "The group size of chunk meta, if the meta size 
exceeds the group size, the meta will be divided into multiple groups.");
+
+    @Experimental
+    public static final ConfigOption<Double> 
CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND =
+            ConfigOptions.key("chunk-key.even-distribution.factor.upper-bound")
+                    .doubleType()
+                    .defaultValue(1000.0d)
+                    
.withFallbackKeys("split-key.even-distribution.factor.upper-bound")
+                    .withDescription(
+                            "The upper bound of chunk key distribution factor. 
The distribution factor is used to determine whether the"
+                                    + " table is evenly distribution or not."
+                                    + " The table chunks would use evenly 
calculation optimization when the data distribution is even,"
+                                    + " and the query oracle for splitting 
would happen when it is uneven."
+                                    + " The distribution factor could be 
calculated by (MAX(id) - MIN(id) + 1) / rowCount.");
+
+    @Experimental
+    public static final ConfigOption<Double> 
CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND =
+            ConfigOptions.key("chunk-key.even-distribution.factor.lower-bound")
+                    .doubleType()
+                    .defaultValue(0.05d)
+                    
.withFallbackKeys("split-key.even-distribution.factor.lower-bound")
+                    .withDescription(
+                            "The lower bound of chunk key distribution factor. 
The distribution factor is used to determine whether the"
+                                    + " table is evenly distribution or not."
+                                    + " The table chunks would use evenly 
calculation optimization when the data distribution is even,"
+                                    + " and the query oracle for splitting 
would happen when it is uneven."
+                                    + " The distribution factor could be 
calculated by (MAX(id) - MIN(id) + 1) / rowCount.");
+
+    @Experimental
+    public static final ConfigOption<Boolean> 
SCAN_INCREMENTAL_CLOSE_IDLE_READER_ENABLED =
+            ConfigOptions.key("scan.incremental.close-idle-reader.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "Whether to close idle readers at the end of the 
snapshot phase. This feature depends on "
+                                    + "FLIP-147: Support Checkpoints After 
Tasks Finished. The flink version is required to be "
+                                    + "greater than or equal to 1.14 when 
enabling this feature.");
+
+    @Experimental
+    public static final ConfigOption<Boolean> SCHEMA_CHANGE_ENABLED =
+            ConfigOptions.key("schema-change.enabled")
+                    .booleanType()
+                    .defaultValue(true)
+                    .withDescription(
+                            "Whether send schema change events, by default is 
true. If set to false, the schema changes will not be sent.");

Review Comment:
   What would happen if we alter table structure but setting 
`schema-change.enabled` to false? Should we always set this parameter to true?



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/OracleDataSource.java:
##########
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source;
+
+import org.apache.flink.cdc.common.annotation.VisibleForTesting;
+import org.apache.flink.cdc.common.configuration.Configuration;
+import org.apache.flink.cdc.common.source.DataSource;
+import org.apache.flink.cdc.common.source.EventSourceProvider;
+import org.apache.flink.cdc.common.source.FlinkSourceProvider;
+import org.apache.flink.cdc.common.source.MetadataAccessor;
+import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
+import 
org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfigFactory;
+import 
org.apache.flink.cdc.connectors.oracle.source.meta.offset.RedoLogOffsetFactory;
+import 
org.apache.flink.cdc.connectors.oracle.source.reader.OracleTableSourceReader;
+import org.apache.flink.cdc.connectors.oracle.table.OracleReadableMetaData;
+import org.apache.flink.cdc.debezium.table.DebeziumChangelogMode;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import 
org.apache.flink.table.connector.source.abilities.SupportsReadingMetadata;
+import org.apache.flink.table.types.DataType;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * A {@link DynamicTableSource} that describes how to create a Oracle redo log 
from a logical
+ * description.
+ */
+public class OracleDataSource implements DataSource, SupportsReadingMetadata {
+
+    private final OracleSourceConfig sourceConfig;
+    private final Configuration config;
+    private final OracleSourceConfigFactory configFactory;
+
+    /** Data type that describes the final output of the source. */
+    protected DataType producedDataType;
+
+    /** Metadata that is appended at the end of a physical source row. */
+    protected List<String> metadataKeys;
+
+    private final List<OracleReadableMetaData> readableMetadataList;
+
+    public OracleDataSource(
+            OracleSourceConfigFactory configFactory,
+            Configuration config,
+            List<OracleReadableMetaData> readableMetadataList) {
+        this.sourceConfig = configFactory.create(0);
+        this.config = config;
+        this.metadataKeys = Collections.emptyList();
+        this.readableMetadataList = readableMetadataList;
+        this.configFactory = configFactory;
+    }
+
+    @Override
+    public EventSourceProvider getEventSourceProvider() {
+
+        OracleDialect oracleDialect = new OracleDialect();
+        OracleEventDeserializer deserializer =
+                new OracleEventDeserializer(
+                        DebeziumChangelogMode.ALL,
+                        
config.get(OracleDataSourceOptions.SCHEMA_CHANGE_ENABLED),
+                        readableMetadataList);
+
+        RedoLogOffsetFactory offsetFactory = new RedoLogOffsetFactory();
+        OracleTableSourceReader oracleChangeEventSource =
+                new OracleTableSourceReader(
+                        configFactory, deserializer, offsetFactory, 
oracleDialect);

Review Comment:
   Can be updated to the following code to avoid `Raw use of parameterized 
class`.
   ```
           OracleEventDeserializer<Event> deserializer =
                   new OracleEventDeserializer<>(
                           DebeziumChangelogMode.ALL,
                           
config.get(OracleDataSourceOptions.SCHEMA_CHANGE_ENABLED),
                           readableMetadataList);
   
           RedoLogOffsetFactory offsetFactory = new RedoLogOffsetFactory();
           OracleTableSourceReader<Event> oracleChangeEventSource =
                   new OracleTableSourceReader<>(
                           configFactory, deserializer, offsetFactory, 
oracleDialect);
   ```



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/test/java/org/apache/flink/cdc/connectors/oracle/testutils/RecordsFormatter.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.testutils;
+
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils;
+import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
+import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.conversion.RowRowConverter;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.table.types.utils.TypeConversions;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Collector;
+
+import org.apache.kafka.connect.source.SourceRecord;
+
+import java.time.ZoneId;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Formatter that formats the {@link SourceRecord} to String. */
+public class RecordsFormatter {

Review Comment:
   This class seems to be useless.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/reader/OracleTableSourceReader.java:
##########
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source.reader;
+
+import org.apache.flink.api.connector.source.SourceReaderContext;
+import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig;
+import org.apache.flink.cdc.connectors.base.config.SourceConfig;
+import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords;
+import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitState;
+import org.apache.flink.cdc.connectors.base.source.metrics.SourceReaderMetrics;
+import 
org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceReader;
+import org.apache.flink.cdc.connectors.oracle.source.OracleDialect;
+import org.apache.flink.cdc.connectors.oracle.source.OracleSourceBuilder;
+import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
+import 
org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfigFactory;
+import 
org.apache.flink.cdc.connectors.oracle.source.meta.offset.RedoLogOffsetFactory;
+import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
+import org.apache.flink.connector.base.source.reader.RecordEmitter;
+
+/**
+ * The basic source of Incremental Snapshot framework for JDBC datasource, it 
is based on FLIP-27
+ * and Watermark Signal Algorithm which supports parallel reading snapshot of 
table and then
+ * continue to capture data change by streaming reading.
+ */
+public class OracleTableSourceReader<T> extends 
OracleSourceBuilder.OracleIncrementalSource<T> {

Review Comment:
   We can clarify that the generic type `T` here is `Event`.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/reader/OraclePipelineRecordEmitter.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source.reader;
+
+import org.apache.flink.api.connector.source.SourceOutput;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.connectors.base.options.StartupOptions;
+import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory;
+import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitState;
+import org.apache.flink.cdc.connectors.base.source.metrics.SourceReaderMetrics;
+import 
org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceRecordEmitter;
+import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
+import org.apache.flink.cdc.connectors.oracle.utils.OracleSchemaUtils;
+import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
+import org.apache.flink.connector.base.source.reader.RecordEmitter;
+
+import io.debezium.jdbc.JdbcConnection;
+import io.debezium.relational.TableId;
+import org.apache.kafka.connect.source.SourceRecord;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import static 
org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkEvent.isLowWatermarkEvent;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.getTableId;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isDataChangeRecord;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isSchemaChangeEvent;
+
+/** The {@link RecordEmitter} implementation for Oracle pipeline connector. */
+public class OraclePipelineRecordEmitter<T> extends 
IncrementalSourceRecordEmitter<T> {
+    private static final long serialVersionUID = 1L;
+    // Used when startup mode is initial
+    private final Set<TableId> alreadySendCreateTableTables;
+    private final Map<TableId, CreateTableEvent> createTableEventCache;
+
+    // Used when startup mode is not initial
+    private boolean shouldEmitAllCreateTableEventsInSnapshotMode = true;
+    private final boolean isBounded;
+    private final OracleSourceConfig sourceConfig;
+
+    public OraclePipelineRecordEmitter(
+            DebeziumDeserializationSchema<T> debeziumDeserializationSchema,
+            SourceReaderMetrics sourceReaderMetrics,
+            boolean includeSchemaChanges,
+            OffsetFactory offsetFactory,
+            OracleSourceConfig sourceConfig) {
+        super(
+                debeziumDeserializationSchema,
+                sourceReaderMetrics,
+                includeSchemaChanges,
+                offsetFactory);
+        List<String> tableList = sourceConfig.getTableList();
+        this.sourceConfig = sourceConfig;
+        this.createTableEventCache = new HashMap<>();
+        this.alreadySendCreateTableTables = new HashSet<>();
+        try (JdbcConnection jdbc = 
OracleSchemaUtils.createOracleConnection(sourceConfig)) {
+
+            List<TableId> capturedTableIds = new ArrayList<>();
+            for (String table : tableList) {
+                TableId capturedTableId = 
TableId.parse(table.toUpperCase(Locale.ROOT));
+                capturedTableIds.add(capturedTableId);
+            }
+            for (TableId tableId : capturedTableIds) {
+                Schema schema = OracleSchemaUtils.getSchema(jdbc, tableId);
+                createTableEventCache.put(
+                        tableId,
+                        new CreateTableEvent(
+                                
org.apache.flink.cdc.common.event.TableId.tableId(
+                                        tableId.catalog(), tableId.table()),
+                                schema));
+            }
+            this.isBounded = 
StartupOptions.snapshot().equals(sourceConfig.getStartupOptions());
+        } catch (SQLException e) {
+            throw new RuntimeException("Cannot start emitter to fetch table 
schema.", e);
+        }
+    }
+
+    @Override
+    protected void processElement(
+            SourceRecord element, SourceOutput<T> output, SourceSplitState 
splitState)
+            throws Exception {
+        if (shouldEmitAllCreateTableEventsInSnapshotMode && isBounded) {
+            for (TableId tableId : createTableEventCache.keySet()) {
+                output.collect((T) createTableEventCache.get(tableId));
+                alreadySendCreateTableTables.add(
+                        TableId.parse(tableId.schema() + "." + 
tableId.table()));
+            }
+            shouldEmitAllCreateTableEventsInSnapshotMode = false;
+        } else if (isLowWatermarkEvent(element) && 
splitState.isSnapshotSplitState()) {
+            TableId tableId = 
splitState.asSnapshotSplitState().toSourceSplit().getTableId();
+            if (!alreadySendCreateTableTables.contains(tableId)) {
+                try (JdbcConnection jdbc = 
OracleSchemaUtils.createOracleConnection(sourceConfig)) {
+                    sendCreateTableEvent(jdbc, tableId, output);
+                }
+                alreadySendCreateTableTables.add(
+                        TableId.parse(tableId.schema() + "." + 
tableId.table()));

Review Comment:
   alreadySendCreateTableTables.add(new TableId(tableId.schema(), null, 
tableId.table()));



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/test/java/org/apache/flink/cdc/connectors/oracle/testutils/TestTable.java:
##########
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.testutils;
+
+import 
org.apache.flink.cdc.connectors.oracle.table.OracleDeserializationConverterFactory;
+import org.apache.flink.cdc.debezium.table.RowDataDebeziumDeserializeSchema;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.conversion.RowRowConverter;
+import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
+import org.apache.flink.table.types.logical.RowType;
+
+import org.apache.kafka.connect.source.SourceRecord;
+
+import java.util.List;
+
+/**
+ * Test utility for creating converter, formatter and deserializer of a table 
in the test database.
+ */
+public class TestTable {

Review Comment:
   This class seems to be useless.



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/main/java/org/apache/flink/cdc/connectors/oracle/source/reader/OraclePipelineRecordEmitter.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.source.reader;
+
+import org.apache.flink.api.connector.source.SourceOutput;
+import org.apache.flink.cdc.common.event.CreateTableEvent;
+import org.apache.flink.cdc.common.schema.Schema;
+import org.apache.flink.cdc.connectors.base.options.StartupOptions;
+import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory;
+import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitState;
+import org.apache.flink.cdc.connectors.base.source.metrics.SourceReaderMetrics;
+import 
org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceRecordEmitter;
+import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
+import org.apache.flink.cdc.connectors.oracle.utils.OracleSchemaUtils;
+import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
+import org.apache.flink.connector.base.source.reader.RecordEmitter;
+
+import io.debezium.jdbc.JdbcConnection;
+import io.debezium.relational.TableId;
+import org.apache.kafka.connect.source.SourceRecord;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import static 
org.apache.flink.cdc.connectors.base.source.meta.wartermark.WatermarkEvent.isLowWatermarkEvent;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.getTableId;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isDataChangeRecord;
+import static 
org.apache.flink.cdc.connectors.base.utils.SourceRecordUtils.isSchemaChangeEvent;
+
+/** The {@link RecordEmitter} implementation for Oracle pipeline connector. */
+public class OraclePipelineRecordEmitter<T> extends 
IncrementalSourceRecordEmitter<T> {
+    private static final long serialVersionUID = 1L;
+    // Used when startup mode is initial
+    private final Set<TableId> alreadySendCreateTableTables;
+    private final Map<TableId, CreateTableEvent> createTableEventCache;
+
+    // Used when startup mode is not initial
+    private boolean shouldEmitAllCreateTableEventsInSnapshotMode = true;
+    private final boolean isBounded;
+    private final OracleSourceConfig sourceConfig;
+
+    public OraclePipelineRecordEmitter(
+            DebeziumDeserializationSchema<T> debeziumDeserializationSchema,
+            SourceReaderMetrics sourceReaderMetrics,
+            boolean includeSchemaChanges,
+            OffsetFactory offsetFactory,
+            OracleSourceConfig sourceConfig) {
+        super(
+                debeziumDeserializationSchema,
+                sourceReaderMetrics,
+                includeSchemaChanges,
+                offsetFactory);
+        List<String> tableList = sourceConfig.getTableList();
+        this.sourceConfig = sourceConfig;
+        this.createTableEventCache = new HashMap<>();
+        this.alreadySendCreateTableTables = new HashSet<>();
+        try (JdbcConnection jdbc = 
OracleSchemaUtils.createOracleConnection(sourceConfig)) {
+
+            List<TableId> capturedTableIds = new ArrayList<>();
+            for (String table : tableList) {
+                TableId capturedTableId = 
TableId.parse(table.toUpperCase(Locale.ROOT));
+                capturedTableIds.add(capturedTableId);
+            }
+            for (TableId tableId : capturedTableIds) {
+                Schema schema = OracleSchemaUtils.getSchema(jdbc, tableId);
+                createTableEventCache.put(
+                        tableId,
+                        new CreateTableEvent(
+                                
org.apache.flink.cdc.common.event.TableId.tableId(
+                                        tableId.catalog(), tableId.table()),
+                                schema));
+            }
+            this.isBounded = 
StartupOptions.snapshot().equals(sourceConfig.getStartupOptions());
+        } catch (SQLException e) {
+            throw new RuntimeException("Cannot start emitter to fetch table 
schema.", e);
+        }
+    }
+
+    @Override
+    protected void processElement(
+            SourceRecord element, SourceOutput<T> output, SourceSplitState 
splitState)
+            throws Exception {
+        if (shouldEmitAllCreateTableEventsInSnapshotMode && isBounded) {
+            for (TableId tableId : createTableEventCache.keySet()) {
+                output.collect((T) createTableEventCache.get(tableId));
+                alreadySendCreateTableTables.add(
+                        TableId.parse(tableId.schema() + "." + 
tableId.table()));
+            }
+            shouldEmitAllCreateTableEventsInSnapshotMode = false;
+        } else if (isLowWatermarkEvent(element) && 
splitState.isSnapshotSplitState()) {
+            TableId tableId = 
splitState.asSnapshotSplitState().toSourceSplit().getTableId();
+            if (!alreadySendCreateTableTables.contains(tableId)) {
+                try (JdbcConnection jdbc = 
OracleSchemaUtils.createOracleConnection(sourceConfig)) {
+                    sendCreateTableEvent(jdbc, tableId, output);
+                }
+                alreadySendCreateTableTables.add(
+                        TableId.parse(tableId.schema() + "." + 
tableId.table()));
+            }
+        } else {
+            boolean isDataChangeRecord = isDataChangeRecord(element);
+            if (isDataChangeRecord || isSchemaChangeEvent(element)) {
+                TableId debeziumTableId = getTableId(element);
+                TableId tableId =
+                        TableId.parse(debeziumTableId.schema() + "." + 
debeziumTableId.table());

Review Comment:
   Can we use debeziumTableId directly here?



##########
flink-cdc-connect/flink-cdc-pipeline-connectors/flink-cdc-pipeline-connector-oracle/src/test/java/org/apache/flink/cdc/connectors/oracle/testutils/OracleTestUtils.java:
##########
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.cdc.connectors.oracle.testutils;
+
+import org.apache.flink.api.common.JobID;
+import 
org.apache.flink.runtime.highavailability.nonha.embedded.HaLeadershipControl;
+import org.apache.flink.runtime.minicluster.MiniCluster;
+import org.apache.flink.table.planner.factories.TestValuesTableFactory;
+
+import org.apache.commons.lang3.StringUtils;
+
+import static java.lang.String.format;
+import static org.apache.flink.util.Preconditions.checkState;
+
+/** Oracle test utilities. */
+public class OracleTestUtils {

Review Comment:
   This class seems to be useless.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [FLINK-36796][pipeline-connector][oracle]add oracle pipeline connector. [flink-cdc]

Reply via email to