timoninmaxim commented on code in PR #311: URL: https://github.com/apache/ignite-extensions/pull/311#discussion_r2221073264
########## modules/cdc-ext/src/test/java/org/apache/ignite/cdc/postgres/CdcPostgreSqlReplicationAbstractTest.java: ########## @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgres; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Set; +import javax.sql.DataSource; +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.query.FieldsQueryCursor; +import org.apache.ignite.cache.query.SqlFieldsQuery; +import org.apache.ignite.cdc.CdcConfiguration; +import org.apache.ignite.cdc.postgresql.IgniteToPostgreSqlCdcConsumer; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.cdc.CdcMain; +import org.apache.ignite.internal.util.lang.GridAbsPredicate; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; + +import static org.apache.ignite.testframework.GridTestUtils.runAsync; + +/** */ +public class CdcPostgreSqlReplicationAbstractTest extends GridCommonAbstractTest { + /** */ + protected static final int MAX_BATCH_SIZE = 128; + + /** */ + protected static final int KEYS_CNT = 1024; + + /** */ + protected static void executeOnIgnite(IgniteEx src, String sqlText, Object... args) { + SqlFieldsQuery qry = new SqlFieldsQuery(sqlText).setArgs(args); + + try (FieldsQueryCursor<List<?>> cursor = src.context().query().querySqlFields(qry, true)) { + cursor.getAll(); + } + } + + /** */ + protected static ResultSet selectOnPostgreSql(EmbeddedPostgres postgres, String qry) { + try (Connection conn = postgres.getPostgresDatabase().getConnection()) { + PreparedStatement stmt = conn.prepareStatement(qry); + + return stmt.executeQuery(); + } + catch (SQLException e) { + throw new IgniteException(e); + } + } + + /** */ + protected static void executeOnPostgreSql(EmbeddedPostgres postgres, String qry) { + try (Connection conn = postgres.getPostgresDatabase().getConnection()) { + PreparedStatement stmt = conn.prepareStatement(qry); + + stmt.executeUpdate(); + } + catch (SQLException e) { + throw new IgniteException(e); + } + } + + /** */ + protected static boolean checkRow( + EmbeddedPostgres postgres, + String tableName, + String columnName, + String expected, + String condition + ) { + String qry = "SELECT " + columnName + " FROM " + tableName + " WHERE " + condition; + + try (ResultSet res = selectOnPostgreSql(postgres, qry)) { + if (res.next()) { + String actual = res.getString(columnName); + + return expected.equals(actual); + } + + return false; + } + catch (Exception e) { + throw new IgniteException(e); + } + } + + /** */ + protected static GridAbsPredicate waitForTablesCreatedOnPostgres(EmbeddedPostgres postgres, Set<String> caches) { + return () -> { + String sql = "SELECT EXISTS (" + + " SELECT 1 FROM information_schema.tables " + + " WHERE table_name = '%s'" + + ")"; + + for (String cache : caches) { + try (ResultSet rs = selectOnPostgreSql(postgres, String.format(sql, cache.toLowerCase()))) { + rs.next(); + + if (!rs.getBoolean(1)) + return false; + } + catch (SQLException e) { + log.error(e.getMessage(), e); + + throw new IgniteException(e); + } + } + + return true; + }; + } + + /** */ + protected static GridAbsPredicate waitForTableSize(EmbeddedPostgres postgres, String tableName, long expSz) { + return () -> { + try (ResultSet res = selectOnPostgreSql(postgres, "SELECT COUNT(*) FROM " + tableName)) { + res.next(); + + long cnt = res.getLong(1); + + return cnt == expSz; + } + catch (SQLException e) { + throw new IgniteException(e); + } + }; + } + + /** */ + protected IgniteToPostgreSqlCdcConsumer getCdcConsumerConfiguration() { + return new IgniteToPostgreSqlCdcConsumer() + .setMaxBatchSize(MAX_BATCH_SIZE) Review Comment: Let's add a test with batchSize = 1 to be sure nothing hangs in this case ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcConsumer.java: ########## @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcConsumer; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.cdc.TypeMapping; +import org.apache.ignite.internal.processors.metric.MetricRegistryImpl; +import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.metric.MetricRegistry; +import org.apache.ignite.resources.LoggerResource; + +/** + * This class represents a consumer component that replicates cache changes from Apache Ignite to PostgreSQL using + * Change Data Capture (CDC) mechanism. It applies events to PostgreSQL via batch-prepared SQL statements, ensuring + * efficient handling of large volumes of updates. + * + * <p>Additionally, it provides methods for initializing connections, managing transactions, and performing atomic batches + * of writes.</p> + */ +public class IgniteToPostgreSqlCdcConsumer implements CdcConsumer { + /** */ + public static final String EVTS_SENT_CNT = "EventsCount"; + + /** */ + public static final String EVTS_SENT_CNT_DESC = "Count of events applied to PostgreSQL"; + + /** */ + public static final String LAST_EVT_SENT_TIME = "LastEventTime"; + + /** */ + public static final String LAST_EVT_SENT_TIME_DESC = "Timestamp of last applied event to PostgreSQL"; + + /** */ + private static final boolean DFLT_IS_ONLY_PRIMARY = true; + + /** */ + private static final long DFLT_BATCH_SIZE = 1024; + + /** */ + private static final boolean DFLT_CREATE_TABLES = false; + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private DataSource dataSrc; + + /** Collection of cache names which will be replicated to PostgreSQL. */ + private Collection<String> caches; + + /** */ + private boolean onlyPrimary = DFLT_IS_ONLY_PRIMARY; + + /** */ + private long maxBatchSize = DFLT_BATCH_SIZE; + + /** */ + private boolean createTables = DFLT_CREATE_TABLES; + + /** */ + private boolean autoCommit = DFLT_AUTO_COMMIT; + + /** Log. */ + @LoggerResource + private IgniteLogger log; + + /** Cache IDs. */ + private Set<Integer> cachesIds; + + /** Applier instance responsible for applying individual CDC events to PostgreSQL. */ + private IgniteToPostgreSqlCdcApplier applier; + + /** Count of events applied to PostgreSQL. */ + private AtomicLongMetric evtsCnt; + + /** Timestamp of last applied batch to PostgreSQL. */ + private AtomicLongMetric lastEvtTs; + + /** {@inheritDoc} */ + @Override public void start(MetricRegistry reg) { + A.notNull(dataSrc, "dataSource"); + A.notEmpty(caches, "caches"); + + cachesIds = caches.stream() + .map(CU::cacheId) + .collect(Collectors.toSet()); + + applier = new IgniteToPostgreSqlCdcApplier(dataSrc, autoCommit, maxBatchSize, log); + + MetricRegistryImpl mreg = (MetricRegistryImpl)reg; + + this.evtsCnt = mreg.longMetric(EVTS_SENT_CNT, EVTS_SENT_CNT_DESC); + this.lastEvtTs = mreg.longMetric(LAST_EVT_SENT_TIME, LAST_EVT_SENT_TIME_DESC); + + if (log.isInfoEnabled()) + log.info("CDC Ignite to PostgreSQL start-up [cacheIds=" + cachesIds + ']'); + } + + /** {@inheritDoc} */ + @Override public boolean onEvents(Iterator<CdcEvent> events) { + Iterator<CdcEvent> filtered = F.iterator( + events, + F.identity(), + true, + evt -> !onlyPrimary || evt.primary(), + evt -> cachesIds.contains(evt.cacheId())); + + long evtsSent = applier.applyEvents(filtered); + + if (evtsSent > 0) { + evtsCnt.add(evtsSent); + lastEvtTs.value(System.currentTimeMillis()); + + if (log.isInfoEnabled()) + log.info("Events applied [evtsApplied=" + evtsCnt.value() + ']'); + } + + return true; + } + + /** {@inheritDoc} */ + @Override public void onTypes(Iterator<BinaryType> types) { + types.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onMappings(Iterator<TypeMapping> mappings) { + mappings.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onCacheChange(Iterator<CdcCacheEvent> cacheEvents) { + Iterator<CdcCacheEvent> filtered = F.iterator( + cacheEvents, + F.identity(), + true, + evt -> cachesIds.contains(evt.cacheId())); + + long tablesCreated = applier.applyCacheEvents(filtered, createTables); + + if (tablesCreated > 0 && log.isInfoEnabled()) + log.info("Cache changes applied [tablesCreatedCnt=" + tablesCreated + ']'); + } + + /** {@inheritDoc} */ + @Override public void onCacheDestroy(Iterator<Integer> caches) { + caches.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void stop() { + + } + + /** + * Sets the datasource configuration for connecting to the PostgreSQL database. + * + * @param dataSrc Configured data source. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setDataSource(DataSource dataSrc) { + this.dataSrc = dataSrc; + + return this; + } + + /** + * Sets cache names to replicate. + * + * @param caches Cache names. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setCaches(Set<String> caches) { + this.caches = caches; + + return this; + } + + /** + * Enables/disables filtering to accept only primary-node originated events. + * + * @param onlyPrimary True to restrict replication to primary events only. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setOnlyPrimary(boolean onlyPrimary) { + this.onlyPrimary = onlyPrimary; + + return this; + } + + /** + * Sets the maximum batch size that will be submitted to PostgreSQL. + * <p> + * This setting controls how many statements are sent in a single {@link java.sql.PreparedStatement#executeBatch()} call. + * <p> + * Commit behavior depends on the {@code autoCommit} setting: + * <ul> + * <li>If {@code autoCommit} is {@code true}, each batch will be committed immediately after submission.</li> + * <li>If {@code autoCommit} is {@code false}, batches accumulate and are committed by the connector after + * finishing the last WAL segment.</li> + * </ul> + * + * @param maxBatchSize Maximum number of statements per batch. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setMaxBatchSize(int maxBatchSize) { + this.maxBatchSize = maxBatchSize; Review Comment: A.ensure(maxBatchSize > 0) ########## modules/cdc-ext/src/test/java/org/apache/ignite/cdc/postgres/CdcPostgreSqlReplicationTest.java: ########## @@ -0,0 +1,679 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgres; + +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.IntConsumer; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.postgresql.IgniteToPostgreSqlCdcConsumer; +import org.apache.ignite.cluster.ClusterState; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.IgniteInterruptedCheckedException; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC; +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.testframework.GridTestUtils.waitForCondition; + +/** */ +@RunWith(Parameterized.class) +public class CdcPostgreSqlReplicationTest extends CdcPostgreSqlReplicationAbstractTest { + /** */ + private static final int BACKUP = 0; + + /** */ + private static final String CACHE_MODE = "PARTITIONED"; + + /** */ + @Parameterized.Parameter() + public CacheAtomicityMode atomicity; + + /** */ + @Parameterized.Parameter(1) + public boolean createTables; + + /** @return Test parameters. */ + @Parameterized.Parameters(name = "atomicity={0}, createTables={1}") + public static Collection<?> parameters() { + List<Object[]> params = new ArrayList<>(); + + for (CacheAtomicityMode atomicity : EnumSet.of(ATOMIC, TRANSACTIONAL)) { + for (boolean createTables : new boolean[] {true, false}) + params.add(new Object[] {atomicity, createTables}); + } + + return params; + } + + /** */ + protected static IgniteEx src; + + /** */ + protected static EmbeddedPostgres postgres; + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName); + + DataRegionConfiguration dataRegionConfiguration = new DataRegionConfiguration() + .setPersistenceEnabled(true) + .setCdcEnabled(true); + + DataStorageConfiguration dataStorageConfiguration = new DataStorageConfiguration() + .setWalForceArchiveTimeout(5_000) + .setDefaultDataRegionConfiguration(dataRegionConfiguration); + + cfg.setDataStorageConfiguration(dataStorageConfiguration); + cfg.setConsistentId(igniteInstanceName); + + return cfg; + } + + /** {@inheritDoc} */ + @Override protected IgniteToPostgreSqlCdcConsumer getCdcConsumerConfiguration() { + IgniteToPostgreSqlCdcConsumer cdcCfg = super.getCdcConsumerConfiguration(); + + cdcCfg.setCreateTables(createTables); + + return cdcCfg; + } + + /** {@inheritDoc} */ + @Override protected void beforeTest() throws Exception { + cleanPersistenceDir(); + + src = startGrid(0); + + src.cluster().state(ClusterState.ACTIVE); + + postgres = EmbeddedPostgres.builder().start(); + } + + /** {@inheritDoc} */ + @Override protected void afterTest() throws Exception { + stopAllGrids(); + + cleanPersistenceDir(); + + postgres.close(); + } + + /** */ + @Test + public void testSingleColumnKeyDataReplicationWithPrimaryFirst() throws Exception { + testSingleColumnKeyDataReplication(false); + } + + /** */ + @Test + public void testSingleColumnKeyDataReplicationWithPrimaryLast() throws Exception { + testSingleColumnKeyDataReplication(true); + } + + /** */ + public void testSingleColumnKeyDataReplication(boolean isPrimaryLast) throws Exception { + String[] tableFields; + + String insertQry = "INSERT INTO T1 VALUES(?, ?)"; + String updateQry; + + IntConsumer insert; + IntConsumer update; + + if (isPrimaryLast) { + tableFields = new String[] {"NAME VARCHAR(20)", "ID BIGINT PRIMARY KEY"}; + + updateQry = "MERGE INTO T1 (NAME, ID) VALUES (?, ?)"; + + insert = id -> executeOnIgnite(src, insertQry, "Name" + id, id); + update = id -> executeOnIgnite(src, updateQry, id + "Name", id); + } + else { + tableFields = new String[] {"ID BIGINT PRIMARY KEY", "NAME VARCHAR(20)"}; + + updateQry = "MERGE INTO T1 (ID, NAME) VALUES (?, ?)"; + + insert = id -> executeOnIgnite(src, insertQry, id, "Name" + id); + update = id -> executeOnIgnite(src, updateQry, id, id + "Name"); + } + + createTable("T1", tableFields, null, null, null); + + Supplier<Boolean> checkInsert = () -> checkSingleColumnKeyTable(id -> "Name" + id); + + Supplier<Boolean> checkUpdate = () -> checkSingleColumnKeyTable(id -> id + "Name"); + + testDataReplication("T1", insert, checkInsert, update, checkUpdate); + } + + /** */ + private boolean checkSingleColumnKeyTable(Function<Long, String> idToName) { + String qry = "SELECT ID, NAME FROM T1"; + + try (ResultSet res = selectOnPostgreSql(postgres, qry)) { + long cnt = 0; + + long id; + String curName; + + while (res.next()) { + id = res.getLong("ID"); + curName = res.getString("NAME"); + + if (!idToName.apply(id).equals(curName)) + return false; + + cnt++; + } + + return cnt == KEYS_CNT; + } + catch (Exception e) { + throw new IgniteException(e); + } + } + + /** Replication with complex SQL key. Data inserted via SQL. */ + @Test + public void testMultiColumnKeyDataReplicationWithSql() throws Exception { + IntConsumer insert = id -> executeOnIgnite( + src, + "INSERT INTO T2 (ID, SUBID, NAME, VAL) VALUES(?, ?, ?, ?)", + id, + "SUBID", + "Name" + id, + id * 42 + ); + + IntConsumer update = id -> executeOnIgnite( + src, + "MERGE INTO T2 (ID, SUBID, NAME, VAL) VALUES(?, ?, ?, ?)", + id, + "SUBID", + id + "Name", + id + 42 + ); + + testMultiColumnKeyDataReplication("T2", insert, update); + } + + /** Replication with complex SQL key. Data inserted via key-value API. */ + @Test + public void testMultiColumnKeyDataReplicationWithKeyValue() throws Exception { + IntConsumer insert = id -> src.cache("T3") + .put( + new TestKey(id, "SUBID"), + new TestVal("Name" + id, id * 42) + ); + + IntConsumer update = id -> src.cache("T3") + .put( + new TestKey(id, "SUBID"), + new TestVal(id + "Name", id + 42) + ); + + testMultiColumnKeyDataReplication("T3", insert, update); + } + + /** */ + public void testMultiColumnKeyDataReplication(String tableName, IntConsumer insert, IntConsumer update) throws Exception { + String[] tableFields = new String[] { + "ID INT NOT NULL", + "SUBID VARCHAR(15) NOT NULL", + "NAME VARCHAR", + "VAL INT" + }; + + String constraint = "PRIMARY KEY (ID, SUBID)"; + + createTable(tableName, tableFields, constraint, TestKey.class.getName(), TestVal.class.getName()); + + Supplier<Boolean> checkInsert = () -> checkMultiColumnKeyTable(tableName, id -> "Name" + id, id -> id * 42); + + Supplier<Boolean> checkUpdate = () -> checkMultiColumnKeyTable(tableName, id -> id + "Name", id -> id + 42); + + testDataReplication(tableName, insert, checkInsert, update, checkUpdate); + } + + /** */ + private boolean checkMultiColumnKeyTable( + String tableName, + Function<Integer, String> idToName, + Function<Integer, Integer> idToVal + ) { + String qry = "SELECT ID, NAME, VAL FROM " + tableName; + + try (ResultSet res = selectOnPostgreSql(postgres, qry)) { + long cnt = 0; + + int id; + String curName; + int curVal; + + while (res.next()) { + id = res.getInt("ID"); + curName = res.getString("NAME"); + curVal = res.getInt("VAL"); + + if (!idToVal.apply(id).equals(curVal) || !idToName.apply(id).equals(curName)) + return false; + + cnt++; + } + + return cnt == KEYS_CNT; + } + catch (Exception e) { + throw new IgniteException(e); + } + } + + /** */ + private void testDataReplication( + String tableName, + IntConsumer insert, + Supplier<Boolean> checkInsert, + IntConsumer update, + Supplier<Boolean> checkUpdate + ) throws Exception { + IgniteInternalFuture<?> fut = startCdc(Stream.of(tableName).collect(Collectors.toSet())); + + try { + IntStream.range(0, KEYS_CNT).forEach(insert); Review Comment: Do you have tests for ranges with duplicated keys? ########## modules/cdc-ext/src/test/java/org/apache/ignite/cdc/postgres/CdcPostgreSqlReplicationAbstractTest.java: ########## @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgres; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Set; +import javax.sql.DataSource; +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.query.FieldsQueryCursor; +import org.apache.ignite.cache.query.SqlFieldsQuery; +import org.apache.ignite.cdc.CdcConfiguration; +import org.apache.ignite.cdc.postgresql.IgniteToPostgreSqlCdcConsumer; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.cdc.CdcMain; +import org.apache.ignite.internal.util.lang.GridAbsPredicate; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; + +import static org.apache.ignite.testframework.GridTestUtils.runAsync; + +/** */ +public class CdcPostgreSqlReplicationAbstractTest extends GridCommonAbstractTest { + /** */ + protected static final int MAX_BATCH_SIZE = 128; + + /** */ + protected static final int KEYS_CNT = 1024; + + /** */ + protected static void executeOnIgnite(IgniteEx src, String sqlText, Object... args) { Review Comment: why all methods static? ########## modules/cdc-ext/src/test/java/org/apache/ignite/cdc/postgres/CdcPostgreSqlReplicationTest.java: ########## @@ -0,0 +1,679 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgres; + +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.IntConsumer; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import io.zonky.test.db.postgres.embedded.EmbeddedPostgres; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.QueryEntity; +import org.apache.ignite.cdc.postgresql.IgniteToPostgreSqlCdcConsumer; +import org.apache.ignite.cluster.ClusterState; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.IgniteInterruptedCheckedException; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.apache.ignite.cache.CacheAtomicityMode.ATOMIC; +import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; +import static org.apache.ignite.testframework.GridTestUtils.waitForCondition; + +/** */ +@RunWith(Parameterized.class) +public class CdcPostgreSqlReplicationTest extends CdcPostgreSqlReplicationAbstractTest { + /** */ + private static final int BACKUP = 0; + + /** */ + private static final String CACHE_MODE = "PARTITIONED"; + + /** */ + @Parameterized.Parameter() + public CacheAtomicityMode atomicity; + + /** */ + @Parameterized.Parameter(1) + public boolean createTables; + + /** @return Test parameters. */ + @Parameterized.Parameters(name = "atomicity={0}, createTables={1}") + public static Collection<?> parameters() { + List<Object[]> params = new ArrayList<>(); + + for (CacheAtomicityMode atomicity : EnumSet.of(ATOMIC, TRANSACTIONAL)) { + for (boolean createTables : new boolean[] {true, false}) + params.add(new Object[] {atomicity, createTables}); + } + + return params; + } + + /** */ + protected static IgniteEx src; Review Comment: Is there a reason to keep src/postgres static? ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcConsumer.java: ########## @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcConsumer; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.cdc.TypeMapping; +import org.apache.ignite.internal.processors.metric.MetricRegistryImpl; +import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.metric.MetricRegistry; +import org.apache.ignite.resources.LoggerResource; + +/** + * This class represents a consumer component that replicates cache changes from Apache Ignite to PostgreSQL using + * Change Data Capture (CDC) mechanism. It applies events to PostgreSQL via batch-prepared SQL statements, ensuring + * efficient handling of large volumes of updates. + * + * <p>Additionally, it provides methods for initializing connections, managing transactions, and performing atomic batches + * of writes.</p> + */ +public class IgniteToPostgreSqlCdcConsumer implements CdcConsumer { + /** */ + public static final String EVTS_SENT_CNT = "EventsCount"; + + /** */ + public static final String EVTS_SENT_CNT_DESC = "Count of events applied to PostgreSQL"; + + /** */ + public static final String LAST_EVT_SENT_TIME = "LastEventTime"; + + /** */ + public static final String LAST_EVT_SENT_TIME_DESC = "Timestamp of last applied event to PostgreSQL"; + + /** */ + private static final boolean DFLT_IS_ONLY_PRIMARY = true; + + /** */ + private static final long DFLT_BATCH_SIZE = 1024; + + /** */ + private static final boolean DFLT_CREATE_TABLES = false; + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private DataSource dataSrc; + + /** Collection of cache names which will be replicated to PostgreSQL. */ + private Collection<String> caches; + + /** */ + private boolean onlyPrimary = DFLT_IS_ONLY_PRIMARY; + + /** */ + private long maxBatchSize = DFLT_BATCH_SIZE; + + /** */ + private boolean createTables = DFLT_CREATE_TABLES; + + /** */ + private boolean autoCommit = DFLT_AUTO_COMMIT; + + /** Log. */ + @LoggerResource + private IgniteLogger log; + + /** Cache IDs. */ + private Set<Integer> cachesIds; + + /** Applier instance responsible for applying individual CDC events to PostgreSQL. */ + private IgniteToPostgreSqlCdcApplier applier; + + /** Count of events applied to PostgreSQL. */ + private AtomicLongMetric evtsCnt; + + /** Timestamp of last applied batch to PostgreSQL. */ + private AtomicLongMetric lastEvtTs; + + /** {@inheritDoc} */ + @Override public void start(MetricRegistry reg) { + A.notNull(dataSrc, "dataSource"); + A.notEmpty(caches, "caches"); + + cachesIds = caches.stream() + .map(CU::cacheId) + .collect(Collectors.toSet()); + + applier = new IgniteToPostgreSqlCdcApplier(dataSrc, autoCommit, maxBatchSize, log); + + MetricRegistryImpl mreg = (MetricRegistryImpl)reg; + + this.evtsCnt = mreg.longMetric(EVTS_SENT_CNT, EVTS_SENT_CNT_DESC); + this.lastEvtTs = mreg.longMetric(LAST_EVT_SENT_TIME, LAST_EVT_SENT_TIME_DESC); + + if (log.isInfoEnabled()) + log.info("CDC Ignite to PostgreSQL start-up [cacheIds=" + cachesIds + ']'); + } + + /** {@inheritDoc} */ + @Override public boolean onEvents(Iterator<CdcEvent> events) { + Iterator<CdcEvent> filtered = F.iterator( + events, + F.identity(), + true, + evt -> !onlyPrimary || evt.primary(), + evt -> cachesIds.contains(evt.cacheId())); + + long evtsSent = applier.applyEvents(filtered); + + if (evtsSent > 0) { + evtsCnt.add(evtsSent); + lastEvtTs.value(System.currentTimeMillis()); + + if (log.isInfoEnabled()) + log.info("Events applied [evtsApplied=" + evtsCnt.value() + ']'); + } + + return true; + } + + /** {@inheritDoc} */ + @Override public void onTypes(Iterator<BinaryType> types) { + types.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onMappings(Iterator<TypeMapping> mappings) { + mappings.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onCacheChange(Iterator<CdcCacheEvent> cacheEvents) { + Iterator<CdcCacheEvent> filtered = F.iterator( + cacheEvents, + F.identity(), + true, + evt -> cachesIds.contains(evt.cacheId())); + + long tablesCreated = applier.applyCacheEvents(filtered, createTables); + + if (tablesCreated > 0 && log.isInfoEnabled()) + log.info("Cache changes applied [tablesCreatedCnt=" + tablesCreated + ']'); + } + + /** {@inheritDoc} */ + @Override public void onCacheDestroy(Iterator<Integer> caches) { + caches.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void stop() { + + } + + /** + * Sets the datasource configuration for connecting to the PostgreSQL database. + * + * @param dataSrc Configured data source. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setDataSource(DataSource dataSrc) { + this.dataSrc = dataSrc; + + return this; + } + + /** + * Sets cache names to replicate. + * + * @param caches Cache names. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setCaches(Set<String> caches) { + this.caches = caches; + + return this; + } + + /** + * Enables/disables filtering to accept only primary-node originated events. + * + * @param onlyPrimary True to restrict replication to primary events only. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setOnlyPrimary(boolean onlyPrimary) { + this.onlyPrimary = onlyPrimary; + + return this; + } + + /** + * Sets the maximum batch size that will be submitted to PostgreSQL. + * <p> + * This setting controls how many statements are sent in a single {@link java.sql.PreparedStatement#executeBatch()} call. + * <p> + * Commit behavior depends on the {@code autoCommit} setting: + * <ul> + * <li>If {@code autoCommit} is {@code true}, each batch will be committed immediately after submission.</li> Review Comment: It commits every single query, doesn't it? ########## modules/cdc-ext/src/main/java/org/apache/ignite/cdc/postgresql/IgniteToPostgreSqlCdcConsumer.java: ########## @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cdc.postgresql; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; +import javax.sql.DataSource; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cdc.CdcCacheEvent; +import org.apache.ignite.cdc.CdcConsumer; +import org.apache.ignite.cdc.CdcEvent; +import org.apache.ignite.cdc.TypeMapping; +import org.apache.ignite.internal.processors.metric.MetricRegistryImpl; +import org.apache.ignite.internal.processors.metric.impl.AtomicLongMetric; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.A; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.metric.MetricRegistry; +import org.apache.ignite.resources.LoggerResource; + +/** + * This class represents a consumer component that replicates cache changes from Apache Ignite to PostgreSQL using + * Change Data Capture (CDC) mechanism. It applies events to PostgreSQL via batch-prepared SQL statements, ensuring + * efficient handling of large volumes of updates. + * + * <p>Additionally, it provides methods for initializing connections, managing transactions, and performing atomic batches + * of writes.</p> + */ +public class IgniteToPostgreSqlCdcConsumer implements CdcConsumer { + /** */ + public static final String EVTS_SENT_CNT = "EventsCount"; + + /** */ + public static final String EVTS_SENT_CNT_DESC = "Count of events applied to PostgreSQL"; + + /** */ + public static final String LAST_EVT_SENT_TIME = "LastEventTime"; + + /** */ + public static final String LAST_EVT_SENT_TIME_DESC = "Timestamp of last applied event to PostgreSQL"; + + /** */ + private static final boolean DFLT_IS_ONLY_PRIMARY = true; + + /** */ + private static final long DFLT_BATCH_SIZE = 1024; + + /** */ + private static final boolean DFLT_CREATE_TABLES = false; + + /** */ + private static final boolean DFLT_AUTO_COMMIT = false; + + /** */ + private DataSource dataSrc; + + /** Collection of cache names which will be replicated to PostgreSQL. */ + private Collection<String> caches; + + /** */ + private boolean onlyPrimary = DFLT_IS_ONLY_PRIMARY; + + /** */ + private long maxBatchSize = DFLT_BATCH_SIZE; + + /** */ + private boolean createTables = DFLT_CREATE_TABLES; + + /** */ + private boolean autoCommit = DFLT_AUTO_COMMIT; + + /** Log. */ + @LoggerResource + private IgniteLogger log; + + /** Cache IDs. */ + private Set<Integer> cachesIds; + + /** Applier instance responsible for applying individual CDC events to PostgreSQL. */ + private IgniteToPostgreSqlCdcApplier applier; + + /** Count of events applied to PostgreSQL. */ + private AtomicLongMetric evtsCnt; + + /** Timestamp of last applied batch to PostgreSQL. */ + private AtomicLongMetric lastEvtTs; + + /** {@inheritDoc} */ + @Override public void start(MetricRegistry reg) { + A.notNull(dataSrc, "dataSource"); + A.notEmpty(caches, "caches"); + + cachesIds = caches.stream() + .map(CU::cacheId) + .collect(Collectors.toSet()); + + applier = new IgniteToPostgreSqlCdcApplier(dataSrc, autoCommit, maxBatchSize, log); + + MetricRegistryImpl mreg = (MetricRegistryImpl)reg; + + this.evtsCnt = mreg.longMetric(EVTS_SENT_CNT, EVTS_SENT_CNT_DESC); + this.lastEvtTs = mreg.longMetric(LAST_EVT_SENT_TIME, LAST_EVT_SENT_TIME_DESC); + + if (log.isInfoEnabled()) + log.info("CDC Ignite to PostgreSQL start-up [cacheIds=" + cachesIds + ']'); + } + + /** {@inheritDoc} */ + @Override public boolean onEvents(Iterator<CdcEvent> events) { + Iterator<CdcEvent> filtered = F.iterator( + events, + F.identity(), + true, + evt -> !onlyPrimary || evt.primary(), + evt -> cachesIds.contains(evt.cacheId())); + + long evtsSent = applier.applyEvents(filtered); + + if (evtsSent > 0) { + evtsCnt.add(evtsSent); + lastEvtTs.value(System.currentTimeMillis()); + + if (log.isInfoEnabled()) + log.info("Events applied [evtsApplied=" + evtsCnt.value() + ']'); + } + + return true; + } + + /** {@inheritDoc} */ + @Override public void onTypes(Iterator<BinaryType> types) { + types.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onMappings(Iterator<TypeMapping> mappings) { + mappings.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void onCacheChange(Iterator<CdcCacheEvent> cacheEvents) { + Iterator<CdcCacheEvent> filtered = F.iterator( + cacheEvents, + F.identity(), + true, + evt -> cachesIds.contains(evt.cacheId())); + + long tablesCreated = applier.applyCacheEvents(filtered, createTables); + + if (tablesCreated > 0 && log.isInfoEnabled()) + log.info("Cache changes applied [tablesCreatedCnt=" + tablesCreated + ']'); + } + + /** {@inheritDoc} */ + @Override public void onCacheDestroy(Iterator<Integer> caches) { + caches.forEachRemaining(e -> { + // Just skip. Handle of cache events not supported. + }); + } + + /** {@inheritDoc} */ + @Override public void stop() { + + } + + /** + * Sets the datasource configuration for connecting to the PostgreSQL database. + * + * @param dataSrc Configured data source. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setDataSource(DataSource dataSrc) { + this.dataSrc = dataSrc; + + return this; + } + + /** + * Sets cache names to replicate. + * + * @param caches Cache names. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setCaches(Set<String> caches) { + this.caches = caches; + + return this; + } + + /** + * Enables/disables filtering to accept only primary-node originated events. + * + * @param onlyPrimary True to restrict replication to primary events only. + * @return {@code this} for chaining. + */ + public IgniteToPostgreSqlCdcConsumer setOnlyPrimary(boolean onlyPrimary) { + this.onlyPrimary = onlyPrimary; + + return this; + } + + /** + * Sets the maximum batch size that will be submitted to PostgreSQL. + * <p> + * This setting controls how many statements are sent in a single {@link java.sql.PreparedStatement#executeBatch()} call. + * <p> + * Commit behavior depends on the {@code autoCommit} setting: + * <ul> + * <li>If {@code autoCommit} is {@code true}, each batch will be committed immediately after submission.</li> + * <li>If {@code autoCommit} is {@code false}, batches accumulate and are committed by the connector after + * finishing the last WAL segment.</li> Review Comment: Let's commit after applying a batch. Committing full WAL segment can be too heavy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org