github-actions[bot] commented on code in PR #64423: URL: https://github.com/apache/doris/pull/64423#discussion_r3459405396
########## regression-test/suites/job_p0/streaming_job/cdc/test_streaming_postgres_job_slot_dropped_during_incremental.groovy: ########## @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +import org.awaitility.Awaitility + +import static java.util.concurrent.TimeUnit.SECONDS + +// When the replication slot is dropped out from under a running incremental job, +// resuming from the committed position would silently skip the WAL discarded with +// the slot. On rebuild, validateStreamSource detects the missing slot and fails with +// the "Replication slot invalidated" marker; FE classifies that as CANNOT_RESUME_ERR, +// so the job settles in PAUSED and is NOT pulled back to RUNNING by auto-resume. +// +// Uses a user-provided slot so (1) the slot name is known up front and (2) Doris does +// not auto-recreate it (createSlotForGlobalStreamSplit only fires for Doris-owned +// slots), keeping the "slot not found" branch deterministic. +// +// We assert state + error marker only — NOT a hard slot-count check, which is +// TOCTOU-flaky against the cdc_client winding down its connection. +suite("test_streaming_postgres_job_slot_dropped_during_incremental", + "p0,external,pg,external_docker,external_docker_pg,nondatalake") { + def jobName = "test_streaming_pg_slot_dropped_job" + def currentDb = (sql "select database()")[0][0] + def table1 = "slot_dropped_pg_tbl" + def pgDB = "postgres" + def pgSchema = "cdc_test" + def pgUser = "postgres" + def pgPassword = "123456" + def userSlot = "slot_dropped_user_slot" + def userPub = "slot_dropped_user_pub" + + sql """DROP JOB IF EXISTS where jobname = '${jobName}'""" + sql """drop table if exists ${currentDb}.${table1} force""" + + String enabled = context.config.otherConfigs.get("enableJdbcTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String pg_port = context.config.otherConfigs.get("pg_14_port"); + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/postgresql-42.5.0.jar" + + connect("${pgUser}", "${pgPassword}", "jdbc:postgresql://${externalEnvIp}:${pg_port}/${pgDB}") { + sql """DROP TABLE IF EXISTS ${pgDB}.${pgSchema}.${table1}""" + sql """CREATE TABLE ${pgDB}.${pgSchema}.${table1} ( + "id" int PRIMARY KEY, + "name" varchar(200) + )""" + sql """DROP PUBLICATION IF EXISTS ${userPub}""" + sql """CREATE PUBLICATION ${userPub} FOR TABLE ${pgDB}.${pgSchema}.${table1}""" + def existing = sql """SELECT COUNT(1) FROM pg_replication_slots WHERE slot_name = '${userSlot}'""" + if (existing[0][0] != 0) { Review Comment: This setup path is not idempotent if a previous run left `slot_dropped_user_slot` active. PostgreSQL rejects `pg_drop_replication_slot` while a walsender still owns the slot, but this branch only checks that the slot exists and drops it directly. The final cleanup repeats the same direct drop after `DROP JOB`, so a slow cdc_client teardown can fail the test there too. Please use the same terminate-and-retry pattern from the main scenario, or wait for `active_pid IS NULL`, in setup and cleanup so reruns do not fail on an active leftover slot. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
