This is an automated email from the ASF dual-hosted git repository.
fanjia pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new b5c7b4ad0e [Fix][Connector-V2] Update file filter pattern compilation
to remove unnecessary quoting (#9658)
b5c7b4ad0e is described below
commit b5c7b4ad0ec19f5fa0dcb7fed1dd09c9fa49fe31
Author: corgy-w <[email protected]>
AuthorDate: Mon Aug 4 21:03:45 2025 +0800
[Fix][Connector-V2] Update file filter pattern compilation to remove
unnecessary quoting (#9658)
---
.../file/source/reader/AbstractReadStrategy.java | 3 +-
.../e2e/connector/file/local/LocalFileIT.java | 7 ++
.../excel/local_filter_regex_excel_to_assert.conf | 132 +++++++++++++++++++++
3 files changed, 140 insertions(+), 2 deletions(-)
diff --git
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
index 8dcf788592..90db77f10b 100644
---
a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
+++
b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/source/reader/AbstractReadStrategy.java
@@ -53,7 +53,6 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -178,7 +177,7 @@ public abstract class AbstractReadStrategy implements
ReadStrategy {
if
(pluginConfig.hasPath(FileBaseSourceOptions.FILE_FILTER_PATTERN.key())) {
String filterPattern =
pluginConfig.getString(FileBaseSourceOptions.FILE_FILTER_PATTERN.key());
- this.pattern =
Pattern.compile(Matcher.quoteReplacement(filterPattern));
+ this.pattern = Pattern.compile(filterPattern);
}
}
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
index 63b94d82f6..7832c0ae1a 100644
---
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/local/LocalFileIT.java
@@ -286,6 +286,11 @@ public class LocalFileIT extends TestSuiteBase {
"/seatunnel/read/excel_filter/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx",
container);
+ ContainerUtil.copyFileIntoContainers(
+ "/excel/e2e.xlsx",
+
"/seatunnel/read/excel_filter_regex/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx",
+ container);
+
ContainerUtil.copyFileIntoContainers(
"/csv/break_line.csv",
"/seatunnel/read/csv/break_line/break_line.csv",
@@ -361,6 +366,8 @@ public class LocalFileIT extends TestSuiteBase {
helper.execute("/parquet/local_file_parquet_projection_to_assert.conf");
// test read filtered local file
helper.execute("/excel/local_filter_excel_to_assert.conf");
+ // test read filtered local file with regex
+ helper.execute("/excel/local_filter_regex_excel_to_assert.conf");
// test read empty directory
helper.execute("/json/local_file_to_console.conf");
diff --git
a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/local_filter_regex_excel_to_assert.conf
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/local_filter_regex_excel_to_assert.conf
new file mode 100644
index 0000000000..1bdcd998bf
--- /dev/null
+++
b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-local-e2e/src/test/resources/excel/local_filter_regex_excel_to_assert.conf
@@ -0,0 +1,132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+ # You can set spark configuration here
+ spark.app.name = "SeaTunnel"
+ spark.executor.instances = 2
+ spark.executor.cores = 1
+ spark.executor.memory = "1g"
+ spark.master = local
+}
+
+source {
+ LocalFile {
+ path = "/seatunnel/read/excel_filter_regex"
+ plugin_output = "fake"
+ file_format_type = excel
+ field_delimiter = ;
+ skip_header_row_number = 1
+ file_filter_pattern = ".*\\.xlsx"
+ schema = {
+ fields {
+ c_map = "map<string, string>"
+ c_array = "array<int>"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_bytes = bytes
+ c_date = date
+ c_decimal = "decimal(38, 18)"
+ c_timestamp = timestamp
+ c_row = {
+ c_map = "map<string, string>"
+ c_array = "array<int>"
+ c_string = string
+ c_boolean = boolean
+ c_tinyint = tinyint
+ c_smallint = smallint
+ c_int = int
+ c_bigint = bigint
+ c_float = float
+ c_double = double
+ c_bytes = bytes
+ c_date = date
+ c_decimal = "decimal(38, 18)"
+ c_timestamp = timestamp
+ }
+ }
+ }
+ }
+}
+
+sink {
+ Assert {
+ rules {
+ row_rules = [
+ {
+ rule_type = MAX_ROW
+ rule_value = 10
+ }
+ ],
+ field_rules = [
+ {
+ field_name = c_string
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = c_boolean
+ field_type = boolean
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = c_double
+ field_type = double
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = name
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ },
+ {
+ field_name = hobby
+ field_type = string
+ field_value = [
+ {
+ rule_type = NOT_NULL
+ }
+ ]
+ }
+ ]
+ }
+ }
+}