This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 2c927224acc branch-3.0: [opt](identifer) let unicode format as a
superset of latin format #48078 (#49808)
2c927224acc is described below
commit 2c927224accf207ea03a192794049f6da310faa7
Author: morrySnow <[email protected]>
AuthorDate: Sat Apr 5 06:29:59 2025 +0800
branch-3.0: [opt](identifer) let unicode format as a superset of latin
format #48078 (#49808)
pick from master #48078
---
.../java/org/apache/doris/common/FeNameFormat.java | 6 +-
.../org/apache/doris/common/FeNameFormatTest.java | 110 ++++++++++++++++++---
2 files changed, 97 insertions(+), 19 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 363ec175f23..fbbd670c9df 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -37,15 +37,15 @@ public class FeNameFormat {
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
private static final String REPOSITORY_NAME_REGEX =
"^[a-zA-Z][a-zA-Z0-9-_]{0,255}$";
- private static final String COLUMN_NAME_REGEX =
"^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,256}$";
+ private static final String COLUMN_NAME_REGEX =
"^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$";
- private static final String UNICODE_LABEL_REGEX =
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
+ private static final String UNICODE_LABEL_REGEX =
"^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$";
private static final String UNICODE_COMMON_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX =
"^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_TABLE_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
private static final String UNICODE_USER_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
- = "^[.a-zA-Z0-9_+-/?@#$%^&*\\s,:\\p{L}]{0,256}$";
+ = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$";
private static final String UNICODE_REPOSITORY_NAME_REGEX =
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,255}$";
public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
index b6e5e68ee83..32e2a553b94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
@@ -17,27 +17,24 @@
package org.apache.doris.common;
-import org.junit.Test;
+import org.apache.doris.qe.VariableMgr;
+
+import com.google.common.collect.Lists;
+import org.apache.ivy.util.StringUtils;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
public class FeNameFormatTest {
@Test
- public void testCheckColumnName() {
+ void testLabelName() {
// check label use correct regex, begin with '-' is different from
others
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkLabel("-lable"));
+ }
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("_id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("__id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("___id"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("___id_"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("@timestamp"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("@timestamp#"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp*"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp.1"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("timestamp.#"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("?id_"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("#id_"));
- ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName("$id_"));
+ @Test
+ void testTableName() {
// length 64
String tblName =
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkTableName(tblName));
@@ -46,19 +43,100 @@ public class FeNameFormatTest {
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkTableName(largeTblName));
// check table name use correct regex, not begin with '-'
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkTableName("-" + tblName));
+ }
+
+ @Test
+ void testCheckColumnName() {
+ List<String> alwaysValid = Lists.newArrayList(
+ "_id",
+ "_id",
+ "_ id",
+ " _id",
+ "__id",
+ "___id",
+ "___id_",
+ "@timestamp",
+ "@timestamp#",
+ "timestamp*",
+ "timestamp.1",
+ "timestamp.#",
+ "?id_",
+ "#id_",
+ "$id_",
+ "a-zA-Z0-9.+-/?@#$%^&*\" ,:"
+ );
+
+ List<String> alwaysInvalid = Lists.newArrayList(
+ // inner column prefix
+ "mv_",
+ "mva_",
+ "__doris_shadow_",
+
+ // invalid
+ "",
+ "\\",
+ "column\\",
+ StringUtils.repeat("a", 257)
+ );
+
+ List<String> unicodeValid = Lists.newArrayList(
+ "中文",
+ "語言",
+ "язык",
+ "언어",
+ "لغة",
+ "ภาษา",
+ "שפה",
+ "γλώσσα",
+ "ენა",
+ "げんご"
+ );
+ boolean defaultUnicode =
VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport;
+ List<Boolean> enableUnicode = Lists.newArrayList(false, true);
+ try {
+ for (Boolean unicode : enableUnicode) {
+
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode);
+ for (String s : alwaysValid) {
+ ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName(s));
+ }
+ for (String s : alwaysInvalid) {
+ ExceptionChecker.expectThrows(AnalysisException.class, ()
-> FeNameFormat.checkColumnName(s));
+ }
+ for (String s : unicodeValid) {
+ if (unicode) {
+ ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkColumnName(s));
+ } else {
+ ExceptionChecker.expectThrows(AnalysisException.class,
() -> FeNameFormat.checkColumnName(s));
+ }
+ }
+ }
+ } finally {
+
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode);
+ }
+ }
+
+ @Test
+ void testUserName() {
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkUserName("a.b"));
// check user name use correct regex, not begin with '.'
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkUserName(".a.b"));
+ }
+
+ @Test
+ void testCommonName() {
+ String commonName =
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
// check common name use correct regex, length 65
- ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", tblName + "t"));
+ ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", commonName + "t"));
ExceptionChecker.expectThrows(AnalysisException.class, () ->
FeNameFormat.checkCommonName("fakeType", "_commonName"));
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkCommonName("fakeType", "common-Name"));
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkCommonName("fakeType", "commonName-"));
+ }
+ @Test
+ void testOutfileName() {
// check success file name prefix
ExceptionChecker.expectThrowsNoException(() ->
FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success"));
}
-
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]