This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 79e1481d43 [core] Fix FieldListaggAgg distinct mode incorrectly 
skipping values (#7652)
79e1481d43 is described below

commit 79e1481d43ecdd2f6a573f0971a1df62495b8751
Author: lszskye <[email protected]>
AuthorDate: Thu Apr 16 09:00:22 2026 +0800

    [core] Fix FieldListaggAgg distinct mode incorrectly skipping values (#7652)
    
    Fix a bug in `FieldListaggAgg.agg()` where the distinct deduplication
    logic
    incorrectly uses substring matching (`BinaryString.contains()`) instead
    of
    exact token matching, causing valid values to be silently dropped.
    
    For example, with delimiter `,`:
    - accumulator = `"abc,def,asd"`
    - inputField = `"ab,xy"`
    - Token `"ab"` is incorrectly skipped because
    `"abc,def,asd".contains("ab")`
      returns `true`
    - Result: `"abc,def,asd,xy"` (missing `"ab"`)
    - Expected: `"abc,def,asd,ab,xy"`
---
 .../compact/aggregate/FieldListaggAgg.java         | 18 +++++++---
 .../mergetree/compact/MergeFunctionTestUtils.java  | 10 +++---
 .../compact/aggregate/FieldAggregatorTest.java     | 40 ++++++++++++++++++++++
 3 files changed, 60 insertions(+), 8 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldListaggAgg.java
 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldListaggAgg.java
index cdd3901f24..6ca31bbead 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldListaggAgg.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldListaggAgg.java
@@ -24,7 +24,9 @@ import org.apache.paimon.types.VarCharType;
 import org.apache.paimon.utils.BinaryStringUtils;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import static 
org.apache.paimon.utils.BinaryStringUtils.splitByWholeSeparatorPreserveAllTokens;
 
@@ -35,11 +37,14 @@ public class FieldListaggAgg extends FieldAggregator {
 
     private final String delimiter;
 
+    private final BinaryString delimiterBinaryString;
+
     private final boolean distinct;
 
     public FieldListaggAgg(String name, VarCharType dataType, CoreOptions 
options, String field) {
         super(name, dataType);
         this.delimiter = options.fieldListAggDelimiter(field);
+        this.delimiterBinaryString = BinaryString.fromString(this.delimiter);
         this.distinct = options.fieldCollectAggDistinct(field);
     }
 
@@ -62,16 +67,22 @@ public class FieldListaggAgg extends FieldAggregator {
         }
 
         if (distinct) {
-            BinaryString delimiterBinaryString = 
BinaryString.fromString(delimiter);
+            BinaryString[] accumulatorTokens =
+                    splitByWholeSeparatorPreserveAllTokens(mergeFieldSD, 
delimiterBinaryString);
+            Set<BinaryString> existingTokens = new 
HashSet<>(accumulatorTokens.length);
+            for (BinaryString token : accumulatorTokens) {
+                existingTokens.add(token);
+            }
 
             List<BinaryString> result = new ArrayList<>();
             result.add(mergeFieldSD);
             for (BinaryString str :
                     splitByWholeSeparatorPreserveAllTokens(inFieldSD, 
delimiterBinaryString)) {
-                if (str.getSizeInBytes() == 0 || mergeFieldSD.contains(str)) {
+                if (str.getSizeInBytes() == 0 || existingTokens.contains(str)) 
{
                     continue;
                 }
 
+                existingTokens.add(str);
                 result.add(delimiterBinaryString);
                 result.add(str);
             }
@@ -83,7 +94,6 @@ public class FieldListaggAgg extends FieldAggregator {
             return BinaryStringUtils.concat(result);
         }
 
-        return BinaryStringUtils.concat(
-                mergeFieldSD, BinaryString.fromString(delimiter), inFieldSD);
+        return BinaryStringUtils.concat(mergeFieldSD, delimiterBinaryString, 
inFieldSD);
     }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/MergeFunctionTestUtils.java
 
b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/MergeFunctionTestUtils.java
index 8035985969..27ae5cce5d 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/MergeFunctionTestUtils.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/MergeFunctionTestUtils.java
@@ -66,10 +66,12 @@ public class MergeFunctionTestUtils {
                 expected.add(group.get(group.size() - 1));
             } else {
                 if (group.stream().noneMatch(data -> data.valueKind == 
RowKind.INSERT)) {
-                    // No insert: fill the pk and left nullable fields to 
null; sequenceNumber = 0
-                    // because it's not updated
-                    ReusingTestData last = group.get(group.size() - 1);
-                    expected.add(new ReusingTestData(last.key, 0, 
RowKind.DELETE, null));
+                    // No insert with ignoreDelete: initRow sets all fields 
(including
+                    // nullable ones) from the first DELETE record, then all 
DELETEs are
+                    // ignored. sequenceNumber stays 0 because 
latestSequenceNumber is
+                    // updated after the ignoreDelete check.
+                    ReusingTestData first = group.get(0);
+                    expected.add(new ReusingTestData(first.key, 0, 
RowKind.DELETE, first.value));
                 } else {
                     // get the last INSERT data because later DELETE data are 
ignored
                     group.stream()
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/aggregate/FieldAggregatorTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/aggregate/FieldAggregatorTest.java
index 7e9a3aaf25..f040a5eb73 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/aggregate/FieldAggregatorTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/mergetree/compact/aggregate/FieldAggregatorTest.java
@@ -356,6 +356,46 @@ public class FieldAggregatorTest {
                 .isEqualTo("user1-user2");
     }
 
+    @Test
+    public void testFieldListAggDistinctShouldNotMatchSubstring() {
+        FieldListaggAgg fieldListaggAgg =
+                new FieldListaggAggFactory()
+                        .create(
+                                new VarCharType(VarCharType.MAX_LENGTH),
+                                CoreOptions.fromMap(
+                                        
ImmutableMap.of("fields.fieldName.distinct", "true")),
+                                "fieldName");
+
+        BinaryString accumulator = BinaryString.fromString("abc,def,asd");
+        BinaryString inputField = BinaryString.fromString("ab,xy");
+        Object result = fieldListaggAgg.agg(accumulator, inputField);
+
+        assertNotNull(result);
+        assertEquals("abc,def,asd,ab,xy", result.toString());
+    }
+
+    @Test
+    public void testFieldListAggDistinctSubstringWithCustomDelimiter() {
+        FieldListaggAgg fieldListaggAgg =
+                new FieldListaggAggFactory()
+                        .create(
+                                new VarCharType(VarCharType.MAX_LENGTH),
+                                CoreOptions.fromMap(
+                                        ImmutableMap.of(
+                                                "fields.fieldName.distinct",
+                                                "true",
+                                                
"fields.fieldName.list-agg-delimiter",
+                                                ";")),
+                                "fieldName");
+
+        BinaryString accumulator = BinaryString.fromString("abc;def;asd");
+        BinaryString inputField = BinaryString.fromString("ab;xy;def");
+        Object result = fieldListaggAgg.agg(accumulator, inputField);
+
+        assertNotNull(result);
+        assertEquals("abc;def;asd;ab;xy", result.toString());
+    }
+
     @Test
     public void testFieldListAggWithBoundedVarcharShouldFail() {
         FieldListaggAggFactory factory = new FieldListaggAggFactory();

Reply via email to