This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 3ebb9e47bef branch-3.1: [fix](hudi) Fix querying hudi jni table where
only partition columns (and no data fields) are required #55466 (#55662)
3ebb9e47bef is described below
commit 3ebb9e47beff1cfffbb532d006805a75f8b19f24
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Sep 8 10:54:33 2025 +0800
branch-3.1: [fix](hudi) Fix querying hudi jni table where only partition
columns (and no data fields) are required #55466 (#55662)
Cherry-picked from #55466
Co-authored-by: Socrates <[email protected]>
---
.../apache/doris/hudi/HadoopHudiJniScanner.java | 27 +++++++++++++++------
.../apache/doris/common/jni/vec/VectorTable.java | 16 ++++++++++++
.../external_table_p2/hudi/test_hudi_catalog.out | Bin 0 -> 142 bytes
.../hudi/test_hudi_catalog.groovy | 10 ++++++++
4 files changed, 45 insertions(+), 8 deletions(-)
diff --git
a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
index be7e31a115e..5fa4c72951b 100644
---
a/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
+++
b/fe/be-java-extensions/hadoop-hudi-scanner/src/main/java/org/apache/doris/hudi/HadoopHudiJniScanner.java
@@ -111,8 +111,13 @@ public class HadoopHudiJniScanner extends JniScanner {
this.hudiColumnNames = params.get("hudi_column_names");
this.hudiColumnTypes = params.get("hudi_column_types").split("#");
- this.requiredFields = params.get("required_fields").split(",");
-
+ // Required fields will be empty when only partition fields are
selected
+ // This is because partition fields are not stored in the data files
+ if (!params.get("required_fields").equals("")) {
+ this.requiredFields = params.get("required_fields").split(",");
+ } else {
+ this.requiredFields = new String[0];
+ }
this.fieldInspectors = new ObjectInspector[requiredFields.length];
this.structFields = new StructField[requiredFields.length];
this.fsOptionsProps = Maps.newHashMap();
@@ -166,14 +171,20 @@ public class HadoopHudiJniScanner extends JniScanner {
if (!reader.next(key, value)) {
break;
}
- Object rowData = deserializer.deserialize(value);
- for (int i = 0; i < fields.length; i++) {
- Object fieldData =
rowInspector.getStructFieldData(rowData, structFields[i]);
- columnValue.setRow(fieldData);
- columnValue.setField(types[i], fieldInspectors[i]);
- appendData(i, columnValue);
+ if (fields.length > 0) {
+ Object rowData = deserializer.deserialize(value);
+ for (int i = 0; i < fields.length; i++) {
+ Object fieldData =
rowInspector.getStructFieldData(rowData, structFields[i]);
+ columnValue.setRow(fieldData);
+ columnValue.setField(types[i], fieldInspectors[i]);
+ appendData(i, columnValue);
+ }
}
}
+ // vectorTable is virtual
+ if (fields.length == 0) {
+ vectorTable.appendVirtualData(numRows);
+ }
return numRows;
});
} catch (Exception e) {
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java
index a8849d1fa20..08fb2bc8b00 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorTable.java
@@ -34,6 +34,10 @@ public class VectorTable {
private final VectorColumn meta;
private final boolean onlyReadable;
private final int numRowsOfReadable;
+ // this will be true if fields is empty
+ private final boolean isVirtual;
+ // count rows when isVirtual is true
+ private int numVirtualRows = 0;
// Create writable vector table
private VectorTable(ColumnType[] types, String[] fields, int capacity) {
@@ -48,6 +52,7 @@ public class VectorTable {
this.meta = VectorColumn.createWritableColumn(new ColumnType("#meta",
Type.BIGINT), metaSize);
this.onlyReadable = false;
numRowsOfReadable = -1;
+ this.isVirtual = columns.length == 0;
}
// Create readable vector table
@@ -69,6 +74,7 @@ public class VectorTable {
this.meta = VectorColumn.createReadableColumn(metaAddress, metaSize,
new ColumnType("#meta", Type.BIGINT));
this.onlyReadable = true;
numRowsOfReadable = numRows;
+ this.isVirtual = columns.length == 0;
}
public static VectorTable createWritableTable(ColumnType[] types, String[]
fields, int capacity) {
@@ -111,16 +117,24 @@ public class VectorTable {
public void appendNativeData(int fieldId, NativeColumnValue o) {
assert (!onlyReadable);
+ assert (!isVirtual);
columns[fieldId].appendNativeValue(o);
}
public void appendData(int fieldId, ColumnValue o) {
assert (!onlyReadable);
+ assert (!isVirtual);
columns[fieldId].appendValue(o);
}
+ public void appendVirtualData(int numRows) {
+ assert (isVirtual);
+ numVirtualRows += numRows;
+ }
+
public void appendData(int fieldId, Object[] batch, ColumnValueConverter
converter, boolean isNullable) {
assert (!onlyReadable);
+ assert (!isVirtual);
if (converter != null) {
columns[fieldId].appendObjectColumn(converter.convert(batch),
isNullable);
} else {
@@ -190,6 +204,8 @@ public class VectorTable {
public int getNumRows() {
if (onlyReadable) {
return numRowsOfReadable;
+ } else if (isVirtual) {
+ return numVirtualRows;
} else {
return columns[0].numRows();
}
diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out
b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out
new file mode 100644
index 00000000000..3f97939043f
Binary files /dev/null and
b/regression-test/data/external_table_p2/hudi/test_hudi_catalog.out differ
diff --git
a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy
b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy
index ad0f8d25d63..858c064c48a 100644
--- a/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy
+++ b/regression-test/suites/external_table_p2/hudi/test_hudi_catalog.groovy
@@ -36,5 +36,15 @@ suite("test_hudi_catalog",
"p2,external,hudi,external_remote,external_remote_hud
sql """ set enable_fallback_to_original_planner=false """
def tables = sql """ show tables; """
assertTrue(tables.size() > 0)
+ try {
+ sql """ set force_jni_scanner = true; """
+ qt_test_only_partition_columns """
+ select signup_date from user_activity_log_mor_partition order by
signup_date limit 1;
+ """
+ } catch (Exception e) {
+ logger.error("Error occurred while executing query", e)
+ } finally {
+ sql """ set force_jni_scanner = false; """
+ }
sql """drop catalog if exists ${catalog_name};"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]