This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c9757b54315 [improvement](paimon)Using table serialization on the jni side (#43167) c9757b54315 is described below commit c9757b543157d95b275caa4545a5ebf20241a523 Author: wuwenchi <wuwenchi...@hotmail.com> AuthorDate: Fri Nov 8 08:04:13 2024 +0800 [improvement](paimon)Using table serialization on the jni side (#43167) advantage: 1. Reduce the access to HMS and HDFS on the JNI side. 2. There will be no inconsistency between the fe and be tables. --------- Co-authored-by: wuwenchi <wuwen...@selectdb.com> --- be/src/vec/exec/format/table/paimon_jni_reader.cpp | 3 ++ .../org/apache/doris/paimon/PaimonJniScanner.java | 32 ++++++++++++++++------ .../datasource/paimon/source/PaimonScanNode.java | 1 + gensrc/thrift/PlanNodes.thrift | 17 ++++++------ 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/be/src/vec/exec/format/table/paimon_jni_reader.cpp b/be/src/vec/exec/format/table/paimon_jni_reader.cpp index a9ec243cf46..30358eace1a 100644 --- a/be/src/vec/exec/format/table/paimon_jni_reader.cpp +++ b/be/src/vec/exec/format/table/paimon_jni_reader.cpp @@ -61,6 +61,9 @@ PaimonJniReader::PaimonJniReader(const std::vector<SlotDescriptor*>& file_slot_d std::to_string(range.table_format_params.paimon_params.last_update_time); params["required_fields"] = join(column_names, ","); params["columns_types"] = join(column_types, "#"); + if (range.table_format_params.paimon_params.__isset.paimon_table) { + params["paimon_table"] = range.table_format_params.paimon_params.paimon_table; + } // Used to create paimon option for (auto& kv : range.table_format_params.paimon_params.paimon_options) { diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index 7bd9fa631c8..e85d465f663 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -42,13 +42,19 @@ import java.util.stream.Collectors; public class PaimonJniScanner extends JniScanner { private static final Logger LOG = LoggerFactory.getLogger(PaimonJniScanner.class); + @Deprecated private static final String PAIMON_OPTION_PREFIX = "paimon."; + @Deprecated private static final String HADOOP_OPTION_PREFIX = "hadoop."; private final Map<String, String> params; + @Deprecated private final Map<String, String> paimonOptionParams; + @Deprecated private final Map<String, String> hadoopOptionParams; + @Deprecated private final String dbName; + @Deprecated private final String tblName; private final String paimonSplit; private final String paimonPredicate; @@ -58,9 +64,13 @@ public class PaimonJniScanner extends JniScanner { private List<String> paimonAllFieldNames; private List<DataType> paimonDataTypeList; + @Deprecated private long ctlId; + @Deprecated private long dbId; + @Deprecated private long tblId; + @Deprecated private long lastUpdateTime; private RecordReader.RecordIterator<InternalRow> recordIterator = null; private final ClassLoader classLoader; @@ -214,16 +224,20 @@ public class PaimonJniScanner extends JniScanner { } private void initTable() { - PaimonTableCacheKey key = new PaimonTableCacheKey(ctlId, dbId, tblId, - paimonOptionParams, hadoopOptionParams, dbName, tblName); - TableExt tableExt = PaimonTableCache.getTable(key); - if (tableExt.getCreateTime() < lastUpdateTime) { - LOG.warn("invalidate cache table:{}, localTime:{}, remoteTime:{}", key, tableExt.getCreateTime(), - lastUpdateTime); - PaimonTableCache.invalidateTableCache(key); - tableExt = PaimonTableCache.getTable(key); + if (params.containsKey("paimon_table")) { + table = PaimonUtils.deserialize(params.get("paimon_table")); + } else { + PaimonTableCacheKey key = new PaimonTableCacheKey(ctlId, dbId, tblId, + paimonOptionParams, hadoopOptionParams, dbName, tblName); + TableExt tableExt = PaimonTableCache.getTable(key); + if (tableExt.getCreateTime() < lastUpdateTime) { + LOG.warn("invalidate cache table:{}, localTime:{}, remoteTime:{}", key, tableExt.getCreateTime(), + lastUpdateTime); + PaimonTableCache.invalidateTableCache(key); + tableExt = PaimonTableCache.getTable(key); + } + this.table = tableExt.getTable(); } - this.table = tableExt.getTable(); paimonAllFieldNames = PaimonUtils.getFieldNames(this.table.rowType()); if (LOG.isDebugEnabled()) { LOG.debug("paimonAllFieldNames:{}", paimonAllFieldNames); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index cd477cc9b29..29e3d0529f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -164,6 +164,7 @@ public class PaimonScanNode extends FileQueryScanNode { fileDesc.setDbId(((PaimonExternalTable) source.getTargetTable()).getDbId()); fileDesc.setTblId(source.getTargetTable().getId()); fileDesc.setLastUpdateTime(source.getTargetTable().getUpdateTime()); + fileDesc.setPaimonTable(encodeObjectToString(source.getPaimonTable())); // The hadoop conf should be same with PaimonExternalCatalog.createCatalog()#getConfiguration() fileDesc.setHadoopConf(source.getCatalog().getCatalogProperty().getHadoopProperties()); Optional<DeletionFile> optDeletionFile = paimonSplit.getDeletionFile(); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index eb5266942c0..ec4497b267b 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -321,17 +321,18 @@ struct TPaimonDeletionFileDesc { struct TPaimonFileDesc { 1: optional string paimon_split 2: optional string paimon_column_names - 3: optional string db_name - 4: optional string table_name + 3: optional string db_name // deprecated + 4: optional string table_name // deprecated 5: optional string paimon_predicate - 6: optional map<string, string> paimon_options - 7: optional i64 ctl_id - 8: optional i64 db_id - 9: optional i64 tbl_id - 10: optional i64 last_update_time + 6: optional map<string, string> paimon_options // deprecated + 7: optional i64 ctl_id // deprecated + 8: optional i64 db_id // deprecated + 9: optional i64 tbl_id // deprecated + 10: optional i64 last_update_time // deprecated 11: optional string file_format 12: optional TPaimonDeletionFileDesc deletion_file; - 13: optional map<string, string> hadoop_conf + 13: optional map<string, string> hadoop_conf // deprecated + 14: optional string paimon_table } struct TTrinoConnectorFileDesc { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org