eldenmoon commented on code in PR #26749: URL: https://github.com/apache/doris/pull/26749#discussion_r1391912285
########## fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java: ########## @@ -1004,11 +1023,49 @@ public SlotDescriptor registerColumnRef(TableName tblName, String colName) throw newTblName == null ? d.getTable().getName() : newTblName.toString()); } + LOG.debug("register column ref table {}, colName {}, col {}", tblName, colName, col.toSql()); + if (col.getType().isVariantType() || (subColNames != null && !subColNames.isEmpty())) { + if (!col.getType().isVariantType()) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_ILLEGAL_COLUMN_REFERENCE_ERROR, + Joiner.on(".").join(tblName.getTbl(), colName)); + } + if (subColNames == null) { + // Root + subColNames = new ArrayList<String>(); + } + String key = d.getAlias() + "." + col.getName(); + if (subColumnSlotRefMap.get(key) == null) { + subColumnSlotRefMap.put(key, Maps.newTreeMap( + new Comparator<List<String>>() { + public int compare(List<String> lst1, List<String> lst2) { + String str1 = String.join(".", lst1); + String str2 = String.join(".", lst2); + return str1.compareTo(str2); + } + })); + } + SlotDescriptor result = subColumnSlotRefMap.get(key).get(subColNames); + if (result != null) { + // avoid duplicate slots + return result; + } + result = globalState.descTbl.addSlotDescriptor(d); Review Comment: No, the query engine will handle such conflict, and generate schema for this situation ########## fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java: ########## @@ -759,6 +765,18 @@ public TupleDescriptor registerOlapTable(Table table, TableName tableName, List< slot.setIsNullable(col.isAllowNull()); String key = tableRef.aliases[0] + "." + col.getName(); slotRefMap.put(key, slot); + + if (col.getType().isVariantType()) { + LOG.debug("add subColumnSlotRefMap, key {}, column {}", key, col.toThrift()); + subColumnSlotRefMap.put(key, Maps.newTreeMap( Review Comment: yes, i will refactor it ########## fe/fe-core/src/main/cup/sql_parser.cup: ########## @@ -440,6 +440,7 @@ terminal String KW_JOIN, KW_JSON, KW_JSONB, + KW_VARIANT, Review Comment: lost in load pr ########## be/src/common/config.cpp: ########## @@ -1108,6 +1109,11 @@ DEFINE_Int32(ingest_binlog_work_pool_size, "-1"); // Download binlog rate limit, unit is KB/s, 0 means no limit DEFINE_Int32(download_binlog_rate_limit_kbs, "0"); +// Buffer block size for flushing a single block +// This config used for merging multiple blocks into a single block +// and flush them once total, default 400M +DEFINE_mInt32(flushing_block_buffer_size_bytes, "419430400"); Review Comment: when doing heavy schema change, variant subcolumns type is not certain, it's needs to be merged into a single block.400M is a estimate block size for a single segment ########## fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java: ########## @@ -1004,11 +1023,49 @@ public SlotDescriptor registerColumnRef(TableName tblName, String colName) throw newTblName == null ? d.getTable().getName() : newTblName.toString()); } + LOG.debug("register column ref table {}, colName {}, col {}", tblName, colName, col.toSql()); + if (col.getType().isVariantType() || (subColNames != null && !subColNames.isEmpty())) { + if (!col.getType().isVariantType()) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_ILLEGAL_COLUMN_REFERENCE_ERROR, + Joiner.on(".").join(tblName.getTbl(), colName)); + } + if (subColNames == null) { + // Root + subColNames = new ArrayList<String>(); + } + String key = d.getAlias() + "." + col.getName(); + if (subColumnSlotRefMap.get(key) == null) { + subColumnSlotRefMap.put(key, Maps.newTreeMap( + new Comparator<List<String>>() { + public int compare(List<String> lst1, List<String> lst2) { + String str1 = String.join(".", lst1); + String str2 = String.join(".", lst2); + return str1.compareTo(str2); + } + })); + } + SlotDescriptor result = subColumnSlotRefMap.get(key).get(subColNames); + if (result != null) { + // avoid duplicate slots + return result; + } + result = globalState.descTbl.addSlotDescriptor(d); + LOG.debug("register slot descriptor {}", result); + result.setSubColLables(subColNames); + result.setColumn(col); + if (!subColNames.isEmpty()) { + result.setMaterializedColumnName(col.getName() + "." + String.join(".", subColNames)); + } + result.setIsMaterialized(true); + result.setIsNullable(col.isAllowNull()); + subColumnSlotRefMap.get(key).put(subColNames, result); + return result; + } + // Make column name case insensitive String key = d.getAlias() + "." + col.getName(); SlotDescriptor result = slotRefMap.get(key); if (result != null) { - result.setMultiRef(true); return result; } result = globalState.descTbl.addSlotDescriptor(d); Review Comment: the top variant column slot desc could be used when quering it like `select v from tbl`, otherwise slot for it's subcolumn should be enough ########## gensrc/proto/internal_service.proto: ########## @@ -589,6 +589,7 @@ message PTabletWriteSlaveRequest { message IndexSize { required int64 indexId = 1; required int64 size = 2; + required string suffix_path = 3; Review Comment: IndexId is not sufficient for indexing variant's sub columns, so use a suffix to identify them ########## fe/fe-core/src/main/cup/sql_parser.cup: ########## @@ -7049,6 +7066,16 @@ column_ref ::= {: RESULT = new SlotRef(new TableName(null, db, tbl), col); :} | ident:ctl DOT ident:db DOT ident:tbl DOT ident:col {: RESULT = new SlotRef(new TableName(ctl, db, tbl), col); :} + + | ident:pcol COLON sub_column_lables:lables Review Comment: DOT could not be implemeneted in original planner ########## be/src/vec/exec/scan/scanner_scheduler.cpp: ########## @@ -415,7 +415,10 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, ScannerContext ctx->return_free_block(std::move(block)); } else { if (!blocks.empty() && blocks.back()->rows() + block->rows() <= state->batch_size()) { + VLOG_DEBUG << "dump block " << block->dump_data(0, block->rows()); Review Comment: remove debug code -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org