morningman commented on code in PR #41067: URL: https://github.com/apache/doris/pull/41067#discussion_r1768544824
########## fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java: ########## @@ -258,6 +258,15 @@ public void analyze(Analyzer analyzer, List<Expr> resultExprs, List<String> colL headerType = FileFormatConstants.FORMAT_CSV_WITH_NAMES_AND_TYPES; fileFormatType = TFileFormatType.FORMAT_CSV_PLAIN; break; + case "rcbinary": Review Comment: Do you support write sequence/rcfile too? ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java: ########## @@ -377,6 +410,22 @@ public List<Column> getTableColumns() throws AnalysisException { return columns; } + @Override Review Comment: do not remove code ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java: ########## @@ -144,6 +143,11 @@ public BrokerDesc getBrokerDesc() { return new BrokerDesc("LocalTvfBroker", StorageType.LOCAL, locationProperties); } + @Override Review Comment: Do not move code that unrelated to your feature ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java: ########## @@ -259,8 +279,26 @@ protected Map<String, String> parseCommonProperties(Map<String, String> properti } } + this.useMetastore = getOrDefaultAndRemove(copiedProps, FileFormatConstants.PROP_USE_METASTORE, + "").toLowerCase(); + + if (!useMetastore.isEmpty() && !useMetastore.equals("true") && !useMetastore.equals("false")) { + throw new AnalysisException("useMetastore field cannot be recognized, should be \"true\" or \"false\". "); + } + + if (!useMetastore.isEmpty() && HIVE_FILE_FORMATS.contains(this.fileFormatType)) { + if (useMetastore.equals("false")) { + columnNamesStr = getOrDefaultAndRemove(copiedProps, FileFormatConstants.PROP_HIVE_COLUMN_NAMES, ""); + columnTypesStr = getOrDefaultAndRemove(copiedProps, FileFormatConstants.PROP_HIVE_COLUMN_TYPES, ""); + if (columnNamesStr.isEmpty() || columnTypesStr.isEmpty()) { + throw new AnalysisException("use metastore is disable, should set column names and column types."); + } + } + // TODO: set metastore address in else branch Review Comment: I think we don't need to support `use_hivemetastore` in tvf. ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/LocalTableValuedFunction.java: ########## @@ -46,12 +46,11 @@ * local("file_path" = "path/to/file.txt", "backend_id" = "be_id"). */ public class LocalTableValuedFunction extends ExternalFileTableValuedFunction { - private static final Logger LOG = LogManager.getLogger(LocalTableValuedFunction.class); public static final String NAME = "local"; public static final String PROP_FILE_PATH = "file_path"; public static final String PROP_BACKEND_ID = "backend_id"; public static final String PROP_SHARED_STORAGE = "shared_storage"; - + private static final Logger LOG = LogManager.getLogger(LocalTableValuedFunction.class); Review Comment: No need to move this code ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java: ########## @@ -99,20 +100,15 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio // Columns got from file and path(if has) protected List<Column> columns = null; - // User specified csv columns, it will override columns got from file - private final List<Column> csvSchema = Lists.newArrayList(); - - // Partition columns from path, e.g. /path/to/columnName=columnValue. - private List<String> pathPartitionKeys; - protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList(); protected Map<String, String> locationProperties = Maps.newHashMap(); protected String filePath; - protected TFileFormatType fileFormatType; - protected Optional<String> resourceName = Optional.empty(); - + // User specified csv columns, it will override columns got from file + private final List<Column> csvSchema = Lists.newArrayList(); Review Comment: Do not move code ########## fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java: ########## @@ -128,6 +124,21 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio private boolean trimDoubleQuotes; private int skipLines; private long tableId; + private static final ImmutableSet<TFileFormatType> HIVE_FILE_FORMATS = new ImmutableSet.Builder<TFileFormatType>() + .add(TFileFormatType.FORMAT_RCBINARY) + .add(TFileFormatType.FORMAT_RCTEXT) + .add(TFileFormatType.FORMAT_SEQUENCE) + .build(); + + // When useMetastore is "false", users can specify column names and column types + // for file formats such as RCFile and SequenceFile that rely on metadata stored in Hive Metastore + private String useMetastore; + // Comma-separated list of column names. + // Only applicable when useMetastore is false and for formats like RCFile and SequenceFile. + private String columnNamesStr; Review Comment: Could you reuse `FileFormatUtils.parseCsvSchema()`? Although it is called "csv schema", but actually it can be used for any file format which need specified schema -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org