This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f1a64ea09f [fix](new-scan)Fix new scanner load job bugs (#12903)
f1a64ea09f is described below

commit f1a64ea09fbd271b03b9a4cd86e1b87bb11447f2
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Sat Sep 24 17:21:19 2022 +0800

    [fix](new-scan)Fix new scanner load job bugs (#12903)
    
    Fix bugs:
    1. Fe need to send file format (e.g. parquet, orc ...) to be while 
processing load jobs using new scanner.
    2. Try to get parquet file column type from SchemaElement.type before 
getting from Logical type and Converted type.
---
 be/src/vec/exec/format/parquet/schema_desc.cpp     | 30 ++++++++++++++++++++--
 be/src/vec/exec/format/parquet/schema_desc.h       |  2 ++
 .../doris/planner/external/LoadScanProvider.java   |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp 
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index b0d449f604..04ed26af7c 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -152,11 +152,37 @@ void FieldDescriptor::parse_physical_field(const 
tparquet::SchemaElement& physic
     physical_field->physical_type = physical_schema.type;
     _physical_fields.push_back(physical_field);
     physical_field->physical_column_index = _physical_fields.size() - 1;
+    physical_field->type = get_doris_type(physical_schema);
+}
+
+TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& 
physical_schema) {
+    TypeDescriptor type;
+    switch (physical_schema.type) {
+    case tparquet::Type::BOOLEAN:
+        type.type = TYPE_BOOLEAN;
+        return type;
+    case tparquet::Type::INT32:
+        type.type = TYPE_INT;
+        return type;
+    case tparquet::Type::INT64:
+    case tparquet::Type::INT96:
+        type.type = TYPE_BIGINT;
+        return type;
+    case tparquet::Type::FLOAT:
+        type.type = TYPE_FLOAT;
+        return type;
+    case tparquet::Type::DOUBLE:
+        type.type = TYPE_DOUBLE;
+        return type;
+    default:
+        break;
+    }
     if (physical_schema.__isset.logicalType) {
-        physical_field->type = 
convert_to_doris_type(physical_schema.logicalType);
+        type = convert_to_doris_type(physical_schema.logicalType);
     } else if (physical_schema.__isset.converted_type) {
-        physical_field->type = 
convert_to_doris_type(physical_schema.converted_type);
+        type = convert_to_doris_type(physical_schema.converted_type);
     }
+    return type;
 }
 
 TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType 
logicalType) {
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h 
b/be/src/vec/exec/format/parquet/schema_desc.h
index 12db2b7011..7f69cc6559 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -82,6 +82,8 @@ private:
 
     TypeDescriptor convert_to_doris_type(tparquet::ConvertedType::type 
convertedType);
 
+    TypeDescriptor get_doris_type(const tparquet::SchemaElement& 
physical_schema);
+
 public:
     FieldDescriptor() = default;
     ~FieldDescriptor() = default;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
index 5f791186a2..33b0db2de7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
@@ -93,6 +93,7 @@ public class LoadScanProvider implements FileScanProviderIf {
         ctx.timezone = analyzer.getTimezone();
 
         TFileScanRangeParams params = new TFileScanRangeParams();
+        params.format_type = 
formatType(fileGroupInfo.getFileGroup().getFileFormat(), "");
         params.setStrictMode(fileGroupInfo.isStrictMode());
         params.setProperties(fileGroupInfo.getBrokerDesc().getProperties());
         if (fileGroupInfo.getBrokerDesc().getFileType() == 
TFileType.FILE_HDFS) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to