[GitHub] morningman commented on a change in pull request #456: Add routine load statement

GitBox Wed, 02 Jan 2019 18:56:30 -0800

morningman commented on a change in pull request #456: Add routine load 
statement
URL: https://github.com/apache/incubator-doris/pull/456#discussion_r244901682


 ##########
 File path: 
fe/src/main/java/org/apache/doris/analysis/CreateRoutineLoadStmt.java
 ##########
 @@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableSet;
+import com.sun.istack.internal.Nullable;
+import org.apache.doris.catalog.StreamDataSourceType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.FeNameFormat;
+import org.apache.doris.common.UserException;
+import org.apache.doris.load.RoutineLoadDesc;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.regex.Pattern;
+
+/*
+ Create routine Load statement,  continually load data from a streaming app
+
+ syntax:
+      CREATE ROUTINE LOAD name ON database.table
+      [load properties]
+      [PROPERTIES
+      (
+          desired_concurrent_number = xxx,
+          max_error_number = xxx,
+          k1 = v1,
+          ...
+          kn = vn
+      )]
+      FROM type of routine load
+      [(
+          k1 = v1,
+          ...
+          kn = vn
+      )]
+
+      load properties:
+          load property [[,] load property] ...
+
+      load property:
+          column separator | columns | partitions | where
+
+      column separator:
+          COLUMNS TERMINATED BY xxx
+      columns:
+          COLUMNS (c1, c2, c3) set (c1, c2, c3=c1+c2)
+      partitions:
+          PARTITIONS (p1, p2, p3)
+      where:
+          WHERE xxx
+
+      type of routine load:
+          KAFKA
+*/
+public class CreateRoutineLoadStmt extends DdlStmt {
+    // routine load properties
+    public static final String DESIRED_CONCURRENT_NUMBER_PROPERTY = 
"desired_concurrent_number";
+    // max error number in ten thousand records
+    public static final String MAX_ERROR_NUMBER_PROPERTY = "max_error_number";
+
+    // kafka type properties
+    public static final String KAFKA_ENDPOINT_PROPERTY = "kafka_endpoint";
+    public static final String KAFKA_TOPIC_PROPERTY = "kafka_topic";
+    // optional
+    public static final String KAFKA_PARTITIONS_PROPERTY = "kafka_partitions";
+
+    private static final String NAME_TYPE = "ROUTINE LOAD NAME";
+    private static final String ENDPOINT_REGEX = "([a-z]+\\.*)+:[0-9]+";
+    private static final String EMPTY_STRING = "";
+
+    private static final ImmutableSet<String> PROPERTIES_SET = new 
ImmutableSet.Builder<String>()
+            .add(DESIRED_CONCURRENT_NUMBER_PROPERTY)
+            .add(MAX_ERROR_NUMBER_PROPERTY)
+            .build();
+
+    private static final ImmutableSet<String> KAFKA_PROPERTIES_SET = new 
ImmutableSet.Builder<String>()
+            .add(KAFKA_ENDPOINT_PROPERTY)
+            .add(KAFKA_TOPIC_PROPERTY)
+            .add(KAFKA_PARTITIONS_PROPERTY)
+            .build();
+
+    private final String name;
+    private final TableName dbTableName;
+    private final List<ParseNode> loadPropertyList;
+    private final Map<String, String> properties;
+    private final String typeName;
+    private final Map<String, String> typeProperties;
+
+
+    // those load properties will be initialized after analyze
+    private RoutineLoadDesc routineLoadDesc;
+    private int desiredConcurrentNum;
+    private int maxErrorNum;
+    private String kafkaEndpoint;
+    private String kafkaTopic;
+    private List<Integer> kafkaPartitions;
+
+    public CreateRoutineLoadStmt(String name, TableName dbTableName, 
List<ParseNode> loadPropertyList,
+                                 Map<String, String> properties,
+                                 String typeName, Map<String, String> 
typeProperties) {
+        this.name = name;
+        this.dbTableName = dbTableName;
+        this.loadPropertyList = loadPropertyList;
+        this.properties = properties;
+        this.typeName = typeName;
+        this.typeProperties = typeProperties;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public TableName getDBTableName() {
+        return dbTableName;
+    }
+
+    public Map<String, String> getProperties() {
+        return properties;
+    }
+
+    public String getTypeName() {
+        return typeName;
+    }
+
+    public Map<String, String> getTypeProperties() {
+        return typeProperties;
+    }
+
+    @Nullable
+    public RoutineLoadDesc getRoutineLoadDesc() {
+        return routineLoadDesc;
+    }
+
+    public int getDesiredConcurrentNum() {
+        return desiredConcurrentNum;
+    }
+
+    public int getMaxErrorNum() {
+        return maxErrorNum;
+    }
+
+    public String getKafkaEndpoint() {
+        return kafkaEndpoint;
+    }
+
+    public String getKafkaTopic() {
+        return kafkaTopic;
+    }
+
+    public List<Integer> getKafkaPartitions() {
+        return kafkaPartitions;
+    }
+
+    @Override
+    public void analyze(Analyzer analyzer) throws AnalysisException, 
UserException {
+        super.analyze(analyzer);
+        // check name
+        FeNameFormat.checkCommonName(NAME_TYPE, name);
+        // check dbName and tableName
+        if (Strings.isNullOrEmpty(dbTableName.getDb()) || 
Strings.isNullOrEmpty(dbTableName.getTbl())) {
+            throw new AnalysisException("empty db name or table name in create 
routine load statement");
+        }
+        // check load properties include column separator etc.
+        checkLoadProperties(analyzer);
+        // check routine load properties include desired concurrent number etc.
+        checkRoutineLoadProperties();
+        // check type
+        try {
+            StreamDataSourceType.valueOf(typeName);
+        } catch (IllegalArgumentException e) {
+            throw new AnalysisException("routine load job does not support 
this type " + typeName);
+        }
+        // check type properties
+        checkTypeProperties();
+    }
+
+    private void checkLoadProperties(Analyzer analyzer) throws 
AnalysisException {
+        if (loadPropertyList != null) {
+            ColumnSeparator columnSeparator = null;
+            LoadColumnsInfo columnsInfo = null;
+            Expr wherePredicate = null;
+            PartitionNames partitionNames = null;
+            for (ParseNode parseNode : loadPropertyList) {
+                if (parseNode instanceof ColumnSeparator) {
+                    // check column separator
+                    if (columnSeparator != null) {
+                        throw new AnalysisException("repeat setting of column 
separator");
+                    }
+                    columnSeparator = (ColumnSeparator) parseNode;
+                    columnSeparator.analyze(analyzer);
+                } else if (parseNode instanceof LoadColumnsInfo) {
+                    // check columns info
+                    if (columnsInfo != null) {
+                        throw new AnalysisException("repeat setting of columns 
info");
+                    }
+                    columnsInfo = (LoadColumnsInfo) parseNode;
+                    columnsInfo.analyze(analyzer);
+                } else if (parseNode instanceof Expr) {
+                    // check where expr
+                    if (wherePredicate != null) {
+                        throw new AnalysisException("repeat setting of where 
predicate");
+                    }
+                    wherePredicate = (Expr) parseNode;
+                    wherePredicate.analyze(analyzer);
+                } else if (parseNode instanceof PartitionNames) {
+                    // check partition names
+                    if (partitionNames != null) {
+                        throw new AnalysisException("repeat setting of 
partition names");
+                    }
+                    partitionNames = (PartitionNames) parseNode;
+                    partitionNames.analyze(analyzer);
+                }
+            }
+            routineLoadDesc = new RoutineLoadDesc(columnSeparator, 
columnsInfo, wherePredicate,
+                                                  
partitionNames.getPartitionNames());
+        }
+    }
+
+    private void checkRoutineLoadProperties() throws AnalysisException {
+        Optional<String> optional = properties.keySet().parallelStream()
+                .filter(entity -> 
!PROPERTIES_SET.contains(entity)).findFirst();
+        if (!optional.isPresent()) {
 
 Review comment:
   should it be optional.isPresent(), not !optional.isPresent()?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org
For additional commands, e-mail: dev-h...@doris.apache.org

[GitHub] morningman commented on a change in pull request #456: Add routine load statement

Reply via email to