[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r24274 ## File path: fe/src/main/java/org/apache/doris/clone/TabletSchedulerStat.java ## @@ -0,0 +1,180 @@ +package org.apache.doris.clone; Review comment: added This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r24545 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r24730 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246667657 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246669339 ## File path: fe/src/main/java/org/apache/doris/clone/TabletInfo.java ## @@ -0,0 +1,922 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletScheduler.PathSlot; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TBackend; +import org.apache.doris.thrift.TFinishTaskRequest; +import org.apache.doris.thrift.TStatusCode; +import org.apache.doris.thrift.TStorageMedium; +import org.apache.doris.thrift.TTabletInfo; +import org.apache.doris.thrift.TTaskType; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.List; +import java.util.Map; + +/* + * TabletInfo contains all information which is created during tablet scheduler processing. + */ +public class TabletInfo implements Comparable { Review comment: Changed to TabletSchedCtx This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246669370 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246669392 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246669410 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246669732 ## File path: fe/src/main/java/org/apache/doris/clone/TabletScheduler.java ## @@ -0,0 +1,1186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.OlapTable.OlapTableState; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.SchedException.Status; +import org.apache.doris.clone.TabletInfo.Priority; +import org.apache.doris.clone.TabletInfo.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.persist.ReplicaPersistInfo; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.task.AgentTask; +import org.apache.doris.task.AgentTaskExecutor; +import org.apache.doris.task.AgentTaskQueue; +import org.apache.doris.task.CloneTask; +import org.apache.doris.thrift.TFinishTaskRequest; + +import com.google.common.base.Preconditions; +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * TabletScheduler saved the tablets produced by TabletChecker and try to schedule them. + * It also try to balance the cluster load. + * + * We are expecting an efficient way to recovery the entire cluster and make it balanced. + * Case 1: + * A Backend is down. All tablets which has replica on this BE should be repaired as soon as possible. + * + * Case 1.1: + * As Backend is down, some tables should be repaired in high priority. So the clone task should be able + * to preempted. + * + * Case 2: + * A new Backend is added to the cluster. Replicas should be transfer to that host to balance the cluster load. + */ +public class TabletScheduler extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletScheduler.class); + +// handle at most BATCH_NUM tablets in one loop +private static final int MIN_BATCH_NUM = 10; + +// the minimum interval of updating cluster statistics and priority of tablet info +private static final long STAT_UPDATE_INTERVAL_MS = 60 * 1000; // 1min + +private static final long SCHEDULE_INTERVAL_MS = 5000; // 5s + +public static final int BALANCE_SLOT_NUM_FOR_PATH = 2; + +/* + * Tablet is added to pendingTablets as well it's id in allTabletIds. + * TabletScheduler will take tablet from pendingTablets but will not remove it's id from allTabletIds when + * handling a tablet. + * Tablet' id can only be removed after the clone task is done(timeout, cancelled or finished). + * So if a tablet's id is still in allTabletIds, TabletChecker can not add tablet to TabletScheduler. + * + * pendingTablets + runningTablets = allTabletIds + * + * pendingTablets, allTabletIds, runningTablets and schedHistory are protected by 'synchronized' + */ +private PriorityQueue pendingTablets = new PriorityQueue<>(); +private Set allTabletIds = Sets.newHashSet(); +// contains all tabletInfos which state are RUNNING +private Map runningTablets = Maps.newHashMap(); +//
[GitHub] morningman opened a new issue #522: Index id in Partition instance is different from index id in OlapTable instance
morningman opened a new issue #522: Index id in Partition instance is different from index id in OlapTable instance URL: https://github.com/apache/incubator-doris/issues/522 I don't know why, but it happened. It can only be recovered using very tricky code modification. Add a issue here, in case it happen again. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] imay commented on a change in pull request #450: Add EsScanNode
imay commented on a change in pull request #450: Add EsScanNode URL: https://github.com/apache/incubator-doris/pull/450#discussion_r246680983 ## File path: be/src/exec/es_scan_node.cpp ## @@ -0,0 +1,668 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "es_scan_node.h" + +#include +#include +#include + +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Exprs_types.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "runtime/client_cache.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" +#include "service/backend_options.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "exprs/expr_context.h" +#include "exprs/expr.h" +#include "exprs/slot_ref.h" + +namespace doris { + +// $0 = column type (e.g. INT) +const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " +"Expected value of type $0 based on column metadata. This likely indicates a " +"problem with the data source library."; +const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " +"$1 bytes for $2."; + +EsScanNode::EsScanNode( +ObjectPool* pool, +const TPlanNode& tnode, +const DescriptorTbl& descs) : +ScanNode(pool, tnode, descs), +_tuple_id(tnode.es_scan_node.tuple_id), +_scan_range_idx(0) { +if (tnode.es_scan_node.__isset.properties) { +_properties = tnode.es_scan_node.properties; +} +} + +EsScanNode::~EsScanNode() { +} + +Status EsScanNode::prepare(RuntimeState* state) { +VLOG(1) << "EsScanNode::Prepare"; + +RETURN_IF_ERROR(ScanNode::prepare(state)); +_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); +if (_tuple_desc == nullptr) { +std::stringstream ss; +ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} +_env = state->exec_env(); + +return Status::OK; +} + +Status EsScanNode::open(RuntimeState* state) { +VLOG(1) << "EsScanNode::Open"; + +RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); +RETURN_IF_CANCELLED(state); +SCOPED_TIMER(_runtime_profile->total_time_counter()); +RETURN_IF_ERROR(ExecNode::open(state)); + +// TExtOpenParams.row_schema +vector cols; +for (const SlotDescriptor* slot : _tuple_desc->slots()) { +TExtColumnDesc col; +col.__set_name(slot->col_name()); +col.__set_type(slot->type().to_thrift()); +cols.emplace_back(std::move(col)); +} +TExtTableSchema row_schema; +row_schema.cols = std::move(cols); +row_schema.__isset.cols = true; + +// TExtOpenParams.predicates +vector > predicates; +vector predicate_to_conjunct; +for (int i = 0; i < _conjunct_ctxs.size(); ++i) { +VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string(); +vector disjuncts; +if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) { +predicates.emplace_back(std::move(disjuncts)); +predicate_to_conjunct.push_back(i); +} +} + +// open every scan range +vector conjunct_accepted_times(_conjunct_ctxs.size(), 0); +for (int i = 0; i < _scan_ranges.size(); ++i) { +TEsScanRange& es_scan_range = _scan_ranges[i]; + +if (es_scan_range.es_hosts.empty()) { +std::stringstream ss; +ss << "es fail to open: hosts empty"; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} + + +// TExtOpenParams +TExtOpenParams params; +params.__set_query_id(state->query_id()); +_properties["index"] = es_scan_range.index; +if (es_scan_range.__isset.type) { +_properties["type"] = es_scan_range.type; +} +_properties["shard_id"] = std::to_string(es_scan_range.shard_id); +params.__set_properties(_properties); +params.__set_row_schema(row_schema); +params.__set_batch_size(state->batch_size()); +params.__set_predica
[GitHub] imay commented on a change in pull request #450: Add EsScanNode
imay commented on a change in pull request #450: Add EsScanNode URL: https://github.com/apache/incubator-doris/pull/450#discussion_r246684072 ## File path: be/src/exec/es_scan_node.cpp ## @@ -0,0 +1,668 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "es_scan_node.h" + +#include +#include +#include + +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Exprs_types.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "runtime/client_cache.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" +#include "service/backend_options.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "exprs/expr_context.h" +#include "exprs/expr.h" +#include "exprs/slot_ref.h" + +namespace doris { + +// $0 = column type (e.g. INT) +const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " +"Expected value of type $0 based on column metadata. This likely indicates a " +"problem with the data source library."; +const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " +"$1 bytes for $2."; + +EsScanNode::EsScanNode( +ObjectPool* pool, +const TPlanNode& tnode, +const DescriptorTbl& descs) : +ScanNode(pool, tnode, descs), +_tuple_id(tnode.es_scan_node.tuple_id), +_scan_range_idx(0) { +if (tnode.es_scan_node.__isset.properties) { +_properties = tnode.es_scan_node.properties; +} +} + +EsScanNode::~EsScanNode() { +} + +Status EsScanNode::prepare(RuntimeState* state) { +VLOG(1) << "EsScanNode::Prepare"; + +RETURN_IF_ERROR(ScanNode::prepare(state)); +_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); +if (_tuple_desc == nullptr) { +std::stringstream ss; +ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} +_env = state->exec_env(); + +return Status::OK; +} + +Status EsScanNode::open(RuntimeState* state) { +VLOG(1) << "EsScanNode::Open"; + +RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); +RETURN_IF_CANCELLED(state); +SCOPED_TIMER(_runtime_profile->total_time_counter()); +RETURN_IF_ERROR(ExecNode::open(state)); + +// TExtOpenParams.row_schema +vector cols; +for (const SlotDescriptor* slot : _tuple_desc->slots()) { +TExtColumnDesc col; +col.__set_name(slot->col_name()); +col.__set_type(slot->type().to_thrift()); +cols.emplace_back(std::move(col)); +} +TExtTableSchema row_schema; +row_schema.cols = std::move(cols); +row_schema.__isset.cols = true; + +// TExtOpenParams.predicates +vector > predicates; +vector predicate_to_conjunct; +for (int i = 0; i < _conjunct_ctxs.size(); ++i) { +VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string(); +vector disjuncts; +if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) { +predicates.emplace_back(std::move(disjuncts)); +predicate_to_conjunct.push_back(i); +} +} + +// open every scan range +vector conjunct_accepted_times(_conjunct_ctxs.size(), 0); +for (int i = 0; i < _scan_ranges.size(); ++i) { +TEsScanRange& es_scan_range = _scan_ranges[i]; + +if (es_scan_range.es_hosts.empty()) { +std::stringstream ss; +ss << "es fail to open: hosts empty"; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} + + +// TExtOpenParams +TExtOpenParams params; +params.__set_query_id(state->query_id()); +_properties["index"] = es_scan_range.index; +if (es_scan_range.__isset.type) { +_properties["type"] = es_scan_range.type; +} +_properties["shard_id"] = std::to_string(es_scan_range.shard_id); +params.__set_properties(_properties); +params.__set_row_schema(row_schema); +params.__set_batch_size(state->batch_size()); +params.__set_predica
[GitHub] imay commented on a change in pull request #450: Add EsScanNode
imay commented on a change in pull request #450: Add EsScanNode URL: https://github.com/apache/incubator-doris/pull/450#discussion_r246687168 ## File path: be/src/exec/es_scan_node.cpp ## @@ -0,0 +1,668 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "es_scan_node.h" + +#include +#include +#include + +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Exprs_types.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "runtime/client_cache.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" +#include "service/backend_options.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "exprs/expr_context.h" +#include "exprs/expr.h" +#include "exprs/slot_ref.h" + +namespace doris { + +// $0 = column type (e.g. INT) +const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " +"Expected value of type $0 based on column metadata. This likely indicates a " +"problem with the data source library."; +const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " +"$1 bytes for $2."; + +EsScanNode::EsScanNode( +ObjectPool* pool, +const TPlanNode& tnode, +const DescriptorTbl& descs) : +ScanNode(pool, tnode, descs), +_tuple_id(tnode.es_scan_node.tuple_id), +_scan_range_idx(0) { +if (tnode.es_scan_node.__isset.properties) { +_properties = tnode.es_scan_node.properties; +} +} + +EsScanNode::~EsScanNode() { +} + +Status EsScanNode::prepare(RuntimeState* state) { +VLOG(1) << "EsScanNode::Prepare"; + +RETURN_IF_ERROR(ScanNode::prepare(state)); +_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); +if (_tuple_desc == nullptr) { +std::stringstream ss; +ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} +_env = state->exec_env(); + +return Status::OK; +} + +Status EsScanNode::open(RuntimeState* state) { +VLOG(1) << "EsScanNode::Open"; + +RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); +RETURN_IF_CANCELLED(state); +SCOPED_TIMER(_runtime_profile->total_time_counter()); +RETURN_IF_ERROR(ExecNode::open(state)); + +// TExtOpenParams.row_schema +vector cols; +for (const SlotDescriptor* slot : _tuple_desc->slots()) { +TExtColumnDesc col; +col.__set_name(slot->col_name()); +col.__set_type(slot->type().to_thrift()); +cols.emplace_back(std::move(col)); +} +TExtTableSchema row_schema; +row_schema.cols = std::move(cols); +row_schema.__isset.cols = true; + +// TExtOpenParams.predicates +vector > predicates; +vector predicate_to_conjunct; +for (int i = 0; i < _conjunct_ctxs.size(); ++i) { +VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string(); +vector disjuncts; +if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) { +predicates.emplace_back(std::move(disjuncts)); +predicate_to_conjunct.push_back(i); +} +} + +// open every scan range +vector conjunct_accepted_times(_conjunct_ctxs.size(), 0); +for (int i = 0; i < _scan_ranges.size(); ++i) { +TEsScanRange& es_scan_range = _scan_ranges[i]; + +if (es_scan_range.es_hosts.empty()) { +std::stringstream ss; +ss << "es fail to open: hosts empty"; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} + + +// TExtOpenParams +TExtOpenParams params; +params.__set_query_id(state->query_id()); +_properties["index"] = es_scan_range.index; +if (es_scan_range.__isset.type) { +_properties["type"] = es_scan_range.type; +} +_properties["shard_id"] = std::to_string(es_scan_range.shard_id); +params.__set_properties(_properties); +params.__set_row_schema(row_schema); +params.__set_batch_size(state->batch_size()); +params.__set_predica
[GitHub] imay commented on a change in pull request #450: Add EsScanNode
imay commented on a change in pull request #450: Add EsScanNode URL: https://github.com/apache/incubator-doris/pull/450#discussion_r246681927 ## File path: be/src/exec/es_scan_node.cpp ## @@ -0,0 +1,668 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "es_scan_node.h" + +#include +#include +#include + +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Exprs_types.h" +#include "runtime/runtime_state.h" +#include "runtime/row_batch.h" +#include "runtime/string_value.h" +#include "runtime/tuple_row.h" +#include "runtime/client_cache.h" +#include "util/runtime_profile.h" +#include "util/debug_util.h" +#include "service/backend_options.h" +#include "olap/olap_common.h" +#include "olap/utils.h" +#include "exprs/expr_context.h" +#include "exprs/expr.h" +#include "exprs/slot_ref.h" + +namespace doris { + +// $0 = column type (e.g. INT) +const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " +"Expected value of type $0 based on column metadata. This likely indicates a " +"problem with the data source library."; +const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " +"$1 bytes for $2."; + +EsScanNode::EsScanNode( +ObjectPool* pool, +const TPlanNode& tnode, +const DescriptorTbl& descs) : +ScanNode(pool, tnode, descs), +_tuple_id(tnode.es_scan_node.tuple_id), +_scan_range_idx(0) { +if (tnode.es_scan_node.__isset.properties) { +_properties = tnode.es_scan_node.properties; +} +} + +EsScanNode::~EsScanNode() { +} + +Status EsScanNode::prepare(RuntimeState* state) { +VLOG(1) << "EsScanNode::Prepare"; + +RETURN_IF_ERROR(ScanNode::prepare(state)); +_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); +if (_tuple_desc == nullptr) { +std::stringstream ss; +ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} +_env = state->exec_env(); + +return Status::OK; +} + +Status EsScanNode::open(RuntimeState* state) { +VLOG(1) << "EsScanNode::Open"; + +RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); +RETURN_IF_CANCELLED(state); +SCOPED_TIMER(_runtime_profile->total_time_counter()); +RETURN_IF_ERROR(ExecNode::open(state)); + +// TExtOpenParams.row_schema +vector cols; +for (const SlotDescriptor* slot : _tuple_desc->slots()) { +TExtColumnDesc col; +col.__set_name(slot->col_name()); +col.__set_type(slot->type().to_thrift()); +cols.emplace_back(std::move(col)); +} +TExtTableSchema row_schema; +row_schema.cols = std::move(cols); +row_schema.__isset.cols = true; + +// TExtOpenParams.predicates +vector > predicates; +vector predicate_to_conjunct; +for (int i = 0; i < _conjunct_ctxs.size(); ++i) { +VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string(); +vector disjuncts; +if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) { +predicates.emplace_back(std::move(disjuncts)); +predicate_to_conjunct.push_back(i); +} +} + +// open every scan range +vector conjunct_accepted_times(_conjunct_ctxs.size(), 0); +for (int i = 0; i < _scan_ranges.size(); ++i) { +TEsScanRange& es_scan_range = _scan_ranges[i]; + +if (es_scan_range.es_hosts.empty()) { +std::stringstream ss; +ss << "es fail to open: hosts empty"; +LOG(WARNING) << ss.str(); +return Status(ss.str()); +} + + +// TExtOpenParams +TExtOpenParams params; +params.__set_query_id(state->query_id()); +_properties["index"] = es_scan_range.index; +if (es_scan_range.__isset.type) { +_properties["type"] = es_scan_range.type; +} +_properties["shard_id"] = std::to_string(es_scan_range.shard_id); +params.__set_properties(_properties); +params.__set_row_schema(row_schema); +params.__set_batch_size(state->batch_size()); +params.__set_predica
[GitHub] chaoyli opened a new pull request #523: Fix inconsistency of three replicas belongs one tablet
chaoyli opened a new pull request #523: Fix inconsistency of three replicas belongs one tablet URL: https://github.com/apache/incubator-doris/pull/523 There is A, B, C replica of one tablet. A has 0 - 10 version. B has 0 - 5, 6, 7, 9, 10 version. 1. B has missed versions, so it clones 0 - 10 from A, and remove overlapped versions in its header. 2. Coincidentally, 6 is a version for delete predicate (delete where day = 20181221). When removing overlapped versions, version 6 is removed but delete predicate is not be removed. 3. User insert data generated at 20181221 again into B replica. 4. B performs compaction, and data generated by 20181221 is be removed falsely. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] morningman opened a new pull request #524: Fix bug that schema change does not set null value correctly
morningman opened a new pull request #524: Fix bug that schema change does not set null value correctly URL: https://github.com/apache/incubator-doris/pull/524 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] chaoyli closed pull request #524: Fix bug that schema change does not set null value correctly
chaoyli closed pull request #524: Fix bug that schema change does not set null value correctly URL: https://github.com/apache/incubator-doris/pull/524 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/src/olap/column_reader.cpp b/be/src/olap/column_reader.cpp index 38e9cd35..13f7c6ae 100644 --- a/be/src/olap/column_reader.cpp +++ b/be/src/olap/column_reader.cpp @@ -641,7 +641,6 @@ ColumnReader* ColumnReader::create(uint32_t column_id, field_info.default_value, field_info.type, field_info.length); } } else if (field_info.is_allow_null) { -LOG(WARNING) << "create NullValueReader: " << field_info.name; return new(std::nothrow) NullValueReader(column_id, column_unique_id); } else { OLAP_LOG_WARNING("not null field has no default value"); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index a5c2a2c1..2e9c6151 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -2358,7 +2358,7 @@ OLAPStatus SchemaChangeHandler::_init_column_mapping(ColumnMapping* column_mappi return OLAP_ERR_MALLOC_ERROR; } -if (true == column_schema.is_allow_null && value.length() == 0) { +if (column_schema.is_allow_null && !column_schema.has_default_value) { column_mapping->default_value->set_null(); } else { column_mapping->default_value->from_string(value); This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] imay commented on a change in pull request #523: Fix inconsistency of three replicas belongs to one tablet
imay commented on a change in pull request #523: Fix inconsistency of three replicas belongs to one tablet URL: https://github.com/apache/incubator-doris/pull/523#discussion_r246710398 ## File path: be/src/olap/olap_header.cpp ## @@ -460,6 +460,48 @@ void OLAPHeader::add_delete_condition(const DeleteConditionMessage& delete_condi LOG(INFO) << "add delete condition. version=" << version; } +void OLAPHeader::delete_cond_by_version(const Version& version) { +DCHECK(version.first == version.second); +google::protobuf::RepeatedPtrField* delete_conditions += mutable_delete_data_conditions(); +int index = 0; +for (; index < delete_conditions->size(); ++index) { +DeleteConditionMessage temp = delete_conditions->Get(index); Review comment: reference This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] chaoyli closed pull request #523: Fix inconsistency of three replicas belongs to one tablet
chaoyli closed pull request #523: Fix inconsistency of three replicas belongs to one tablet URL: https://github.com/apache/incubator-doris/pull/523 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/src/olap/olap_header.cpp b/be/src/olap/olap_header.cpp index da79c039..f8d6d439 100644 --- a/be/src/olap/olap_header.cpp +++ b/be/src/olap/olap_header.cpp @@ -460,6 +460,48 @@ void OLAPHeader::add_delete_condition(const DeleteConditionMessage& delete_condi LOG(INFO) << "add delete condition. version=" << version; } +void OLAPHeader::delete_cond_by_version(const Version& version) { +DCHECK(version.first == version.second); +google::protobuf::RepeatedPtrField* delete_conditions += mutable_delete_data_conditions(); +int index = 0; +for (; index < delete_conditions->size(); ++index) { +const DeleteConditionMessage& temp = delete_conditions->Get(index); +if (temp.version() == version.first) { +// log delete condtion +string del_cond_str; +const RepeatedPtrField& sub_conditions = temp.sub_conditions(); + +for (int i = 0; i != sub_conditions.size(); ++i) { +del_cond_str += sub_conditions.Get(i) + ";"; +} + +LOG(INFO) << "delete one condition. version=" << temp.version() + << ", condition=" << del_cond_str; + +// remove delete condition from PB +delete_conditions->SwapElements(index, delete_conditions->size() - 1); +delete_conditions->RemoveLast(); +} +} +} + +bool OLAPHeader::is_delete_data_version(Version version) { +if (version.first != version.second) { +return false; +} + +google::protobuf::RepeatedPtrField::const_iterator it; +it = delete_data_conditions().begin(); +for (; it != delete_data_conditions().end(); ++it) { +if (it->version() == version.first) { +return true; +} +} + +return false; +} + const PPendingDelta* OLAPHeader::get_pending_delta(int64_t transaction_id) const { for (int i = 0; i < pending_delta_size(); i++) { if (pending_delta(i).transaction_id() == transaction_id) { diff --git a/be/src/olap/olap_header.h b/be/src/olap/olap_header.h index f29bffc1..23f97efa 100644 --- a/be/src/olap/olap_header.h +++ b/be/src/olap/olap_header.h @@ -79,6 +79,8 @@ class OLAPHeader : public OLAPHeaderMessage { bool empty, const std::vector* column_statistics); void add_delete_condition(const DeleteConditionMessage& delete_condition, int64_t version); +void delete_cond_by_version(const Version& version); +bool is_delete_data_version(Version version); const PPendingDelta* get_pending_delta(int64_t transaction_id) const; const PPendingSegmentGroup* get_pending_segment_group(int64_t transaction_id, int32_t pending_segment_group_id) const; diff --git a/be/src/olap/olap_table.cpp b/be/src/olap/olap_table.cpp index 0ff94875..ecdd9448 100644 --- a/be/src/olap/olap_table.cpp +++ b/be/src/olap/olap_table.cpp @@ -1307,6 +1307,9 @@ OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, << " version=" << version.first << "-" << version.second << "]"; break; } +if (new_local_header.is_delete_data_version(version)) { +new_local_header.delete_cond_by_version(version); +} LOG(INFO) << "delete version from new local header when clone. [table='" << full_name() << "', version=" << version.first << "-" << version.second << "]"; } diff --git a/be/src/olap/olap_table.h b/be/src/olap/olap_table.h index ec2f4f52..b1621957 100644 --- a/be/src/olap/olap_table.h +++ b/be/src/olap/olap_table.h @@ -528,19 +528,7 @@ class OLAPTable : public std::enable_shared_from_this { } bool is_delete_data_version(Version version) { -if (version.first != version.second) { -return false; -} - - google::protobuf::RepeatedPtrField::const_iterator it; -it = _header->delete_data_conditions().begin(); -for (; it != _header->delete_data_conditions().end(); ++it) { -if (it->version() == version.first) { -return true; -} -} - -return false; +return _header->is_delete_data_version(version); } bool is_load_delete_version(Version version); diff --git a/be/test/runtime/test_data/user_function_cache/download/1/1.1234.so.tmp b/be/test/runtime/test_data/user_function_cache/download/1/1.1234.so.tmp new file mode 100644 index ..64cb35c2 Binary files /dev/null and b/be/test/runt
[GitHub] chaoyli opened a new pull request #525: Fix inconsistency of three replicas belongs to one tablet (#523)
chaoyli opened a new pull request #525: Fix inconsistency of three replicas belongs to one tablet (#523) URL: https://github.com/apache/incubator-doris/pull/525 There are A, B, C replicas of one tablet. A has 0 - 10 version. B has 0 - 5, 6, 7, 9, 10 version. 1. B has missed versions, so it clones 0 - 10 from A, and remove overlapped versions in its header. 2. Coincidentally, 6 is a version for delete predicate (delete where day = 20181221). When removing overlapped versions, version 6 is removed but delete predicate is not be removed. 3. Unfortunately, 0-10 cloned from A has data indicated at 20181221. 4. B performs compaction, and data generated by 20181221 is be removed falsely. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] chaoyli closed pull request #525: Fix inconsistency of three replicas belongs to one tablet (#523)
chaoyli closed pull request #525: Fix inconsistency of three replicas belongs to one tablet (#523) URL: https://github.com/apache/incubator-doris/pull/525 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/src/olap/olap_header.cpp b/be/src/olap/olap_header.cpp index da79c039..f8d6d439 100644 --- a/be/src/olap/olap_header.cpp +++ b/be/src/olap/olap_header.cpp @@ -460,6 +460,48 @@ void OLAPHeader::add_delete_condition(const DeleteConditionMessage& delete_condi LOG(INFO) << "add delete condition. version=" << version; } +void OLAPHeader::delete_cond_by_version(const Version& version) { +DCHECK(version.first == version.second); +google::protobuf::RepeatedPtrField* delete_conditions += mutable_delete_data_conditions(); +int index = 0; +for (; index < delete_conditions->size(); ++index) { +const DeleteConditionMessage& temp = delete_conditions->Get(index); +if (temp.version() == version.first) { +// log delete condtion +string del_cond_str; +const RepeatedPtrField& sub_conditions = temp.sub_conditions(); + +for (int i = 0; i != sub_conditions.size(); ++i) { +del_cond_str += sub_conditions.Get(i) + ";"; +} + +LOG(INFO) << "delete one condition. version=" << temp.version() + << ", condition=" << del_cond_str; + +// remove delete condition from PB +delete_conditions->SwapElements(index, delete_conditions->size() - 1); +delete_conditions->RemoveLast(); +} +} +} + +bool OLAPHeader::is_delete_data_version(Version version) { +if (version.first != version.second) { +return false; +} + +google::protobuf::RepeatedPtrField::const_iterator it; +it = delete_data_conditions().begin(); +for (; it != delete_data_conditions().end(); ++it) { +if (it->version() == version.first) { +return true; +} +} + +return false; +} + const PPendingDelta* OLAPHeader::get_pending_delta(int64_t transaction_id) const { for (int i = 0; i < pending_delta_size(); i++) { if (pending_delta(i).transaction_id() == transaction_id) { diff --git a/be/src/olap/olap_header.h b/be/src/olap/olap_header.h index f29bffc1..23f97efa 100644 --- a/be/src/olap/olap_header.h +++ b/be/src/olap/olap_header.h @@ -79,6 +79,8 @@ class OLAPHeader : public OLAPHeaderMessage { bool empty, const std::vector* column_statistics); void add_delete_condition(const DeleteConditionMessage& delete_condition, int64_t version); +void delete_cond_by_version(const Version& version); +bool is_delete_data_version(Version version); const PPendingDelta* get_pending_delta(int64_t transaction_id) const; const PPendingSegmentGroup* get_pending_segment_group(int64_t transaction_id, int32_t pending_segment_group_id) const; diff --git a/be/src/olap/olap_table.cpp b/be/src/olap/olap_table.cpp index 0ff94875..ecdd9448 100644 --- a/be/src/olap/olap_table.cpp +++ b/be/src/olap/olap_table.cpp @@ -1307,6 +1307,9 @@ OLAPStatus OLAPTable::clone_data(const OLAPHeader& clone_header, << " version=" << version.first << "-" << version.second << "]"; break; } +if (new_local_header.is_delete_data_version(version)) { +new_local_header.delete_cond_by_version(version); +} LOG(INFO) << "delete version from new local header when clone. [table='" << full_name() << "', version=" << version.first << "-" << version.second << "]"; } diff --git a/be/src/olap/olap_table.h b/be/src/olap/olap_table.h index ec2f4f52..b1621957 100644 --- a/be/src/olap/olap_table.h +++ b/be/src/olap/olap_table.h @@ -528,19 +528,7 @@ class OLAPTable : public std::enable_shared_from_this { } bool is_delete_data_version(Version version) { -if (version.first != version.second) { -return false; -} - - google::protobuf::RepeatedPtrField::const_iterator it; -it = _header->delete_data_conditions().begin(); -for (; it != _header->delete_data_conditions().end(); ++it) { -if (it->version() == version.first) { -return true; -} -} - -return false; +return _header->is_delete_data_version(version); } bool is_load_delete_version(Version version); This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queri
[GitHub] imay closed pull request #519: Change the default bdbje sync policy to SYNC
imay closed pull request #519: Change the default bdbje sync policy to SYNC URL: https://github.com/apache/incubator-doris/pull/519 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/fe/src/main/java/org/apache/doris/common/Config.java b/fe/src/main/java/org/apache/doris/common/Config.java index 3bddd82e..43135e69 100644 --- a/fe/src/main/java/org/apache/doris/common/Config.java +++ b/fe/src/main/java/org/apache/doris/common/Config.java @@ -118,18 +118,25 @@ @ConfField public static int meta_delay_toleration_second = 300;// 5 min /* * Master FE sync policy of bdbje. + * If you only deploy one Follower FE, set this to 'SYNC'. If you deploy more than 3 Follower FE, + * you can set this and the following 'replica_sync_policy' to WRITE_NO_SYNC. * more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html */ -@ConfField public static String master_sync_policy = "WRITE_NO_SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC +@ConfField public static String master_sync_policy = "SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC /* * Follower FE sync policy of bdbje. */ -@ConfField public static String replica_sync_policy = "WRITE_NO_SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC +@ConfField public static String replica_sync_policy = "SYNC"; // SYNC, NO_SYNC, WRITE_NO_SYNC /* * Replica ack policy of bdbje. * more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html */ @ConfField public static String replica_ack_policy = "SIMPLE_MAJORITY"; // ALL, NONE, SIMPLE_MAJORITY + +/* + * the max txn number which bdbje can rollback when trying to rejoin the group + */ +@ConfField public static int txn_rollback_limit = 100; /* * Specified an IP for frontend, instead of the ip get by *InetAddress.getByName*. diff --git a/fe/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java b/fe/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java index a6f863a4..f3ddc1d5 100644 --- a/fe/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java +++ b/fe/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java @@ -105,6 +105,8 @@ public void setup(File envHome, String selfNodeName, String selfNodeHostPort, replicationConfig.setGroupName(PALO_JOURNAL_GROUP); replicationConfig.setConfigParam(ReplicationConfig.ENV_UNKNOWN_STATE_TIMEOUT, "10"); replicationConfig.setMaxClockDelta(Config.max_bdbje_clock_delta_ms, TimeUnit.MILLISECONDS); +replicationConfig.setConfigParam(ReplicationConfig.TXN_ROLLBACK_LIMIT, +String.valueOf(Config.txn_rollback_limit)); if (isElectable) { replicationConfig.setReplicaAckTimeout(2, TimeUnit.SECONDS); This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] morningman commented on a change in pull request #336: Implement new tablet repair and balance framework
morningman commented on a change in pull request #336: Implement new tablet repair and balance framework URL: https://github.com/apache/incubator-doris/pull/336#discussion_r246720010 ## File path: fe/src/main/java/org/apache/doris/clone/TabletChecker.java ## @@ -0,0 +1,449 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.analysis.AdminCancelRepairTableStmt; +import org.apache.doris.analysis.AdminRepairTableStmt; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.Table.TableType; +import org.apache.doris.catalog.Tablet; +import org.apache.doris.catalog.Tablet.TabletStatus; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.Daemon; +import org.apache.doris.system.SystemInfoService; + +import com.google.common.base.Preconditions; +import com.google.common.collect.HashBasedTable; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.common.collect.Table.Cell; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/* + * This checker is responsible for checking all unhealthy tablets. + * It does not responsible for any scheduler of tablet repairing or balance + */ +public class TabletChecker extends Daemon { +private static final Logger LOG = LogManager.getLogger(TabletChecker.class); + +private static final long CHECK_INTERVAL_MS = 20 * 1000L; // 20 second + +// if the number of scheduled tablets in TabletScheduler exceed this threshold +// skip checking. +private static final int MAX_SCHEDULING_TABLETS = 5000; + +private Catalog catalog; +private SystemInfoService infoService; +private TabletScheduler tabletScheduler; +private TabletSchedulerStat stat; + +// db id -> (tbl id -> PrioPart) +// priority of replicas of partitions in this table will be set to VERY_HIGH if not healthy +private com.google.common.collect.Table> prios = HashBasedTable.create(); + +// represent a partition which need to be repaired preferentially +public static class PrioPart { +public long partId; +public long addTime; +public long timeoutMs; + +public PrioPart(long partId, long addTime, long timeoutMs) { +this.partId = partId; +this.addTime = addTime; +this.timeoutMs = timeoutMs; +} + +public boolean isTimeout() { +return System.currentTimeMillis() - addTime > timeoutMs; +} + +@Override +public boolean equals(Object obj) { +if (!(obj instanceof PrioPart)) { +return false; +} +return partId == ((PrioPart) obj).partId; +} +} + +public TabletChecker(Catalog catalog, SystemInfoService infoService, TabletScheduler tabletScheduler, +TabletSchedulerStat stat) { +super("tablet checker", CHECK_INTERVAL_MS); +this.catalog = catalog; +this.infoService = infoService; +this.tabletScheduler = tabletScheduler; +this.stat = stat; +} + +public void addPrios(long dbId, long tblId, List partitionIds, long timeoutMs) { +Preconditions.checkArgument(!partitionIds.isEmpty()); +long currentTime = System.currentTimeMillis(); +synchronized (prios) { +Set parts = prios.get(dbId, tblId); +if (parts == null) { +parts = Sets.newHashSet(); +prios.put(dbId, tblId, parts); +} + +for (long partId : partitionIds) { +PrioPart prioPart = new PrioPart(partId, currentTime, timeoutMs); +parts.add(prioPart); +} +} + +// we also need to
[GitHub] chaoyli commented on a change in pull request #514: Add rowset reader context builder
chaoyli commented on a change in pull request #514: Add rowset reader context builder URL: https://github.com/apache/incubator-doris/pull/514#discussion_r246723895 ## File path: be/src/olap/rowset/rowset_reader_context_builder.h ## @@ -0,0 +1,154 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_BUILDER_H +#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_READER_CONTEXT_BUILDER_H + +#include "olap/schema.h" +#include "olap/column_predicate.h" +#include "olap/row_cursor.h" +#include "olap/row_block.h" +#include "olap/lru_cache.h" +#include "olap/olap_cond.h" +#include "olap/delete_handler.h" +#include "runtime/runtime_state.h" + +namespace doris { + +struct ReaderContext { + RowFields* tablet_schema; Review comment: not aligned. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] chaoyli closed pull request #514: Add rowset reader context builder
chaoyli closed pull request #514: Add rowset reader context builder URL: https://github.com/apache/incubator-doris/pull/514 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp b/be/src/olap/rowset/alpha_rowset_reader.cpp index e44ad066..b7b29462 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.cpp +++ b/be/src/olap/rowset/alpha_rowset_reader.cpp @@ -39,12 +39,11 @@ AlphaRowsetReader::AlphaRowsetReader(int num_key_fields, int num_short_key_field } OLAPStatus AlphaRowsetReader::init(ReaderContext* read_context) { -_current_read_context = read_context; -OLAPStatus status = _init_segment_groups(read_context); -if (status != OLAP_SUCCESS) { -return status; +if (read_context == nullptr) { +return OLAP_ERR_INIT_FAILED; } -status = _init_column_datas(read_context); +_current_read_context = read_context; +OLAPStatus status = _init_column_datas(read_context); return status; } @@ -140,26 +139,20 @@ OLAPStatus AlphaRowsetReader::_init_column_datas(ReaderContext* read_context) { if (status != OLAP_SUCCESS) { return OLAP_ERR_READER_READING_ERROR; } -new_column_data->set_delete_handler(read_context->delete_handler); +new_column_data->set_delete_handler(*read_context->delete_handler); new_column_data->set_stats(read_context->stats); new_column_data->set_lru_cache(read_context->lru_cache); std::vector col_predicates; -for (auto& column_predicate : read_context->predicates) { -col_predicates.push_back(&column_predicate.second); +for (auto& column_predicate : *read_context->predicates) { +col_predicates.push_back(column_predicate.second); } -if (read_context->lower_bound_keys.size() != read_context->is_lower_keys_included.size() -|| read_context->lower_bound_keys.size() != read_context->upper_bound_keys.size() -|| read_context->upper_bound_keys.size() != read_context->is_upper_keys_included.size()) { -std::string error_msg = "invalid key range arguments"; -LOG(WARNING) << error_msg; -return OLAP_ERR_INPUT_PARAMETER_ERROR; -} -new_column_data->set_read_params(read_context->return_columns, -read_context->load_bf_columns, -read_context->conditions, + +new_column_data->set_read_params(*read_context->return_columns, +*read_context->load_bf_columns, +*read_context->conditions, col_predicates, -read_context->lower_bound_keys, -read_context->upper_bound_keys, +*read_context->lower_bound_keys, +*read_context->upper_bound_keys, read_context->is_using_cache, read_context->runtime_state); // filter column data @@ -183,13 +176,31 @@ OLAPStatus AlphaRowsetReader::_init_column_datas(ReaderContext* read_context) { new_column_data->set_delete_status(DEL_NOT_SATISFIED); } _column_datas.emplace_back(std::move(new_column_data)); -_key_range_size = read_context->lower_bound_keys.size(); +if (read_context->lower_bound_keys == nullptr) { +if (read_context->is_lower_keys_included != nullptr +|| read_context->upper_bound_keys != nullptr +|| read_context->is_upper_keys_included != nullptr) { +LOG(WARNING) << "invalid key range arguments"; +return OLAP_ERR_INPUT_PARAMETER_ERROR; +} +_key_range_size = 0; +} else { +if (read_context->lower_bound_keys->size() != read_context->is_lower_keys_included->size() +|| read_context->lower_bound_keys->size() != read_context->upper_bound_keys->size() +|| read_context->upper_bound_keys->size() != read_context->is_upper_keys_included->size()) { +std::string error_msg = "invalid key range arguments"; +LOG(WARNING) << error_msg; +return OLAP_ERR_INPUT_PARAMETER_ERROR; +} +_key_range_size = read_context->lower_bound_keys->size(); +} + RowBlock* row_block = nullptr; if (_key_range_size > 0) { -status = new_column_data->prepare_block_read(read_context->lower_bound_keys[_key_range_index], -read_context->is_lower_keys_included[_key_range_index], -
Re: [VOTE] Release Apache Doris 0.9.0-incubating-rc01
Moving this to the Doris dev list - general@incubator is BCC’d. Regards, Dave > On Jan 10, 2019, at 10:23 AM, Makoto Yui wrote: > > Reed, > > Why not cancel rc1 and create rc2 fixing license header issue that Willem > pointed? > It seems it's better to be fixed. > > It also seems that no one from IPMC succeeded to build distribution from > src (no sure for Luke though). > > I got the following build error (which might be gcc-5 version issue): > > [ 57%] Building CXX object > projects/compiler-rt/lib/msan/CMakeFiles/clang_rt.msan-x86_64.dir/msan_interceptors.cc.o > > /tmp/doris/apache-doris-0.9.0.rc01-incubating-src/thirdparty/src/llvm-3.4.2.src/projects/compiler-rt/lib/msan/msan_interceptors.cc: > In function 'void __msan::InitializeInterceptors()': > > /tmp/doris/apache-doris-0.9.0.rc01-incubating-src/thirdparty/src/llvm-3.4.2.src/projects/compiler-rt/lib/msan/msan_interceptors.cc:1573:1: > internal > compiler error: Segmentation fault > > } > > ^ > > Please submit a full bug report, > > with preprocessed source if appropriate. > > See for instructions. > > projects/compiler-rt/lib/msan/CMakeFiles/clang_rt.msan-x86_64.dir/build.make:134: > recipe for target > 'projects/compiler-rt/lib/msan/CMakeFiles/clang_rt.msan-x86_64.dir/msan_interceptors.cc.o' > failed > > I used ubuntu xenial on docker on OSX. > Software versions meets requirements (except Maven version) as seen in > > docker run ubuntu:xenial -it > > > $ apt-get install wget openjdk-8-jdk maven gcc-5 bzip2 python cmake zip > xz-utils patch byacc flex automake libtool g++ > > > root@d9e5b7017e7b:/tmp/doris/apache-doris-0.9.0.rc01-incubating-src# gcc > --version | head -1 > > gcc (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609 > > > root@d9e5b7017e7b:/tmp/doris/apache-doris-0.9.0.rc01-incubating-src# java > -version > > openjdk version "1.8.0_191" > > OpenJDK Runtime Environment (build 1.8.0_191-8u191-b12-0ubuntu0.16.04.1-b12) > > OpenJDK 64-Bit Server VM (build 25.191-b12, mixed mode) > > > root@d9e5b7017e7b:/tmp/doris/apache-doris-0.9.0.rc01-incubating-src# python > --version > > Python 2.7.12 > > > root@d9e5b7017e7b:/tmp/doris/apache-doris-0.9.0.rc01-incubating-src# mvn > --version > > Apache Maven 3.3.9 > > > root@d9e5b7017e7b:/tmp/doris/apache-doris-0.9.0.rc01-incubating-src# cmake > --version > > cmake version 3.5.1 > > > $ ./build.sh > > > > Providing working Dockerfile would help to verify release by IPMC members. > > Thanks, > Makoto > > 2019年1月10日(木) 15:51 Li,De(BDG) : >> >> Hi, >> >> Does anyone help us to check and vote? >> Now we got two +1(binding). >> If needed we will cancel and call for next VOTE after fixed all issues >> which Willem and Justin mentioned. >> >> >> Best Regards, >> Reed >> >> On 2018/12/29 上午10:36, "Li,De(BDG)" wrote: >> >>> Hi Dave, >>> >>> I got it, thank you very much. Have a good trip. >>> >>> Best Regards, >>> Reed >>> >>> On 2018/12/28 下午10:49, "Dave Meikle" wrote: >>> Hi Reed, I am traveling just now so away from the desktop that I tested the build on. I setup a clean virtual machine with Ubuntu 18.04 and tried from scratch, resulting in a perfect build. So must be something funky with my configuration back at base - it's been upgraded a few times, so sorry > for the hassle. (NOTE: I tried to build on a remote Ubuntu 18.10 at first getting a different error but it made sense due to the 3rd-party LLVM fails due to ustat depreciation in glibc). So +1 (binding) to the release. Cheers, Dave On Thu, 27 Dec 2018 at 12:13, Li,De(BDG) wrote: > Hi Dave, > > Thanks for your reply. > > The files of flat_map.h and json_to_pb.h are come from brpc, which > should > be installed in thirdparty/installed/include. > > > It seems brpc is not been compiled and installed successfully. > > Could you remove brpc and retry build as following command ? It will > help > us to know what happed. > > > $ cd > >> /home/dmeikle/Development/Apache/incubator/release-review/apache-doris-0 > . > 9. > 0.rc01-incubating-src > $ rm -f thirdparty/installed/lib/librdkafka.a > $ rm -fr thirdparty/src/brpc-0.9.0* > $ sh build.sh > > Regards, > Reed > > On 2018/12/27 下午6:33, "David Meikle" wrote: > >> Hi Reed, >> >>> On 27 Dec 2018, at 03:00, Li,De(BDG) wrote: >>> >>> Thanks to David, could you provide the detail for failure, > actually, > we >>> expect building is OK if following tools installed on Ubuntu. >>> >>> GCC 5.3.1+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake >>> 3.4.3+ >>> >>> >>> Best Regards, >>> Reed >> >> >> >> I get the following errors: >> >> -- CLANG_COMPATIBLE_FLAGS: -I/usr/include/c++/7 >> -I/usr/include/x86_64-linux-gnu/c++/7 -I/
[GitHub] kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453394706 > hi @kangkaisen , Is there any failure log in be? I remember there was not any failure log in be. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453395944 When I restarted a cluster at 1.9 night , this issue happened again. So I deep into this issue yesterday. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] imay opened a new pull request #526: Remove useless check, not need lsb_release any more
imay opened a new pull request #526: Remove useless check, not need lsb_release any more URL: https://github.com/apache/incubator-doris/pull/526 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453398715 The FE error Log: ` 2019-01-10 14:00:22,198 WARN 2142 [AgentBatchTask.run():133] task exec error. backend[10001] org.apache.thrift.transport.TTransportException at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:132) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[libthrift-0.9.3.jar:0.9.3] at org.apache.doris.thrift.BackendService$Client.recv_submit_tasks(BackendService.java:256) ~[palo-fe.jar:?] at org.apache.doris.thrift.BackendService$Client.submit_tasks(BackendService.java:243) ~[palo-fe.jar:?] at org.apache.doris.task.AgentBatchTask.run(AgentBatchTask.java:124) [palo-fe.jar:?] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_112] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_112] at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] ` And no any create table log in BE. the exception in TIOStreamTransport.java:132 is ` if (bytesRead < 0) { throw new TTransportException(TTransportException.END_OF_FILE); } ` I tried reproducing this issue in our dev cluster: 1. kill -9 all BE precesses 2. when all BE thrift server start, create the table by mysql client. the TTransportException will occur and create table will timeout. I use ` netstat -antp | grep 9060` check the TCP connection status, I found the BE TCP connection status is FIN_WAIT_2 and FE TCP connection status is CLOSE_WAIT. So when we use the BackendService.Client that TCP connection is CLOSE_WAIT, the TTransportException will occur. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453399131 So I think when the TCP connection is CLOSE_WAIT status, we should close the old TCP connection and create a new TCP connection. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] lide-reed closed pull request #526: Remove useless check, not need lsb_release any more
lide-reed closed pull request #526: Remove useless check, not need lsb_release any more URL: https://github.com/apache/incubator-doris/pull/526 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/.gitignore b/.gitignore index b9bce7bf..5c8ed763 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ docs/build gensrc/build fe/target thirdparty/src +*.so.tmp diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index a829efe1..fe4337ad 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -383,31 +383,6 @@ message(STATUS "LLVM compile flags: ${LLVM_CFLAGS}") # these reasons we need to manually add the system c++ headers to the path when we # compile the IR code with clang. -# Check the release version of the system to set the correct flags. -# You may have to modify the ${CLANG_BASE_FLAGS} by you own. -execute_process(COMMAND lsb_release -si OUTPUT_VARIABLE LINUX_VERSION) -string(TOLOWER ${LINUX_VERSION} LINUX_VERSION_LOWER) -message(STATUS "${LINUX_VERSION_LOWER}") - -# if(DEFINED ENV{CLANG_BASE_FLAGS}) -# set(CLANG_BASE_FLAGS -# $ENV{CLANG_BASE_FLAGS}) -# elseif(${LINUX_VERSION_LOWER} MATCHES "ubuntu") -# set(CLANG_BASE_FLAGS -# "-I/usr/include/c++/5/" -# "-I/usr/include/x86_64-linux-gnu/c++/5/") -# elseif(${LINUX_VERSION_LOWER} MATCHES "centos") -# set(CLANG_BASE_FLAGS -# "-I/usr/include/c++/4.8.5/" -# "-I/usr/include/c++/4.8.5/x86_64-redhat-linux/") -# elseif(${LINUX_VERSION_LOWER} MATCHES "fedora") -# set(CLANG_BASE_FLAGS -# "-I/usr/include/c++/7/" -# "-I/usr/include/c++/7/x86_64-redhat-linux/") -# else() -# message(FATAL_ERROR "Currently not support system ${LINUX_VERSION}") -# endif() - if(DEFINED ENV{CLANG_COMPATIBLE_FLAGS}) set(CLANG_COMPATIBLE_FLAGS $ENV{CLANG_COMPATIBLE_FLAGS}) endif() diff --git a/be/test/runtime/test_data/user_function_cache/download/1/1.1234.so.tmp b/be/test/runtime/test_data/user_function_cache/download/1/1.1234.so.tmp deleted file mode 100644 index 64cb35c2.. Binary files a/be/test/runtime/test_data/user_function_cache/download/1/1.1234.so.tmp and /dev/null differ This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen opened a new pull request #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask
kangkaisen opened a new pull request #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask URL: https://github.com/apache/incubator-doris/pull/527 Partially fix [458](https://github.com/apache/incubator-doris/issues/458) This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] morningman commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
morningman commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453400999 You can check these 2 commits: https://github.com/apache/incubator-doris/commit/b5737ee59a4fbced6ca61b748216fb15140cbe99 https://github.com/apache/incubator-doris/commit/ac01da49847a5ad7e584fe583f467d2e91ef7bf9 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out
kangkaisen commented on issue #458: Create table error: java.net.SocketTimeoutException: Read timed out URL: https://github.com/apache/incubator-doris/issues/458#issuecomment-453403362 > You can check these 2 commits: > [b5737ee](https://github.com/apache/incubator-doris/commit/b5737ee59a4fbced6ca61b748216fb15140cbe99) > [ac01da4](https://github.com/apache/incubator-doris/commit/ac01da49847a5ad7e584fe583f467d2e91ef7bf9) Thanks you. I think ac01da49847a5a is enough. https://github.com/apache/incubator-doris/pull/408 is more better than my PR. I will test it. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen closed pull request #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask
kangkaisen closed pull request #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask URL: https://github.com/apache/incubator-doris/pull/527 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java index 4d429f9c..a67800db 100644 --- a/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java +++ b/fe/src/main/java/org/apache/doris/task/AgentBatchTask.java @@ -46,6 +46,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.thrift.transport.TTransportException; import java.util.HashMap; import java.util.LinkedList; @@ -115,13 +116,39 @@ public void run() { List tasks = this.backendIdToTasks.get(backendId); // create AgentClient address = new TNetworkAddress(backend.getHost(), backend.getBePort()); + client = ClientPool.backendPool.borrowObject(address); List agentTaskRequests = new LinkedList(); for (AgentTask task : tasks) { agentTaskRequests.add(toAgentTaskRequest(task)); } -client.submit_tasks(agentTaskRequests); + +int count = 0; +int retryCount = 1; +boolean needRetry = true; +while (needRetry && count++ <= retryCount) { +needRetry = false; +try { +client.submit_tasks(agentTaskRequests); +} catch (TTransportException e) { +//handle the TCP connection is CLOSE_WAIT status +if (e.getMessage() == null && (e.getType() == TTransportException.END_OF_FILE)) { +LOG.warn("Maybe the BE {} : {} restarted just now, will retry", backendId, backend.getHost()); +needRetry = true; + +//close the TCP connection +ClientPool.backendPool.invalidateObject(address, client); + +//create a new TCP connection +address = new TNetworkAddress(backend.getHost(), backend.getBePort()); +client = ClientPool.backendPool.borrowObject(address); +} else { +throw e; +} +} +} + if (LOG.isDebugEnabled()) { for (AgentTask task : tasks) { LOG.debug("send task: type[{}], backend[{}], signature[{}]", This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen commented on issue #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask
kangkaisen commented on issue #527: Add retry for TCP CLOSE_WAIT in AgentBatchTask URL: https://github.com/apache/incubator-doris/pull/527#issuecomment-453411410 https://github.com/apache/incubator-doris/pull/408 is duplicated with this PR. https://github.com/apache/incubator-doris/pull/408 is more reasonable, because we have heartbeat between FE and BE, So we can rely on heartbeat to know the TCP connection is broken. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org
[GitHub] kangkaisen closed issue #245: Colocate Join
kangkaisen closed issue #245: Colocate Join URL: https://github.com/apache/incubator-doris/issues/245 This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services - To unsubscribe, e-mail: dev-unsubscr...@doris.apache.org For additional commands, e-mail: dev-h...@doris.apache.org