This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6751e5b23c [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827) 6751e5b23c is described below commit 6751e5b23c8bee302a6802638bd78739128b4355 Author: caiconghui <55968745+caicong...@users.noreply.github.com> AuthorDate: Fri Jul 15 19:59:00 2022 +0800 [fix](alter)(tablet-scheduler) fix unexpected exception with compaction_too_slow message when add rollup for olap table (#10827) --- .../org/apache/doris/alter/MaterializedViewHandler.java | 10 ++++++---- .../java/org/apache/doris/clone/TabletSchedCtx.java | 17 ++++++++--------- .../java/org/apache/doris/clone/TabletScheduler.java | 14 ++++---------- .../src/main/java/org/apache/doris/common/Config.java | 2 +- .../apache/doris/clone/TabletReplicaTooSlowTest.java | 3 --- 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java index 9612fbb83d..1d2b9a7cf6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; @@ -384,18 +385,19 @@ public class MaterializedViewHandler extends AlterHandler { for (Replica baseReplica : baseReplicas) { long mvReplicaId = catalog.getNextId(); long backendId = baseReplica.getBackendId(); - if (baseReplica.getState() == Replica.ReplicaState.CLONE - || baseReplica.getState() == Replica.ReplicaState.DECOMMISSION + if (baseReplica.getState() == ReplicaState.CLONE + || baseReplica.getState() == ReplicaState.DECOMMISSION + || baseReplica.getState() == ReplicaState.COMPACTION_TOO_SLOW || baseReplica.getLastFailedVersion() > 0) { LOG.info("base replica {} of tablet {} state is {}, and last failed version is {}," + " skip creating rollup replica", baseReplica.getId(), baseTabletId, baseReplica.getState(), baseReplica.getLastFailedVersion()); continue; } - Preconditions.checkState(baseReplica.getState() == Replica.ReplicaState.NORMAL, + Preconditions.checkState(baseReplica.getState() == ReplicaState.NORMAL, baseReplica.getState()); // replica's init state is ALTER, so that tablet report process will ignore its report - Replica mvReplica = new Replica(mvReplicaId, backendId, Replica.ReplicaState.ALTER, + Replica mvReplica = new Replica(mvReplicaId, backendId, ReplicaState.ALTER, Partition.PARTITION_INIT_VERSION, mvSchemaHash); newTablet.addReplica(mvReplica); healthyReplicaNum++; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index 57a77bb33f..87ed910fd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -527,28 +527,27 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { public boolean compactionRecovered() { Replica chosenReplica = null; - long maxVersionCount = -1; - long minVersionCount = Integer.MAX_VALUE; + long maxVersionCount = Integer.MIN_VALUE; for (Replica replica : tablet.getReplicas()) { if (replica.getVersionCount() > maxVersionCount) { maxVersionCount = replica.getVersionCount(); chosenReplica = replica; } - if (replica.getVersionCount() < minVersionCount) { - minVersionCount = replica.getVersionCount(); - } } boolean recovered = false; for (Replica replica : tablet.getReplicas()) { - if (replica.isAlive() && replica.tooSlow() && !chosenReplica.equals(replica)) { - chosenReplica.setState(ReplicaState.NORMAL); - recovered = true; + if (replica.isAlive() && replica.tooSlow() && (!replica.equals(chosenReplica) + || replica.getVersionCount() < Config.min_version_count_indicate_replica_compaction_too_slow)) { + if (chosenReplica != null) { + chosenReplica.setState(ReplicaState.NORMAL); + recovered = true; + } } } return recovered; } - // database lock should be held. + // table lock should be held. // If exceptBeId != -1, should not choose src replica with same BE id as exceptBeId public void chooseSrcReplica(Map<Long, PathSlot> backendsWorkingSlots, long exceptBeId) throws SchedException { /* diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index cd731bb0ff..7fb1b75481 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1071,9 +1071,7 @@ public class TabletScheduler extends MasterDaemon { */ private void handleReplicaTooSlow(TabletSchedCtx tabletCtx) throws SchedException { Replica chosenReplica = null; - Replica minReplica = null; long maxVersionCount = -1; - long minVersionCount = Integer.MAX_VALUE; int normalReplicaCount = 0; for (Replica replica : tabletCtx.getReplicas()) { if (replica.isAlive() && !replica.tooSlow()) { @@ -1083,20 +1081,16 @@ public class TabletScheduler extends MasterDaemon { maxVersionCount = replica.getVersionCount(); chosenReplica = replica; } - if (replica.getVersionCount() < minVersionCount) { - minVersionCount = replica.getVersionCount(); - minReplica = replica; - } } - - if (chosenReplica != null && !chosenReplica.equals(minReplica) && minReplica.isAlive() && !minReplica.tooSlow() - && normalReplicaCount >= 1) { + if (chosenReplica != null && chosenReplica.isAlive() && !chosenReplica.tooSlow() + && chosenReplica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow + && normalReplicaCount - 1 >= tabletCtx.getReplicas().size() / 2 + 1) { chosenReplica.setState(ReplicaState.COMPACTION_TOO_SLOW); LOG.info("set replica id :{} tablet id: {}, backend id: {} to COMPACTION_TOO_SLOW", chosenReplica.getId(), tabletCtx.getTablet().getId(), chosenReplica.getBackendId()); throw new SchedException(Status.FINISHED, "set replica to COMPACTION_TOO_SLOW"); } - throw new SchedException(Status.FINISHED, "No replica too slow"); + throw new SchedException(Status.FINISHED, "No replica set to COMPACTION_TOO_SLOW"); } private void deleteReplicaInternal(TabletSchedCtx tabletCtx, diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 5aaac04a42..749322e48a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1587,7 +1587,7 @@ public class Config extends ConfigBase { public static int min_version_count_indicate_replica_compaction_too_slow = 200; /** - * The valid ratio threshold of the difference between the version count of the slowest replicaand the fastest + * The valid ratio threshold of the difference between the version count of the slowest replica and the fastest * replica. If repair_slow_replica is set to true, it is used to determine whether to repair the slowest replica */ @ConfField(mutable = true, masterOnly = true) diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java index b298723ed8..1fa209f8f2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java @@ -139,9 +139,6 @@ public class TabletReplicaTooSlowTest { Backend be = Catalog.getCurrentSystemInfo().getBackend(beId); List<Long> pathHashes = be.getDisks().values().stream() .map(DiskInfo::getPathHash).collect(Collectors.toList()); - if (be == null) { - continue; - } Replica replica = cell.getValue(); replica.setVersionCount(versionCount); versionCount = versionCount + 200; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org