Jiabao-Sun commented on code in PR #1: URL: https://github.com/apache/flink-connector-mongodb/pull/1#discussion_r1025484017
########## flink-connector-mongodb/src/main/java/org/apache/flink/connector/mongodb/common/utils/MongoUtils.java: ########## @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.connector.mongodb.common.utils; + +import org.apache.flink.annotation.Internal; + +import com.mongodb.MongoNamespace; +import com.mongodb.client.MongoClient; +import com.mongodb.client.MongoCollection; +import org.bson.BsonBoolean; +import org.bson.BsonDocument; +import org.bson.BsonInt32; +import org.bson.BsonString; +import org.bson.conversions.Bson; + +import javax.annotation.Nullable; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import static com.mongodb.client.model.Filters.eq; +import static com.mongodb.client.model.Filters.or; +import static com.mongodb.client.model.Projections.excludeId; +import static com.mongodb.client.model.Projections.fields; +import static com.mongodb.client.model.Projections.include; +import static com.mongodb.client.model.Sorts.ascending; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.DROPPED_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.ID_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.KEY_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.MAX_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.MIN_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.NAMESPACE_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.OK_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.SHARD_FIELD; +import static org.apache.flink.connector.mongodb.common.utils.MongoConstants.UUID_FIELD; + +/** A util class with some helper method for MongoDB commands. */ +@Internal +public class MongoUtils { + + public static final int UNAUTHORIZED_ERROR = 13; + + public static final String COLL_STATS_COMMAND = "collStats"; + public static final String SPLIT_VECTOR_COMMAND = "splitVector"; + public static final String KEY_PATTERN_OPTION = "keyPattern"; + public static final String MAX_CHUNK_SIZE_OPTION = "maxChunkSize"; + + public static final String CONFIG_DATABASE = "config"; + public static final String COLLECTIONS_COLLECTION = "collections"; + public static final String CHUNKS_COLLECTION = "chunks"; + + private MongoUtils() {} + + public static BsonDocument collStats(MongoClient mongoClient, MongoNamespace namespace) { + BsonDocument collStatsCommand = + new BsonDocument(COLL_STATS_COMMAND, new BsonString(namespace.getCollectionName())); + return mongoClient + .getDatabase(namespace.getDatabaseName()) + .runCommand(collStatsCommand, BsonDocument.class); + } + + public static BsonDocument splitVector( + MongoClient mongoClient, + MongoNamespace namespace, + BsonDocument keyPattern, + int maxChunkSizeMB) { + return splitVector(mongoClient, namespace, keyPattern, maxChunkSizeMB, null, null); + } + + public static BsonDocument splitVector( + MongoClient mongoClient, + MongoNamespace namespace, + BsonDocument keyPattern, + int maxChunkSizeMB, + @Nullable BsonDocument min, + @Nullable BsonDocument max) { + BsonDocument splitVectorCommand = + new BsonDocument(SPLIT_VECTOR_COMMAND, new BsonString(namespace.getFullName())) + .append(KEY_PATTERN_OPTION, keyPattern) + .append(MAX_CHUNK_SIZE_OPTION, new BsonInt32(maxChunkSizeMB)); + Optional.ofNullable(min).ifPresent(v -> splitVectorCommand.append(MIN_FIELD, v)); + Optional.ofNullable(max).ifPresent(v -> splitVectorCommand.append(MAX_FIELD, v)); + return mongoClient + .getDatabase(namespace.getDatabaseName()) + .runCommand(splitVectorCommand, BsonDocument.class); + } + + @Nullable + public static BsonDocument readCollectionMetadata( + MongoClient mongoClient, MongoNamespace namespace) { + MongoCollection<BsonDocument> collection = + mongoClient + .getDatabase(CONFIG_DATABASE) + .getCollection(COLLECTIONS_COLLECTION) + .withDocumentClass(BsonDocument.class); + + return collection + .find(eq(ID_FIELD, namespace.getFullName())) + .projection(include(ID_FIELD, UUID_FIELD, DROPPED_FIELD, KEY_FIELD)) Review Comment: > What is this `DROPPED_FIELD` about? Is this about whether the `collection` was dropped? Yes, this field exists in older versions of MongoDB. Refers to https://jira.mongodb.org/browse/SERVER-27474 >In releases up to and including 3.6, when a sharded collection was dropped, the config server, instead of deleting the dropped collection from its on-disk list of sharded collections, would instead tag it as "dropped". This was necessary because a <= 3.4 mongos, when a client tried to use the dropped collection, would get an error from the shard, and then would retrieve the whole collections list from the config server to update its collections cache. The mongos needs to see the collections with a "dropped" flag to know to discard its corresponding cached records of those collections and their chunks. As a consequence, other activities that read the list of collections have needed to filter out the "dropped" ones, and the records of dropped collections accumulate indefinitely. > >The 3.6 mongos no longer reads in the whole list of collections when it gets an "collection does not exist" error from a shard, but only the entry for the one collection, so does not need to see dropped collections identified. "Dropped" entries left over from <= 3.6 should be ignored/skipped when reading the collections list, and no new ones should be written by the 3.8 config server. Collection entries in 3.8 no longer need the "dropped" flag, and code that checks for it may be deleted. > >The upgrade process for 3.6 -> 3.8 config servers should scrub the remaining "dropped" entries from config.collections. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org