minihippo commented on a change in pull request #3173: URL: https://github.com/apache/hudi/pull/3173#discussion_r760226513
########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java ########## @@ -200,6 +209,48 @@ .defaultValue("true") .withDocumentation("Similar to " + BLOOM_INDEX_UPDATE_PARTITION_PATH_ENABLE + ", but for simple index."); + /** + * ***** Bucket Index Configs ***** + * Bucket Index is targeted to locate the record fast by hash in big data scenarios. + * The current implementation is a basic version, so there are some constraints: + * 1. Unsupported operation: bulk insert, cluster and so on. + * 2. Bucket num change requires rewriting the partition. + * 3. Predict the table size and future data growth well to set a reasonable bucket num. + * 4. A bucket size is recommended less than 3GB and avoid bing too small. + */ + // Bucket num equals file groups num in each partition. + // Bucket num can be set according to partition size and file group size. + public static final ConfigProperty<Integer> BUCKET_INDEX_NUM_BUCKETS = ConfigProperty + .key("hoodie.bucket.index.num.buckets") + .defaultValue(256) + .withDocumentation("Only applies if index type is BUCKET_INDEX. Determine the bucket num of the hudi table, " + + "and each partition is divided to N buckets."); + + public static final ConfigProperty<String> BUCKET_INDEX_HASH_FIELD = ConfigProperty + .key("hoodie.bucket.index.hash.field") + .noDefaultValue() + .withDocumentation("Index key. It is used to index the record and find its file group. " + + "If not set, use record key field as default"); + + public static final ConfigProperty<String> BUCKET_INDEX_HASH_FUNCTION = ConfigProperty + .key("hoodie.bucket.index.hash.function") + .defaultValue("JVMHash") + .withDocumentation("Hash function. It is used to compute the index key hash value " + + "Possible options are [JVMHash | HiveHash]. "); + + public static final Set<WriteOperationType> BUCKET_INDEX_SUPPORTED_OPERATIONS = new HashSet<WriteOperationType>() {{ + add(WriteOperationType.INSERT); Review comment: It is caused by insert operation doesn't tag location before creating the work profile. Will fix. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org