clintropolis commented on code in PR #19374: URL: https://github.com/apache/druid/pull/19374#discussion_r3163675245
########## server/src/main/java/org/apache/druid/server/coordinator/rules/ProjectionPartialLoadMatcher.java: ########## @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.rules; + +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import com.google.common.io.BaseEncoding; +import org.apache.druid.timeline.DataSegment; + +import javax.annotation.Nullable; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Base for {@link PartialLoadMatcher} implementations that decide which of a segment's V10 projections to load. + * Subclasses supply the resolution policy via {@link #resolveProjectionNames(DataSegment)}; this base handles + * fingerprint computation and wraps the result into the {@code partialProjection} load-spec wire form consumed + * by the historical-side {@code PartialProjectionLoadSpec} (which does not exist yet, supplied in future work). + * <p> + * The fingerprint is a hash of what projections are partially loaded on a segment by this rule; the data node will + * include this value in the segment announcement so that it can be used as a lightweight value to compare against + * to handle things like rule change so that we can ensure that the 'right' partial load is in place from run to run. + */ +public abstract class ProjectionPartialLoadMatcher implements PartialLoadMatcher +{ + static final String WIRE_TYPE = "partialProjection"; + static final String FINGERPRINT_VERSION = "v1"; + + /** + * Returns the sorted, deduped list of projection names from {@link DataSegment#getProjections()} that this matcher + * selects. Returns an empty list when nothing matches (the segment exposes no projections, or no configured pattern + * intersects what the segment has). + */ + protected abstract List<String> resolveProjectionNames(DataSegment segment); + + @Override + @Nullable + public MatchResult match(DataSegment segment, Map<String, Object> baseLoadSpec) + { + final List<String> resolved = resolveProjectionNames(segment); + if (resolved.isEmpty()) { + return null; + } + final String fingerprint = computeFingerprint(resolved); + final Map<String, Object> wrapped = Map.of( + "type", WIRE_TYPE, + "delegate", baseLoadSpec, + "projections", resolved, + "ruleFingerprint", fingerprint + ); + return new MatchResult(wrapped, fingerprint); + } + + static String computeFingerprint(List<String> sortedDedupedNames) + { + final Hasher hasher = Hashing.sha256().newHasher(); + for (String name : sortedDedupedNames) { + hasher.putString(name, StandardCharsets.UTF_8); Review Comment: Oh, I looked into this but didn't change it yet because i sort of forgot about it. FWIW we are using putString in a lot more places in code than putUnecodedChars; the javadocs do say that it is fine so long as only Java is the language involved, so probably fine to change, especially since a version is included in the fingerprint so i guess we could just bump version if we ever want to support non-java processes being involved in this stuff in any manner... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
