clintropolis commented on code in PR #19535: URL: https://github.com/apache/druid/pull/19535#discussion_r3415379015
########## processing/src/main/java/org/apache/druid/segment/PartialQueryableIndexSegment.java: ########## @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment; + +import org.apache.druid.timeline.SegmentId; +import org.apache.druid.utils.CloseableUtils; +import org.joda.time.Interval; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.Closeable; + +/** + * {@link Segment} wrapper around a {@link PartialQueryableIndex}. Mirrors {@link QueryableIndexSegment} but wires up + * the V10-specific {@link V10TimeBoundaryInspector} (which answers from + * {@link org.apache.druid.segment.projections.ProjectionMetadata} min/max fields without downloading the + * {@code __time} column) and the partial-aware {@link PartialQueryableIndexCursorFactory} (which downloads + * required files on the supplied download executor before handing back a cursor). + * <p> + * Lifecycle: this segment is intended to exist as a transient reference-hold scope over an externally-owned + * {@link PartialQueryableIndex}, it never closes the underlying index. The {@code onClose} hook is what + * {@link #close()} fires when the segment is closed (e.g. to 'release' reference tracking stuff in the cache layer). + */ +public class PartialQueryableIndexSegment implements ReferenceCountedSegmentProvider.LeafReference +{ + private final PartialQueryableIndex index; + private final PartialQueryableIndexCursorFactory cursorFactory; + private final TimeBoundaryInspector timeBoundaryInspector; + private final SegmentId segmentId; + private final Closeable onClose; + + public PartialQueryableIndexSegment( + PartialQueryableIndex index, + SegmentId segmentId, + Closeable onClose, + PartialBundleAcquirer bundleAcquirer + ) + { + this.index = index; + this.timeBoundaryInspector = V10TimeBoundaryInspector.forBaseProjection( + index.getBaseProjectionMetadata(), + index.getDataInterval() + ); + this.cursorFactory = new PartialQueryableIndexCursorFactory( + index, + timeBoundaryInspector, + bundleAcquirer + ); + this.segmentId = segmentId; + this.onClose = onClose; + } + + @Override + public SegmentId getId() + { + return segmentId; + } + + @Override + public Interval getDataInterval() + { + return index.getDataInterval(); + } + + @Override + public void close() + { + CloseableUtils.closeAndWrapExceptions(onClose); + } + + @SuppressWarnings("unchecked") + @Nullable + @Override + public <T> T as(@Nonnull Class<T> clazz) Review Comment: #19577 split out `RowCountInspector` which is now wired up in this PR so we can answer the row count stuff which is much more widely used than the other parts of this interface (and answerable purely by metadata with v10 segments) ########## server/src/main/java/org/apache/druid/server/SegmentManager.java: ########## @@ -225,6 +224,32 @@ public AcquireSegmentAction acquireSegment(DataSegment dataSegment) return cacheManager.acquireSegment(dataSegment); } + /** + * Partial-load variant of {@link #acquireCachedSegment(SegmentId)}, returns a {@link Segment} when the cache holds + * an entry for the id; empty otherwise. The returned segment may not be fully loaded, callers must use async methods + * like {@link org.apache.druid.segment.CursorFactory#makeCursorHolderAsync} to download data on-demand. If the + * returned segment is only partially loaded, the synchronous methods like {@code makeCursorHolder} will fail if + * anything is still missing. If the segment is fully loaded, or not capable of partial loading, this method will + * still return a segment if it is present in cache and any async methods will function properly and return + * immediately. + */ + public Optional<Segment> acquireCachedPartialSegment(SegmentId segmentId) + { + return cacheManager.acquireCachedPartialSegment(segmentId); + } + + /** + * Partial-load variant of {@link #acquireSegment(DataSegment)}, returns an {@link AcquireSegmentAction} that Review Comment: the methods are now combined and accept an enum argument, i think it is a bit clearer what happens ########## server/src/main/java/org/apache/druid/server/SegmentManager.java: ########## @@ -225,6 +224,32 @@ public AcquireSegmentAction acquireSegment(DataSegment dataSegment) return cacheManager.acquireSegment(dataSegment); } + /** + * Partial-load variant of {@link #acquireCachedSegment(SegmentId)}, returns a {@link Segment} when the cache holds + * an entry for the id; empty otherwise. The returned segment may not be fully loaded, callers must use async methods + * like {@link org.apache.druid.segment.CursorFactory#makeCursorHolderAsync} to download data on-demand. If the + * returned segment is only partially loaded, the synchronous methods like {@code makeCursorHolder} will fail if + * anything is still missing. If the segment is fully loaded, or not capable of partial loading, this method will + * still return a segment if it is present in cache and any async methods will function properly and return + * immediately. + */ + public Optional<Segment> acquireCachedPartialSegment(SegmentId segmentId) + { + return cacheManager.acquireCachedPartialSegment(segmentId); + } + + /** + * Partial-load variant of {@link #acquireSegment(DataSegment)}, returns an {@link AcquireSegmentAction} that Review Comment: the methods are now combined and accept an `AcquireMode` enum argument, i think it is a bit clearer what happens ########## server/src/main/java/org/apache/druid/server/SegmentManager.java: ########## @@ -225,6 +224,32 @@ public AcquireSegmentAction acquireSegment(DataSegment dataSegment) return cacheManager.acquireSegment(dataSegment); } + /** + * Partial-load variant of {@link #acquireCachedSegment(SegmentId)}, returns a {@link Segment} when the cache holds Review Comment: same thing with the enum `AcquireMode` argument, i think it should be more obvious now -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
