magibney commented on a change in pull request #2: URL: https://github.com/apache/solr/pull/2#discussion_r595447714
########## File path: solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java ########## @@ -671,107 +673,141 @@ public DocSet union(DocSet other) { return new BitDocSet(newbits); } - @Override - public Filter getTopFilter() { - return new Filter() { + private int[] cachedOrdIdxMap; + private int[] getOrdIdxMap(LeafReaderContext ctx) { + final int[] ret; + if (ctx.isTopLevel) { + // don't bother caching this? + ret = new int[] {0, docs.length - 1}; + } else if (cachedOrdIdxMap != null) { + ret = cachedOrdIdxMap; + } else { + List<LeafReaderContext> leaves = ReaderUtil.getTopLevelContext(ctx).leaves(); + ret = new int[leaves.size() << 1]; int lastEndIdx = 0; - - @Override - public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) { - LeafReader reader = context.reader(); - // all Solr DocSets that are used as filters only include live docs - final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs); - - final int base = context.docBase; - final int maxDoc = reader.maxDoc(); + for (LeafReaderContext lrc : leaves) { + final int base = lrc.docBase; + final int maxDoc = lrc.reader().maxDoc(); final int max = base + maxDoc; // one past the max doc in this segment. int sidx = Math.max(0,lastEndIdx); - if (sidx > 0 && docs[sidx-1] >= base) { - // oops, the lastEndIdx isn't correct... we must have been used - // in a multi-threaded context, or the indexreaders are being - // used out-of-order. start at 0. - sidx = 0; - } if (sidx < docs.length && docs[sidx] < base) { // if docs[sidx] is < base, we need to seek to find the real start. sidx = findIndex(docs, base, sidx, docs.length-1); } - final int startIdx = sidx; - // Largest possible end index is limited to the start index // plus the number of docs contained in the segment. Subtract 1 since // the end index is inclusive. - int eidx = Math.min(docs.length, startIdx + maxDoc) - 1; + int eidx = Math.min(docs.length, sidx + maxDoc) - 1; // find the real end - eidx = findIndex(docs, max, startIdx, eidx) - 1; + eidx = findIndex(docs, max, sidx, eidx) - 1; + + final int mapOrdIdx = lrc.ord << 1; + ret[mapOrdIdx] = sidx; + ret[mapOrdIdx + 1] = eidx; + lastEndIdx = eidx; + } + cachedOrdIdxMap = ret; // replace atomically after building + } + return ret; + } + + @Override + public DocIdSetIterator iterator(LeafReaderContext context) { + + if (docs.length == 0 || context.reader().maxDoc() < 1) { + // empty docset or entirely empty segment (verified that the latter actually happens) + // NOTE: wrt the "empty docset" case, this is not just an optimization; this shortcircuits also + // to prevent the static DocSet.EmptyLazyHolder.INSTANCE from having cachedOrdIdxMap initiated + // across different IndexReaders. + return null; + } + + int[] ordIdxMap = getOrdIdxMap(context); + final int base = context.docBase; + + final int mapOrdIdx = context.ord << 1; + final int startIdx = ordIdxMap[mapOrdIdx]; + final int endIdx = ordIdxMap[mapOrdIdx + 1]; + + if (startIdx > endIdx) { + return null; // verified this does happen Review comment: This ended up being a little more involved than I'd anticipated (f8ba35129b) -- and also addressed the "left-shift" comment above. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org