jpountz commented on PR #13359:
URL: https://github.com/apache/lucene/pull/13359#issuecomment-2132902792
<details>
<summary>
Now that #13408 has been merged, I could update the benchmark to simply call
IndexSearcher#search.
</summary>
```java
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TermsEnumPrefetchBench {
private static final int NUM_TERMS = 3;
public static int DUMMY;
public static void main(String[] args) throws Exception {
Path dirPath = Paths.get(args[0]);
Directory dir = FSDirectory.open(dirPath);
if (DirectoryReader.indexExists(dir) == false) {
TieredMergePolicy mp = new TieredMergePolicy();
mp.setSegmentsPerTier(100);
mp.setMaxMergeAtOnce(100);
mp.setMaxMergedSegmentMB(1024);
try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()
.setMergePolicy(mp)
.setRAMBufferSizeMB(1024))) {
ExecutorService executor =
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
AtomicLong indexed = new AtomicLong(0);
for (int task = 0; task < 1000; ++task) {
executor.execute(() -> {
Random r = ThreadLocalRandom.current();
for (int i = 0; i < 1_000; ++i) {
Document doc = new Document();
for (int j = 0; j < 10_000; ++j) {
doc.add(new StringField("f",
Long.toString(r.nextLong(20_000_000_000L)), Store.NO));
}
try {
w.addDocument(doc);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
final long actualIndexed = indexed.incrementAndGet();
if (actualIndexed % 10_000 == 0) {
System.out.println("Indexed: " + actualIndexed);
}
}
});
}
executor.shutdown();
executor.awaitTermination(1, TimeUnit.DAYS);
w.commit();
System.out.println("Start force merging");
w.forceMerge(1);
System.out.println("Done force merging");
w.commit();
}
}
List<Long> latencies = new ArrayList<>();
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
Random r = ThreadLocalRandom.current();
for (int i = 0; i < 10_000; ++i) {
long start = System.nanoTime();
BooleanQuery.Builder query = new BooleanQuery.Builder();
for (int t = 0; t < NUM_TERMS; ++t) {
query.add(new TermQuery(new Term("f",
Long.toString(r.nextLong(20_000_000_000L)))), Occur.SHOULD);
}
DUMMY += searcher.search(query.build(), 1,
Sort.INDEXORDER).totalHits.value;
long end = System.nanoTime();
latencies.add((end - start) / 1000);
}
}
latencies.sort(null);
System.out.println("P50: " + latencies.get(latencies.size() / 2));
System.out.println("P90: " + latencies.get(latencies.size() * 9 / 10));
System.out.println("P99: " + latencies.get(latencies.size() * 99 / 100));
}
}
```
</details>
Results still look good.
Before the change:
P50: 282
P90: 387
P99: 537
After the change:
P50: 161
P90: 253
P99: 379
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]