dsmiley commented on code in PR #254: URL: https://github.com/apache/solr/pull/254#discussion_r1917586188
########## solr/benchmark/src/java/org/apache/solr/bench/generators/StringsDSL.java: ########## @@ -0,0 +1,481 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.bench.generators; + +import static org.apache.solr.bench.generators.SourceDSL.checkArguments; +import static org.apache.solr.bench.generators.SourceDSL.integers; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Random; +import java.util.Scanner; +import java.util.SplittableRandom; +import org.apache.solr.bench.BaseBenchState; +import org.apache.solr.bench.SolrGenerate; +import org.apache.solr.bench.SolrRandomnessSource; +import org.quicktheories.core.Gen; +import org.quicktheories.core.RandomnessSource; +import org.quicktheories.impl.SplittableRandomSource; + +/** The type Strings dsl. */ +public class StringsDSL { + + private static final int BASIC_LATIN_LAST_CODEPOINT = 0x007E; + private static final int BASIC_LATIN_FIRST_CODEPOINT = 0x0020; + private static final int ASCII_LAST_CODEPOINT = 0x007F; + private static final int LARGEST_DEFINED_BMP_CODEPOINT = 65533; + + private static final List<String> words; + + private static final int WORD_SIZE; + + static { + // english word list via https://github.com/dwyl/english-words + + words = new ArrayList<>(1000); + InputStream inputStream = StringsDSL.class.getClassLoader().getResourceAsStream("words.txt"); + try (Scanner scanner = + new Scanner(Objects.requireNonNull(inputStream), StandardCharsets.UTF_8.name())) { + while (scanner.hasNextLine()) { + words.add(scanner.nextLine()); + } + } + Collections.shuffle(words, new Random(BaseBenchState.getRandomSeed())); + WORD_SIZE = words.size(); + } + + /** + * Word list word list generator builder. + * + * @return the word list generator builder + */ + public WordListGeneratorBuilder wordList() { + return new WordListGeneratorBuilder( + new SolrGen<>(new WordListStringSolrGen(), String.class).describedAs("WordList Word")); + } + + /** + * Generates integers as Strings, and shrinks towards "0". + * + * @return a Source of type String + */ + public SolrGen<String> numeric() { + return new SolrGen<>(numericBetween(Integer.MIN_VALUE, Integer.MAX_VALUE), String.class); + } + + /** + * Generates integers within the interval as Strings. + * + * @param startInclusive - lower inclusive bound of integer domain + * @param endInclusive - upper inclusive bound of integer domain + * @return a Source of type String + */ + public SolrGen<String> numericBetween(int startInclusive, int endInclusive) { + checkArguments( + startInclusive <= endInclusive, + "There are no Integer values to be generated between startInclusive (%s) and endInclusive (%s)", + startInclusive, + endInclusive); + return new SolrGen<>(Strings.boundedNumericStrings(startInclusive, endInclusive), String.class); + } + + /** + * Constructs a StringGeneratorBuilder which will build Strings composed of all defined code + * points + * + * @return a StringGeneratorBuilder + */ + public StringGeneratorBuilder allPossible() { + return betweenCodePoints(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); + } + + /** + * Realistic unicode realistic unicode generator builder. + * + * @param minLength the min length + * @param maxLength the max length + * @return the realistic unicode generator builder + */ + public RealisticUnicodeGeneratorBuilder realisticUnicode(int minLength, int maxLength) { + return new RealisticUnicodeGeneratorBuilder( + new SolrGen<>() { + @Override + public String generate(SolrRandomnessSource in) { + + int block = + integers() + .between(0, blockStarts.length - 1) + .describedAs("Realistic Unicode BLock Index") + .generate(in); + + return Strings.ofBoundedLengthStrings( + blockStarts[block], blockEnds[block], minLength, maxLength) + .describedAs("Realistic Unicode") + .generate(in); + } + }); + } + + /** + * Constructs a StringGeneratorBuilder which will build Strings composed of all defined code + * points in the Basic Multilingual Plane + * + * @return a StringGeneratorBuilder + */ + public StringGeneratorBuilder basicMultilingualPlaneAlphabet() { + return betweenCodePoints(Character.MIN_CODE_POINT, LARGEST_DEFINED_BMP_CODEPOINT); + } + + /** + * Constructs a StringGeneratorBuilder which will build Strings composed of Unicode Basic Latin + * Alphabet + * + * @return a StringGeneratorBuilder + */ + public StringGeneratorBuilder basicLatinAlphabet() { + return betweenCodePoints(BASIC_LATIN_FIRST_CODEPOINT, BASIC_LATIN_LAST_CODEPOINT); + } + + /** + * Alpha string generator builder. + * + * @return the string generator builder + */ + public StringGeneratorBuilder alpha() { + return betweenCodePoints('a', 'z' + 1); + } + + /** + * Alpha numeric string generator builder. + * + * @return the string generator builder + */ + public StringGeneratorBuilder alphaNumeric() { + return betweenCodePoints(' ', 'z' + 1); + } + + /** + * Constructs a StringGeneratorBuilder which will build Strings composed of Unicode Ascii Alphabet + * + * @return a StringGeneratorBuilder + */ + public StringGeneratorBuilder ascii() { + return betweenCodePoints(Character.MIN_CODE_POINT, ASCII_LAST_CODEPOINT); + } + + /** + * Strings with characters between two (inclusive) code points + * + * @param minInclusive minimum code point + * @param maxInclusive max code point + * @return Builder for strings + */ + public StringGeneratorBuilder betweenCodePoints(int minInclusive, int maxInclusive) { + return new StringGeneratorBuilder(minInclusive, maxInclusive); + } + + /** The type Word list generator builder. */ + public static class WordListGeneratorBuilder { + private final SolrGen<String> strings; + + /** + * Instantiates a new Word list generator builder. + * + * @param strings the strings + */ + WordListGeneratorBuilder(SolrGen<String> strings) { + this.strings = strings; + } + + /** + * Of one solr gen. + * + * @return the solr gen + */ + public SolrGen<String> ofOne() { + return strings; + } + + /** + * Multi solr gen. + * + * @param count the count + * @return the solr gen + */ + public SolrGen<String> multi(int count) { + return multiStringGen(strings, count); + } + + /** + * With distribution word list generator builder. + * + * @param distribution the distribution + * @return the word list generator builder + */ + public WordListGeneratorBuilder withDistribution(Distribution distribution) { + this.strings.withDistribution(distribution); + return this; + } + } + + /** The type Realistic unicode generator builder. */ + public static class RealisticUnicodeGeneratorBuilder { + private final SolrGen<String> strings; + + /** + * Instantiates a new Realistic unicode generator builder. + * + * @param strings the strings + */ + RealisticUnicodeGeneratorBuilder(SolrGen<String> strings) { + this.strings = strings; + } + + /** + * Of one solr gen. + * + * @return the solr gen + */ + public SolrGen<String> ofOne() { + return strings; + } + + /** + * Multi solr gen. + * + * @param count the count + * @return the solr gen + */ + public SolrGen<String> multi(int count) { + return multiStringGen(strings, count); + } + } + + /** The type String generator builder. */ + public static class StringGeneratorBuilder { + + private final int minCodePoint; + private final int maxCodePoint; + private Integer cardinalityStart; + private Gen<Integer> maxCardinality; + private int multi; + + private StringGeneratorBuilder(int minCodePoint, int maxCodePoint) { + this.minCodePoint = minCodePoint; + this.maxCodePoint = maxCodePoint; + } + + /** + * Generates Strings of a fixed number of code points. + * + * @param codePoints - the fixed number of code points for the String + * @return a a Source of type String + */ + public SolrGen<String> ofFixedNumberOfCodePoints(int codePoints) { + checkArguments( + codePoints >= 0, + "The number of codepoints cannot be negative; %s is not an accepted argument", + codePoints); + return new SolrGen<>( + Strings.withCodePoints(minCodePoint, maxCodePoint, SolrGenerate.constant(codePoints)), + String.class); + } + + /** + * Generates Strings of a fixed length. + * + * @param fixedLength - the fixed length for the Strings + * @return a Source of type String + */ + public SolrGen<String> ofLength(int fixedLength) { + return ofLengthBetween(fixedLength, fixedLength); + } + + /** + * Max cardinality string generator builder. + * + * @param max the max + * @return the string generator builder + */ + public StringGeneratorBuilder maxCardinality(int max) { + maxCardinality = SolrGenerate.constant(max); + return this; + } + + /** + * Max cardinality string generator builder. + * + * @param max the max + * @return the string generator builder + */ + public StringGeneratorBuilder maxCardinality(Gen<Integer> max) { + maxCardinality = max; + return this; + } + + /** + * Multi string generator builder. + * + * @param count the count + * @return the string generator builder + */ + public StringGeneratorBuilder multi(int count) { + this.multi = count; + return this; + } + + /** + * Generates Strings of length bounded between minLength and maxLength inclusively. + * + * @param minLength - minimum inclusive length of String + * @param maxLength - maximum inclusive length of String + * @return a Source of type String + */ + public SolrGen<String> ofLengthBetween(int minLength, int maxLength) { + checkArguments( + minLength <= maxLength, + "The minLength (%s) is longer than the maxLength(%s)", + minLength, + maxLength); + checkArguments( + minLength >= 0, + "The length of a String cannot be negative; %s is not an accepted argument", + minLength); + SolrGen<String> strings = + Strings.ofBoundedLengthStrings(minCodePoint, maxCodePoint, minLength, maxLength); + + if (maxCardinality != null) { + SolrGen<String> gen = + new SolrGen<>( + new SolrGen<>() { + @Override + public String generate(SolrRandomnessSource in) { + Integer maxCard = maxCardinality.generate(in); + + if (cardinalityStart == null) { + cardinalityStart = + SolrGenerate.range(0, Integer.MAX_VALUE - maxCard - 1).generate(in); + } + + long seed = + SolrGenerate.range(cardinalityStart, cardinalityStart + maxCard - 1) + .generate(in); Review Comment: but the cardinality isn't necessarily a constant; it can also be `Gen`, thus vary... which makes no sense to me. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org