gf2121 commented on issue #12659:
URL: https://github.com/apache/lucene/issues/12659#issuecomment-1759244758
I tried another reading way like:
```
byte b = msbBytes[pos++];
long i = b & 0x7FL;
while (b < 0) {
b = msbBytes[pos++];
i = (i << 7) | (b & 0x7FL);
}
```
Following is the benchmark result for this (readVLongMSBNew)
* Comparing `readVLongMSB` and `readVLong`, some `valueBit`s are better but
others worse.
* Comparing `readMSBVLongNew` and `readVLong`, `readVLongMSBNew` win for all
`valueBit`.
I'm considering try the new reading way and see what will happen for nightly
benchmark.
```
Benchmark (size) (valueBit) Mode Cnt Score
Error Units
ReadVLongBenchmark.readVLong 128 7 thrpt 5 29.333 ±
0.626 ops/us
ReadVLongBenchmark.readVLong 128 14 thrpt 5 3.643 ±
0.026 ops/us
ReadVLongBenchmark.readVLong 128 21 thrpt 5 2.411 ±
0.047 ops/us
ReadVLongBenchmark.readVLong 128 28 thrpt 5 2.075 ±
0.004 ops/us
ReadVLongBenchmark.readVLong 128 35 thrpt 5 1.836 ±
0.002 ops/us
ReadVLongBenchmark.readVLong 128 42 thrpt 5 1.943 ±
0.006 ops/us
ReadVLongBenchmark.readVLong 128 49 thrpt 5 1.514 ±
0.006 ops/us
ReadVLongBenchmark.readVLong 128 56 thrpt 5 1.463 ±
0.002 ops/us
ReadVLongBenchmark.readVLong 128 63 thrpt 5 1.338 ±
0.002 ops/us
ReadVLongBenchmark.readVLongMSB 128 7 thrpt 5 29.350 ±
0.140 ops/us
ReadVLongBenchmark.readVLongMSB 128 14 thrpt 5 4.474 ±
0.025 ops/us
ReadVLongBenchmark.readVLongMSB 128 21 thrpt 5 3.640 ±
0.033 ops/us
ReadVLongBenchmark.readVLongMSB 128 28 thrpt 5 2.439 ±
0.324 ops/us
ReadVLongBenchmark.readVLongMSB 128 35 thrpt 5 1.879 ±
0.009 ops/us
ReadVLongBenchmark.readVLongMSB 128 42 thrpt 5 1.508 ±
0.037 ops/us
ReadVLongBenchmark.readVLongMSB 128 49 thrpt 5 1.268 ±
0.012 ops/us
ReadVLongBenchmark.readVLongMSB 128 56 thrpt 5 1.091 ±
0.024 ops/us
ReadVLongBenchmark.readVLongMSB 128 63 thrpt 5 0.960 ±
0.005 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 7 thrpt 5 31.110 ±
0.254 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 14 thrpt 5 4.075 ±
0.010 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 21 thrpt 5 2.600 ±
0.008 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 28 thrpt 5 2.244 ±
0.006 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 35 thrpt 5 1.946 ±
0.005 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 42 thrpt 5 2.102 ±
0.016 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 49 thrpt 5 1.615 ±
0.209 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 56 thrpt 5 1.614 ±
0.032 ops/us
ReadVLongBenchmark.readVLongMSBNew 128 63 thrpt 5 1.465 ±
0.008 ops/us
```
<details>
<summary>CODE</summary>
```
package testing;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.PackedInts;
import org.openjdk.jmh.annotations.*;
import java.io.IOException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Benchmark)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 5, time = 3)
@Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
public class ReadVLongBenchmark {
private byte[] bytes;
private byte[] msbBytes;
@Param({"128"})
int size;
@Param({"7", "14", "21", "28", "35", "42", "49", "56", "63"})
int valueBit;
@Setup(Level.Trial)
public void init() throws Exception {
bytes = new byte[size * (valueBit / 7)];
msbBytes = new byte[size * (valueBit / 7)];
ByteArrayDataOutput out = new ByteArrayDataOutput(bytes);
ByteArrayDataOutput msbOut = new ByteArrayDataOutput(msbBytes);
final long min = 1L << (valueBit - 1);
final long max = valueBit == 63 ? Long.MAX_VALUE : (1L << valueBit) - 1;
for (int i = 0; i < size; i++) {
long l = ThreadLocalRandom.current().nextLong(min, max);
if (PackedInts.bitsRequired(l) != valueBit) {
throw new IllegalStateException();
}
out.writeVLong(l);
writeMSBVLong(l, msbOut);
}
if (readVLong() != readVLongMSB()) {
System.out.println("vlong msb wrong");
throw new IllegalStateException();
}
if (readVLong() != readVLongMSBNew()) {
System.out.println("vlong new msb wrong");
throw new IllegalStateException();
}
}
@Benchmark
public long readVLong() {
int pos = 0;
long res = 0;
for (int iter = 0; iter < size; iter++) {
byte b = bytes[pos++];
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = bytes[pos++];
i |= (b & 0x7FL) << shift;
}
res ^= i;
}
return res;
}
@Benchmark
public long readVLongMSB() {
int pos = 0;
long res = 0;
for (int iter = 0; iter < size; iter++) {
long i = 0L;
while (true) {
byte b = msbBytes[pos++];
i = (i << 7) | (b & 0x7FL);
if ((b & 0x80) == 0) {
break;
}
}
res ^= i;
}
return res;
}
@Benchmark
public long readVLongMSBNew() {
int pos = 0;
long res = 0;
for (int iter = 0; iter < size; iter++) {
byte b = msbBytes[pos++];
long i = b & 0x7FL;
while (b < 0) {
b = msbBytes[pos++];
i = (i << 7) | (b & 0x7FL);
}
res ^= i;
}
return res;
}
static void writeMSBVLong(long l, DataOutput scratchBytes) throws
IOException {
assert l >= 0;
// Keep zero bits on most significant byte to have more chance to get
prefix bytes shared.
// e.g. we expect 0x7FFF stored as [0x81, 0xFF, 0x7F] but not [0xFF,
0xFF, 0x40]
final int bytesNeeded = (Long.SIZE - Long.numberOfLeadingZeros(l) - 1) /
7 + 1;
l <<= Long.SIZE - bytesNeeded * 7;
for (int i = 1; i < bytesNeeded; i++) {
scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL) | 0x80));
l = l << 7;
}
scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL)));
}
}
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]