This is an automated email from the ASF dual-hosted git repository.
kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b89c276508b Optimize interval deserialization for DataSegment (#18477)
b89c276508b is described below
commit b89c276508be8754272841bff47680cace926032
Author: Virushade <[email protected]>
AuthorDate: Sun Sep 7 21:15:14 2025 +0800
Optimize interval deserialization for DataSegment (#18477)
Changes:
- Add method `Intervals.fromString()` to deserialize interval strings
optimally
- Update `@JsonCreator` constructor of `DataSegment` to accept interval as
String and deserialize with new method
---
.../JodaIntervalDeserializationBenchmark.java | 153 +++++++++++++++++++++
.../apache/druid/java/util/common/Intervals.java | 66 +++++++++
.../org/apache/druid/timeline/DataSegment.java | 37 ++++-
.../druid/java/util/common/IntervalsTest.java | 38 +++++
4 files changed, 292 insertions(+), 2 deletions(-)
diff --git
a/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
b/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
new file mode 100644
index 00000000000..2cc236dbbc8
--- /dev/null
+++
b/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.Intervals;
+import org.joda.time.Interval;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 3)
+@Measurement(iterations = 5)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class JodaIntervalDeserializationBenchmark
+{
+ @Param({"20000"})
+ public int numValues;
+ /** Object Mapper that uses {@link Intervals#fromString(String)} under the
hood. */
+ private ObjectMapper formatStrictObjectMapper;
+ private ObjectMapper defaultMapper;
+
+ private List<String> intervalJsonValues;
+ private List<String> fallbackIntervalJsonValues;
+
+ public static void main(String[] args) throws RunnerException
+ {
+ Options opt = new OptionsBuilder()
+ .include(JodaIntervalDeserializationBenchmark.class.getSimpleName())
+ .forks(1)
+ .build();
+ new Runner(opt).run();
+ }
+
+ @Setup
+ public void setUp()
+ {
+ SimpleModule strictIntervalFormatModule = new SimpleModule();
+ strictIntervalFormatModule.addDeserializer(
+ Interval.class,
+ new StdDeserializer<>(Interval.class)
+ {
+ @Override
+ public Interval deserialize(JsonParser jsonParser,
DeserializationContext ctx) throws IOException
+ {
+ return Intervals.fromString(jsonParser.getText());
+ }
+ }
+ );
+
+ defaultMapper = new DefaultObjectMapper();
+ formatStrictObjectMapper = new
DefaultObjectMapper().registerModule(strictIntervalFormatModule);
+
+ intervalJsonValues = new ArrayList<>(numValues);
+ fallbackIntervalJsonValues = new ArrayList<>(numValues);
+
+ // Use a small set of valid ISO UTC interval strings that hit the
optimized fast path.
+ final String[] samples = new String[]{
+ "\"2022-09-16T00:00:00.000Z/2022-09-17T00:00:00.000Z\"",
+ "\"2021-01-01T12:34:56.789Z/2021-01-02T12:34:56.789Z\"",
+ "\"2010-06-30T23:59:59.000Z/2010-07-01T23:59:59.000Z\"",
+ "\"1999-12-31T00:00:00.123Z/2000-01-01T00:00:00.123Z\""
+ };
+
+ final String[] fallbackSamples = new String[]{
+ "\"2022-01-01T00:00:00Z/2022-01-02T00:00:00Z\"",
+ "\"2022-01-01T12:34:56Z/2022-01-02T12:34:56Z\"",
+ "\"2010-06-30T23:59:59Z/2010-07-01T23:59:59Z\"",
+ "\"1999-12-31T00:00:00Z/2000-01-01T00:00:00Z\""
+ };
+
+ for (int i = 0; i < numValues; i++) {
+ intervalJsonValues.add(samples[i % samples.length]);
+ fallbackIntervalJsonValues.add(fallbackSamples[i %
fallbackSamples.length]);
+ }
+ }
+
+ @Benchmark
+ public void deserializeOptimized(Blackhole blackhole) throws Exception
+ {
+ for (String json : intervalJsonValues) {
+ blackhole.consume(formatStrictObjectMapper.readValue(json,
Interval.class));
+ }
+ }
+
+ @Benchmark
+ public void deserializeLegacy(Blackhole blackhole) throws Exception
+ {
+ for (String json : intervalJsonValues) {
+ blackhole.consume(defaultMapper.readValue(json, Interval.class));
+ }
+ }
+
+ @Benchmark
+ public void deserializeOptimizedFallback(Blackhole blackhole) throws
Exception
+ {
+ for (String json : fallbackIntervalJsonValues) {
+ blackhole.consume(formatStrictObjectMapper.readValue(json,
Interval.class));
+ }
+ }
+
+ @Benchmark
+ public void deserializeLegacyFallback(Blackhole blackhole) throws Exception
+ {
+ for (String json : fallbackIntervalJsonValues) {
+ blackhole.consume(defaultMapper.readValue(json, Interval.class));
+ }
+ }
+}
diff --git
a/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
b/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
index 623f546349f..80be5a738d4 100644
--- a/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
+++ b/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
@@ -25,6 +25,8 @@ import org.apache.druid.java.util.common.guava.Comparators;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.joda.time.chrono.ISOChronology;
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.ISODateTimeFormat;
import javax.annotation.Nullable;
@@ -32,6 +34,8 @@ public final class Intervals
{
public static final Interval ETERNITY = utc(JodaUtils.MIN_INSTANT,
JodaUtils.MAX_INSTANT);
public static final ImmutableList<Interval> ONLY_ETERNITY =
ImmutableList.of(ETERNITY);
+ private static final DateTimeFormatter FAST_ISO_UTC_FORMATTER =
+
ISODateTimeFormat.dateTime().withChronology(ISOChronology.getInstanceUTC());
public static Interval utc(long startInstant, long endInstant)
{
@@ -53,6 +57,68 @@ public final class Intervals
return of(StringUtils.format(format, formatArgs));
}
+ /**
+ * A performance-optimized method for parsing a Joda-Time {@link Interval}
from a string.
+ * This method is significantly faster than the standard {@link
Intervals#of(String)} for the following
+ * group of offsets:
+ * <ol>
+ * <li>"2022-01-01T00:00:00.000Z/2022-01-02T00:00:00.000Z"</li>
+ * <li>"2022-01-01T00:00:00.000+05:30/2022-01-01T01:00:00.000+05:30"</li>
+ * <li>"2022-01-01T00:00:00.000+0530/2022-01-01T01:00:00.000+0530"</li>
+ * </ol>
+ * <p>
+ * If the input string does not match the format, it will fall back to the
more flexible but
+ * slower {@link Intervals#of(String)} parser. If you are dealing with any
Intervals format examples below,
+ * consider using {@link Intervals#of(String)} instead:
+ * <ol>
+ * <li>"2022-01-01T00:00:00Z/2022-01-02T00:00:00Z" (without millis)</li>
+ * <li>"2022-01-01/2022-01-02" (Date only)</li>
+ * <li>"2022-01-01T12:00:00.000Z/PT6H" (Periods in start / end)</li>
+ * </ol>
+ *
+ * Currently, this method is only used in {@link
org.apache.druid.timeline.SegmentId}.
+ */
+ public static Interval fromString(String string)
+ {
+ Interval interval = null;
+ if (canDeserializeIntervalOptimallyFromString(string)) {
+ interval = tryOptimizedIntervalDeserialization(string);
+ }
+
+ return interval == null ? Intervals.of(string) : interval;
+ }
+
+ private static boolean canDeserializeIntervalOptimallyFromString(String
intervalText)
+ {
+ // Optimized version does not deal well with Periods.
+ if (intervalText.contains("P")) {
+ return false;
+ }
+
+ final int slashIndex = intervalText.indexOf('/');
+ return (slashIndex > 0 && slashIndex < intervalText.length() - 1);
+ }
+
+ /**
+ * @return null if the input format cannot be parsed with optimized
strategy, else return the Interval.
+ */
+ @Nullable
+ private static Interval tryOptimizedIntervalDeserialization(final String
intervalText)
+ {
+ final int slashIndex = intervalText.indexOf('/');
+ final String startStr = intervalText.substring(0, slashIndex);
+ final String endStr = intervalText.substring(slashIndex + 1);
+
+ try {
+ final long startMillis = FAST_ISO_UTC_FORMATTER.parseMillis(startStr);
+ final long endMillis = FAST_ISO_UTC_FORMATTER.parseMillis(endStr);
+ return Intervals.utc(startMillis, endMillis);
+ }
+ catch (IllegalArgumentException e) {
+ return null;
+ }
+ }
+
/**
* Returns true if the provided interval has endpoints that can be compared
against other DateTimes using their
* string representations.
diff --git
a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
index 3d2dd045998..ef13b15f773 100644
--- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
+++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
@@ -37,6 +37,7 @@ import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap;
import org.apache.druid.guice.annotations.PublicApi;
import org.apache.druid.jackson.CommaListJoinDeserializer;
import org.apache.druid.jackson.CommaListJoinSerializer;
+import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.query.SegmentDescriptor;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.apache.druid.timeline.partition.ShardSpec;
@@ -179,9 +180,10 @@ public class DataSegment implements
Comparable<DataSegment>, Overshadowable<Data
}
@JsonCreator
- public DataSegment(
+ private DataSegment(
@JsonProperty("dataSource") String dataSource,
- @JsonProperty("interval") Interval interval,
+ // We take interval input as a String so we can deserialize it optimally
via Intervals.fromString(interval).
+ @JsonProperty("interval") String interval,
@JsonProperty("version") String version,
// use `Map` *NOT* `LoadSpec` because we want to do lazy materialization
to prevent dependency pollution
@JsonProperty("loadSpec") @Nullable Map<String, Object> loadSpec,
@@ -196,6 +198,37 @@ public class DataSegment implements
Comparable<DataSegment>, Overshadowable<Data
@JsonProperty("size") long size,
@JacksonInject PruneSpecsHolder pruneSpecsHolder
)
+ {
+ this(
+ dataSource,
+ Intervals.fromString(interval),
+ version,
+ loadSpec,
+ dimensions,
+ metrics,
+ projections,
+ shardSpec,
+ lastCompactionState,
+ binaryVersion,
+ size,
+ pruneSpecsHolder
+ );
+ }
+
+ public DataSegment(
+ String dataSource,
+ Interval interval,
+ String version,
+ @Nullable Map<String, Object> loadSpec,
+ @Nullable List<String> dimensions,
+ @Nullable List<String> metrics,
+ @Nullable List<String> projections,
+ @Nullable ShardSpec shardSpec,
+ @Nullable CompactionState lastCompactionState,
+ Integer binaryVersion,
+ long size,
+ PruneSpecsHolder pruneSpecsHolder
+ )
{
this.id = SegmentId.of(dataSource, interval, version, shardSpec);
// prune loadspec if needed
diff --git
a/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
b/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
index a8703b0ec70..3d591a319f7 100644
---
a/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
+++
b/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
@@ -79,6 +79,44 @@ public class IntervalsTest
);
}
+ @Test
+ public void testValidIntervalStrings()
+ {
+ final String[] intervalStringRepresentations = new String[]{
+ // Tests that use does not fallback to Intervals.of()
+ // Zulu with millis
+ "2022-01-01T00:00:00.000Z/2022-01-02T00:00:00.000Z",
+ "2021-03-14T12:34:56.789Z/2021-03-15T12:34:56.789Z",
+
+ // Offset with colon
+ "2022-01-01T00:00:00.000+05:30/2022-01-01T01:00:00.000+05:30",
+ "2022-01-01T07:00:00.000-07:00/2022-01-01T08:00:00.000-07:00",
+
+ // Basic offset without colon
+ "2022-01-01T00:00:00.000+0530/2022-01-01T01:00:00.000+0530",
+
+ // Tests that fallback to Intervals.of()
+ // Zulu without millis
+ "2022-01-01T00:00:00Z/2022-01-02T00:00:00Z",
+ // Date-only
+ "2022-01-01/2022-01-02",
+ // start/period
+ "2022-01-01T00:00:00.000Z/P1D",
+ "2022-01-01T12:00:00Z/PT6H",
+ "2022-01-01T00:00:00Z/P2DT3H4M5S",
+ // period/end
+ "P1D/2022-01-02T00:00:00.000Z",
+ "PT6H/2022-01-01T18:00:00Z",
+ "P2DT3H4M5S/2022-01-03T03:04:05Z"
+ };
+
+ for (String s : intervalStringRepresentations) {
+ Interval expected = Intervals.of(s);
+ Interval actual = Intervals.fromString(s);
+ Assert.assertEquals("Mismatch for: " + s, expected, actual);
+ }
+ }
+
@Test
public void testInvalidInterval()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]