This is an automated email from the ASF dual-hosted git repository.

kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new b89c276508b Optimize interval deserialization for DataSegment (#18477)
b89c276508b is described below

commit b89c276508be8754272841bff47680cace926032
Author: Virushade <[email protected]>
AuthorDate: Sun Sep 7 21:15:14 2025 +0800

    Optimize interval deserialization for DataSegment (#18477)
    
    Changes:
    - Add method `Intervals.fromString()` to deserialize interval strings 
optimally
    - Update `@JsonCreator` constructor of `DataSegment` to accept interval as 
String and deserialize with new method
---
 .../JodaIntervalDeserializationBenchmark.java      | 153 +++++++++++++++++++++
 .../apache/druid/java/util/common/Intervals.java   |  66 +++++++++
 .../org/apache/druid/timeline/DataSegment.java     |  37 ++++-
 .../druid/java/util/common/IntervalsTest.java      |  38 +++++
 4 files changed, 292 insertions(+), 2 deletions(-)

diff --git 
a/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
 
b/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
new file mode 100644
index 00000000000..2cc236dbbc8
--- /dev/null
+++ 
b/benchmarks/src/test/java/org/apache/druid/benchmark/JodaIntervalDeserializationBenchmark.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.Intervals;
+import org.joda.time.Interval;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 3)
+@Measurement(iterations = 5)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class JodaIntervalDeserializationBenchmark
+{
+  @Param({"20000"})
+  public int numValues;
+  /** Object Mapper that uses {@link Intervals#fromString(String)} under the 
hood. */
+  private ObjectMapper formatStrictObjectMapper;
+  private ObjectMapper defaultMapper;
+
+  private List<String> intervalJsonValues;
+  private List<String> fallbackIntervalJsonValues;
+
+  public static void main(String[] args) throws RunnerException
+  {
+    Options opt = new OptionsBuilder()
+        .include(JodaIntervalDeserializationBenchmark.class.getSimpleName())
+        .forks(1)
+        .build();
+    new Runner(opt).run();
+  }
+
+  @Setup
+  public void setUp()
+  {
+    SimpleModule strictIntervalFormatModule = new SimpleModule();
+    strictIntervalFormatModule.addDeserializer(
+        Interval.class,
+        new StdDeserializer<>(Interval.class)
+        {
+          @Override
+          public Interval deserialize(JsonParser jsonParser, 
DeserializationContext ctx) throws IOException
+          {
+            return Intervals.fromString(jsonParser.getText());
+          }
+        }
+    );
+
+    defaultMapper = new DefaultObjectMapper();
+    formatStrictObjectMapper = new 
DefaultObjectMapper().registerModule(strictIntervalFormatModule);
+
+    intervalJsonValues = new ArrayList<>(numValues);
+    fallbackIntervalJsonValues = new ArrayList<>(numValues);
+
+    // Use a small set of valid ISO UTC interval strings that hit the 
optimized fast path.
+    final String[] samples = new String[]{
+        "\"2022-09-16T00:00:00.000Z/2022-09-17T00:00:00.000Z\"",
+        "\"2021-01-01T12:34:56.789Z/2021-01-02T12:34:56.789Z\"",
+        "\"2010-06-30T23:59:59.000Z/2010-07-01T23:59:59.000Z\"",
+        "\"1999-12-31T00:00:00.123Z/2000-01-01T00:00:00.123Z\""
+    };
+
+    final String[] fallbackSamples = new String[]{
+        "\"2022-01-01T00:00:00Z/2022-01-02T00:00:00Z\"",
+        "\"2022-01-01T12:34:56Z/2022-01-02T12:34:56Z\"",
+        "\"2010-06-30T23:59:59Z/2010-07-01T23:59:59Z\"",
+        "\"1999-12-31T00:00:00Z/2000-01-01T00:00:00Z\""
+    };
+
+    for (int i = 0; i < numValues; i++) {
+      intervalJsonValues.add(samples[i % samples.length]);
+      fallbackIntervalJsonValues.add(fallbackSamples[i % 
fallbackSamples.length]);
+    }
+  }
+
+  @Benchmark
+  public void deserializeOptimized(Blackhole blackhole) throws Exception
+  {
+    for (String json : intervalJsonValues) {
+      blackhole.consume(formatStrictObjectMapper.readValue(json, 
Interval.class));
+    }
+  }
+
+  @Benchmark
+  public void deserializeLegacy(Blackhole blackhole) throws Exception
+  {
+    for (String json : intervalJsonValues) {
+      blackhole.consume(defaultMapper.readValue(json, Interval.class));
+    }
+  }
+
+  @Benchmark
+  public void deserializeOptimizedFallback(Blackhole blackhole) throws 
Exception
+  {
+    for (String json : fallbackIntervalJsonValues) {
+      blackhole.consume(formatStrictObjectMapper.readValue(json, 
Interval.class));
+    }
+  }
+
+  @Benchmark
+  public void deserializeLegacyFallback(Blackhole blackhole) throws Exception
+  {
+    for (String json : fallbackIntervalJsonValues) {
+      blackhole.consume(defaultMapper.readValue(json, Interval.class));
+    }
+  }
+}
diff --git 
a/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java 
b/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
index 623f546349f..80be5a738d4 100644
--- a/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
+++ b/processing/src/main/java/org/apache/druid/java/util/common/Intervals.java
@@ -25,6 +25,8 @@ import org.apache.druid.java.util.common.guava.Comparators;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;
 import org.joda.time.chrono.ISOChronology;
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.ISODateTimeFormat;
 
 import javax.annotation.Nullable;
 
@@ -32,6 +34,8 @@ public final class Intervals
 {
   public static final Interval ETERNITY = utc(JodaUtils.MIN_INSTANT, 
JodaUtils.MAX_INSTANT);
   public static final ImmutableList<Interval> ONLY_ETERNITY = 
ImmutableList.of(ETERNITY);
+  private static final DateTimeFormatter FAST_ISO_UTC_FORMATTER =
+      
ISODateTimeFormat.dateTime().withChronology(ISOChronology.getInstanceUTC());
 
   public static Interval utc(long startInstant, long endInstant)
   {
@@ -53,6 +57,68 @@ public final class Intervals
     return of(StringUtils.format(format, formatArgs));
   }
 
+  /**
+   * A performance-optimized method for parsing a Joda-Time {@link Interval} 
from a string.
+   * This method is significantly faster than the standard {@link 
Intervals#of(String)} for the following
+   * group of offsets:
+   * <ol>
+   *   <li>"2022-01-01T00:00:00.000Z/2022-01-02T00:00:00.000Z"</li>
+   *   <li>"2022-01-01T00:00:00.000+05:30/2022-01-01T01:00:00.000+05:30"</li>
+   *   <li>"2022-01-01T00:00:00.000+0530/2022-01-01T01:00:00.000+0530"</li>
+   * </ol>
+   * <p>
+   * If the input string does not match the format, it will fall back to the 
more flexible but
+   * slower {@link Intervals#of(String)} parser. If you are dealing with any 
Intervals format examples below,
+   * consider using {@link Intervals#of(String)} instead:
+   * <ol>
+   *   <li>"2022-01-01T00:00:00Z/2022-01-02T00:00:00Z" (without millis)</li>
+   *   <li>"2022-01-01/2022-01-02" (Date only)</li>
+   *   <li>"2022-01-01T12:00:00.000Z/PT6H" (Periods in start / end)</li>
+   * </ol>
+   *
+   * Currently, this method is only used in {@link 
org.apache.druid.timeline.SegmentId}.
+   */
+  public static Interval fromString(String string)
+  {
+    Interval interval = null;
+    if (canDeserializeIntervalOptimallyFromString(string)) {
+      interval = tryOptimizedIntervalDeserialization(string);
+    }
+
+    return interval == null ? Intervals.of(string) : interval;
+  }
+
+  private static boolean canDeserializeIntervalOptimallyFromString(String 
intervalText)
+  {
+    // Optimized version does not deal well with Periods.
+    if (intervalText.contains("P")) {
+      return false;
+    }
+
+    final int slashIndex = intervalText.indexOf('/');
+    return (slashIndex > 0 && slashIndex < intervalText.length() - 1);
+  }
+
+  /**
+   * @return null if the input format cannot be parsed with optimized 
strategy, else return the Interval.
+   */
+  @Nullable
+  private static Interval tryOptimizedIntervalDeserialization(final String 
intervalText)
+  {
+    final int slashIndex = intervalText.indexOf('/');
+    final String startStr = intervalText.substring(0, slashIndex);
+    final String endStr = intervalText.substring(slashIndex + 1);
+
+    try {
+      final long startMillis = FAST_ISO_UTC_FORMATTER.parseMillis(startStr);
+      final long endMillis = FAST_ISO_UTC_FORMATTER.parseMillis(endStr);
+      return Intervals.utc(startMillis, endMillis);
+    }
+    catch (IllegalArgumentException e) {
+      return null;
+    }
+  }
+
   /**
    * Returns true if the provided interval has endpoints that can be compared 
against other DateTimes using their
    * string representations.
diff --git 
a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java 
b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
index 3d2dd045998..ef13b15f773 100644
--- a/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
+++ b/processing/src/main/java/org/apache/druid/timeline/DataSegment.java
@@ -37,6 +37,7 @@ import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap;
 import org.apache.druid.guice.annotations.PublicApi;
 import org.apache.druid.jackson.CommaListJoinDeserializer;
 import org.apache.druid.jackson.CommaListJoinSerializer;
+import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.query.SegmentDescriptor;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
 import org.apache.druid.timeline.partition.ShardSpec;
@@ -179,9 +180,10 @@ public class DataSegment implements 
Comparable<DataSegment>, Overshadowable<Data
   }
 
   @JsonCreator
-  public DataSegment(
+  private DataSegment(
       @JsonProperty("dataSource") String dataSource,
-      @JsonProperty("interval") Interval interval,
+      // We take interval input as a String so we can deserialize it optimally 
via Intervals.fromString(interval).
+      @JsonProperty("interval") String interval,
       @JsonProperty("version") String version,
       // use `Map` *NOT* `LoadSpec` because we want to do lazy materialization 
to prevent dependency pollution
       @JsonProperty("loadSpec") @Nullable Map<String, Object> loadSpec,
@@ -196,6 +198,37 @@ public class DataSegment implements 
Comparable<DataSegment>, Overshadowable<Data
       @JsonProperty("size") long size,
       @JacksonInject PruneSpecsHolder pruneSpecsHolder
   )
+  {
+    this(
+        dataSource,
+        Intervals.fromString(interval),
+        version,
+        loadSpec,
+        dimensions,
+        metrics,
+        projections,
+        shardSpec,
+        lastCompactionState,
+        binaryVersion,
+        size,
+        pruneSpecsHolder
+    );
+  }
+
+  public DataSegment(
+      String dataSource,
+      Interval interval,
+      String version,
+      @Nullable Map<String, Object> loadSpec,
+      @Nullable List<String> dimensions,
+      @Nullable List<String> metrics,
+      @Nullable List<String> projections,
+      @Nullable ShardSpec shardSpec,
+      @Nullable CompactionState lastCompactionState,
+      Integer binaryVersion,
+      long size,
+      PruneSpecsHolder pruneSpecsHolder
+  )
   {
     this.id = SegmentId.of(dataSource, interval, version, shardSpec);
     // prune loadspec if needed
diff --git 
a/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java 
b/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
index a8703b0ec70..3d591a319f7 100644
--- 
a/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
+++ 
b/processing/src/test/java/org/apache/druid/java/util/common/IntervalsTest.java
@@ -79,6 +79,44 @@ public class IntervalsTest
     );
   }
 
+  @Test
+  public void testValidIntervalStrings()
+  {
+    final String[] intervalStringRepresentations = new String[]{
+        // Tests that use does not fallback to Intervals.of()
+        // Zulu with millis
+        "2022-01-01T00:00:00.000Z/2022-01-02T00:00:00.000Z",
+        "2021-03-14T12:34:56.789Z/2021-03-15T12:34:56.789Z",
+
+        // Offset with colon
+        "2022-01-01T00:00:00.000+05:30/2022-01-01T01:00:00.000+05:30",
+        "2022-01-01T07:00:00.000-07:00/2022-01-01T08:00:00.000-07:00",
+
+        // Basic offset without colon
+        "2022-01-01T00:00:00.000+0530/2022-01-01T01:00:00.000+0530",
+
+        // Tests that fallback to Intervals.of()
+        // Zulu without millis
+        "2022-01-01T00:00:00Z/2022-01-02T00:00:00Z",
+        // Date-only
+        "2022-01-01/2022-01-02",
+        // start/period
+        "2022-01-01T00:00:00.000Z/P1D",
+        "2022-01-01T12:00:00Z/PT6H",
+        "2022-01-01T00:00:00Z/P2DT3H4M5S",
+        // period/end
+        "P1D/2022-01-02T00:00:00.000Z",
+        "PT6H/2022-01-01T18:00:00Z",
+        "P2DT3H4M5S/2022-01-03T03:04:05Z"
+    };
+
+    for (String s : intervalStringRepresentations) {
+      Interval expected = Intervals.of(s);
+      Interval actual = Intervals.fromString(s);
+      Assert.assertEquals("Mismatch for: " + s, expected, actual);
+    }
+  }
+
   @Test
   public void testInvalidInterval()
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to