This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
commit acb890565293b0c1e480dbea44411c52cb6444f8 Author: Pranav Toggi <[email protected]> AuthorDate: Fri Aug 22 16:11:37 2025 -0700 [EWT-3210] Make temporal columns to timestamps (#9) * use Arrow Date64 * use Arrow Timestamp instead * fmt fix * fix method name * fix arrow test --- spatialbench-arrow/src/conversions.rs | 14 ++++++++++---- spatialbench-arrow/src/lib.rs | 28 ++++++++++++++-------------- spatialbench-arrow/src/trip.rs | 28 ++++++++++++++++++---------- spatialbench/src/dates.rs | 14 ++++++++++++++ 4 files changed, 56 insertions(+), 28 deletions(-) diff --git a/spatialbench-arrow/src/conversions.rs b/spatialbench-arrow/src/conversions.rs index 2e51719..63aad6e 100644 --- a/spatialbench-arrow/src/conversions.rs +++ b/spatialbench-arrow/src/conversions.rs @@ -33,6 +33,12 @@ pub fn to_arrow_date32(value: TPCHDate) -> i32 { value.to_unix_epoch() } +/// Convert a TPCH date to an Arrow Timestamp (milliseconds since Unix epoch) +#[inline(always)] +pub fn to_arrow_timestamp_millis(value: TPCHDate) -> i64 { + value.to_unix_epoch_seconds() * 1000 +} + /// Converts an iterator of TPCH decimals to an Arrow Decimal128Array pub fn decimal128_array_from_iter<I>(values: I) -> arrow::array::Decimal128Array where @@ -77,14 +83,14 @@ mod tests { } #[test] - fn test_to_arrow_date32() { + fn test_to_arrow_timestamp_millis() { let value = TPCHDate::new(MIN_GENERATE_DATE, 0, 0, 0); - assert_eq!(to_arrow_date32(value), 8035); + assert_eq!(to_arrow_timestamp_millis(value), 694224000000); let value = TPCHDate::new(MIN_GENERATE_DATE + 100, 0, 0, 0); - assert_eq!(to_arrow_date32(value), 8135); + assert_eq!(to_arrow_timestamp_millis(value), 702864000000); let value = TPCHDate::new(MIN_GENERATE_DATE + 1234, 0, 0, 0); - assert_eq!(to_arrow_date32(value), 9269); + assert_eq!(to_arrow_timestamp_millis(value), 800841600000); } } diff --git a/spatialbench-arrow/src/lib.rs b/spatialbench-arrow/src/lib.rs index 0ccceb9..e1b73ae 100644 --- a/spatialbench-arrow/src/lib.rs +++ b/spatialbench-arrow/src/lib.rs @@ -18,20 +18,20 @@ //! // compare the output by pretty printing it //! let formatted_batches = pretty_format_batches(&[batch]).unwrap().to_string(); //! assert_eq!(formatted_batches.lines().collect::<Vec<_>>(), vec![ -//! "+-----------+-----------+-------------+--------------+--------------+---------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+", -//! "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime | t_dropofftime | t_fare | t_tip | t_totalamount | t_distance | t_pickuploc | t_dropoffloc |", -//! "+-----------+-----------+-------------+--------------+--------------+---------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+", -//! "| 1 | 215 | 1 | 1 | 1997-07-24 | 1997-07-24 | 0.00034 | 0.00002 | 0.00037 | 0.00014 | 010100000000000000009f65c000000000008056c0 | 0101000000ea6f323f719f65c0a190cff1f28856c0 |", -//! "| 2 | 172 | 1 | 1 | 1997-12-24 | 1997-12-24 | 0.00003 | 0.00000 | 0.00004 | 0.00001 | 010100000000000000800165c000000000001835c0 | 01010000007707047c0f0165c0e360c2aa721735c0 |", -//! "| 3 | 46 | 1 | 1 | 1993-06-27 | 1993-06-27 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 010100000000000000007265c000000000809953c0 | 0101000000123a01b00e7265c0fc9862509e9953c0 |", -//! "| 4 | 40 | 1 | 1 | 1996-08-02 | 1996-08-02 | 0.00005 | 0.00000 | 0.00005 | 0.00002 | 010100000000000000800f56c00000000000c63bc0 | 01010000005c186d7e111056c0435fb4a6fdcb3bc0 |", -//! "| 5 | 232 | 1 | 1 | 1996-08-23 | 1996-08-23 | 0.00002 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000406460c00000000000da4640 | 01010000003da9a3a1ae6460c00036836c17db4640 |", -//! "| 6 | 46 | 1 | 1 | 1994-11-16 | 1994-11-16 | 0.00003 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000002666c000000000806f40c0 | 01010000009fbda7303e2666c0cdb6cb65c06d40c0 |", -//! "| 7 | 284 | 1 | 1 | 1996-01-20 | 1996-01-20 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 010100000000000000002963c00000000000e040c0 | 010100000000000000002963c00000000000e040c0 |", -//! "| 8 | 233 | 1 | 1 | 1995-01-09 | 1995-01-10 | 0.00003 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000008056c000000000c03955c0 | 0101000000c0e91ba00d8156c06e03b14bd83955c0 |", -//! "| 9 | 178 | 1 | 1 | 1993-10-13 | 1993-10-13 | 0.00005 | 0.00001 | 0.00007 | 0.00003 | 010100000000000000005366c00000000000e050c0 | 0101000000a6ef3504e75266c0448c538406e250c0 |", -//! "| 10 | 118 | 1 | 1 | 1994-11-08 | 1994-11-08 | 0.00001 | 0.00000 | 0.00001 | 0.00000 | 010100000000000000008066c000000000c07456c0 | 01010000001459106fe27f66c08d065341837456c0 |", -//! "+-----------+-----------+-------------+--------------+--------------+---------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+" +//! "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+", +//! "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime | t_dropofftime | t_fare | t_tip | t_totalamount | t_distance | t_pickuploc | t_dropoffloc |", +//! "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+", +//! "| 1 | 215 | 1 | 1 | 1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037 | 0.00014 | 010100000000000000009f65c000000000008056c0 | 0101000000ea6f323f719f65c0a190cff1f28856c0 |", +//! "| 2 | 172 | 1 | 1 | 1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004 | 0.00001 | 010100000000000000800165c000000000001835c0 | 01010000007707047c0f0165c0e360c2aa721735c0 |", +//! "| 3 | 46 | 1 | 1 | 1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 010100000000000000007265c000000000809953c0 | 0101000000123a01b00e7265c0fc9862509e9953c0 |", +//! "| 4 | 40 | 1 | 1 | 1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005 | 0.00002 | 010100000000000000800f56c00000000000c63bc0 | 01010000005c186d7e111056c0435fb4a6fdcb3bc0 |", +//! "| 5 | 232 | 1 | 1 | 1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000406460c00000000000da4640 | 01010000003da9a3a1ae6460c00036836c17db4640 |", +//! "| 6 | 46 | 1 | 1 | 1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000002666c000000000806f40c0 | 01010000009fbda7303e2666c0cdb6cb65c06d40c0 |", +//! "| 7 | 284 | 1 | 1 | 1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000 | 0.00000 | 010100000000000000002963c00000000000e040c0 | 010100000000000000002963c00000000000e040c0 |", +//! "| 8 | 233 | 1 | 1 | 1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003 | 0.00001 | 010100000000000000008056c000000000c03955c0 | 0101000000c0e91ba00d8156c06e03b14bd83955c0 |", +//! "| 9 | 178 | 1 | 1 | 1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007 | 0.00003 | 010100000000000000005366c00000000000e050c0 | 0101000000a6ef3504e75266c0448c538406e250c0 |", +//! "| 10 | 118 | 1 | 1 | 1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001 | 0.00000 | 010100000000000000008066c000000000c07456c0 | 01010000001459106fe27f66c08d065341837456c0 |", +//! "+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+" //! ]); //! ``` diff --git a/spatialbench-arrow/src/trip.rs b/spatialbench-arrow/src/trip.rs index 8305dcb..bf866de 100644 --- a/spatialbench-arrow/src/trip.rs +++ b/spatialbench-arrow/src/trip.rs @@ -1,7 +1,7 @@ -use crate::conversions::{decimal128_array_from_iter, to_arrow_date32}; +use crate::conversions::{decimal128_array_from_iter, to_arrow_timestamp_millis}; use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -use arrow::array::{BinaryArray, Date32Array, Int64Array, RecordBatch}; -use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use arrow::array::{BinaryArray, Int64Array, RecordBatch, TimestampMillisecondArray}; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; use geo::Geometry; use geozero::{CoordDimensions, ToWkb}; use spatialbench::generators::{Trip, TripGenerator, TripGeneratorIterator}; @@ -71,13 +71,13 @@ impl Iterator for TripArrow { let t_custkey = Int64Array::from_iter_values(rows.iter().map(|row| row.t_custkey)); let t_driverkey = Int64Array::from_iter_values(rows.iter().map(|row| row.t_driverkey)); let t_vehiclekey = Int64Array::from_iter_values(rows.iter().map(|row| row.t_vehiclekey)); - let t_pickuptime = Date32Array::from_iter_values( - rows.iter().map(|row| row.t_pickuptime).map(to_arrow_date32), + let t_pickuptime = TimestampMillisecondArray::from_iter_values( + rows.iter() + .map(|row| to_arrow_timestamp_millis(row.t_pickuptime)), ); - let t_dropofftime = Date32Array::from_iter_values( + let t_dropofftime = TimestampMillisecondArray::from_iter_values( rows.iter() - .map(|row| row.t_dropofftime) - .map(to_arrow_date32), + .map(|row| to_arrow_timestamp_millis(row.t_dropofftime)), ); let t_fare = decimal128_array_from_iter(rows.iter().map(|row| row.t_fare)); let t_tip = decimal128_array_from_iter(rows.iter().map(|row| row.t_tip)); @@ -126,8 +126,16 @@ fn make_trip_schema() -> SchemaRef { Field::new("t_custkey", DataType::Int64, false), Field::new("t_driverkey", DataType::Int64, false), Field::new("t_vehiclekey", DataType::Int64, false), - Field::new("t_pickuptime", DataType::Date32, false), - Field::new("t_dropofftime", DataType::Date32, false), + Field::new( + "t_pickuptime", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), + Field::new( + "t_dropofftime", + DataType::Timestamp(TimeUnit::Millisecond, None), + false, + ), Field::new("t_fare", DataType::Decimal128(15, 5), false), Field::new("t_tip", DataType::Decimal128(15, 5), false), Field::new("t_totalamount", DataType::Decimal128(15, 5), false), diff --git a/spatialbench/src/dates.rs b/spatialbench/src/dates.rs index 2bc59e5..d8ccaeb 100644 --- a/spatialbench/src/dates.rs +++ b/spatialbench/src/dates.rs @@ -224,6 +224,20 @@ impl TPCHDate { pub fn to_unix_epoch(&self) -> i32 { self.date_index + Self::UNIX_EPOCH_OFFSET } + + /// Returns the number of seconds since the Unix epoch this date represents, + /// including the time components (hour, minute, second) + #[inline(always)] + pub fn to_unix_epoch_seconds(&self) -> i64 { + // Start with days since Unix epoch converted to seconds + let base_seconds = (self.date_index + Self::UNIX_EPOCH_OFFSET) as i64 * 24 * 60 * 60; + + // Add the time components in seconds + let time_seconds = + (self.hour as i64) * 3600 + (self.minute as i64) * 60 + (self.second as i64); + + base_seconds + time_seconds + } } /// Creates a index table of formatted strings
