This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-swift.git


The following commit(s) were added to refs/heads/main by this push:
     new 79521cd  fix: move timezone string creation before startTimestamp to 
avoid nesting assertion (#148)
79521cd is described below

commit 79521cdd1a06cad944dab4ae2c7a788d8244971f
Author: Junjia Ding <[email protected]>
AuthorDate: Wed Mar 25 02:41:03 2026 -0700

    fix: move timezone string creation before startTimestamp to avoid nesting 
assertion (#148)
    
    ## Problem
    
    `ArrowWriter.writeStreaming()` crashes with a FlatBuffers assertion when
    writing any schema containing a `Timestamp` field with a timezone (e.g.
    `timestamp[us, UTC]`):
    
    ```
    FlatBuffers/FlatBufferBuilder.swift:317: Assertion failed: Object 
serialization must not be nested
    ```
    
    In `toFBType()` (ArrowWriterHelper.swift), the `.timestamp` case calls
    `fbb.create(string: timezone)` inside the
    `startTimestamp`/`endTimestamp` table context. FlatBuffers'
    `create(string:)` calls `notNested()` which asserts `!isNested`, but
    `startTimestamp` (which calls `startTable`) has already set `isNested =
    true`.
    
    ## Fix
    
    Move the `fbb.create(string:)` call before `startTimestamp()`. This is
    the standard FlatBuffers pattern: all child objects (strings, vectors,
    tables) must be created before starting their parent table.
    
    ```swift
    // Before (buggy):
    let startOffset = org_apache_arrow_flatbuf_Timestamp.startTimestamp(&fbb)
    // ...
    if let timezone = timestampType.timezone {
        let timezoneOffset = fbb.create(string: timezone) // ASSERTS
    }
    
    // After (fixed):
    let timezoneOffset = timestampType.timezone.map { fbb.create(string: $0) }
    let startOffset = org_apache_arrow_flatbuf_Timestamp.startTimestamp(&fbb)
    // ...
    if let offset = timezoneOffset {
        org_apache_arrow_flatbuf_Timestamp.add(timezone: offset, &fbb)
    }
    ```
    
    ## Testing
    
    Added `testTimestampWithTimezoneInMemoryToFromStream` which writes a
    `timestamp[us, UTC]` column through `writeStreaming` and reads it back,
    verifying the schema (unit, timezone) and data survive the roundtrip.
    This test would crash with the nesting assertion before the fix.
    
    ## AI disclosure
    The code change and test were written with the assistance of Cursor
    (AI). I identified the bug by tracing the FlatBuffers assertion through
    the startTable/notNested call chain, reviewed all generated code, and
    verified correctness by running the test suite locally.
    
    Generated-by: Cursor
---
 Sources/Arrow/ArrowWriterHelper.swift |  6 ++--
 Tests/ArrowTests/IPCTests.swift       | 56 +++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/Sources/Arrow/ArrowWriterHelper.swift 
b/Sources/Arrow/ArrowWriterHelper.swift
index 7ecb3ab..4a95499 100644
--- a/Sources/Arrow/ArrowWriterHelper.swift
+++ b/Sources/Arrow/ArrowWriterHelper.swift
@@ -107,6 +107,7 @@ func toFBType( // swiftlint:disable:this 
cyclomatic_complexity function_body_len
         return .failure(.invalid("Unable to case to Time64"))
     case .timestamp:
         if let timestampType = arrowType as? ArrowTypeTimestamp {
+            let timezoneOffset = timestampType.timezone.map { 
fbb.create(string: $0) }
             let startOffset = 
org_apache_arrow_flatbuf_Timestamp.startTimestamp(&fbb)
 
             let fbUnit: org_apache_arrow_flatbuf_TimeUnit
@@ -122,9 +123,8 @@ func toFBType( // swiftlint:disable:this 
cyclomatic_complexity function_body_len
             }
             org_apache_arrow_flatbuf_Timestamp.add(unit: fbUnit, &fbb)
 
-            if let timezone = timestampType.timezone {
-                let timezoneOffset = fbb.create(string: timezone)
-                org_apache_arrow_flatbuf_Timestamp.add(timezone: 
timezoneOffset, &fbb)
+            if let offset = timezoneOffset {
+                org_apache_arrow_flatbuf_Timestamp.add(timezone: offset, &fbb)
             }
 
             return 
.success(org_apache_arrow_flatbuf_Timestamp.endTimestamp(&fbb, start: 
startOffset))
diff --git a/Tests/ArrowTests/IPCTests.swift b/Tests/ArrowTests/IPCTests.swift
index 8496d50..58f973e 100644
--- a/Tests/ArrowTests/IPCTests.swift
+++ b/Tests/ArrowTests/IPCTests.swift
@@ -615,5 +615,61 @@ final class IPCFileReaderTests: XCTestCase { // 
swiftlint:disable:this type_body
             throw error
         }
     }
+
+    func makeTimestampWithTimezoneDataset() throws -> (ArrowSchema, 
RecordBatch) {
+        let schema = ArrowSchema.Builder()
+            .addField("ts_utc", type: ArrowTypeTimestamp(.microseconds, 
timezone: "UTC"), isNullable: true)
+            .finish()
+
+        let tsBuilder = try 
ArrowArrayBuilders.loadTimestampArrayBuilder(.microseconds, timezone: "UTC")
+        tsBuilder.append(1609459200000000) // 2021-01-01 00:00:00.000000 UTC
+        tsBuilder.append(nil)
+        tsBuilder.append(1609545600000000) // 2021-01-02 00:00:00.000000 UTC
+
+        let tsHolder = ArrowArrayHolderImpl(try tsBuilder.finish())
+        let result = RecordBatch.Builder()
+            .addColumn("ts_utc", arrowArray: tsHolder)
+            .finish()
+        switch result {
+        case .success(let recordBatch):
+            return (schema, recordBatch)
+        case .failure(let error):
+            throw error
+        }
+    }
+
+    func testTimestampWithTimezoneInMemoryToFromStream() throws {
+        let dataset = try makeTimestampWithTimezoneDataset()
+        let writerInfo = ArrowWriter.Info(.recordbatch, schema: dataset.0, 
batches: [dataset.1])
+        let arrowWriter = ArrowWriter()
+        switch arrowWriter.writeStreaming(writerInfo) {
+        case .success(let writeData):
+            let arrowReader = ArrowReader()
+            switch arrowReader.readStreaming(writeData) {
+            case .success(let result):
+                XCTAssertNotNil(result.schema)
+                let schema = result.schema!
+                XCTAssertEqual(schema.fields.count, 1)
+                XCTAssertEqual(schema.fields[0].name, "ts_utc")
+                XCTAssertEqual(schema.fields[0].type.info, 
ArrowType.ArrowTimestamp)
+                let tsType = schema.fields[0].type as? ArrowTypeTimestamp
+                XCTAssertNotNil(tsType)
+                XCTAssertEqual(tsType!.unit, .microseconds)
+                XCTAssertEqual(tsType!.timezone, "UTC")
+                XCTAssertEqual(result.batches.count, 1)
+                let recordBatch = result.batches[0]
+                XCTAssertEqual(recordBatch.length, 3)
+                let columns = recordBatch.columns
+                XCTAssertEqual(columns[0].nullCount, 1)
+                let tsVal =
+                    "\((columns[0].array as! AsString).asString(0))" // 
swiftlint:disable:this force_cast
+                XCTAssertEqual(tsVal, "2021-01-01 00:00:00.000")
+            case .failure(let error):
+                throw error
+            }
+        case .failure(let error):
+            throw error
+        }
+    }
 }
 // swiftlint:disable:this file_length

Reply via email to