kriti-sc commented on code in PR #2889: URL: https://github.com/apache/iggy/pull/2889#discussion_r3149690999
########## core/integration/tests/connectors/fixtures/delta/fixture.rs: ########## @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use async_trait::async_trait; +use deltalake::kernel::{DataType, PrimitiveType, StructField}; +use deltalake::operations::create::CreateBuilder; +use integration::harness::{TestBinaryError, TestFixture}; +use std::collections::HashMap; +use std::path::PathBuf; +use tempfile::TempDir; +use testcontainers_modules::testcontainers::core::{IntoContainerPort, WaitFor}; +use testcontainers_modules::testcontainers::runners::AsyncRunner; +use testcontainers_modules::testcontainers::{ContainerAsync, GenericImage, ImageExt}; +use tracing::info; +use uuid::Uuid; + +const ENV_SINK_TABLE_URI: &str = "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_TABLE_URI"; +const ENV_SINK_PATH: &str = "IGGY_CONNECTORS_SINK_DELTA_PATH"; +const ENV_SINK_STORAGE_BACKEND_TYPE: &str = + "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_STORAGE_BACKEND_TYPE"; +const ENV_SINK_AWS_S3_ACCESS_KEY: &str = + "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_AWS_S3_ACCESS_KEY"; +const ENV_SINK_AWS_S3_SECRET_KEY: &str = + "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_AWS_S3_SECRET_KEY"; +const ENV_SINK_AWS_S3_REGION: &str = "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_AWS_S3_REGION"; +const ENV_SINK_AWS_S3_ENDPOINT_URL: &str = + "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_AWS_S3_ENDPOINT_URL"; +const ENV_SINK_AWS_S3_ALLOW_HTTP: &str = + "IGGY_CONNECTORS_SINK_DELTA_PLUGIN_CONFIG_AWS_S3_ALLOW_HTTP"; + +const MINIO_IMAGE: &str = "minio/minio"; +const MINIO_TAG: &str = "RELEASE.2025-09-07T16-13-09Z"; +const MINIO_PORT: u16 = 9000; +const MINIO_CONSOLE_PORT: u16 = 9001; +const MINIO_ACCESS_KEY: &str = "admin"; +const MINIO_SECRET_KEY: &str = "password"; +const MINIO_BUCKET: &str = "delta-warehouse"; + +pub struct DeltaFixture { + _temp_dir: TempDir, + table_path: PathBuf, +} + +async fn count_rows( + table_uri: url::Url, + storage_options: HashMap<String, String>, +) -> Result<usize, TestBinaryError> { + use deltalake::arrow::array::Int64Array; + + let table = deltalake::open_table_with_storage_options(table_uri, storage_options) + .await + .map_err(|e| TestBinaryError::InvalidState { + message: format!("Failed to open delta table: {e}"), + })?; + + let batch = table + .snapshot() + .map_err(|e| TestBinaryError::InvalidState { + message: format!("Failed to get table snapshot: {e}"), + })? + .add_actions_table(false) + .map_err(|e| TestBinaryError::InvalidState { + message: format!("Failed to get add actions table: {e}"), + })?; + + let total = batch + .column_by_name("num_records") + .and_then(|col| col.as_any().downcast_ref::<Int64Array>()) + .map(|arr| arr.iter().flatten().sum::<i64>() as usize) + .unwrap_or(0); + + Ok(total) +} + +async fn wait_for_row_count( + table_uri: url::Url, + storage_options: HashMap<String, String>, + expected_rows: usize, + max_attempts: usize, + interval_ms: u64, +) -> Result<usize, TestBinaryError> { + for _ in 0..max_attempts { + let count = count_rows(table_uri.clone(), storage_options.clone()) + .await + .unwrap_or(0); + if count >= expected_rows { + info!("Found {count} rows in delta table (required: {expected_rows})"); + return Ok(count); + } + tokio::time::sleep(std::time::Duration::from_millis(interval_ms)).await; + } + + let final_count = count_rows(table_uri, storage_options).await.unwrap_or(0); + Err(TestBinaryError::InvalidState { + message: format!( + "Expected at least {expected_rows} rows, found {final_count} after {max_attempts} attempts" + ), + }) +} + +fn table_columns() -> Vec<StructField> { + vec![ + StructField::new("id", DataType::Primitive(PrimitiveType::Long), true), + StructField::new("name", DataType::Primitive(PrimitiveType::String), true), + StructField::new("count", DataType::Primitive(PrimitiveType::Integer), true), + StructField::new("amount", DataType::Primitive(PrimitiveType::Double), true), + StructField::new("active", DataType::Primitive(PrimitiveType::Boolean), true), + StructField::new( + "timestamp", + DataType::Primitive(PrimitiveType::TimestampNtz), Review Comment: Your point is valid, but testing the `string_to_timestamp` branch is not an integration test concern. It is a unit test concern (and the unit test exists). IMO, integration tests responsibility is to ensure end-to-end wiring from Iggy topic into the delta lake. lmk @hubcio -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
