ryerraguntla commented on code in PR #2933: URL: https://github.com/apache/iggy/pull/2933#discussion_r3002341448
########## core/connectors/sinks/influxdb_sink/src/lib.rs: ########## @@ -0,0 +1,1123 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use async_trait::async_trait; +use base64::{Engine as _, engine::general_purpose}; +use bytes::Bytes; +use iggy_common::serde_secret::serialize_secret; +use iggy_connector_sdk::retry::{ + CircuitBreaker, build_retry_client, exponential_backoff, jitter, parse_duration, +}; +use iggy_connector_sdk::{ + ConsumedMessage, Error, MessagesMetadata, Sink, TopicMetadata, sink_connector, +}; +use reqwest::Url; +use reqwest_middleware::ClientWithMiddleware; +use secrecy::{ExposeSecret, SecretString}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; +use std::time::SystemTime; +use std::time::UNIX_EPOCH; +use tracing::{debug, error, info, warn}; +sink_connector!(InfluxDbSink); + +const DEFAULT_MAX_RETRIES: u32 = 3; +const DEFAULT_RETRY_DELAY: &str = "1s"; +const DEFAULT_TIMEOUT: &str = "30s"; +const DEFAULT_PRECISION: &str = "us"; +// Maximum attempts for open() connectivity retries +const DEFAULT_MAX_OPEN_RETRIES: u32 = 10; +// Cap for exponential backoff in open() — never wait longer than this +const DEFAULT_OPEN_RETRY_MAX_DELAY: &str = "60s"; +// Cap for exponential backoff on per-write retries — kept short so a +// transient InfluxDB blip does not stall message delivery for too long +const DEFAULT_RETRY_MAX_DELAY: &str = "5s"; +// How many consecutive batch failures open the circuit breaker +const DEFAULT_CIRCUIT_BREAKER_THRESHOLD: u32 = 5; +// How long the circuit stays open before allowing a probe attempt +const DEFAULT_CIRCUIT_COOL_DOWN: &str = "30s"; + +// --------------------------------------------------------------------------- +// Main connector structs +// --------------------------------------------------------------------------- + +#[derive(Debug)] +pub struct InfluxDbSink { + pub id: u32, + config: InfluxDbSinkConfig, + /// `None` until `open()` is called. Wraps `reqwest::Client` with + /// [`HttpRetryMiddleware`] so retry/back-off/jitter is handled + /// transparently by the middleware stack instead of a hand-rolled loop. + client: Option<ClientWithMiddleware>, + /// Cached once in `open()` — config fields never change at runtime. + write_url: Option<Url>, + messages_attempted: AtomicU64, + write_success: AtomicU64, + write_errors: AtomicU64, + verbose: bool, + retry_delay: Duration, + circuit_breaker: Arc<CircuitBreaker>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InfluxDbSinkConfig { + pub url: String, + pub org: String, + pub bucket: String, + #[serde(serialize_with = "serialize_secret")] + pub token: SecretString, + pub measurement: Option<String>, + pub precision: Option<String>, + pub batch_size: Option<u32>, + pub include_metadata: Option<bool>, + pub include_checksum: Option<bool>, + pub include_origin_timestamp: Option<bool>, + pub include_stream_tag: Option<bool>, + pub include_topic_tag: Option<bool>, + pub include_partition_tag: Option<bool>, + pub payload_format: Option<String>, + pub verbose_logging: Option<bool>, + pub max_retries: Option<u32>, + pub retry_delay: Option<String>, + pub timeout: Option<String>, + // How many times open() will retry before giving up + pub max_open_retries: Option<u32>, + // Upper cap on open() backoff delay — can be set high (e.g. "60s") for + // patient startup without affecting per-write retry behaviour + pub open_retry_max_delay: Option<String>, + // Upper cap on per-write retry backoff — kept short so a transient blip + // does not stall message delivery; independent of open_retry_max_delay + pub retry_max_delay: Option<String>, + // Circuit breaker configuration + pub circuit_breaker_threshold: Option<u32>, + pub circuit_breaker_cool_down: Option<String>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +enum PayloadFormat { + #[default] + Json, + Text, + Base64, +} + +impl PayloadFormat { + fn from_config(value: Option<&str>) -> Self { + match value.map(|v| v.to_ascii_lowercase()).as_deref() { + Some("text") | Some("utf8") => PayloadFormat::Text, + Some("base64") | Some("raw") => PayloadFormat::Base64, + Some("json") => PayloadFormat::Json, + other => { + warn!( + "Unrecognized payload_format value {:?}, falling back to JSON. \ + Valid values are: \"json\", \"text\", \"utf8\", \"base64\", \"raw\".", + other + ); + PayloadFormat::Json + } + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Write an escaped measurement name into `buf`. +/// Escapes: `\` → `\\`, `,` → `\,`, ` ` → `\ `, `\n` → `\\n`, `\r` → `\\r` +/// +/// Newline (`\n`) and carriage-return (`\r`) are the InfluxDB line-protocol +/// record delimiters; a literal newline inside a measurement name would split +/// the line and corrupt the batch. +fn write_measurement(buf: &mut String, value: &str) { + for ch in value.chars() { + match ch { + '\\' => buf.push_str("\\\\"), + ',' => buf.push_str("\\,"), + ' ' => buf.push_str("\\ "), + '\n' => buf.push_str("\\n"), + '\r' => buf.push_str("\\r"), + _ => buf.push(ch), + } + } +} + +/// Write an escaped tag key/value into `buf`. +/// Escapes: `\` → `\\`, `,` → `\,`, `=` → `\=`, ` ` → `\ `, `\n` → `\\n`, `\r` → `\\r` +/// +/// Newline and carriage-return are escaped for the same reason as in +/// [`write_measurement`]: they are InfluxDB line-protocol record delimiters. +fn write_tag_value(buf: &mut String, value: &str) { + for ch in value.chars() { + match ch { + '\\' => buf.push_str("\\\\"), + ',' => buf.push_str("\\,"), + '=' => buf.push_str("\\="), + ' ' => buf.push_str("\\ "), + '\n' => buf.push_str("\\n"), + '\r' => buf.push_str("\\r"), + _ => buf.push(ch), + } + } +} + +/// Write an escaped string field value (without surrounding quotes) into `buf`. +/// Escapes: `\` → `\\`, `"` → `\"`, `\n` → `\\n`, `\r` → `\\r` +/// +/// Newline and carriage-return are the InfluxDB line-protocol record +/// delimiters; a literal newline inside a string field value (e.g. from a +/// multi-line text payload) would split the line and corrupt the batch. +fn write_field_string(buf: &mut String, value: &str) { + for ch in value.chars() { + match ch { + '\\' => buf.push_str("\\\\"), + '"' => buf.push_str("\\\""), + '\n' => buf.push_str("\\n"), + '\r' => buf.push_str("\\r"), + _ => buf.push(ch), + } + } +} + +// --------------------------------------------------------------------------- +// InfluxDbSink implementation +// --------------------------------------------------------------------------- + +impl InfluxDbSink { + pub fn new(id: u32, config: InfluxDbSinkConfig) -> Self { + let verbose = config.verbose_logging.unwrap_or(false); + let retry_delay = parse_duration(config.retry_delay.as_deref(), DEFAULT_RETRY_DELAY); + + // Build circuit breaker from config + let cb_threshold = config + .circuit_breaker_threshold + .unwrap_or(DEFAULT_CIRCUIT_BREAKER_THRESHOLD); + let cb_cool_down = parse_duration( + config.circuit_breaker_cool_down.as_deref(), + DEFAULT_CIRCUIT_COOL_DOWN, + ); + + InfluxDbSink { + id, + config, + client: None, + write_url: None, + messages_attempted: AtomicU64::new(0), + write_success: AtomicU64::new(0), + write_errors: AtomicU64::new(0), + verbose, + retry_delay, + circuit_breaker: Arc::new(CircuitBreaker::new(cb_threshold, cb_cool_down)), + } + } + + fn build_raw_client(&self) -> Result<reqwest::Client, Error> { + let timeout = parse_duration(self.config.timeout.as_deref(), DEFAULT_TIMEOUT); + reqwest::Client::builder() + .timeout(timeout) + .build() + .map_err(|e| Error::InitError(format!("Failed to create HTTP client: {e}"))) + } + + fn build_write_url(&self) -> Result<Url, Error> { + let base = self.config.url.trim_end_matches('/'); + let mut url = Url::parse(&format!("{base}/api/v2/write")) + .map_err(|e| Error::InvalidConfigValue(format!("Invalid InfluxDB URL: {e}")))?; + + let precision = self + .config + .precision + .as_deref() + .unwrap_or(DEFAULT_PRECISION); + url.query_pairs_mut() + .append_pair("org", &self.config.org) + .append_pair("bucket", &self.config.bucket) + .append_pair("precision", precision); + + Ok(url) + } + + fn build_health_url(&self) -> Result<Url, Error> { + let base = self.config.url.trim_end_matches('/'); + Url::parse(&format!("{base}/health")) + .map_err(|e| Error::InvalidConfigValue(format!("Invalid InfluxDB URL: {e}"))) + } + + /// Single connectivity probe using the provided raw client (no retry). + /// The caller (`check_connectivity_with_retry`) is responsible for the + /// outer retry loop, which uses different delay bounds than per-write retries. + async fn check_connectivity(&self, client: &reqwest::Client) -> Result<(), Error> { + let url = self.build_health_url()?; + + let response = client + .get(url) + .send() + .await + .map_err(|e| Error::Connection(format!("InfluxDB health check failed: {e}")))?; + + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "failed to read response body".to_string()); + return Err(Error::Connection(format!( + "InfluxDB health check returned status {status}: {body}" + ))); + } + + Ok(()) + } + + /// Retry connectivity check with exponential backoff + jitter instead of + /// failing hard on the first attempt. + /// + /// Uses a separate `max_open_retries` / `open_retry_max_delay` so startup + /// can wait patiently for InfluxDB without affecting the per-write retry + /// parameters used by the middleware during normal operation. + async fn check_connectivity_with_retry(&self, client: &reqwest::Client) -> Result<(), Error> { + let max_open_retries = self + .config + .max_open_retries + .unwrap_or(DEFAULT_MAX_OPEN_RETRIES) + .max(1); + + let max_delay = parse_duration( + self.config.open_retry_max_delay.as_deref(), + DEFAULT_OPEN_RETRY_MAX_DELAY, + ); + + let mut attempt = 0u32; + loop { + match self.check_connectivity(client).await { + Ok(()) => { + if attempt > 0 { + info!( + "InfluxDB connectivity established after {attempt} retries \ + for sink connector ID: {}", + self.id + ); + } + return Ok(()); + } + Err(e) => { + attempt += 1; + if attempt >= max_open_retries { + error!( + "InfluxDB connectivity check failed after {attempt} attempts \ + for sink connector ID: {}. Giving up: {e}", + self.id + ); + return Err(e); + } + // Exponential backoff, with jitter + let backoff = jitter(exponential_backoff(self.retry_delay, attempt, max_delay)); + warn!( + "InfluxDB health check failed (attempt {attempt}/{max_open_retries}) \ + for sink connector ID: {}. Retrying in {backoff:?}: {e}", + self.id + ); + tokio::time::sleep(backoff).await; + } + } + } + } + + fn get_client(&self) -> Result<&ClientWithMiddleware, Error> { + self.client + .as_ref() + .ok_or_else(|| Error::Connection("InfluxDB client is not initialized".to_string())) + } + + fn measurement(&self) -> &str { + self.config + .measurement + .as_deref() + .unwrap_or("iggy_messages") + } + + fn payload_format(&self) -> PayloadFormat { Review Comment: Good one. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
