This is an automated email from the ASF dual-hosted git repository. kriskras99 pushed a commit to branch feat/enums in repository https://gitbox.apache.org/repos/asf/avro-rs.git
commit 5e69691a604d8397a5ced93bdfad445cc7949fe6 Author: Kriskras99 <[email protected]> AuthorDate: Tue Mar 3 22:32:41 2026 +0100 feat: Replace `to_datum*` functions with `GenericDatumWriter` --- avro/benches/single.rs | 67 +++++++++- avro/src/lib.rs | 6 +- avro/src/schema/mod.rs | 12 +- avro/src/serde/de.rs | 14 +- avro/src/writer/datum.rs | 208 +++++++++++++++++++----------- avro/src/writer/single_object.rs | 40 +++++- avro/tests/avro-3786.rs | 28 +++- avro/tests/avro-3787.rs | 12 +- avro/tests/io.rs | 48 +++++-- avro/tests/schema.rs | 22 ++-- avro/tests/to_from_avro_datum_schemata.rs | 17 ++- 11 files changed, 339 insertions(+), 135 deletions(-) diff --git a/avro/benches/single.rs b/avro/benches/single.rs index 05d8453..862a15f 100644 --- a/avro/benches/single.rs +++ b/avro/benches/single.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. +use apache_avro::writer::datum::GenericDatumWriter; use apache_avro::{ schema::Schema, - to_avro_datum, types::{Record, Value}, }; use criterion::{Criterion, criterion_group, criterion_main}; +use std::time::Duration; const RAW_SMALL_SCHEMA: &str = r#" { @@ -174,20 +175,74 @@ fn make_big_record() -> Result<(Schema, Value), Box<dyn std::error::Error>> { fn bench_small_schema_write_record(c: &mut Criterion) { let (schema, record) = make_small_record().unwrap(); c.bench_function("small record", |b| { - b.iter(|| to_avro_datum(&schema, record.clone())) + b.iter(|| { + GenericDatumWriter::builder(&schema) + .build() + .unwrap() + .write_value_to_vec(record.clone()) + }) }); } fn bench_big_schema_write_record(c: &mut Criterion) { let (schema, record) = make_big_record().unwrap(); c.bench_function("big record", |b| { - b.iter(|| to_avro_datum(&schema, record.clone())) + b.iter(|| { + GenericDatumWriter::builder(&schema) + .build() + .unwrap() + .write_value_to_vec(record.clone()) + }) + }); +} + +fn bench_small_schema_write_record_reuse_datum_writer(c: &mut Criterion) { + let (schema, record) = make_small_record().unwrap(); + let writer = GenericDatumWriter::builder(&schema).build().unwrap(); + c.bench_function("small record (reused writer)", |b| { + b.iter(|| writer.write_value_ref(&mut Vec::new(), &record)) + }); +} + +fn bench_big_schema_write_record_reuse_datum_writer(c: &mut Criterion) { + let (schema, record) = make_big_record().unwrap(); + let writer = GenericDatumWriter::builder(&schema).build().unwrap(); + c.bench_function("big record (reused writer)", |b| { + b.iter(|| writer.write_value_ref(&mut Vec::new(), &record)) + }); +} + +fn bench_small_schema_write_record_no_validation(c: &mut Criterion) { + let (schema, record) = make_small_record().unwrap(); + let writer = GenericDatumWriter::builder(&schema) + .validate(false) + .build() + .unwrap(); + c.bench_function("small record (no validation)", |b| { + b.iter(|| writer.write_value_ref(&mut Vec::new(), &record)) + }); +} + +fn bench_big_schema_write_record_no_validation(c: &mut Criterion) { + let (schema, record) = make_big_record().unwrap(); + let writer = GenericDatumWriter::builder(&schema) + .validate(false) + .build() + .unwrap(); + c.bench_function("big record (no validation)", |b| { + b.iter(|| writer.write_value_ref(&mut Vec::new(), &record)) }); } criterion_group!( - benches, - bench_small_schema_write_record, - bench_big_schema_write_record + name = benches; + config = Criterion::default().sample_size(200).measurement_time(Duration::from_secs(10)); + targets = + bench_small_schema_write_record, + bench_big_schema_write_record, + bench_small_schema_write_record_reuse_datum_writer, + bench_big_schema_write_record_reuse_datum_writer, + bench_small_schema_write_record_no_validation, + bench_big_schema_write_record_no_validation, ); criterion_main!(benches); diff --git a/avro/src/lib.rs b/avro/src/lib.rs index 6edded5..314d596 100644 --- a/avro/src/lib.rs +++ b/avro/src/lib.rs @@ -56,7 +56,6 @@ mod decimal; mod decode; mod duration; mod encode; -mod writer; #[cfg(doc)] pub mod documentation; @@ -71,6 +70,7 @@ pub mod serde; pub mod types; pub mod util; pub mod validator; +pub mod writer; #[expect(deprecated)] pub use crate::{ @@ -103,6 +103,10 @@ pub use reader::{ pub use schema::Schema; pub use serde::{AvroSchema, AvroSchemaComponent, from_value, to_value}; pub use uuid::Uuid; +#[expect( + deprecated, + reason = "Still need to export it until we remove it completely" +)] pub use writer::{ Clearable, Writer, WriterBuilder, datum::{to_avro_datum, to_avro_datum_schemata, write_avro_datum_ref}, diff --git a/avro/src/schema/mod.rs b/avro/src/schema/mod.rs index 98600fa..97a5824 100644 --- a/avro/src/schema/mod.rs +++ b/avro/src/schema/mod.rs @@ -1168,7 +1168,9 @@ fn field_ordering_position(field: &str) -> Option<usize> { #[cfg(test)] mod tests { use super::*; - use crate::{error::Details, rabin::Rabin, reader::datum::GenericDatumReader}; + use crate::reader::datum::GenericDatumReader; + use crate::writer::datum::GenericDatumWriter; + use crate::{error::Details, rabin::Rabin}; use apache_avro_test_helper::{ TestResult, logger::{assert_logged, assert_not_logged}, @@ -2992,7 +2994,9 @@ mod tests { avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -3576,7 +3580,9 @@ mod tests { avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; // Now, attempt to deserialize using the reader schema. let reader_schema = Schema::parse(&reader_schema)?; diff --git a/avro/src/serde/de.rs b/avro/src/serde/de.rs index 3144fac..b9900d5 100644 --- a/avro/src/serde/de.rs +++ b/avro/src/serde/de.rs @@ -953,7 +953,9 @@ mod tests { use apache_avro_test_helper::TestResult; use super::*; - use crate::{Decimal, reader::datum::GenericDatumReader}; + use crate::Decimal; + use crate::reader::datum::GenericDatumReader; + use crate::writer::datum::GenericDatumWriter; #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] pub struct StringEnum { @@ -989,7 +991,11 @@ mod tests { // encode into avro let value = crate::to_value(&data)?; - let mut buf = std::io::Cursor::new(crate::to_avro_datum(&schema, value)?); + let mut buf = std::io::Cursor::new( + GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(value)?, + ); // decode from avro let value = GenericDatumReader::builder(&schema) @@ -1033,7 +1039,9 @@ mod tests { let value = crate::to_value(data)?; // The following sentence have to fail has the data is wrong. - let encoded_data = crate::to_avro_datum(&schema, value); + let encoded_data = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(value); assert!(encoded_data.is_err()); diff --git a/avro/src/writer/datum.rs b/avro/src/writer/datum.rs index 5708eed..a45f704 100644 --- a/avro/src/writer/datum.rs +++ b/avro/src/writer/datum.rs @@ -15,28 +15,123 @@ // specific language governing permissions and limitations // under the License. -use std::io::Write; - +use bon::bon; use serde::Serialize; +use std::io::Write; use crate::{ AvroResult, Schema, - encode::{encode, encode_internal}, + encode::encode_internal, error::Details, - schema::{NamesRef, ResolvedOwnedSchema, ResolvedSchema}, + schema::{NamesRef, ResolvedSchema}, serde::ser_schema::SchemaAwareWriteSerializer, types::Value, }; +pub struct GenericDatumWriter<'s> { + schema: &'s Schema, + resolved: ResolvedSchema<'s>, + validate: bool, +} + +#[bon] +impl<'s> GenericDatumWriter<'s> { + #[builder] + pub fn new( + #[builder(start_fn)] schema: &'s Schema, + resolved_schemata: Option<ResolvedSchema<'s>>, + #[builder(default = true)] validate: bool, + ) -> AvroResult<Self> { + let resolved = if let Some(resolved) = resolved_schemata { + resolved + } else { + ResolvedSchema::try_from(schema)? + }; + Ok(Self { + schema, + resolved, + validate, + }) + } +} + +impl<'s, S: generic_datum_writer_builder::State> GenericDatumWriterBuilder<'s, S> { + /// Set the schemata that will be used to resolve any references in the schema. + /// + /// This is equivalent to `.resolved_schemata(ResolvedSchema::new_with_schemata(schemata)?)`. + /// If you already have a [`ResolvedSchema`], use that function instead. + pub fn schemata( + self, + schemata: Vec<&'s Schema>, + ) -> AvroResult< + GenericDatumWriterBuilder<'s, generic_datum_writer_builder::SetResolvedSchemata<S>>, + > + where + S::ResolvedSchemata: generic_datum_writer_builder::IsUnset, + { + let resolved = ResolvedSchema::new_with_schemata(schemata)?; + Ok(self.resolved_schemata(resolved)) + } +} + +impl GenericDatumWriter<'_> { + /// Write a value to the writer. + pub fn write_value<W: Write, V: Into<Value>>( + &self, + writer: &mut W, + value: V, + ) -> AvroResult<usize> { + let value = value.into(); + self.write_value_ref(writer, &value) + } + + pub fn write_value_ref<W: Write>(&self, writer: &mut W, value: &Value) -> AvroResult<usize> { + if self.validate + && self.resolved.get_schemata().iter().all(|s| { + value + .validate_internal(s, self.resolved.get_names(), None) + .is_some() + }) + { + return Err(Details::Validation.into()); + } + encode_internal(value, self.schema, self.resolved.get_names(), None, writer) + } + + /// Write a value to a [`Vec`]. + pub fn write_value_to_vec<V: Into<Value>>(&self, value: V) -> AvroResult<Vec<u8>> { + let mut vec = Vec::new(); + self.write_value(&mut vec, value)?; + Ok(vec) + } + + pub fn write_ser<W: Write, T: Serialize>( + &self, + writer: &mut W, + value: &T, + ) -> AvroResult<usize> { + let mut serializer = + SchemaAwareWriteSerializer::new(writer, self.schema, self.resolved.get_names(), None); + value.serialize(&mut serializer) + } + + pub fn write_ser_to_vec<T: Serialize>(&self, value: &T) -> AvroResult<Vec<u8>> { + let mut vec = Vec::new(); + self.write_ser(&mut vec, value)?; + Ok(vec) + } +} + /// Encode a value into raw Avro data, also performs schema validation. /// /// **NOTE**: This function has a quite small niche of usage and does NOT generate headers and sync /// markers; use [`Writer`] to be fully Avro-compatible if you don't know what /// you are doing, instead. +#[deprecated(since = "0.22.0", note = "Use GenericDatumWriter instead")] pub fn to_avro_datum<T: Into<Value>>(schema: &Schema, value: T) -> AvroResult<Vec<u8>> { - let mut buffer = Vec::new(); - write_avro_datum(schema, value, &mut buffer)?; - Ok(buffer) + GenericDatumWriter::builder(schema) + .build()? + .write_value_to_vec(value) } /// Write the referenced [Serialize]able object to the provided [Write] object. @@ -46,6 +141,7 @@ pub fn to_avro_datum<T: Into<Value>>(schema: &Schema, value: T) -> AvroResult<Ve /// **NOTE**: This function has a quite small niche of usage and does **NOT** generate headers and sync /// markers; use [`append_ser`](Writer::append_ser) to be fully Avro-compatible /// if you don't know what you are doing, instead. +#[deprecated(since = "0.22.0", note = "Use GenericDatumWriter instead")] pub fn write_avro_datum_ref<T: Serialize, W: Write>( schema: &Schema, names: &NamesRef, @@ -60,80 +156,20 @@ pub fn write_avro_datum_ref<T: Serialize, W: Write>( /// /// If the provided `schema` is incomplete then its dependencies must be /// provided in `schemata` +#[deprecated(since = "0.22.0", note = "Use GenericDatumWriter instead")] pub fn to_avro_datum_schemata<T: Into<Value>>( schema: &Schema, schemata: Vec<&Schema>, value: T, ) -> AvroResult<Vec<u8>> { - let mut buffer = Vec::new(); - write_avro_datum_schemata(schema, schemata, value, &mut buffer)?; - Ok(buffer) -} - -/// Encode a value into raw Avro data, also performs schema validation. -/// -/// This is an internal function which gets the bytes buffer where to write as parameter instead of -/// creating a new one like `to_avro_datum`. -pub(super) fn write_avro_datum<T: Into<Value>, W: Write>( - schema: &Schema, - value: T, - writer: &mut W, -) -> AvroResult<()> { - let avro = value.into(); - if !avro.validate(schema) { - return Err(Details::Validation.into()); - } - encode(&avro, schema, writer)?; - Ok(()) -} - -pub(super) fn write_avro_datum_schemata<T: Into<Value>>( - schema: &Schema, - schemata: Vec<&Schema>, - value: T, - buffer: &mut Vec<u8>, -) -> AvroResult<usize> { - let avro = value.into(); - let rs = ResolvedSchema::try_from(schemata)?; - let names = rs.get_names(); - let enclosing_namespace = schema.namespace(); - if let Some(_err) = avro.validate_internal(schema, names, enclosing_namespace) { - return Err(Details::Validation.into()); - } - encode_internal(&avro, schema, names, enclosing_namespace, buffer) -} - -pub(super) fn write_value_ref_owned_resolved<W: Write>( - resolved_schema: &ResolvedOwnedSchema, - value: &Value, - writer: &mut W, -) -> AvroResult<usize> { - let root_schema = resolved_schema.get_root_schema(); - if let Some(reason) = value.validate_internal( - root_schema, - resolved_schema.get_names(), - root_schema.namespace(), - ) { - return Err(Details::ValidationWithReason { - value: value.clone(), - schema: root_schema.clone(), - reason, - } - .into()); - } - encode_internal( - value, - root_schema, - resolved_schema.get_names(), - root_schema.namespace(), - writer, - ) + GenericDatumWriter::builder(schema) + .schemata(schemata)? + .build()? + .write_value_to_vec(value) } #[cfg(test)] mod tests { - use std::collections::HashMap; - use apache_avro_test_helper::TestResult; use crate::{ @@ -178,7 +214,11 @@ mod tests { zig_i64(3, &mut expected)?; expected.extend([b'f', b'o', b'o']); - assert_eq!(to_avro_datum(&schema, record)?, expected); + let written = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(record)?; + + assert_eq!(written, expected); Ok(()) } @@ -203,7 +243,9 @@ mod tests { zig_i64(3, &mut expected)?; expected.extend([b'f', b'o', b'o']); - let bytes = write_avro_datum_ref(&schema, &HashMap::new(), &data, &mut writer)?; + let bytes = GenericDatumWriter::builder(&schema) + .build()? + .write_ser(&mut writer, &data)?; assert_eq!(bytes, expected.len()); assert_eq!(writer, expected); @@ -220,7 +262,10 @@ mod tests { zig_i64(1, &mut expected)?; zig_i64(3, &mut expected)?; - assert_eq!(to_avro_datum(&schema, union)?, expected); + let written = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(union)?; + assert_eq!(written, expected); Ok(()) } @@ -233,7 +278,10 @@ mod tests { let mut expected = Vec::new(); zig_i64(0, &mut expected)?; - assert_eq!(to_avro_datum(&schema, union)?, expected); + let written = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(union)?; + assert_eq!(written, expected); Ok(()) } @@ -250,8 +298,12 @@ mod tests { let schema = Schema::parse_str(schema_str)?; assert_eq!(&schema, expected_schema); // The serialized format should be the same as the schema. - let ser = to_avro_datum(&schema, value.clone())?; - let raw_ser = to_avro_datum(raw_schema, raw_value)?; + let ser = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(value.clone())?; + let raw_ser = GenericDatumWriter::builder(raw_schema) + .build()? + .write_value_to_vec(raw_value)?; assert_eq!(ser, raw_ser); // Should deserialize from the schema into the logical type. diff --git a/avro/src/writer/single_object.rs b/avro/src/writer/single_object.rs index 136986a..d5c2c8c 100644 --- a/avro/src/writer/single_object.rs +++ b/avro/src/writer/single_object.rs @@ -19,14 +19,14 @@ use std::{io::Write, marker::PhantomData, ops::RangeInclusive}; use serde::Serialize; +use crate::encode::encode_internal; +use crate::serde::ser_schema::SchemaAwareWriteSerializer; use crate::{ AvroResult, AvroSchema, Schema, error::Details, headers::{HeaderBuilder, RabinFingerprintHeader}, schema::ResolvedOwnedSchema, types::Value, - write_avro_datum_ref, - writer::datum::write_value_ref_owned_resolved, }; /// Writer that encodes messages according to the single object encoding v1 spec @@ -163,12 +163,13 @@ where .write_all(&self.header) .map_err(Details::WriteBytes)?; - let bytes = write_avro_datum_ref( + let mut serializer = SchemaAwareWriteSerializer::new( + writer, self.resolved.get_root_schema(), self.resolved.get_names(), - data, - writer, - )?; + None, + ); + let bytes = data.serialize(&mut serializer)?; Ok(bytes + self.header.len()) } @@ -184,6 +185,33 @@ where } } +fn write_value_ref_owned_resolved<W: Write>( + resolved_schema: &ResolvedOwnedSchema, + value: &Value, + writer: &mut W, +) -> AvroResult<usize> { + let root_schema = resolved_schema.get_root_schema(); + if let Some(reason) = value.validate_internal( + root_schema, + resolved_schema.get_names(), + root_schema.namespace(), + ) { + return Err(Details::ValidationWithReason { + value: value.clone(), + schema: root_schema.clone(), + reason, + } + .into()); + } + encode_internal( + value, + root_schema, + resolved_schema.get_names(), + root_schema.namespace(), + writer, + ) +} + #[cfg(test)] mod tests { use apache_avro_test_helper::TestResult; diff --git a/avro/tests/avro-3786.rs b/avro/tests/avro-3786.rs index 42c5464..f9f95b9 100644 --- a/avro/tests/avro-3786.rs +++ b/avro/tests/avro-3786.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{Schema, reader::datum::GenericDatumReader, to_avro_datum, to_value, types}; +use apache_avro::reader::datum::GenericDatumReader; +use apache_avro::writer::datum::GenericDatumWriter; +use apache_avro::{Schema, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -132,7 +134,9 @@ fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -259,7 +263,9 @@ fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -375,7 +381,9 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -491,7 +499,9 @@ fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_ avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -607,7 +617,9 @@ fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -884,7 +896,9 @@ fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_i avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) diff --git a/avro/tests/avro-3787.rs b/avro/tests/avro-3787.rs index 4fc2f3d..f50ae96 100644 --- a/avro/tests/avro-3787.rs +++ b/avro/tests/avro-3787.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{Schema, reader::datum::GenericDatumReader, to_avro_datum, to_value, types}; +use apache_avro::reader::datum::GenericDatumReader; +use apache_avro::writer::datum::GenericDatumWriter; +use apache_avro::{Schema, to_value, types}; use apache_avro_test_helper::TestResult; #[test] @@ -133,7 +135,9 @@ fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) @@ -267,7 +271,9 @@ fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) diff --git a/avro/tests/io.rs b/avro/tests/io.rs index 0d95850..9947587 100644 --- a/avro/tests/io.rs +++ b/avro/tests/io.rs @@ -16,9 +16,9 @@ // under the License. //! Port of <https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py> -use apache_avro::{ - Error, Schema, error::Details, reader::datum::GenericDatumReader, to_avro_datum, types::Value, -}; +use apache_avro::reader::datum::GenericDatumReader; +use apache_avro::writer::datum::GenericDatumWriter; +use apache_avro::{Error, Schema, error::Details, types::Value}; use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::{io::Cursor, sync::OnceLock}; @@ -233,7 +233,9 @@ fn test_validate() -> TestResult { fn test_round_trip() -> TestResult { for (raw_schema, value) in schemas_to_validate().iter() { let schema = Schema::parse_str(raw_schema)?; - let encoded = to_avro_datum(&schema, value.clone()).unwrap(); + let encoded = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(value.clone())?; let decoded = GenericDatumReader::builder(&schema) .build()? .read_value(&mut Cursor::new(encoded))?; @@ -246,7 +248,9 @@ fn test_round_trip() -> TestResult { #[test] fn test_binary_int_encoding() -> TestResult { for (number, hex_encoding) in binary_encodings().iter() { - let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32))?; + let encoded = GenericDatumWriter::builder(&Schema::Int) + .build()? + .write_value_to_vec(Value::Int(*number as i32))?; assert_eq!(&encoded, hex_encoding); } @@ -256,7 +260,9 @@ fn test_binary_int_encoding() -> TestResult { #[test] fn test_binary_long_encoding() -> TestResult { for (number, hex_encoding) in binary_encodings().iter() { - let encoded = to_avro_datum(&Schema::Long, Value::Long(*number))?; + let encoded = GenericDatumWriter::builder(&Schema::Long) + .build()? + .write_value_to_vec(Value::Long(*number))?; assert_eq!(&encoded, hex_encoding); } @@ -279,7 +285,9 @@ fn test_schema_promotion() -> TestResult { let original_value = &promotable_values[i]; for (j, reader_raw_schema) in promotable_schemas.iter().enumerate().skip(i + 1) { let reader_schema = Schema::parse_str(reader_raw_schema)?; - let encoded = to_avro_datum(&writer_schema, original_value.clone())?; + let encoded = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(original_value.clone())?; let decoded = GenericDatumReader::builder(&writer_schema) .reader_schema(&reader_schema) .build()? @@ -298,7 +306,9 @@ fn test_unknown_symbol() -> TestResult { let reader_schema = Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#)?; let original_value = Value::Enum(0, "FOO".to_string()); - let encoded = to_avro_datum(&writer_schema, original_value)?; + let encoded = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(original_value.clone())?; let decoded = GenericDatumReader::builder(&writer_schema) .reader_schema(&reader_schema) .build()? @@ -321,7 +331,9 @@ fn test_default_value() -> TestResult { }}"# ))?; let datum_to_read = Value::Record(vec![("H".to_string(), default_datum.clone())]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; + let encoded = GenericDatumWriter::builder(long_record_schema()) + .build()? + .write_value_to_vec(long_record_datum().clone())?; let datum_read = GenericDatumReader::builder(long_record_schema()) .reader_schema(&reader_schema) .build()? @@ -377,7 +389,9 @@ fn test_no_default_value() -> TestResult { ] }"#, )?; - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; + let encoded = GenericDatumWriter::builder(long_record_schema()) + .build()? + .write_value_to_vec(long_record_datum().clone())?; let result = GenericDatumReader::builder(long_record_schema()) .reader_schema(&reader_schema) .build()? @@ -405,7 +419,9 @@ fn test_projection() -> TestResult { ("E".to_string(), Value::Int(5)), ("F".to_string(), Value::Int(6)), ]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; + let encoded = GenericDatumWriter::builder(long_record_schema()) + .build()? + .write_value_to_vec(long_record_datum().clone())?; let datum_read = GenericDatumReader::builder(long_record_schema()) .reader_schema(&reader_schema) .build()? @@ -433,7 +449,9 @@ fn test_field_order() -> TestResult { ("F".to_string(), Value::Int(6)), ("E".to_string(), Value::Int(5)), ]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; + let encoded = GenericDatumWriter::builder(long_record_schema()) + .build()? + .write_value_to_vec(long_record_datum().clone())?; let datum_read = GenericDatumReader::builder(long_record_schema()) .reader_schema(&reader_schema) .build()? @@ -462,7 +480,11 @@ fn test_type_exception() -> Result<(), String> { ("E".to_string(), Value::Int(5)), ("F".to_string(), Value::String(String::from("Bad"))), ]); - let encoded = to_avro_datum(&writer_schema, datum_to_write).map_err(Error::into_details); + let encoded = GenericDatumWriter::builder(&writer_schema) + .build() + .unwrap() + .write_value_to_vec(datum_to_write) + .map_err(Error::into_details); match encoded { Ok(_) => Err(String::from("Expected ValidationError, got Ok")), Err(Details::Validation) => Ok(()), diff --git a/avro/tests/schema.rs b/avro/tests/schema.rs index 9ec8684..89e328a 100644 --- a/avro/tests/schema.rs +++ b/avro/tests/schema.rs @@ -15,18 +15,14 @@ // specific language governing permissions and limitations // under the License. -use std::{ - collections::HashMap, - io::{Cursor, Read}, -}; - +use apache_avro::writer::datum::GenericDatumWriter; use apache_avro::{ Codec, Error, Reader, Schema, Writer, error::Details, from_value, reader::datum::GenericDatumReader, schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, - to_avro_datum, to_value, + to_value, types::{Record, Value}, }; use apache_avro_test_helper::{ @@ -35,6 +31,10 @@ use apache_avro_test_helper::{ init, }; use serde::{Deserialize, Serialize}; +use std::{ + collections::HashMap, + io::{Cursor, Read}, +}; #[test] fn test_correct_recursive_extraction() -> TestResult { @@ -859,7 +859,9 @@ fn avro_old_issue_47() -> TestResult { }; let ser_value = to_value(record.clone())?; - let serialized_bytes = to_avro_datum(&schema, ser_value)?; + let serialized_bytes = GenericDatumWriter::builder(&schema) + .build()? + .write_value_to_vec(ser_value)?; let de_value = GenericDatumReader::builder(&schema) .build()? @@ -979,12 +981,14 @@ fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_ bar_init: Bar::Bar0, bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), }; - let avro_value = crate::to_value(foo1)?; + let avro_value = to_value(foo1)?; assert!( avro_value.validate(&writer_schema), "value is valid for schema", ); - let datum = to_avro_datum(&writer_schema, avro_value)?; + let datum = GenericDatumWriter::builder(&writer_schema) + .build()? + .write_value_to_vec(avro_value)?; let mut x = &datum[..]; let reader_schema = Schema::parse_str(reader_schema)?; let deser_value = GenericDatumReader::builder(&writer_schema) diff --git a/avro/tests/to_from_avro_datum_schemata.rs b/avro/tests/to_from_avro_datum_schemata.rs index 14feacd..0849367 100644 --- a/avro/tests/to_from_avro_datum_schemata.rs +++ b/avro/tests/to_from_avro_datum_schemata.rs @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -use apache_avro::{ - Codec, Reader, Schema, Writer, reader::datum::GenericDatumReader, to_avro_datum_schemata, - types::Value, -}; +use apache_avro::reader::datum::GenericDatumReader; +use apache_avro::writer::datum::GenericDatumWriter; +use apache_avro::{Codec, Reader, Schema, Writer, types::Value}; use apache_avro_test_helper::{TestResult, init}; static SCHEMA_A_STR: &str = r#"{ @@ -52,7 +51,10 @@ fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { // this is the Schema we want to use for write/read let schema_b = schemata[1]; let expected: Vec<u8> = vec![0, 0, 128, 63]; - let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; + let actual = GenericDatumWriter::builder(schema_b) + .schemata(schemata.clone())? + .build()? + .write_value_to_vec(record.clone())?; assert_eq!(actual, expected); let value = GenericDatumReader::builder(schema_b) @@ -79,7 +81,10 @@ fn avro_rs_106_test_multiple_schemata_to_from_avro_datum_with_resolution() -> Te // this is the Schema we want to use for write/read let schema_b = schemata[1]; let expected: Vec<u8> = vec![0, 0, 128, 63]; - let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; + let actual = GenericDatumWriter::builder(schema_b) + .schemata(schemata.clone())? + .build()? + .write_value_to_vec(record.clone())?; assert_eq!(actual, expected); let value = GenericDatumReader::builder(schema_b)
