This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-cookbook.git
The following commit(s) were added to refs/heads/main by this push:
new 09a0f5e [Java] Added section for converting Avro to Arrow (#235)
09a0f5e is described below
commit 09a0f5eeb8c27db19e5699df9bc5771c337f25cc
Author: Ash <[email protected]>
AuthorDate: Thu Aug 4 05:58:14 2022 -0700
[Java] Added section for converting Avro to Arrow (#235)
* added avro receipe
* updated receipe
* Update java/source/avro.rst
Co-authored-by: david dali susanibar arce <[email protected]>
* Update java/source/avro.rst
Co-authored-by: David Li <[email protected]>
* updated per review comments in PR
* fixed extra space issue
Co-authored-by: david dali susanibar arce <[email protected]>
Co-authored-by: David Li <[email protected]>
---
java/source/avro.rst | 72 ++++++++++++++++++++++++++++++++++++
java/source/demo/pom.xml | 5 +++
java/source/index.rst | 1 +
java/thirdpartydeps/avro/user.avsc | 27 ++++++++++++++
java/thirdpartydeps/avro/users.avro | Bin 0 -> 22 bytes
5 files changed, 105 insertions(+)
diff --git a/java/source/avro.rst b/java/source/avro.rst
new file mode 100644
index 0000000..f1b9f2d
--- /dev/null
+++ b/java/source/avro.rst
@@ -0,0 +1,72 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _arrow-avro:
+
+======
+Avro
+======
+
+Avro encoded data can be converted into Arrow format.
+
+.. contents::
+
+Avro to Arrow
+=============
+
+The example assumes that the Avro schema is stored separately from the Avro
data itself.
+
+.. testcode::
+
+ import org.apache.arrow.AvroToArrow;
+ import org.apache.arrow.AvroToArrowConfig;
+ import org.apache.arrow.AvroToArrowConfigBuilder;
+ import org.apache.arrow.AvroToArrowVectorIterator;
+ import org.apache.arrow.memory.BufferAllocator;
+ import org.apache.arrow.memory.RootAllocator;
+ import org.apache.arrow.vector.VectorSchemaRoot;
+ import org.apache.avro.Schema;
+ import org.apache.avro.io.BinaryDecoder;
+ import org.apache.avro.io.DecoderFactory;
+
+ import java.io.File;
+ import java.io.FileInputStream;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+
+ try {
+ BinaryDecoder decoder = new DecoderFactory().binaryDecoder(new
FileInputStream("./thirdpartydeps/avro/users.avro"), null);
+ Schema schema = new Schema.Parser().parse(new
File("./thirdpartydeps/avro/user.avsc"));
+ try (BufferAllocator allocator = new RootAllocator()) {
+ AvroToArrowConfig config = new
AvroToArrowConfigBuilder(allocator).build();
+ try (AvroToArrowVectorIterator avroToArrowVectorIterator =
AvroToArrow.avroToArrowIterator(schema, decoder, config)) {
+ while(avroToArrowVectorIterator.hasNext()) {
+ try (VectorSchemaRoot root =
avroToArrowVectorIterator.next()) {
+ System.out.print(root.contentToTSVString());
+ }
+ }
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+.. testoutput::
+
+ name favorite_number favorite_color
+ Alyssa 256 null
+ Ben 7 red
diff --git a/java/source/demo/pom.xml b/java/source/demo/pom.xml
index 1f6fb1e..30f7f16 100644
--- a/java/source/demo/pom.xml
+++ b/java/source/demo/pom.xml
@@ -70,6 +70,11 @@
<artifactId>arrow-dataset</artifactId>
<version>${arrow.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-avro</artifactId>
+ <version>${arrow.version}</version>
+ </dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
diff --git a/java/source/index.rst b/java/source/index.rst
index 9e2a9f3..cf2d0f1 100644
--- a/java/source/index.rst
+++ b/java/source/index.rst
@@ -38,6 +38,7 @@ To get started with Apache Arrow in Java, see the
flight
dataset
data
+ avro
Indices and tables
==================
diff --git a/java/thirdpartydeps/avro/user.avsc
b/java/thirdpartydeps/avro/user.avsc
new file mode 100644
index 0000000..f908f9b
--- /dev/null
+++ b/java/thirdpartydeps/avro/user.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "name", "type": "string"},
+ {"name": "favorite_number", "type": ["int", "null"]},
+ {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
\ No newline at end of file
diff --git a/java/thirdpartydeps/avro/users.avro
b/java/thirdpartydeps/avro/users.avro
new file mode 100644
index 0000000..6ee5237
Binary files /dev/null and b/java/thirdpartydeps/avro/users.avro differ