This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-cookbook.git


The following commit(s) were added to refs/heads/main by this push:
     new 09a0f5e  [Java] Added section for converting Avro to Arrow (#235)
09a0f5e is described below

commit 09a0f5eeb8c27db19e5699df9bc5771c337f25cc
Author: Ash <[email protected]>
AuthorDate: Thu Aug 4 05:58:14 2022 -0700

    [Java] Added section for converting Avro to Arrow (#235)
    
    * added avro receipe
    
    * updated receipe
    
    * Update java/source/avro.rst
    
    Co-authored-by: david dali susanibar arce <[email protected]>
    
    * Update java/source/avro.rst
    
    Co-authored-by: David Li <[email protected]>
    
    * updated per review comments in PR
    
    * fixed extra space issue
    
    Co-authored-by: david dali susanibar arce <[email protected]>
    Co-authored-by: David Li <[email protected]>
---
 java/source/avro.rst                |  72 ++++++++++++++++++++++++++++++++++++
 java/source/demo/pom.xml            |   5 +++
 java/source/index.rst               |   1 +
 java/thirdpartydeps/avro/user.avsc  |  27 ++++++++++++++
 java/thirdpartydeps/avro/users.avro | Bin 0 -> 22 bytes
 5 files changed, 105 insertions(+)

diff --git a/java/source/avro.rst b/java/source/avro.rst
new file mode 100644
index 0000000..f1b9f2d
--- /dev/null
+++ b/java/source/avro.rst
@@ -0,0 +1,72 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _arrow-avro:
+
+======
+Avro 
+======
+
+Avro encoded data can be converted into Arrow format.
+
+.. contents::
+
+Avro to Arrow
+=============
+
+The example assumes that the Avro schema is stored separately from the Avro 
data itself.
+
+.. testcode::
+
+   import org.apache.arrow.AvroToArrow;
+   import org.apache.arrow.AvroToArrowConfig;
+   import org.apache.arrow.AvroToArrowConfigBuilder;
+   import org.apache.arrow.AvroToArrowVectorIterator;
+   import org.apache.arrow.memory.BufferAllocator;
+   import org.apache.arrow.memory.RootAllocator;
+   import org.apache.arrow.vector.VectorSchemaRoot;
+   import org.apache.avro.Schema;
+   import org.apache.avro.io.BinaryDecoder;
+   import org.apache.avro.io.DecoderFactory;
+
+   import java.io.File;
+   import java.io.FileInputStream;
+   import java.io.FileNotFoundException;
+   import java.io.IOException;
+
+   try {
+       BinaryDecoder decoder = new DecoderFactory().binaryDecoder(new 
FileInputStream("./thirdpartydeps/avro/users.avro"), null);
+       Schema schema = new Schema.Parser().parse(new 
File("./thirdpartydeps/avro/user.avsc"));
+       try (BufferAllocator allocator = new RootAllocator()) {
+           AvroToArrowConfig config = new 
AvroToArrowConfigBuilder(allocator).build();
+           try (AvroToArrowVectorIterator avroToArrowVectorIterator = 
AvroToArrow.avroToArrowIterator(schema, decoder, config)) {
+               while(avroToArrowVectorIterator.hasNext()) {
+                   try (VectorSchemaRoot root = 
avroToArrowVectorIterator.next()) {
+                       System.out.print(root.contentToTSVString());
+                   }
+               }
+           }
+       }
+   } catch (Exception e) {
+       e.printStackTrace();
+   } 
+
+.. testoutput::
+
+   name    favorite_number    favorite_color
+   Alyssa    256    null
+   Ben    7    red
diff --git a/java/source/demo/pom.xml b/java/source/demo/pom.xml
index 1f6fb1e..30f7f16 100644
--- a/java/source/demo/pom.xml
+++ b/java/source/demo/pom.xml
@@ -70,6 +70,11 @@
             <artifactId>arrow-dataset</artifactId>
             <version>${arrow.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-avro</artifactId>
+            <version>${arrow.version}</version>
+        </dependency>
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
diff --git a/java/source/index.rst b/java/source/index.rst
index 9e2a9f3..cf2d0f1 100644
--- a/java/source/index.rst
+++ b/java/source/index.rst
@@ -38,6 +38,7 @@ To get started with Apache Arrow in Java, see the
    flight
    dataset
    data
+   avro
 
 Indices and tables
 ==================
diff --git a/java/thirdpartydeps/avro/user.avsc 
b/java/thirdpartydeps/avro/user.avsc
new file mode 100644
index 0000000..f908f9b
--- /dev/null
+++ b/java/thirdpartydeps/avro/user.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+  "namespace": "org.apache.arrow.avro",
+  "type": "record",
+  "name": "User",
+  "fields": [
+    {"name": "name", "type": "string"},
+    {"name": "favorite_number",  "type": ["int", "null"]},
+    {"name": "favorite_color", "type": ["string", "null"]}
+  ]
+}
\ No newline at end of file
diff --git a/java/thirdpartydeps/avro/users.avro 
b/java/thirdpartydeps/avro/users.avro
new file mode 100644
index 0000000..6ee5237
Binary files /dev/null and b/java/thirdpartydeps/avro/users.avro differ

Reply via email to