This is an automated email from the ASF dual-hosted git repository.
Fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new b8f4dda5d Remove Parquet-Pig (#3377)
b8f4dda5d is described below
commit b8f4dda5d8492904bff33e3774a2de57f172e967
Author: Aaron Niskode-Dossett <[email protected]>
AuthorDate: Tue Apr 21 07:33:10 2026 -0500
Remove Parquet-Pig (#3377)
* [parquet-thrift] remove pig usage
* final pig cleanups
* Put mention of Pig back in README
* MINOR: Exclude removed parquet-thrift Pig APIs from japicmp
---
.github/ISSUE_TEMPLATE/feature_request.yaml | 1 -
parquet-thrift/pom.xml | 26 +--
.../hadoop/thrift/AbstractThriftWriteSupport.java | 19 ---
.../parquet/thrift/pig/ParquetThriftStorer.java | 91 -----------
.../thrift/pig/TupleToThriftWriteSupport.java | 86 ----------
.../apache/parquet/thrift/struct/ThriftType.java | 1 +
.../parquet/thrift/TestParquetWriteProtocol.java | 93 -----------
.../thrift/TestThriftToPigCompatibility.java | 178 ---------------------
.../thrift/pig/TestParquetThriftStorer.java | 82 ----------
pom.xml | 6 +-
10 files changed, 6 insertions(+), 577 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml
b/.github/ISSUE_TEMPLATE/feature_request.yaml
index e1f580ed3..084c3afe1 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yaml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yaml
@@ -40,7 +40,6 @@ body:
- Build
- Arrow
- Avro
- - Pig
- Protobuf
- Thrift
- CLI
diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml
index 9a273769a..193090921 100644
--- a/parquet-thrift/pom.xml
+++ b/parquet-thrift/pom.xml
@@ -70,7 +70,7 @@
<scope>provided</scope>
</dependency>
<!-- Guava is a dependency of hadoop-common, but scoped to compile. We
need to
- explicity declare it as a test dependency. -->
+ explicitly declare it as a test dependency. -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@@ -95,18 +95,6 @@
</exclusion>
</exclusions>
</dependency>
- <dependency>
- <groupId>com.twitter.elephantbird</groupId>
- <artifactId>elephant-bird-pig</artifactId>
- <version>${elephant-bird.version}</version>
- <exclusions>
- <!-- hadoop-lzo is not required for parquet build/tests and there are
issues downloading it -->
- <exclusion>
- <groupId>com.hadoop.gplcompression</groupId>
- <artifactId>hadoop-lzo</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-jackson</artifactId>
@@ -130,18 +118,6 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.parquet</groupId>
- <artifactId>parquet-pig</artifactId>
- <version>1.15.2</version>
- </dependency>
- <dependency>
- <groupId>org.apache.pig</groupId>
- <artifactId>pig</artifactId>
- <version>${pig.version}</version>
- <classifier>${pig.classifier}</classifier>
- <scope>provided</scope>
- </dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
diff --git
a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java
b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java
index dda9aadfb..cb8a4c36b 100644
---
a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java
+++
b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java
@@ -15,7 +15,6 @@
*/
package org.apache.parquet.hadoop.thrift;
-import com.twitter.elephantbird.pig.util.ThriftToPig;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.conf.HadoopParquetConfiguration;
@@ -25,13 +24,11 @@ import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.api.RecordConsumer;
-import org.apache.parquet.pig.PigMetaData;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.thrift.ParquetWriteProtocol;
import org.apache.parquet.thrift.ThriftMetaData;
import org.apache.parquet.thrift.ThriftSchemaConverter;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
-import org.apache.thrift.TBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -99,26 +96,10 @@ public abstract class AbstractThriftWriteSupport<T> extends
WriteSupport<T> {
final Map<String, String> extraMetaData =
new ThriftMetaData(thriftClass.getName(),
thriftStruct).toExtraMetaData();
- // adding the Pig schema as it would have been mapped from thrift
- // TODO: make this work for non-tbase types
- if (isPigLoaded() && TBase.class.isAssignableFrom(thriftClass)) {
- new PigMetaData(new ThriftToPig((Class<? extends TBase<?, ?>>)
thriftClass).toSchema())
- .addToMetaData(extraMetaData);
- }
this.writeContext = new WriteContext(schema, extraMetaData);
}
- protected boolean isPigLoaded() {
- try {
- Class.forName("org.apache.pig.impl.logicalLayer.schema.Schema");
- return true;
- } catch (ClassNotFoundException e) {
- LOG.info("Pig is not loaded, pig metadata will not be written");
- return false;
- }
- }
-
@Override
public WriteContext init(Configuration configuration) {
return init(new HadoopParquetConfiguration(configuration));
diff --git
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java
b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java
deleted file mode 100644
index aec21eff4..000000000
---
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.thrift.pig;
-
-import java.io.IOException;
-import java.util.Arrays;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.parquet.hadoop.ParquetOutputFormat;
-import org.apache.parquet.io.ParquetEncodingException;
-import org.apache.pig.StoreFunc;
-import org.apache.pig.data.Tuple;
-
-/**
- * To store in Pig using a thrift class
- * usage:
- * STORE 'foo' USING parquet.thrift.pig.ParquetThriftStorer('my.thrift.Class');
- *
- * @deprecated will be removed in 1.17.0 or 2.0.0
- */
-@Deprecated
-public class ParquetThriftStorer extends StoreFunc {
-
- private RecordWriter<Void, Tuple> recordWriter;
-
- private String className;
-
- public ParquetThriftStorer(String[] params) {
- if (params == null || params.length != 1) {
- throw new IllegalArgumentException(
- "required the thrift class name in parameter. Got " +
Arrays.toString(params) + " instead");
- }
- className = params[0];
- }
-
- /**
- * {@inheritDoc}
- */
- @Override
- public OutputFormat<Void, Tuple> getOutputFormat() throws IOException {
- return new ParquetOutputFormat<Tuple>(new
TupleToThriftWriteSupport(className));
- }
-
- /**
- * {@inheritDoc}
- */
- @SuppressWarnings({"rawtypes", "unchecked"}) // that's how the base class is
defined
- @Override
- public void prepareToWrite(RecordWriter recordWriter) throws IOException {
- this.recordWriter = recordWriter;
- }
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void putNext(Tuple tuple) throws IOException {
- try {
- this.recordWriter.write(null, tuple);
- } catch (InterruptedException e) {
- throw new ParquetEncodingException("Interrupted while writing", e);
- }
- }
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void setStoreLocation(String location, Job job) throws IOException {
- FileOutputFormat.setOutputPath(job, new Path(location));
- }
-}
diff --git
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
deleted file mode 100644
index 395cf70f7..000000000
---
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.thrift.pig;
-
-import com.twitter.elephantbird.pig.util.PigToThrift;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.conf.HadoopParquetConfiguration;
-import org.apache.parquet.conf.ParquetConfiguration;
-import org.apache.parquet.hadoop.BadConfigurationException;
-import org.apache.parquet.hadoop.api.WriteSupport;
-import org.apache.parquet.hadoop.thrift.ThriftWriteSupport;
-import org.apache.parquet.io.api.RecordConsumer;
-import org.apache.pig.data.Tuple;
-import org.apache.thrift.TBase;
-
-/**
- * Stores Pig tuples as Thrift objects
- *
- * @deprecated will be removed in 1.17.0 or 2.0.0
- */
-@Deprecated
-public class TupleToThriftWriteSupport extends WriteSupport<Tuple> {
-
- private final String className;
- private ThriftWriteSupport<TBase<?, ?>> thriftWriteSupport;
- private PigToThrift<TBase<?, ?>> pigToThrift;
-
- /**
- * @param className the thrift class name
- */
- public TupleToThriftWriteSupport(String className) {
- super();
- this.className = className;
- }
-
- @Override
- public String getName() {
- return "thrift";
- }
-
- @Override
- public WriteContext init(Configuration configuration) {
- return init(new HadoopParquetConfiguration(configuration));
- }
-
- @SuppressWarnings({"rawtypes", "unchecked"})
- @Override
- public WriteContext init(ParquetConfiguration configuration) {
- try {
- Class<?> clazz =
configuration.getClassByName(className).asSubclass(TBase.class);
- thriftWriteSupport = new ThriftWriteSupport(clazz);
- pigToThrift = new PigToThrift(clazz);
- return thriftWriteSupport.init(configuration);
- } catch (ClassNotFoundException e) {
- throw new BadConfigurationException("The thrift class name was not
found: " + className, e);
- } catch (ClassCastException e) {
- throw new BadConfigurationException("The thrift class name should extend
TBase: " + className, e);
- }
- }
-
- @Override
- public void prepareForWrite(RecordConsumer recordConsumer) {
- thriftWriteSupport.prepareForWrite(recordConsumer);
- }
-
- @Override
- public void write(Tuple t) {
- thriftWriteSupport.write(pigToThrift.getThriftObject(t));
- }
-}
diff --git
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
index 264790333..c7aa12b3a 100644
---
a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
+++
b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
@@ -142,6 +142,7 @@ public abstract class ThriftType {
/**
* @deprecated will be removed in 2.0.0; use StateVisitor instead.
*/
+ @Deprecated
public interface TypeVisitor {
void visit(MapType mapType);
diff --git
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java
index 9e562bf73..d11be4af3 100644
---
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java
+++
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java
@@ -23,7 +23,6 @@ import com.twitter.data.proto.tutorial.thrift.Name;
import com.twitter.data.proto.tutorial.thrift.Person;
import com.twitter.data.proto.tutorial.thrift.PhoneNumber;
import com.twitter.data.proto.tutorial.thrift.PhoneType;
-import com.twitter.elephantbird.pig.util.ThriftToPig;
import com.twitter.elephantbird.thrift.test.TestMap;
import com.twitter.elephantbird.thrift.test.TestMapInList;
import com.twitter.elephantbird.thrift.test.TestMapInSet;
@@ -45,20 +44,14 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.conf.ParquetConfiguration;
import org.apache.parquet.io.ColumnIOFactory;
import org.apache.parquet.io.ExpectationValidatingRecordConsumer;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.RecordConsumerLoggingWrapper;
-import org.apache.parquet.pig.PigSchemaConverter;
-import org.apache.parquet.pig.TupleWriteSupport;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.thrift.TBase;
import org.apache.thrift.TException;
-import org.junit.ComparisonFailure;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -133,59 +126,20 @@ public class TestParquetWriteProtocol {
map.put("foo", "bar");
map.put("foo2", "bar2");
TestMap testMap = new TestMap("map_name", map);
- try {
- validatePig(expectations, testMap);
- } catch (ComparisonFailure e) {
- // This can happen despite using a stable TreeMap, since
ThriftToPig#toPigMap
- // in com.twitter.elephantbird.pig.util creates a HashMap.
- // So we test with the map elements in reverse order
- validatePig(expectationsAlt, testMap);
- }
validateThrift(expectations, testMap);
}
/**
* @throws Exception
- * @see TestThriftToPigCompatibility
*/
@Test
public void testMapInSet() throws Exception {
- String[] pigExpectations = {
- "startMessage()",
- "startField(name, 0)",
- "addBinary(top)",
- "endField(name, 0)",
- "startField(names, 1)", // set: optional field
- "startGroup()",
- "startField(t, 0)", // repeated field
- "startGroup()",
- "startField(names_tuple, 0)", // map: optional field
- "startGroup()",
- "startField(key_value, 0)", // repeated field
- "startGroup()",
- "startField(key, 0)", // key
- "addBinary(foo)",
- "endField(key, 0)",
- "startField(value, 1)", // value
- "addBinary(bar)",
- "endField(value, 1)",
- "endGroup()",
- "endField(key_value, 0)",
- "endGroup()",
- "endField(names_tuple, 0)",
- "endGroup()",
- "endField(t, 0)",
- "endGroup()",
- "endField(names, 1)",
- "endMessage()"
- };
final Set<Map<String, String>> set = new HashSet<Map<String, String>>();
final Map<String, String> map = new HashMap<String, String>();
map.put("foo", "bar");
set.add(map);
TestMapInSet o = new TestMapInSet("top", set);
- validatePig(pigExpectations, o);
String[] expectationsThrift = {
"startMessage()",
@@ -217,7 +171,6 @@ public class TestParquetWriteProtocol {
/**
* @throws TException
- * @see TestThriftToPigCompatibility
*/
@Test
public void testNameList() throws TException {
@@ -226,31 +179,6 @@ public class TestParquetWriteProtocol {
names.add("Jack");
final TestNameList o = new TestNameList("name", names);
- String[] pigExpectations = {
- "startMessage()",
- "startField(name, 0)",
- "addBinary(name)",
- "endField(name, 0)",
- "startField(names, 1)",
- "startGroup()",
- "startField(t, 0)",
- "startGroup()",
- "startField(names_tuple, 0)",
- "addBinary(John)",
- "endField(names_tuple, 0)",
- "endGroup()",
- "startGroup()",
- "startField(names_tuple, 0)",
- "addBinary(Jack)",
- "endField(names_tuple, 0)",
- "endGroup()",
- "endField(t, 0)",
- "endGroup()",
- "endField(names, 1)",
- "endMessage()"
- };
- validatePig(pigExpectations, o);
-
String[] expectations = {
"startMessage()",
"startField(name, 0)",
@@ -326,7 +254,6 @@ public class TestParquetWriteProtocol {
map.put("foo", new TestPerson(new TestName("john", "johnson"), new
HashMap<TestPhoneType, String>()));
final Map<String, Integer> stringToIntMap =
Collections.singletonMap("bar", 10);
TestStructInMap testMap = new TestStructInMap("map_name", map,
stringToIntMap);
- validatePig(expectations, testMap);
validateThrift(expectations, testMap);
}
@@ -341,7 +268,6 @@ public class TestParquetWriteProtocol {
"endMessage()"
};
AddressBook a = new AddressBook(new ArrayList<Person>());
- validatePig(expectations, a);
validateThrift(expectations, a);
}
@@ -442,7 +368,6 @@ public class TestParquetWriteProtocol {
"[email protected]",
Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555
999 9996"))));
AddressBook a = new AddressBook(persons);
- validatePig(expectations, a);
// naming conventions are slightly different for the bag inner tuple. The
reader should ignore this.
String[] expectationsThrift = Arrays.copyOf(expectations,
expectations.length, String[].class);
expectationsThrift[3] = "startField(persons_tuple, 0)";
@@ -520,7 +445,6 @@ public class TestParquetWriteProtocol {
new ArrayList<Byte>(),
new ArrayList<Short>(),
new ArrayList<Long>());
- validatePig(expectations, a);
String[] thriftExpectations = Arrays.copyOf(expectations,
expectations.length, String[].class);
thriftExpectations[2] = "addBoolean(true)"; // Elephant bird maps booleans
to int
thriftExpectations[5] = "addBoolean(false)";
@@ -709,21 +633,4 @@ public class TestParquetWriteProtocol {
configuration, new RecordConsumerLoggingWrapper(recordConsumer),
columnIO, structType);
a.write(p);
}
-
- private MessageType validatePig(String[] expectations, TBase<?, ?> a) {
- ThriftToPig<TBase<?, ?>> thriftToPig = new ThriftToPig(a.getClass());
- ExpectationValidatingRecordConsumer recordConsumer =
- new ExpectationValidatingRecordConsumer(new
ArrayDeque<String>(Arrays.asList(expectations)));
- Schema pigSchema = thriftToPig.toSchema();
- LOG.info("{}", pigSchema);
- MessageType schema = new PigSchemaConverter().convert(pigSchema);
- LOG.info("{}", schema);
- TupleWriteSupport tupleWriteSupport = new TupleWriteSupport(pigSchema);
- tupleWriteSupport.init((ParquetConfiguration) null);
- tupleWriteSupport.prepareForWrite(recordConsumer);
- final Tuple pigTuple = thriftToPig.getPigTuple(a);
- LOG.info("{}", pigTuple);
- tupleWriteSupport.write(pigTuple);
- return schema;
- }
}
diff --git
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java
deleted file mode 100644
index 43cc52673..000000000
---
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.thrift;
-
-import static org.junit.Assert.assertEquals;
-
-import com.twitter.data.proto.tutorial.thrift.AddressBook;
-import com.twitter.data.proto.tutorial.thrift.Name;
-import com.twitter.data.proto.tutorial.thrift.Person;
-import com.twitter.data.proto.tutorial.thrift.PhoneNumber;
-import com.twitter.data.proto.tutorial.thrift.PhoneType;
-import com.twitter.elephantbird.pig.util.ThriftToPig;
-import com.twitter.elephantbird.thrift.test.TestMap;
-import com.twitter.elephantbird.thrift.test.TestMapInSet;
-import com.twitter.elephantbird.thrift.test.TestName;
-import com.twitter.elephantbird.thrift.test.TestNameList;
-import com.twitter.elephantbird.thrift.test.TestPerson;
-import com.twitter.elephantbird.thrift.test.TestPhoneType;
-import com.twitter.elephantbird.thrift.test.TestStructInMap;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import org.apache.parquet.io.ColumnIOFactory;
-import org.apache.parquet.io.ConverterConsumer;
-import org.apache.parquet.io.MessageColumnIO;
-import org.apache.parquet.io.RecordConsumerLoggingWrapper;
-import org.apache.parquet.io.api.RecordConsumer;
-import org.apache.parquet.pig.PigSchemaConverter;
-import org.apache.parquet.pig.convert.TupleRecordMaterializer;
-import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.thrift.struct.ThriftType.StructType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-import org.apache.thrift.TBase;
-import org.apache.thrift.TException;
-import org.junit.Test;
-import thrift.test.OneOfEach;
-
-public class TestThriftToPigCompatibility {
-
- public void testMap() throws Exception {
- Map<String, String> map = new TreeMap<String, String>();
- map.put("foo", "bar");
- map.put("foo2", "bar2");
- TestMap testMap = new TestMap("map_name", map);
- validateSameTupleAsEB(testMap);
- }
-
- @Test
- public void testMapInSet() throws Exception {
- final Set<Map<String, String>> set = new HashSet<Map<String, String>>();
- final Map<String, String> map = new HashMap<String, String>();
- map.put("foo", "bar");
- set.add(map);
- TestMapInSet o = new TestMapInSet("top", set);
- validateSameTupleAsEB(o);
- }
-
- @Test
- public void testStructInMap() throws Exception {
-
- final Map<String, TestPerson> map = new HashMap<String, TestPerson>();
- map.put("foo", new TestPerson(new TestName("john", "johnson"), new
HashMap<TestPhoneType, String>()));
- final Map<String, Integer> stringToIntMap =
Collections.singletonMap("bar", 10);
- TestStructInMap testMap = new TestStructInMap("map_name", map,
stringToIntMap);
- validateSameTupleAsEB(testMap);
- }
-
- @Test
- public void testProtocolEmptyAdressBook() throws Exception {
-
- AddressBook a = new AddressBook(new ArrayList<Person>());
- validateSameTupleAsEB(a);
- }
-
- @Test
- public void testProtocolAddressBook() throws Exception {
- ArrayList<Person> persons = new ArrayList<Person>();
- final PhoneNumber phoneNumber = new PhoneNumber("555 999 9998");
- phoneNumber.type = PhoneType.HOME;
- persons.add(new Person(
- new Name("Bob", "Roberts"),
- 1,
- "[email protected]",
- Arrays.asList(new PhoneNumber("555 999 9999"), phoneNumber)));
- persons.add(new Person(
- new Name("Dick", "Richardson"),
- 2,
- "[email protected]",
- Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555
999 9996"))));
- AddressBook a = new AddressBook(persons);
- validateSameTupleAsEB(a);
- }
-
- @Test
- public void testOneOfEach() throws Exception {
- OneOfEach a = new OneOfEach(
- true,
- false,
- (byte) 8,
- (short) 16,
- (int) 32,
- (long) 64,
- (double) 1234,
- "string",
- "å",
- false,
- ByteBuffer.wrap("a".getBytes()),
- new ArrayList<Byte>(),
- new ArrayList<Short>(),
- new ArrayList<Long>());
- validateSameTupleAsEB(a);
- }
-
- @Test
- public void testStringList() throws Exception {
- final List<String> names = new ArrayList<String>();
- names.add("John");
- names.add("Jack");
- TestNameList o = new TestNameList("name", names);
- validateSameTupleAsEB(o);
- }
-
- /**
- * <ul> steps:
- * <li>Writes using the thrift mapping
- * <li>Reads using the pig mapping
- * <li>Use Elephant bird to convert from thrift to pig
- * <li>Check that both transformations give the same result
- *
- * @param o the object to convert
- * @throws TException
- */
- public static <T extends TBase<?, ?>> void validateSameTupleAsEB(T o) throws
TException {
- final ThriftSchemaConverter thriftSchemaConverter = new
ThriftSchemaConverter();
- @SuppressWarnings("unchecked")
- final Class<T> class1 = (Class<T>) o.getClass();
- final MessageType schema = thriftSchemaConverter.convert(class1);
-
- final StructType structType = ThriftSchemaConverter.toStructType(class1);
- final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
- final Schema pigSchema = thriftToPig.toSchema();
- final TupleRecordMaterializer tupleRecordConverter = new
TupleRecordMaterializer(schema, pigSchema, true);
- RecordConsumer recordConsumer = new
ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
- final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
- ParquetWriteProtocol p =
- new ParquetWriteProtocol(new
RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
- o.write(p);
- final Tuple t = tupleRecordConverter.getCurrentRecord();
- final Tuple expected = thriftToPig.getPigTuple(o);
- assertEquals(expected.toString(), t.toString());
- final MessageType filtered = new PigSchemaConverter().filter(schema,
pigSchema);
- assertEquals(schema.toString(), filtered.toString());
- }
-}
diff --git
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java
b/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java
deleted file mode 100644
index c021cd2d2..000000000
---
a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.thrift.pig;
-
-import static org.apache.pig.builtin.mock.Storage.tuple;
-import static org.junit.Assert.assertEquals;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Properties;
-import org.apache.parquet.pig.ParquetLoader;
-import org.apache.parquet.thrift.test.Name;
-import org.apache.pig.ExecType;
-import org.apache.pig.PigServer;
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.executionengine.ExecJob.JOB_STATUS;
-import org.apache.pig.builtin.mock.Storage;
-import org.apache.pig.builtin.mock.Storage.Data;
-import org.apache.pig.data.Tuple;
-import org.junit.Test;
-
-public class TestParquetThriftStorer {
- @Test
- public void testStorer() throws ExecException, Exception {
- String out = "target/out";
- int rows = 1000;
- Properties props = new Properties();
- props.setProperty("parquet.compression", "uncompressed");
- props.setProperty("parquet.page.size", "1000");
- PigServer pigServer = new PigServer(ExecType.LOCAL, props);
- Data data = Storage.resetData(pigServer);
- Collection<Tuple> list = new ArrayList<Tuple>();
- for (int i = 0; i < rows; i++) {
- list.add(tuple("bob", "roberts" + i));
- }
- data.set("in", "fn:chararray, ln:chararray", list);
- pigServer.deleteFile(out);
- pigServer.setBatchOn();
- pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
- pigServer.registerQuery("Store A into '" + out + "' using " +
ParquetThriftStorer.class.getName() + "('"
- + Name.class.getName() + "');");
- execBatch(pigServer);
-
- pigServer.registerQuery("B = LOAD '" + out + "' USING " +
ParquetLoader.class.getName() + "();");
- pigServer.registerQuery("Store B into 'out' using mock.Storage();");
- execBatch(pigServer);
-
- List<Tuple> result = data.get("out");
-
- assertEquals(rows, result.size());
- int i = 0;
- for (Tuple tuple : result) {
- assertEquals(tuple("bob", "roberts" + i), tuple);
- ++i;
- }
- }
-
- private void execBatch(PigServer pigServer) throws IOException {
- if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
- throw new RuntimeException(
- "Job failed", pigServer.executeBatch().get(0).getException());
- }
- }
-}
diff --git a/pom.xml b/pom.xml
index 445d34168..59e1026d2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -88,8 +88,6 @@
<previous.version>1.17.0</previous.version>
<thrift.executable>thrift</thrift.executable>
<format.thrift.executable>${thrift.executable}</format.thrift.executable>
- <pig.version>0.16.0</pig.version>
- <pig.classifier>h2</pig.classifier>
<thrift-maven-plugin.version>0.10.0</thrift-maven-plugin.version>
<thrift.version>0.22.0</thrift.version>
<format.thrift.version>${thrift.version}</format.thrift.version>
@@ -571,6 +569,10 @@
<exclude>org.apache.parquet.column.values.bytestreamsplit.ByteStreamSplitValuesReader#gatherElementDataFromStreams(byte[])</exclude>
<!-- Due to the removal of deprecated methods -->
<exclude>org.apache.parquet.arrow.schema.SchemaMapping$TypeMappingVisitor#visit(org.apache.parquet.arrow.schema.SchemaMapping$MapTypeMapping)</exclude>
+ <!-- Intentional removal of deprecated Pig support from
parquet-thrift -->
+
<exclude>org.apache.parquet.thrift.pig.ParquetThriftStorer</exclude>
+
<exclude>org.apache.parquet.thrift.pig.TupleToThriftWriteSupport</exclude>
+
<exclude>org.apache.parquet.hadoop.thrift.AbstractThriftWriteSupport#isPigLoaded()</exclude>
<!-- Make static variables final -->
<exclude>org.apache.parquet.avro.AvroReadSupport#AVRO_REQUESTED_PROJECTION</exclude>
<exclude>org.apache.parquet.avro.AvroReadSupport#AVRO_DATA_SUPPLIER</exclude>