try to change codes in evaluate method like, for (int i = 0; i < numElements; i++) { Object element = listOI.getListElement(arguments[0].get(), i); Object product = structOI.getStructFieldData(element, structOI.getStructFieldRef("productCategory")); ret.add(((PrimitiveObjectInspector)prodCatOI).getPrimitiveWritableObject(product)); }
2013/3/29 Peter Chu <pete....@outlook.com>: > Sorry, the test should be following (changed extract_shas to > extract_product_category): > > import org.apache.hadoop.hive.ql.metadata.HiveException; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; > import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; > import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; > import > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; > import org.testng.annotations.Test; > > import java.util.ArrayList; > import java.util.List; > > public class TestGenericUDFExtractProductCategory > { > ArrayList<String> fieldNames = new ArrayList<String>(); > ArrayList<ObjectInspector> fieldObjectInspectors = new > ArrayList<ObjectInspector>(); > > @Test > public void simpleTest() > throws Exception > { > ListObjectInspector firstInspector = new MyListObjectInspector(); > > ArrayList test = new ArrayList(); > test.add("test"); > > ArrayList test2 = new ArrayList(); > test2.add(test); > > StructObjectInspector soi = > ObjectInspectorFactory.getStandardStructObjectInspector(test, test2); > > fieldNames.add("productCategory"); > > fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); > > GenericUDF.DeferredObject firstDeferredObject = new > MyDeferredObject(test2); > > GenericUDF extract_product_category = new > GenericUDFExtractProductCategory(); > > extract_product_category.initialize(new > ObjectInspector[]{firstInspector}); > > extract_product_category.evaluate(new > DeferredObject[]{firstDeferredObject}); > } > > public class MyDeferredObject implements DeferredObject > { > private Object value; > > public MyDeferredObject(Object value) { > this.value = value; > } > > @Override > public Object get() throws HiveException > { > return value; > } > } > > private class MyListObjectInspector implements ListObjectInspector > { > @Override > public ObjectInspector getListElementObjectInspector() > { > return > ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, > fieldObjectInspectors); > } > > @Override > public Object getListElement(Object data, int index) > { > List myList = (List) data; > if (myList == null || index > myList.size()) { > return null; > } > return myList.get(index); > } > > @Override > public int getListLength(Object data) > { > if (data == null) { > return -1; > } > return ((List) data).size(); > } > > @Override > public List<?> getList(Object data) > { > return (List) data; > } > > @Override > public String getTypeName() > { > return null; //To change body of implemented methods use File | > Settings | File Templates. > } > > @Override > public Category getCategory() > { > return Category.LIST; > } > } > } > > ________________________________ > From: pete....@outlook.com > To: user@hive.apache.org > Subject: A GenericUDF Function to Extract a Field From an Array of Structs > Date: Thu, 28 Mar 2013 14:16:33 -0700 > > I am trying to write a GenericUDF function to collect all of a specific > struct field(s) within an array for each record, and return them in an array > as well. > > I wrote the UDF (as below), and it seems to work but: > > 1) It does not work when I am performing this on an external table, it works > fine on a managed table, any idea? > > 2) I am having a tough time writing a test on this. I have attached the > test I have so far, and it does not work, > always getting 'java.util.ArrayList cannot be cast to > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector' or cannot > cast String to LazyString', > my question is how do I supply a list of structs for the evalue method? > > Any help will be greatly appreciated. > > Thanks, > Peter > > The table: > > CREATE EXTERNAL TABLE FOO ( > TS string, > customerId string, > products array< struct<productCategory:string> > > ) > PARTITIONED BY (ds string) > ROW FORMAT SERDE 'some.serde' > WITH SERDEPROPERTIES ('error.ignore'='true') > LOCATION 'some_locations' > ; > > A row of record holds: > 1340321132000, 'some_company', > [{"productCategory":"footwear"},{"productCategory":"eyewear"}] > > This is my code: > > import org.apache.hadoop.hive.ql.exec.Description; > import org.apache.hadoop.hive.ql.exec.UDFArgumentException; > import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; > import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; > import org.apache.hadoop.hive.ql.metadata.HiveException; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; > import org.apache.hadoop.hive.serde2.lazy.LazyString; > import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; > import > org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; > import > org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.StructField; > import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; > import > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; > import > org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; > import org.apache.hadoop.io.Text; > > import java.util.ArrayList; > > @Description(name = "extract_product_category", > value = "_FUNC_( array< struct<productCategory:string> > ) - Collect > all product category field values inside an array of struct(s), and return > the results in an array<string>", > extended = "Example:\n SELECT > _FUNC_(array_of_structs_with_product_category_field)") > public class GenericUDFExtractProductCategory > extends GenericUDF > { > private ArrayList ret; > > private ListObjectInspector listOI; > private StructObjectInspector structOI; > private ObjectInspector prodCatOI; > > @Override > public ObjectInspector initialize(ObjectInspector[] args) > throws UDFArgumentException > { > if (args.length != 1) { > throw new UDFArgumentLengthException("The function > extract_product_category() requires exactly one argument."); > } > > if (args[0].getCategory() != Category.LIST) { > throw new UDFArgumentTypeException(0, "Type array<struct> is > expected to be the argument for extract_product_category but " + > args[0].getTypeName() + " is found instead"); > } > > listOI = ((ListObjectInspector) args[0]); > structOI = ((StructObjectInspector) > listOI.getListElementObjectInspector()); > > if (structOI.getAllStructFieldRefs().size() != 1) { > throw new UDFArgumentTypeException(0, "Incorrect number of > fields in the struct, should be one"); > } > > StructField productCategoryField = > structOI.getStructFieldRef("productCategory"); > //If not, throw exception > if (productCategoryField == null) { > throw new UDFArgumentTypeException(0, "NO \"productCategory\" > field in input structure"); > } > > //Are they of the correct types? > //We store these object inspectors for use in the evaluate() method > prodCatOI = productCategoryField.getFieldObjectInspector(); > > //First are they primitives > if (prodCatOI.getCategory() != Category.PRIMITIVE) { > throw new UDFArgumentTypeException(0, "productCategory field > must be of string type"); > } > > //Are they of the correct primitives? > if (((PrimitiveObjectInspector)prodCatOI).getPrimitiveCategory() != > PrimitiveObjectInspector.PrimitiveCategory.STRING) { > throw new UDFArgumentTypeException(0, "productCategory field > must be of string type"); > } > > ret = new ArrayList(); > > return > ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector); > } > > @Override > public ArrayList evaluate(DeferredObject[] arguments) > throws HiveException > { > ret.clear(); > > if (arguments.length != 1) { > return null; > } > > if (arguments[0].get() == null) { > return null; > } > > int numElements = listOI.getListLength(arguments[0].get()); > > for (int i = 0; i < numElements; i++) { > LazyString prodCatDataObject = (LazyString) > (structOI.getStructFieldData(listOI.getListElement(arguments[0].get(), i), > structOI.getStructFieldRef("productCategory"))); > Text productCategoryValue = ((StringObjectInspector) > prodCatOI).getPrimitiveWritableObject(prodCatDataObject); > ret.add(productCategoryValue); > } > return ret; > } > > @Override > public String getDisplayString(String[] strings) > { > assert (strings.length > 0); > StringBuilder sb = new StringBuilder(); > sb.append("extract_product_category("); > sb.append(strings[0]); > sb.append(")"); > return sb.toString(); > } > } > > > My Test: > > import org.apache.hadoop.hive.ql.metadata.HiveException; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; > import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; > import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; > import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; > import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; > import > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; > import org.testng.annotations.Test; > > import java.util.ArrayList; > import java.util.List; > > public class TestGenericUDFExtractShas > { > ArrayList<String> fieldNames = new ArrayList<String>(); > ArrayList<ObjectInspector> fieldObjectInspectors = new > ArrayList<ObjectInspector>(); > > @Test > public void simpleTest() > throws Exception > { > ListObjectInspector firstInspector = new MyListObjectInspector(); > > ArrayList test = new ArrayList(); > test.add("test"); > > ArrayList test2 = new ArrayList(); > test2.add(test); > > StructObjectInspector soi = > ObjectInspectorFactory.getStandardStructObjectInspector(test, test2); > > fieldNames.add("productCategory"); > > fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); > > GenericUDF.DeferredObject firstDeferredObject = new > MyDeferredObject(test2); > > GenericUDF extract_shas = new GenericUDFExtractShas(); > > extract_shas.initialize(new ObjectInspector[]{firstInspector}); > > extract_shas.evaluate(new DeferredObject[]{firstDeferredObject}); > } > > public class MyDeferredObject implements DeferredObject > { > private Object value; > > public MyDeferredObject(Object value) { > this.value = value; > } > > @Override > public Object get() throws HiveException > { > return value; > } > } > > private class MyListObjectInspector implements ListObjectInspector > { > @Override > public ObjectInspector getListElementObjectInspector() > { > return > ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, > fieldObjectInspectors); > } > > @Override > public Object getListElement(Object data, int index) > { > List myList = (List) data; > if (myList == null || index > myList.size()) { > return null; > } > return myList.get(index); > } > > @Override > public int getListLength(Object data) > { > if (data == null) { > return -1; > } > return ((List) data).size(); > } > > @Override > public List<?> getList(Object data) > { > return (List) data; > } > > @Override > public String getTypeName() > { > return null; //To change body of implemented methods use File | > Settings | File Templates. > } > > @Override > public Category getCategory() > { > return Category.LIST; > } > } > }