[ https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=830621&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-830621 ]
ASF GitHub Bot logged work on HIVE-26754: ----------------------------------------- Author: ASF GitHub Bot Created on: 02/Dec/22 11:37 Start Date: 02/Dec/22 11:37 Worklog Time Spent: 10m Work Description: tarak271 commented on code in PR #3806: URL: https://github.com/apache/hive/pull/3806#discussion_r1038057429 ########## ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayDistinct.java: ########## @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Generic UDF for distinct array + * <code>ARRAY_DISTINCT(array(obj1, obj2, obj3...))</code>. + * + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF + */ +@Description(name = "array_distinct", + value = "_FUNC_(array(obj1, obj2,...)) - " + + "The function returns an array of the same type as the input array with distinct values.", + extended = "Example:\n" + + " > SELECT _FUNC_(array('b', 'd', 'd', 'a')) FROM src LIMIT 1;\n" + + " ['b', 'd', 'a']") +public class GenericUDFArrayDistinct extends AbstractGenericUDFArrayBase { + + public GenericUDFArrayDistinct() { + super("ARRAY_DISTINCT", 1, 1, ObjectInspector.Category.LIST); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + + Object array = arguments[ARRAY_IDX].get(); + + if (arrayOI.getListLength(array) <= 0) { + return new ArrayList<Object>(); + } Review Comment: And added another Test class to simulate null Object, TestGenericUDFArray which can be reused across Array functions Issue Time Tracking ------------------- Worklog Id: (was: 830621) Time Spent: 3.5h (was: 3h 20m) > Implement array_distinct UDF to return an array after removing duplicates in > it > ------------------------------------------------------------------------------- > > Key: HIVE-26754 > URL: https://issues.apache.org/jira/browse/HIVE-26754 > Project: Hive > Issue Type: Sub-task > Components: Hive > Reporter: Taraka Rama Rao Lethavadla > Assignee: Taraka Rama Rao Lethavadla > Priority: Major > Labels: pull-request-available > Time Spent: 3.5h > Remaining Estimate: 0h > > *array_distinct(array(obj1, obj2,...))* - The function returns an array of > the same type as the input argument where all duplicate values have been > removed. > Example: > > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1; > ['a', 'b', 'c'] -- This message was sent by Atlassian Jira (v8.20.10#820010)