This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new b0e0efcc5f9 [HUDI-7955] Account for
WritableTimestampObjectInspector#getPrimitiveJavaObject discrepancies in Hive3
and Hive2 (#11576)
b0e0efcc5f9 is described below
commit b0e0efcc5f9da5449a5b6310a7de7996b01318cb
Author: voonhous <[email protected]>
AuthorDate: Mon Jul 8 17:08:38 2024 +0800
[HUDI-7955] Account for
WritableTimestampObjectInspector#getPrimitiveJavaObject discrepancies in Hive3
and Hive2 (#11576)
---
.../org/apache/hudi/hadoop/utils/HiveAvroSerializer.java | 3 +--
.../org/apache/hudi/hadoop/utils/HoodieHiveUtils.java | 4 ++++
.../org/apache/hudi/hadoop/utils/shims/Hive2Shim.java | 5 +++++
.../org/apache/hudi/hadoop/utils/shims/Hive3Shim.java | 15 +++++++++++++++
.../java/org/apache/hudi/hadoop/utils/shims/HiveShim.java | 2 ++
5 files changed, 27 insertions(+), 2 deletions(-)
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
index 278fd57d71b..47d984c89c3 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java
@@ -42,7 +42,6 @@ import
org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
@@ -305,7 +304,7 @@ public class HiveAvroSerializer {
case DATE:
return HoodieHiveUtils.getDays(structFieldData);
case TIMESTAMP:
- Object timestamp = ((WritableTimestampObjectInspector)
fieldOI).getPrimitiveJavaObject(structFieldData);
+ Object timestamp = HoodieHiveUtils.getTimestamp(structFieldData);
return HoodieHiveUtils.getMills(timestamp);
case INT:
if (schema.getLogicalType() != null &&
schema.getLogicalType().getName().equals("date")) {
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
index 63ba2f74f6b..ced39ccf379 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
@@ -182,6 +182,10 @@ public class HoodieHiveUtils {
return HIVE_SHIM.getDateWriteable(value);
}
+ public static Object getTimestamp(Object fieldData) {
+ return HIVE_SHIM.unwrapTimestampAsPrimitive(fieldData);
+ }
+
public static int getDays(Object dateWritable) {
return HIVE_SHIM.getDays(dateWritable);
}
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive2Shim.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive2Shim.java
index b8583799d6c..e2a4f36cb7f 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive2Shim.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive2Shim.java
@@ -42,6 +42,11 @@ public class Hive2Shim implements HiveShim {
return new TimestampWritable(timestamp);
}
+ @Override
+ public Object unwrapTimestampAsPrimitive(Object o) {
+ return o == null ? null : ((TimestampWritable) o).getTimestamp();
+ }
+
public Writable getDateWriteable(int value) {
return new DateWritable(value);
}
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive3Shim.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive3Shim.java
index 0329c464e54..9d6dca4f2b3 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive3Shim.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/Hive3Shim.java
@@ -43,6 +43,7 @@ public class Hive3Shim implements HiveShim {
private static Class<?> TIMESTAMP_CLASS = null;
private static Method SET_TIME_IN_MILLIS = null;
private static Method TO_SQL_TIMESTAMP = null;
+ private static Method GET_TIMESTAMP = null;
private static Constructor<?> TIMESTAMP_WRITEABLE_V2_CONSTRUCTOR = null;
private static Class<?> DATE_WRITEABLE_CLASS = null;
@@ -54,6 +55,7 @@ public class Hive3Shim implements HiveShim {
try {
TIMESTAMP_CLASS = Class.forName(HIVE_TIMESTAMP_TYPE_CLASS);
SET_TIME_IN_MILLIS =
TIMESTAMP_CLASS.getDeclaredMethod("setTimeInMillis", long.class);
+ GET_TIMESTAMP = TIMESTAMP_CLASS.getDeclaredMethod("getTimestamp");
TO_SQL_TIMESTAMP = TIMESTAMP_CLASS.getDeclaredMethod("toSqlTimestamp");
TIMESTAMP_WRITEABLE_V2_CONSTRUCTOR =
Class.forName(TIMESTAMP_WRITEABLE_V2_CLASS).getConstructor(TIMESTAMP_CLASS);
} catch (ClassNotFoundException | NoSuchMethodException e) {
@@ -94,6 +96,19 @@ public class Hive3Shim implements HiveShim {
}
}
+ @Override
+ public Object unwrapTimestampAsPrimitive(Object o) {
+ if (o == null) {
+ return null;
+ }
+
+ try {
+ return GET_TIMESTAMP.invoke(o);
+ } catch (IllegalAccessException | InvocationTargetException e) {
+ throw new HoodieException("unable to get timestamp from writable using
v2 class!", e);
+ }
+ }
+
/**
* Get date writeable object from int value.
* Hive3 use DateWritableV2 to build date objects and Hive2 use DateWritable.
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/HiveShim.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/HiveShim.java
index 4b67514d243..45a8206ce57 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/HiveShim.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/shims/HiveShim.java
@@ -27,6 +27,8 @@ public interface HiveShim {
Writable getTimestampWriteable(long value, boolean timestampMillis);
+ Object unwrapTimestampAsPrimitive(Object o);
+
Writable getDateWriteable(int value);
int getDays(Object dateWritable);