My team and I have been trying, with limited success, to use the COMMENT
feature of hive columns to maintain documentation for the tables and
columns in our data-warehouse built on hive. However, we use a number of
custom and non-native SerDes, and what happens to those tables is that
the comments always get overwritten with the string "from deserializer".
I've possibly found a way to work around this from within hive but I
want to get some insight from the hive-dev community to figure out
whether or not this is a patently bad idea and we are just setting
ourselves up for pain later on.
I won't go into all the details but it seems to work in our (so far)
limited testing. However, we are using hive 0.7.1 and the patch I am
sending is against master/HEAD.
Please let me know if this is an acceptable approach to preserving
column comments with non-native SerDes or not!
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 7524484..7ea77f1 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -1947,19 +1947,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
} catch (NoSuchObjectException e) {
throw new UnknownTableException(e.getMessage());
}
- boolean getColsFromSerDe = SerDeUtils.shouldGetColsFromSerDe(
- tbl.getSd().getSerdeInfo().getSerializationLib());
- if (!getColsFromSerDe) {
- ret = tbl.getSd().getCols();
- } else {
- try {
- Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl);
- ret = MetaStoreUtils.getFieldsFromDeserializer(tableName, s);
- } catch (SerDeException e) {
- StringUtils.stringifyException(e);
- throw new MetaException(e.getMessage());
- }
- }
+ ret = tbl.getSd().getCols();
} finally {
endFunction("get_fields", ret != null);
}
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 86c7205..e872cdc 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -795,6 +797,11 @@ public class MetaStoreUtils {
*/
public static List<FieldSchema> getFieldsFromDeserializer(String tableName,
Deserializer deserializer) throws SerDeException, MetaException {
+ try {
+ return Hive.get().getTable(tableName).getTTable().getSd().getCols();
+ } catch (HiveException e) {
+ // can't get the schema that way? do things the old way, then.
+ }
ObjectInspector oi = deserializer.getObjectInspector();
String[] names = tableName.split("\\.");
String last_name = names[names.length - 1];
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 6b432ac..9e543d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -481,18 +481,7 @@ public class Table implements Serializable {
}
public List<FieldSchema> getCols() {
- boolean getColsFromSerDe = SerDeUtils.shouldGetColsFromSerDe(
- getSerializationLib());
- if (!getColsFromSerDe) {
- return tTable.getSd().getCols();
- } else {
- try {
- return Hive.getFieldsFromDeserializer(getTableName(), getDeserializer());
- } catch (HiveException e) {
- LOG.error("Unable to get field from serde: " + getSerializationLib(), e);
- }
- return new ArrayList<FieldSchema>();
- }
+ return tTable.getSd().getCols();
}
/**