This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0c0a3fff39839b400370433568f37a317b7d4800
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Feb 23 10:56:37 2024 -0800

    IMPALA-12840: Exclude THdfsFileDesc in getJsonCatalogObject
    
    TestReusePartitions::test_reuse_partitions_transactional calls
    /catalog_object path of CatalogD's WebUI and decodes the JSON response.
    The "json_string" field from the response text often contains Unicode
    control characters that come from serialized binary data from
    THdfsFileDesc objects. That causes JSON decoding to fail with an error
    like this:
    
    ValueError: Invalid control character at: line 1 column 1850 (char 1849)
    
    This patch attempts to deflake the test by tuning the return value of
    getJsonCatalogObject() that excludes THdfsFileDesc, by lowering the
    detail level from ThriftObjectType.FULL to
    ThriftObjectType.DESCRIPTOR_ONLY.
    
    test_reuse_partitions_transactional is tweaked a bit to print the
    response / JSON object if an assertion fails.
    
    Testing:
    - Loop and pass the test for hundred times.
    
    Change-Id: I5f6840bf1267d1d99d321c0a6b4a0cab49543182
    Reviewed-on: http://gerrit.cloudera.org:8080/21064
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../main/java/org/apache/impala/catalog/Catalog.java | 20 +++++++++++++++++++-
 .../java/org/apache/impala/catalog/HdfsTable.java    | 10 ++++++++++
 .../java/org/apache/impala/catalog/IcebergTable.java |  9 +++++++++
 .../main/java/org/apache/impala/catalog/Table.java   |  6 ++++++
 .../java/org/apache/impala/service/JniCatalog.java   |  6 ++++--
 tests/metadata/test_reuse_partitions.py              | 10 ++++++----
 6 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java 
b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
index a0b22593a..540590c5f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
@@ -537,6 +537,23 @@ public abstract class Catalog implements AutoCloseable {
    */
   public TCatalogObject getTCatalogObject(TCatalogObject objectDesc)
       throws CatalogException {
+    return getTCatalogObject(objectDesc, false);
+  }
+
+  /**
+   * Gets the thrift representation of a catalog object, given the "object
+   * description". The object description is just a TCatalogObject with only 
the
+   * catalog object type and object name set.
+   * If the object is not found, a CatalogException is thrown.
+   * @param objectDesc the object description.
+   * @param isHumanReadable whether the request is for human readable purpose 
or not.
+   *                        If False, return full object. Otherwise, return 
smaller
+   *                        catalog object that omit some field.
+   * @return the requested catalog object.
+   * @throws CatalogException
+   */
+  public TCatalogObject getTCatalogObject(
+      TCatalogObject objectDesc, boolean isHumanReadable) throws 
CatalogException {
     TCatalogObject result = new TCatalogObject();
     switch (objectDesc.getType()) {
       case DATABASE: {
@@ -562,7 +579,8 @@ public abstract class Catalog implements AutoCloseable {
         try {
           result.setType(table.getCatalogObjectType());
           result.setCatalog_version(table.getCatalogVersion());
-          result.setTable(table.toThrift());
+          result.setTable(
+              isHumanReadable ? table.toHumanReadableThrift() : 
table.toThrift());
         } finally {
           table.releaseReadLock();
         }
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 36324ceaa..05f62814b 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -2092,6 +2092,16 @@ public class HdfsTable extends Table implements 
FeFsTable {
     return table;
   }
 
+  @Override
+  public TTable toHumanReadableThrift() {
+    // Same as toThrift, but call getTHdfsTable with lower
+    // ThriftObjectType.DESCRIPTOR_ONLY.
+    TTable table = super.toThrift();
+    table.setTable_type(TTableType.HDFS_TABLE);
+    table.setHdfs_table(getTHdfsTable(ThriftObjectType.DESCRIPTOR_ONLY, null));
+    return table;
+  }
+
   /**
    * Just like toThrift but unset the full partition metadata in the partition 
map.
    * So only the partition ids are contained. Used in catalogd to send 
partition updates
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java 
b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
index 4e927c06c..6aedc0bd1 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java
@@ -363,6 +363,15 @@ public class IcebergTable extends Table implements 
FeIcebergTable {
     return table;
   }
 
+  @Override
+  public TTable toHumanReadableThrift() {
+    TTable table = super.toThrift();
+    table.setTable_type(TTableType.ICEBERG_TABLE);
+    table.setIceberg_table(Utils.getTIcebergTable(this));
+    table.setHdfs_table(transformToTHdfsTable(true, 
ThriftObjectType.DESCRIPTOR_ONLY));
+    return table;
+  }
+
   /**
    * Loads the metadata of an Iceberg table.
    * <p>
diff --git a/fe/src/main/java/org/apache/impala/catalog/Table.java 
b/fe/src/main/java/org/apache/impala/catalog/Table.java
index dfe4bc87f..15ec5784b 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Table.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Table.java
@@ -705,6 +705,12 @@ public abstract class Table extends CatalogObjectImpl 
implements FeTable {
     return table;
   }
 
+  /**
+   * Variant of toThrift() with lower detail. Intended to get a human readable 
output
+   * that omit any binary fields.
+   */
+  public TTable toHumanReadableThrift() { return toThrift(); }
+
   private boolean isLockedByCurrentThread() {
     return isReadLockedByCurrentThread() || 
tableLock_.isWriteLockedByCurrentThread();
   }
diff --git a/fe/src/main/java/org/apache/impala/service/JniCatalog.java 
b/fe/src/main/java/org/apache/impala/service/JniCatalog.java
index 21fc2a63a..7eb0463f9 100644
--- a/fe/src/main/java/org/apache/impala/service/JniCatalog.java
+++ b/fe/src/main/java/org/apache/impala/service/JniCatalog.java
@@ -400,7 +400,9 @@ public class JniCatalog {
 
   /**
    * Gets the json string of a catalog object. It can only be used in showing 
debug
-   * messages and can't be deserialized to a thrift object.
+   * messages and can't be deserialized to a thrift object. The returned 
object is also
+   * slimmer than the one obtained from getCatalogObject() because binary data 
fields
+   * are excluded.
    */
   public String getJsonCatalogObject(byte[] thriftParams)
       throws ImpalaException, TException {
@@ -411,7 +413,7 @@ public class JniCatalog {
         "Getting JSON catalog object of " + 
Catalog.toCatalogObjectKey(objectDesc);
 
     return execOp("getJsonCatalogObject", shortDesc, () -> {
-      String res = 
jsonSerializer.toString(catalog_.getTCatalogObject(objectDesc));
+      String res = 
jsonSerializer.toString(catalog_.getTCatalogObject(objectDesc, true));
       return Pair.create(res, (long) res.length());
     }, objectDesc);
   }
diff --git a/tests/metadata/test_reuse_partitions.py 
b/tests/metadata/test_reuse_partitions.py
index 24a3e36a4..e732f8423 100644
--- a/tests/metadata/test_reuse_partitions.py
+++ b/tests/metadata/test_reuse_partitions.py
@@ -44,11 +44,13 @@ class TestReusePartitions(ImpalaTestSuite):
     obj_url = self.JSON_TABLE_OBJECT_URL.format(db_name, tbl_name)
     response = requests.get(obj_url)
     assert response.status_code == requests.codes.ok
-    catalog_obj = json.loads(json.loads(response.text)["json_string"])
-    assert "table" in catalog_obj
-    assert "hdfs_table" in catalog_obj["table"]
+    json_response = json.loads(response.text)
+    assert "json_string" in json_response, json_response
+    catalog_obj = json.loads(json_response["json_string"])
+    assert "table" in catalog_obj, catalog_obj
+    assert "hdfs_table" in catalog_obj["table"], catalog_obj["table"]
     tbl_obj = catalog_obj["table"]["hdfs_table"]
-    assert "partitions" in tbl_obj
+    assert "partitions" in tbl_obj, tbl_obj
     return set(tbl_obj["partitions"].keys())
 
   def test_reuse_partitions_nontransactional(self, unique_database):

Reply via email to