This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0c0a3fff39839b400370433568f37a317b7d4800 Author: Riza Suminto <[email protected]> AuthorDate: Fri Feb 23 10:56:37 2024 -0800 IMPALA-12840: Exclude THdfsFileDesc in getJsonCatalogObject TestReusePartitions::test_reuse_partitions_transactional calls /catalog_object path of CatalogD's WebUI and decodes the JSON response. The "json_string" field from the response text often contains Unicode control characters that come from serialized binary data from THdfsFileDesc objects. That causes JSON decoding to fail with an error like this: ValueError: Invalid control character at: line 1 column 1850 (char 1849) This patch attempts to deflake the test by tuning the return value of getJsonCatalogObject() that excludes THdfsFileDesc, by lowering the detail level from ThriftObjectType.FULL to ThriftObjectType.DESCRIPTOR_ONLY. test_reuse_partitions_transactional is tweaked a bit to print the response / JSON object if an assertion fails. Testing: - Loop and pass the test for hundred times. Change-Id: I5f6840bf1267d1d99d321c0a6b4a0cab49543182 Reviewed-on: http://gerrit.cloudera.org:8080/21064 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../main/java/org/apache/impala/catalog/Catalog.java | 20 +++++++++++++++++++- .../java/org/apache/impala/catalog/HdfsTable.java | 10 ++++++++++ .../java/org/apache/impala/catalog/IcebergTable.java | 9 +++++++++ .../main/java/org/apache/impala/catalog/Table.java | 6 ++++++ .../java/org/apache/impala/service/JniCatalog.java | 6 ++++-- tests/metadata/test_reuse_partitions.py | 10 ++++++---- 6 files changed, 54 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java index a0b22593a..540590c5f 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java @@ -537,6 +537,23 @@ public abstract class Catalog implements AutoCloseable { */ public TCatalogObject getTCatalogObject(TCatalogObject objectDesc) throws CatalogException { + return getTCatalogObject(objectDesc, false); + } + + /** + * Gets the thrift representation of a catalog object, given the "object + * description". The object description is just a TCatalogObject with only the + * catalog object type and object name set. + * If the object is not found, a CatalogException is thrown. + * @param objectDesc the object description. + * @param isHumanReadable whether the request is for human readable purpose or not. + * If False, return full object. Otherwise, return smaller + * catalog object that omit some field. + * @return the requested catalog object. + * @throws CatalogException + */ + public TCatalogObject getTCatalogObject( + TCatalogObject objectDesc, boolean isHumanReadable) throws CatalogException { TCatalogObject result = new TCatalogObject(); switch (objectDesc.getType()) { case DATABASE: { @@ -562,7 +579,8 @@ public abstract class Catalog implements AutoCloseable { try { result.setType(table.getCatalogObjectType()); result.setCatalog_version(table.getCatalogVersion()); - result.setTable(table.toThrift()); + result.setTable( + isHumanReadable ? table.toHumanReadableThrift() : table.toThrift()); } finally { table.releaseReadLock(); } diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 36324ceaa..05f62814b 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -2092,6 +2092,16 @@ public class HdfsTable extends Table implements FeFsTable { return table; } + @Override + public TTable toHumanReadableThrift() { + // Same as toThrift, but call getTHdfsTable with lower + // ThriftObjectType.DESCRIPTOR_ONLY. + TTable table = super.toThrift(); + table.setTable_type(TTableType.HDFS_TABLE); + table.setHdfs_table(getTHdfsTable(ThriftObjectType.DESCRIPTOR_ONLY, null)); + return table; + } + /** * Just like toThrift but unset the full partition metadata in the partition map. * So only the partition ids are contained. Used in catalogd to send partition updates diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java index 4e927c06c..6aedc0bd1 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergTable.java @@ -363,6 +363,15 @@ public class IcebergTable extends Table implements FeIcebergTable { return table; } + @Override + public TTable toHumanReadableThrift() { + TTable table = super.toThrift(); + table.setTable_type(TTableType.ICEBERG_TABLE); + table.setIceberg_table(Utils.getTIcebergTable(this)); + table.setHdfs_table(transformToTHdfsTable(true, ThriftObjectType.DESCRIPTOR_ONLY)); + return table; + } + /** * Loads the metadata of an Iceberg table. * <p> diff --git a/fe/src/main/java/org/apache/impala/catalog/Table.java b/fe/src/main/java/org/apache/impala/catalog/Table.java index dfe4bc87f..15ec5784b 100644 --- a/fe/src/main/java/org/apache/impala/catalog/Table.java +++ b/fe/src/main/java/org/apache/impala/catalog/Table.java @@ -705,6 +705,12 @@ public abstract class Table extends CatalogObjectImpl implements FeTable { return table; } + /** + * Variant of toThrift() with lower detail. Intended to get a human readable output + * that omit any binary fields. + */ + public TTable toHumanReadableThrift() { return toThrift(); } + private boolean isLockedByCurrentThread() { return isReadLockedByCurrentThread() || tableLock_.isWriteLockedByCurrentThread(); } diff --git a/fe/src/main/java/org/apache/impala/service/JniCatalog.java b/fe/src/main/java/org/apache/impala/service/JniCatalog.java index 21fc2a63a..7eb0463f9 100644 --- a/fe/src/main/java/org/apache/impala/service/JniCatalog.java +++ b/fe/src/main/java/org/apache/impala/service/JniCatalog.java @@ -400,7 +400,9 @@ public class JniCatalog { /** * Gets the json string of a catalog object. It can only be used in showing debug - * messages and can't be deserialized to a thrift object. + * messages and can't be deserialized to a thrift object. The returned object is also + * slimmer than the one obtained from getCatalogObject() because binary data fields + * are excluded. */ public String getJsonCatalogObject(byte[] thriftParams) throws ImpalaException, TException { @@ -411,7 +413,7 @@ public class JniCatalog { "Getting JSON catalog object of " + Catalog.toCatalogObjectKey(objectDesc); return execOp("getJsonCatalogObject", shortDesc, () -> { - String res = jsonSerializer.toString(catalog_.getTCatalogObject(objectDesc)); + String res = jsonSerializer.toString(catalog_.getTCatalogObject(objectDesc, true)); return Pair.create(res, (long) res.length()); }, objectDesc); } diff --git a/tests/metadata/test_reuse_partitions.py b/tests/metadata/test_reuse_partitions.py index 24a3e36a4..e732f8423 100644 --- a/tests/metadata/test_reuse_partitions.py +++ b/tests/metadata/test_reuse_partitions.py @@ -44,11 +44,13 @@ class TestReusePartitions(ImpalaTestSuite): obj_url = self.JSON_TABLE_OBJECT_URL.format(db_name, tbl_name) response = requests.get(obj_url) assert response.status_code == requests.codes.ok - catalog_obj = json.loads(json.loads(response.text)["json_string"]) - assert "table" in catalog_obj - assert "hdfs_table" in catalog_obj["table"] + json_response = json.loads(response.text) + assert "json_string" in json_response, json_response + catalog_obj = json.loads(json_response["json_string"]) + assert "table" in catalog_obj, catalog_obj + assert "hdfs_table" in catalog_obj["table"], catalog_obj["table"] tbl_obj = catalog_obj["table"]["hdfs_table"] - assert "partitions" in tbl_obj + assert "partitions" in tbl_obj, tbl_obj return set(tbl_obj["partitions"].keys()) def test_reuse_partitions_nontransactional(self, unique_database):
