This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f68986d4522aa2e581c4bbf464a0454f881d7553
Author: stiga-huang <[email protected]>
AuthorDate: Sun Apr 23 12:17:45 2023 +0800

    IMPALA-12082: Fix db not found error of INVALIDATE METADATA under unloaded 
db
    
    INVALIDATE METADATA can be executed on tables under unloaded db. It will
    bring up the metadata of the db by the way. However, this feature is
    broken after IMPALA-11808 in which we try to get the table from catalog
    cache assuming it's loaded. This causes the above use case failed by
    DatabaseNotFoundException.
    
    This patch fixes the regression by not getting the table from catalog
    cache for INVALIDATE METADATA commands. We only do so for REFRESH
    commands. After the INVALIDATE METADATA command succeeds, if we need to
    fire reload events, we get the table from catalog cache.
    
    Tests:
     - Add e2e tests for event-processor is disabled and enabled.
    
    Change-Id: Ifd0a9e87f06c38f569c32bd10cc2668403681fd4
    Reviewed-on: http://gerrit.cloudera.org:8080/19786
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../org/apache/impala/service/CatalogOpExecutor.java   | 18 +++++++++++++-----
 .../test_metadata_no_events_processing.py              | 14 ++++++++++++++
 tests/metadata/test_hms_integration.py                 | 10 ++++++++++
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java 
b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index c62646e71..56f08e28c 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -6404,10 +6404,16 @@ public class CatalogOpExecutor {
       // Thrift representation of the result of the invalidate/refresh 
operation.
       TCatalogObject updatedThriftTable = null;
       TableName tblName = TableName.fromThrift(req.getTable_name());
-      Table tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl());
-      if (req.isIs_refresh()) {
+      Table tbl = null;
+      if (!req.isIs_refresh()) {
+        // For INVALIDATE METADATA <db>.<table>, the db might be unloaded.
+        // So we can't update 'tbl' here.
+        updatedThriftTable = catalog_.invalidateTable(
+            req.getTable_name(), tblWasRemoved, dbWasAdded);
+      } else {
         // Quick check to see if the table exists in the catalog without 
triggering
         // a table load.
+        tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl());
         if (tbl != null) {
           // If the table is not loaded, no need to perform refresh after the 
initial
           // metadata load.
@@ -6454,9 +6460,6 @@ public class CatalogOpExecutor {
             }
           }
         }
-      } else {
-        updatedThriftTable = catalog_.invalidateTable(
-            req.getTable_name(), tblWasRemoved, dbWasAdded);
       }
 
       if (updatedThriftTable == null) {
@@ -6467,6 +6470,11 @@ public class CatalogOpExecutor {
       }
 
       if (BackendConfig.INSTANCE.enableReloadEvents()) {
+        // For INVALIDATE METADATA <table>, 'tbl' can only be got after it 
succeeds.
+        if (!req.isIs_refresh()) {
+          tbl = catalog_.getTable(tblName.getDb(), tblName.getTbl());
+        }
+        Preconditions.checkNotNull(tbl, "tbl is null in " + cmdString);
         // fire event for refresh event and update the last refresh event id
         fireReloadEventAndUpdateRefreshEventId(req, updatedThriftTable, 
tblName, tbl);
       }
diff --git a/tests/custom_cluster/test_metadata_no_events_processing.py 
b/tests/custom_cluster/test_metadata_no_events_processing.py
index ee6af0821..fd6bc9b80 100644
--- a/tests/custom_cluster/test_metadata_no_events_processing.py
+++ b/tests/custom_cluster/test_metadata_no_events_processing.py
@@ -296,3 +296,17 @@ class 
TestMetadataNoEventsProcessing(CustomClusterTestSuite):
     result = self.client.execute("show partitions %s" % tbl)
     assert result.get_data().startswith("1\t1\t2"),\
         "Incorrect partition stats %s" % result.get_data()
+
+  
@CustomClusterTestSuite.with_args(catalogd_args="--hms_event_polling_interval_s=0")
+  def test_invalidate_metadata(self, unique_name):
+    """Verify invalidate metadata on tables under unloaded db won't fail"""
+    db = unique_name + "_db"
+    tbl = db + "." + unique_name + "_tbl"
+    try:
+      self.run_stmt_in_hive("create database " + db)
+      self.run_stmt_in_hive("create table %s (i int)" % tbl)
+      self.client.execute("invalidate metadata %s" % tbl)
+      res = self.client.execute("describe %s" % tbl)
+      assert res.data == ["i\tint\t"]
+    finally:
+      self.run_stmt_in_hive("drop database %s cascade" % db)
diff --git a/tests/metadata/test_hms_integration.py 
b/tests/metadata/test_hms_integration.py
index b67d0cac9..d1cbd4cc7 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -144,6 +144,16 @@ class TestHmsIntegrationSanity(ImpalaTestSuite):
     else:
       assert False
 
+  def test_invalidate_metadata(self, unique_name):
+    """Verify invalidate metadata on tables under unloaded db won't fail"""
+    db = unique_name + "_db"
+    tbl = db + "." + unique_name + "_tbl"
+    try:
+      self.run_stmt_in_hive("create database " + db)
+      self.run_stmt_in_hive("create table %s (i int)" % tbl)
+      self.client.execute("invalidate metadata %s" % tbl)
+    finally:
+      self.run_stmt_in_hive("drop database %s cascade" % db)
 
 @SkipIfFS.hive
 class TestHmsIntegration(ImpalaTestSuite):

Reply via email to