This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0a077fe9921af7c5bac2f9decba371c5eca55d6a Author: wzhou-code <[email protected]> AuthorDate: Thu Mar 21 21:58:51 2024 -0700 IMPALA-12928: Mask JDBC table property dbcp.password for DESC FORMATTED and SHOW CREATE TABLE 'desc formatted' and 'show create table' commands show all of table properties in clear text. For external JDBC table, dbcp.password table property value should be masked in the output of these two commands. This patch makes dbcp.password property value been masked in the output of 'desc formatted' and 'show create table' commands. dbcp.password table property could be wrote into Impala and HMS log files with JDBC table creation statements. There is generic tool in production environment with which user could set up the regular expressions to detect and redact sensitive information within SQL statement text in log files. Testing: - Added end-to-end test cases. - Passed core tests. Change-Id: I83dc32c8d0fec1cdfdfe06e720561b2ae1adf5df Reviewed-on: http://gerrit.cloudera.org:8080/21187 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../java/org/apache/impala/analysis/ToSqlUtils.java | 11 +++++++++++ .../java/org/apache/impala/catalog/DataSourceTable.java | 12 ++++++++++++ .../org/apache/impala/util/HiveMetadataFormatUtils.java | 16 +++++++++++++--- tests/query_test/test_ext_data_sources.py | 17 ++++++++++++++++- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java index 66434a184..305905ef4 100755 --- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java +++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java @@ -23,6 +23,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.antlr.runtime.ANTLRStringStream; import org.antlr.runtime.RecognitionException; @@ -34,6 +35,8 @@ import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.ql.parse.HiveLexer; import org.apache.impala.catalog.CatalogException; import org.apache.impala.catalog.Column; +import org.apache.impala.catalog.DataSourceTable; +import org.apache.impala.catalog.FeDataSourceTable; import org.apache.impala.catalog.FeFsTable; import org.apache.impala.catalog.FeHBaseTable; import org.apache.impala.catalog.FeIcebergTable; @@ -468,6 +471,14 @@ public class ToSqlUtils { } catch (Exception e) { throw new CatalogException("Could not get primary key/foreign keys sql.", e); } + } else if (table instanceof FeDataSourceTable) { + // Mask sensitive table properties for external JDBC table. + Set<String> keysToBeMasked = DataSourceTable.getJdbcTblPropertyMaskKeys(); + for (String key : properties.keySet()) { + if (keysToBeMasked.contains(key.toLowerCase())) { + properties.put(key, "******"); + } + } } HdfsUri tableLocation = location == null ? null : new HdfsUri(location); diff --git a/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java b/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java index 2bddfacc1..4a5f4a05d 100644 --- a/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java @@ -17,7 +17,9 @@ package org.apache.impala.catalog; +import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -370,4 +372,14 @@ public class DataSourceTable extends Table implements FeDataSourceTable { org.apache.hadoop.hive.metastore.api.Table msTbl) { return msTbl.getParameters().containsKey(TBL_PROP_DATA_SRC_NAME); } + + /** + * Returns a list of keys of external JDBC table properties for which the property + * values should be masked in the output of "desc formatted" and "show create table" + * commands. + */ + public static Set<String> getJdbcTblPropertyMaskKeys() { + return new HashSet<String>(Arrays.asList( + JdbcStorageConfig.DBCP_PASSWORD.getPropertyName())); + } } diff --git a/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java b/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java index 61f33a2b2..4a9e2cea4 100644 --- a/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java +++ b/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.impala.catalog.DataSourceTable; import org.apache.impala.catalog.IcebergTable; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.compat.MetastoreShim; @@ -557,7 +558,13 @@ public class HiveMetadataFormatUtils { if (tbl.getParameters().size() > 0) { tableInfo.append("Table Parameters:").append(LINE_DELIM); - displayAllParameters(tbl.getParameters(), tableInfo, false, isOutputPadded); + // Mask sensitive table properties for external JDBC table. + Set<String> keysToBeMasked = null; + if (DataSourceTable.isDataSourceTable(tbl)) { + keysToBeMasked = DataSourceTable.getJdbcTblPropertyMaskKeys(); + } + displayAllParameters( + tbl.getParameters(), tableInfo, false, isOutputPadded, keysToBeMasked); } } @@ -573,7 +580,8 @@ public class HiveMetadataFormatUtils { * escaped. */ private static void displayAllParameters(Map<String, String> params, - StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) { + StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded, + Set<String> keysToBeMasked) { List<String> keys = new ArrayList<String>(params.keySet()); Collections.sort(keys); for (String key : keys) { @@ -583,6 +591,8 @@ public class HiveMetadataFormatUtils { if ("0".equals(value)) { continue; } + } else if (keysToBeMasked != null && keysToBeMasked.contains(key.toLowerCase())) { + value = "******"; } tableInfo.append(FIELD_DELIM); // Ensures all params are indented. formatOutput(key, escapeUnicode ? StringEscapeUtils.escapeJava(value) @@ -692,7 +702,7 @@ public class HiveMetadataFormatUtils { if (storageDesc.getSerdeInfo().getParametersSize() > 0) { tableInfo.append("Storage Desc Params:").append(LINE_DELIM); displayAllParameters(storageDesc.getSerdeInfo().getParameters(), tableInfo, true, - false); + false, /* keysToBeMasked */ null); } } diff --git a/tests/query_test/test_ext_data_sources.py b/tests/query_test/test_ext_data_sources.py index ca16578a9..554bb96fa 100644 --- a/tests/query_test/test_ext_data_sources.py +++ b/tests/query_test/test_ext_data_sources.py @@ -65,8 +65,23 @@ class TestExtDataSources(ImpalaTestSuite): def test_verify_jdbc_table_properties(self, vector): jdbc_tbl_name = "functional.alltypes_jdbc_datasource" properties = self._get_tbl_properties(jdbc_tbl_name) - # Verify data source related table properties + # Verify table properties specific for external JDBC table assert properties['__IMPALA_DATA_SOURCE_NAME'] == 'impalajdbcdatasource' + assert properties['database.type'] == 'POSTGRES' + assert properties['jdbc.driver'] == 'org.postgresql.Driver' + assert properties['dbcp.username'] == 'hiveuser' + assert properties['table'] == 'alltypes' + # Verify dbcp.password is masked in the output of DESCRIBE FORMATTED command + assert properties['dbcp.password'] == '******' + + # Verify dbcp.password is masked in the output of SHOW CREATE TABLE command + result = self.client.execute("SHOW CREATE TABLE {0}".format(jdbc_tbl_name)) + match = False + for row in result.data: + if "'dbcp.password'='******'" in row: + match = True + break + assert match, result.data def test_data_source_tables(self, vector, unique_database): self.run_test_case('QueryTest/data-source-tables', vector, use_db=unique_database)
