[ https://issues.apache.org/jira/browse/HIVE-26504?focusedWorklogId=809562&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-809562 ]
ASF GitHub Bot logged work on HIVE-26504: ----------------------------------------- Author: ASF GitHub Bot Created on: 16/Sep/22 15:14 Start Date: 16/Sep/22 15:14 Worklog Time Spent: 10m Work Description: deniskuzZ commented on code in PR #3557: URL: https://github.com/apache/hive/pull/3557#discussion_r973135097 ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java: ########## @@ -1011,96 +1015,61 @@ private Path constructRenamedPath(Path defaultNewPath, Path currentPath) { defaultNewPath.toUri().getPath()); } - @VisibleForTesting - public static List<ColumnStatistics> alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable, - EnvironmentContext ec, String validWriteIds, Configuration conf, List<String> deletedCols) - throws MetaException, InvalidObjectException { - String catName = normalizeIdentifier(oldTable.isSetCatName() ? oldTable.getCatName() : - getDefaultCatalog(conf)); + public static List<ColumnStatistics> getColumnStats(RawStore msdb, Table oldTable) + throws NoSuchObjectException, MetaException { + String catName = normalizeIdentifier(oldTable.isSetCatName() + ? oldTable.getCatName() + : getDefaultCatalog(msdb.getConf())); String dbName = oldTable.getDbName().toLowerCase(); String tableName = normalizeIdentifier(oldTable.getTableName()); - String newDbName = newTable.getDbName().toLowerCase(); - String newTableName = normalizeIdentifier(newTable.getTableName()); - //if its not called from cahced store then update the table - boolean doAlterTable = deletedCols == null; - List<ColumnStatistics> newMultiColStats = new ArrayList<>(); + return msdb.getTableColumnStatistics(catName, dbName, tableName, + oldTable.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList())); + } + @VisibleForTesting + public static List<ColumnStatistics> deleteTableColumnStats(RawStore msdb, Table oldTable, Table newTable, List<ColumnStatistics> multiColStats) + throws InvalidObjectException, MetaException { + List<ColumnStatistics> newMultiColStats = new ArrayList<>(); try { - List<FieldSchema> oldCols = oldTable.getSd().getCols(); - List<FieldSchema> newCols = newTable.getSd().getCols(); - List<ColumnStatistics> multiColStats = null; - boolean updateColumnStats = !newDbName.equals(dbName) || !newTableName.equals(tableName) - || !MetaStoreServerUtils.columnsIncludedByNameType(oldCols, newCols); - // Don't bother in the case of ACID conversion. - updateColumnStats = updateColumnStats - && (TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)); - if (updateColumnStats) { - List<String> oldColNames = new ArrayList<>(oldCols.size()); - for (FieldSchema oldCol : oldCols) { - oldColNames.add(oldCol.getName()); - } + String catName = normalizeIdentifier(oldTable.isSetCatName() + ? oldTable.getCatName() + : getDefaultCatalog(msdb.getConf())); + String dbName = oldTable.getDbName().toLowerCase(); + String tableName = normalizeIdentifier(oldTable.getTableName()); + String newDbName = newTable.getDbName().toLowerCase(); + String newTableName = normalizeIdentifier(newTable.getTableName()); + List<FieldSchema> oldTableCols = oldTable.getSd().getCols(); + List<FieldSchema> newTableCols = newTable.getSd().getCols(); + + boolean nameChanged = !newDbName.equals(dbName) || !newTableName.equals(tableName); + + if ((nameChanged || !MetaStoreServerUtils.columnsIncludedByNameType(oldTableCols, newTableCols)) && + // Don't bother in the case of ACID conversion. + TxnUtils.isAcidTable(oldTable) == TxnUtils.isAcidTable(newTable)) { + for (ColumnStatistics colStats : multiColStats) { + List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj(); + List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>(); - // NOTE: this doesn't check stats being compliant, but the alterTable call below does. - // The worst we can do is delete the stats. - // Collect column stats which need to be rewritten and remove old stats. - multiColStats = msdb.getTableColumnStatistics(catName, dbName, tableName, oldColNames); - if (multiColStats.isEmpty()) { - updateColumnStats = false; - } else { - for (ColumnStatistics colStats : multiColStats) { - List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj(); - if (statsObjs != null) { - // for out para, this value is initialized by caller. - if (deletedCols == null) { - deletedCols = new ArrayList<>(); + if (statsObjs != null) { + for (ColumnStatisticsObj statsObj : statsObjs) { + boolean found = newTableCols.stream().anyMatch(c -> statsObj.getColName().equalsIgnoreCase(c.getName()) && Review Comment: can't we reuse filterColumnStatsForTableColumns by supplying negative condition? Issue Time Tracking ------------------- Worklog Id: (was: 809562) Time Spent: 1h 50m (was: 1h 40m) > User is not able to drop table > ------------------------------ > > Key: HIVE-26504 > URL: https://issues.apache.org/jira/browse/HIVE-26504 > Project: Hive > Issue Type: Bug > Components: Hive > Reporter: László Végh > Assignee: László Végh > Priority: Major > Labels: pull-request-available > Time Spent: 1h 50m > Remaining Estimate: 0h > > Hive won't store anything in *TAB_COL_STATS* for partitioned table, whereas > impala stores complete column stats in TAB_COL_STATS for partitioned table. > Deleting entries in TAB_COL_STATS is based on (DB_NAME, TABLE_NAME), not by > TBL_ID. Renamed tables were having old names in TAB_COL_STATS. > To Repro: > {code:java} > beeline: > set hive.create.as.insert.only=false; > set hive.create.as.acid=false; > create table testes.table_name_with_partition (id tinyint, name string) > partitioned by (col_to_partition bigint) stored as parquet; > insert into testes.table_name_with_partition (id, name, col_to_partition) > values (1, "a", 2020), (2, "b", 2021), (3, "c", 2022); > impala: > compute stats testes.table_name_with_partition; -- backend shows new entries > in TAB_COL_STATS > beeline: > alter table testes.table_name_with_partition rename to > testes2.table_that_cant_be_droped; > drop table testes2.table_that_cant_be_droped; -- This fails with > TAB_COL_STATS_fkey constraint violation. > {code} > Exception trace for drop table failure > {code:java} > Caused by: org.postgresql.util.PSQLException: ERROR: update or delete on > table "TBLS" violates foreign key constraint "TAB_COL_STATS_fkey" on table > "TAB_COL_STATS" > Detail: Key (TBL_ID)=(19816) is still referenced from table "TAB_COL_STATS". > at > org.postgresql.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:2532) > at > org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:2267) > ... 50 more > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)