[ https://issues.apache.org/jira/browse/HIVE-25204?focusedWorklogId=611172&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-611172 ]
ASF GitHub Bot logged work on HIVE-25204: ----------------------------------------- Author: ASF GitHub Bot Created on: 15/Jun/21 07:35 Start Date: 15/Jun/21 07:35 Worklog Time Spent: 10m Work Description: maheshk114 commented on a change in pull request #2365: URL: https://github.com/apache/hive/pull/2365#discussion_r651526249 ########## File path: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java ########## @@ -1194,108 +1205,82 @@ static String quoteString(String input) { private void addNotificationLog(NotificationEvent event, ListenerEvent listenerEvent, Connection dbConn, SQLGenerator sqlGenerator) throws MetaException, SQLException { LOG.debug("DbNotificationListener: adding notification log for : {}", event.getMessage()); + addNotificationLogBatch(Collections.singletonList(event), Collections.singletonList(listenerEvent), + dbConn, sqlGenerator); + } + + private void addNotificationLogBatch(List<NotificationEvent> eventList, List<ListenerEvent> listenerEventList, + Connection dbConn, SQLGenerator sqlGenerator) throws MetaException, SQLException { if ((dbConn == null) || (sqlGenerator == null)) { LOG.info("connection or sql generator is not set so executing sql via DN"); - process(event, listenerEvent); + for (int idx = 0; idx < eventList.size(); idx++) { + LOG.debug("DbNotificationListener: adding notification log for : {}", eventList.get(idx).getMessage()); + process(eventList.get(idx), listenerEventList.get(idx)); + } return; } - Statement stmt = null; - PreparedStatement pst = null; - ResultSet rs = null; - try { - stmt = dbConn.createStatement(); - event.setMessageFormat(msgEncoder.getMessageFormat()); + try (Statement stmt = dbConn.createStatement()) { if (sqlGenerator.getDbProduct().isMYSQL()) { stmt.execute("SET @@session.sql_mode=ANSI_QUOTES"); } + } - long nextEventId = getNextEventId(dbConn, sqlGenerator); - - long nextNLId = getNextNLId(dbConn, sqlGenerator, - "org.apache.hadoop.hive.metastore.model.MNotificationLog"); - - String insertVal; - String columns; - List<String> params = new ArrayList<String>(); - - // Construct the values string, parameters and column string step by step simultaneously so - // that the positions of columns and of their corresponding values do not go out of sync. - - // Notification log id - columns = "\"NL_ID\""; - insertVal = "" + nextNLId; - - // Event id - columns = columns + ", \"EVENT_ID\""; - insertVal = insertVal + "," + nextEventId; - - // Event time - columns = columns + ", \"EVENT_TIME\""; - insertVal = insertVal + "," + event.getEventTime(); + long nextEventId = getNextEventId(dbConn, sqlGenerator, eventList.size()); + long nextNLId = getNextNLId(dbConn, sqlGenerator, + "org.apache.hadoop.hive.metastore.model.MNotificationLog", eventList.size()); - // Event type - columns = columns + ", \"EVENT_TYPE\""; - insertVal = insertVal + ", ?"; - params.add(event.getEventType()); + String columns = "\"NL_ID\"" + ", \"EVENT_ID\"" + ", \"EVENT_TIME\"" + ", \"EVENT_TYPE\"" + ", \"MESSAGE\"" + + ", \"MESSAGE_FORMAT\"" + ", \"DB_NAME\"" + ", \"TBL_NAME\"" + ", \"CAT_NAME\""; + String insertVal = "insert into \"NOTIFICATION_LOG\" (" + columns + ") VALUES (" + + "?,?,?,?,?,?,?,?,?" + + ")"; - // Message - columns = columns + ", \"MESSAGE\""; - insertVal = insertVal + ", ?"; - params.add(event.getMessage()); + try (PreparedStatement pst = dbConn.prepareStatement(insertVal)) { + int numRows = 0; - // Message format - columns = columns + ", \"MESSAGE_FORMAT\""; - insertVal = insertVal + ", ?"; - params.add(event.getMessageFormat()); + for (int idx = 0; idx < eventList.size(); idx++) { + NotificationEvent event = eventList.get(idx); + ListenerEvent listenerEvent = listenerEventList.get(idx); - // Database name, optional - String dbName = event.getDbName(); - if (dbName != null) { - assert dbName.equals(dbName.toLowerCase()); - columns = columns + ", \"DB_NAME\""; - insertVal = insertVal + ", ?"; - params.add(dbName); - } + LOG.debug("DbNotificationListener: adding notification log for : {}", event.getMessage()); + event.setMessageFormat(msgEncoder.getMessageFormat()); - // Table name, optional - String tableName = event.getTableName(); - if (tableName != null) { - assert tableName.equals(tableName.toLowerCase()); - columns = columns + ", \"TBL_NAME\""; - insertVal = insertVal + ", ?"; - params.add(tableName); - } + pst.setLong(1, nextNLId); + pst.setLong(2, nextEventId); + pst.setLong(3, now()); + pst.setString(4, event.getEventType()); + pst.setString(5, event.getMessage()); + pst.setString(6, event.getMessageFormat()); + pst.setString(7, event.getDbName()); + pst.setString(8, event.getTableName()); + pst.setString(9, event.getCatName()); + + LOG.debug("Going to execute insert <" + insertVal + ">"); + pst.addBatch(); + numRows++; + + if (numRows == maxBatchSize) { + pst.executeBatch(); + numRows = 0; + } - // Catalog name, optional - String catName = event.getCatName(); - if (catName != null) { - assert catName.equals(catName.toLowerCase()); - columns = columns + ", \"CAT_NAME\""; - insertVal = insertVal + ", ?"; - params.add(catName); + event.setEventId(nextEventId); + // Set the DB_NOTIFICATION_EVENT_ID for future reference by other listeners. + if (event.isSetEventId()) { + listenerEvent.putParameter( + MetaStoreEventListenerConstants.DB_NOTIFICATION_EVENT_ID_KEY_NAME, + Long.toString(event.getEventId())); + } Review comment: The batch operation and the batch writing on notification logs are done within a single transaction to avoid inconsistency. ########## File path: hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java ########## @@ -905,25 +909,32 @@ public void onUpdatePartitionColumnStat(UpdatePartitionColumnStatEvent updatePar } @Override - public void onUpdatePartitionColumnStatDirectSql(UpdatePartitionColumnStatEvent updatePartColStatEvent, - Connection dbConn, SQLGenerator sqlGenerator) + public void onUpdatePartitionColumnStatInBatch(UpdatePartitionColumnStatEventBatch updatePartColStatEventBatch, Review comment: updatePartitionColStatsInBatch internally uses updatePartitionColStatsForOneBatch method.updatePartitionColStatsForOneBatch calls direct sql method for one batch and that internally writes the notification log. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 611172) Time Spent: 40m (was: 0.5h) > Reduce overhead of adding notification log for update partition column > statistics > --------------------------------------------------------------------------------- > > Key: HIVE-25204 > URL: https://issues.apache.org/jira/browse/HIVE-25204 > Project: Hive > Issue Type: Sub-task > Components: Hive, HiveServer2 > Reporter: mahesh kumar behera > Assignee: mahesh kumar behera > Priority: Major > Labels: perfomance, pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > > The notification logs for partition column statistics can be optimised by > adding them in batch. In the current implementation its done one by one > causing multiple sql execution in the backend RDBMS. These SQL executions can > be batched to reduce the execution time. -- This message was sent by Atlassian Jira (v8.3.4#803005)