[ https://issues.apache.org/jira/browse/HIVE-24432?focusedWorklogId=523271&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-523271 ]
ASF GitHub Bot logged work on HIVE-24432: ----------------------------------------- Author: ASF GitHub Bot Created on: 11/Dec/20 18:38 Start Date: 11/Dec/20 18:38 Worklog Time Spent: 10m Work Description: belugabehr commented on a change in pull request #1710: URL: https://github.com/apache/hive/pull/1710#discussion_r541149698 ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java ########## @@ -10800,53 +10801,89 @@ public void addNotificationEvent(NotificationEvent entry) throws MetaException { @Override public void cleanNotificationEvents(int olderThan) { - boolean commited = false; - Query query = null; + final int eventBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.EVENT_CLEAN_MAX_EVENTS); + + final long ageSec = olderThan; + final Instant now = Instant.now(); + + final int tooOld = Math.toIntExact(now.getEpochSecond() - ageSec); + + final Optional<Integer> batchSize = (eventBatchSize > 0) ? Optional.of(eventBatchSize) : Optional.empty(); + + final long start = System.nanoTime(); + int deleteCount = doCleanNotificationEvents(tooOld, batchSize); + + if (deleteCount == 0) { + LOG.info("No Notification events found to be cleaned with eventTime < {}", tooOld); + } else { + int batchCount = 0; + do { + batchCount = doCleanNotificationEvents(tooOld, batchSize); + deleteCount += batchCount; + } while (batchCount > 0); + } + + final long finish = System.nanoTime(); + + LOG.info("Deleted {} notification events older than epoch:{} in {}ms", deleteCount, tooOld, + TimeUnit.NANOSECONDS.toMillis(finish - start)); + } + + private int doCleanNotificationEvents(final int ageSec, final Optional<Integer> batchSize) { + final Transaction tx = pm.currentTransaction(); + int eventsCount = 0; + try { - openTransaction(); - long tmp = System.currentTimeMillis() / 1000 - olderThan; - int tooOld = (tmp > Integer.MAX_VALUE) ? 0 : (int) tmp; - query = pm.newQuery(MNotificationLog.class, "eventTime < tooOld"); - query.declareParameters("java.lang.Integer tooOld"); + tx.begin(); - int max_events = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.EVENT_CLEAN_MAX_EVENTS); - max_events = max_events > 0 ? max_events : Integer.MAX_VALUE; - query.setRange(0, max_events); - query.setOrdering("eventId ascending"); + try (Query query = pm.newQuery(MNotificationLog.class, "eventTime < tooOld")) { + query.declareParameters("java.lang.Integer tooOld"); + query.setOrdering("eventId ascending"); + if (batchSize.isPresent()) { + query.setRange(0, batchSize.get()); + } - List<MNotificationLog> toBeRemoved = (List) query.execute(tooOld); - int iteration = 0; - int eventCount = 0; - long minEventId = 0; - long minEventTime = 0; - long maxEventId = 0; - long maxEventTime = 0; - while (CollectionUtils.isNotEmpty(toBeRemoved)) { - int listSize = toBeRemoved.size(); - if (iteration == 0) { - MNotificationLog firstNotification = toBeRemoved.get(0); - minEventId = firstNotification.getEventId(); - minEventTime = firstNotification.getEventTime(); + List<MNotificationLog> events = (List) query.execute(ageSec); + if (CollectionUtils.isNotEmpty(events)) { + eventsCount = events.size(); + + if (LOG.isDebugEnabled()) { + int minEventTime, maxEventTime; + long minEventId, maxEventId; + Iterator<MNotificationLog> iter = events.iterator(); + MNotificationLog firstNotification = iter.next(); + + minEventTime = maxEventTime = firstNotification.getEventTime(); + minEventId = maxEventId = firstNotification.getEventId(); + + while (iter.hasNext()) { + MNotificationLog notification = iter.next(); Review comment: @aasha I updated existing unit tests to enforce small batch sizes (size = 1) so that when it deletes records, it does so in batches of 1. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 523271) Time Spent: 3h 20m (was: 3h 10m) > Delete Notification Events in Batches > ------------------------------------- > > Key: HIVE-24432 > URL: https://issues.apache.org/jira/browse/HIVE-24432 > Project: Hive > Issue Type: Improvement > Affects Versions: 3.2.0 > Reporter: David Mollitor > Assignee: David Mollitor > Priority: Major > Labels: pull-request-available > Time Spent: 3h 20m > Remaining Estimate: 0h > > Notification events are loaded in batches (reduces memory pressure on the > HMS), but all of the deletes happen under a single transactions and, when > deleting many records, can put a lot of pressure on the backend database. > Instead, delete events in batches (in different transactions) as well. -- This message was sent by Atlassian Jira (v8.3.4#803005)