[ https://issues.apache.org/jira/browse/HIVE-24914?focusedWorklogId=582039&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-582039 ]
ASF GitHub Bot logged work on HIVE-24914: ----------------------------------------- Author: ASF GitHub Bot Created on: 13/Apr/21 20:12 Start Date: 13/Apr/21 20:12 Worklog Time Spent: 10m Work Description: pgaref commented on a change in pull request #2108: URL: https://github.com/apache/hive/pull/2108#discussion_r612744867 ########## File path: llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java ########## @@ -1816,23 +1776,90 @@ private static boolean removeFromRunningTaskMap(TreeMap<Integer, TreeSet<TaskInf INADEQUATE_TOTAL_RESOURCES, } + private Pair<Resource, Map<String, List<NodeInfo>>> getResourceAvailability() { + int memory = 0; + int vcores = 0; + int numInstancesFound = 0; + Map<String, List<NodeInfo>> availableHostMap; + readLock.lock(); + try { + // maintain insertion order (needed for Next slot in locality miss) + availableHostMap = new LinkedHashMap<>(instanceToNodeMap.size()); + Collection<LlapServiceInstance> instances = consistentSplits ? + // might also include Inactive instances + activeInstances.getAllInstancesOrdered(true): + // if consistent splits are NOT used we don't need the ordering as there will be no cache benefit anyways + activeInstances.getAll(); + boolean foundSlot = false; + for (LlapServiceInstance inst : instances) { + NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity()); + if (nodeInfo != null) { + List<NodeInfo> hostList = availableHostMap.get(nodeInfo.getHost()); + if (hostList == null) { + hostList = new ArrayList<>(); + availableHostMap.put(nodeInfo.getHost(), hostList); + } + if (!(inst instanceof InactiveServiceInstance)) { + Resource r = inst.getResource(); + memory += r.getMemory(); + vcores += r.getVirtualCores(); + numInstancesFound++; + // Only add to List Nodes with available resources + // Hosts, however, exist even for nodes that do not currently have resources + if (nodeInfo.canAcceptTask()) { + foundSlot = true; + hostList.add(nodeInfo); + } + } + } + } + // isClusterCapacityFull will be set to false on every trySchedulingPendingTasks call + // set it false here to bail out early when we know there are no resources available. + if (!foundSlot) { + isClusterCapacityFull.set(true); + } + } finally { + readLock.unlock(); + } + if (LOG.isDebugEnabled()) { + LOG.debug("ResourceAvail: numInstancesFound={}, totalMem={}, totalVcores={} availableHosts: {}", Review comment: sure fixed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 582039) Time Spent: 50m (was: 40m) > Improve LLAP scheduling by only traversing hosts with capacity > -------------------------------------------------------------- > > Key: HIVE-24914 > URL: https://issues.apache.org/jira/browse/HIVE-24914 > Project: Hive > Issue Type: Sub-task > Reporter: Panagiotis Garefalakis > Assignee: Panagiotis Garefalakis > Priority: Major > Labels: pull-request-available > Time Spent: 50m > Remaining Estimate: 0h > > *schedulePendingTasks* on the LlapTaskScheduler currently goes through all > the pending tasks and tries to allocate them based on their Priority -- if a > priority can not be scheduled completely, we bail out as lower priorities > would not be able to get allocations either. > An optimization here could be to only walk through the nodes with capacity > (if any) ,and not all available hosts, for scheduling these tasks based on > their priority and locality preferences. -- This message was sent by Atlassian Jira (v8.3.4#803005)