This is an automated email from the ASF dual-hosted git repository.
xiaozhenliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new a2a4298249 feat(amber): add memoization of region costs in
CostBasedScheduleGenerator (#3660)
a2a4298249 is described below
commit a2a42982491741d85c0ec68448ed119f6f5bf155
Author: Xiaozhen Liu <[email protected]>
AuthorDate: Tue Aug 26 18:06:00 2025 -0700
feat(amber): add memoization of region costs in CostBasedScheduleGenerator
(#3660)
As `CostEstimator.allocateResourcesAndEstimateCost` can be expensive,
this PR adds memoization of region costs (and also resource configs)
during the search process of `CostBasedScheduleGenerator`.
---
.../scheduling/CostBasedScheduleGenerator.scala | 25 ++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git
a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala
b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala
index 2d26c3fed5..324de1c7f0 100644
---
a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala
+++
b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala
@@ -42,6 +42,7 @@ import org.jgrapht.graph.{DirectedAcyclicGraph,
DirectedPseudograph}
import java.net.URI
import java.util.concurrent.TimeoutException
+import scala.collection.immutable.HashMap
import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
@@ -75,6 +76,17 @@ class CostBasedScheduleGenerator(
actorId = actorId
)
+ private case class CostEstimatorMemoKey(
+ physicalOpIds: Set[PhysicalOpIdentity],
+ physicalLinkIds: Set[PhysicalLink],
+ portIds: Set[GlobalPortIdentity],
+ resourceConfig: Option[ResourceConfig]
+ )
+
+ private val costEstimatorMemoization
+ : mutable.Map[CostEstimatorMemoKey, (ResourceConfig, Double)] =
+ new mutable.HashMap()
+
def generate(): (Schedule, PhysicalPlan) = {
val startTime = System.nanoTime()
val regionDAG = createRegionDAG()
@@ -606,8 +618,17 @@ class CostBasedScheduleGenerator(
.map(level =>
level
.map(region => {
- val (resourceConfig, regionCost) =
- costEstimator.allocateResourcesAndEstimateCost(region, 1)
+ val costEstimatorMemoKey = CostEstimatorMemoKey(
+ physicalOpIds = region.physicalOps.map(_.id),
+ physicalLinkIds = region.physicalLinks,
+ portIds = region.ports,
+ resourceConfig = region.resourceConfig
+ )
+ val (resourceConfig, regionCost) = costEstimatorMemoization
+ .getOrElseUpdate(
+ costEstimatorMemoKey,
+ costEstimator.allocateResourcesAndEstimateCost(region, 1)
+ )
// Update the region in the regionDAG to be the new region with
resources allocated.
val regionWithResourceConfig = region.copy(resourceConfig =
Some(resourceConfig))
replaceVertex(regionDAG, region, regionWithResourceConfig)