This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 25313519a9 [VL] Validate runtime compatibility using reflection before
registering Gluten components (#10942)
25313519a9 is described below
commit 25313519a971ddd7dc6d12b17079a556a4b5b19b
Author: PHILO-HE <[email protected]>
AuthorDate: Tue Oct 28 22:47:52 2025 +0800
[VL] Validate runtime compatibility using reflection before registering
Gluten components (#10942)
---
.../org/apache/gluten/component/VeloxDeltaComponent.scala | 12 ++++++++++++
.../org/apache/gluten/component/VeloxHudiComponent.scala | 12 ++++++++++++
.../apache/gluten/component/VeloxIcebergComponent.scala | 14 ++++++++++++++
.../org/apache/gluten/component/VeloxPaimonComponent.scala | 13 +++++++++++++
.../main/scala/org/apache/gluten/component/Component.scala | 9 +++++++++
.../main/scala/org/apache/gluten/component/package.scala | 10 ++++++----
6 files changed, 66 insertions(+), 4 deletions(-)
diff --git
a/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
b/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
index 90c138aa54..18f92919e7 100644
---
a/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
+++
b/backends-velox/src-delta/main/scala/org/apache/gluten/component/VeloxDeltaComponent.scala
@@ -25,12 +25,24 @@ import
org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector
import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec,
ProjectExec}
+import org.apache.spark.util.SparkReflectionUtil
class VeloxDeltaComponent extends Component {
override def name(): String = "velox-delta"
override def buildInfo(): Component.BuildInfo =
Component.BuildInfo("VeloxDelta", "N/A", "N/A", "N/A")
override def dependencies(): Seq[Class[_ <: Component]] =
classOf[VeloxBackend] :: Nil
+
+ override def isRuntimeCompatible: Boolean = {
+ try {
+
SparkReflectionUtil.classForName("io.delta.sql.DeltaSparkSessionExtension")
+ true
+ } catch {
+ case _: ClassNotFoundException =>
+ false
+ }
+ }
+
override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
diff --git
a/backends-velox/src-hudi/main/scala/org/apache/gluten/component/VeloxHudiComponent.scala
b/backends-velox/src-hudi/main/scala/org/apache/gluten/component/VeloxHudiComponent.scala
index 6685066efa..e31c7bb35b 100644
---
a/backends-velox/src-hudi/main/scala/org/apache/gluten/component/VeloxHudiComponent.scala
+++
b/backends-velox/src-hudi/main/scala/org/apache/gluten/component/VeloxHudiComponent.scala
@@ -25,12 +25,24 @@ import
org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector
import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.util.SparkReflectionUtil
class VeloxHudiComponent extends Component {
override def name(): String = "velox-hudi"
override def buildInfo(): Component.BuildInfo =
Component.BuildInfo("VeloxHudi", "N/A", "N/A", "N/A")
override def dependencies(): Seq[Class[_ <: Component]] =
classOf[VeloxBackend] :: Nil
+
+ override def isRuntimeCompatible: Boolean = {
+ try {
+
SparkReflectionUtil.classForName("org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
+ true
+ } catch {
+ case _: ClassNotFoundException =>
+ false
+ }
+ }
+
override def injectRules(injector: Injector): Unit = {
val legacy = injector.gluten.legacy
val ras = injector.gluten.ras
diff --git
a/backends-velox/src-iceberg/main/scala/org/apache/gluten/component/VeloxIcebergComponent.scala
b/backends-velox/src-iceberg/main/scala/org/apache/gluten/component/VeloxIcebergComponent.scala
index c977e17ab4..4b49f0e3c8 100644
---
a/backends-velox/src-iceberg/main/scala/org/apache/gluten/component/VeloxIcebergComponent.scala
+++
b/backends-velox/src-iceberg/main/scala/org/apache/gluten/component/VeloxIcebergComponent.scala
@@ -20,11 +20,25 @@ import org.apache.gluten.backendsapi.velox.VeloxBackend
import org.apache.gluten.extension.{OffloadIcebergScan, OffloadIcebergWrite}
import org.apache.gluten.extension.injector.Injector
+import org.apache.spark.util.SparkReflectionUtil
+
class VeloxIcebergComponent extends Component {
override def name(): String = "velox-iceberg"
override def buildInfo(): Component.BuildInfo =
Component.BuildInfo("VeloxIceberg", "N/A", "N/A", "N/A")
override def dependencies(): Seq[Class[_ <: Component]] =
classOf[VeloxBackend] :: Nil
+
+ override def isRuntimeCompatible: Boolean = {
+ try {
+ SparkReflectionUtil.classForName(
+ "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
+ true
+ } catch {
+ case _: ClassNotFoundException =>
+ false
+ }
+ }
+
override def injectRules(injector: Injector): Unit = {
OffloadIcebergScan.inject(injector)
OffloadIcebergWrite.inject(injector)
diff --git
a/backends-velox/src-paimon/main/scala/org/apache/gluten/component/VeloxPaimonComponent.scala
b/backends-velox/src-paimon/main/scala/org/apache/gluten/component/VeloxPaimonComponent.scala
index 7dd4f805f3..625480c6b4 100644
---
a/backends-velox/src-paimon/main/scala/org/apache/gluten/component/VeloxPaimonComponent.scala
+++
b/backends-velox/src-paimon/main/scala/org/apache/gluten/component/VeloxPaimonComponent.scala
@@ -25,12 +25,25 @@ import
org.apache.gluten.extension.columnar.validator.Validators
import org.apache.gluten.extension.injector.Injector
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.util.SparkReflectionUtil
class VeloxPaimonComponent extends Component {
override def name(): String = "velox-paimon"
override def buildInfo(): Component.BuildInfo =
Component.BuildInfo("VeloxPaimon", "N/A", "N/A", "N/A")
override def dependencies(): Seq[Class[_ <: Component]] =
classOf[VeloxBackend] :: Nil
+
+ override def isRuntimeCompatible: Boolean = {
+ try {
+ SparkReflectionUtil.classForName(
+ "org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions")
+ true
+ } catch {
+ case _: ClassNotFoundException =>
+ false
+ }
+ }
+
override def injectRules(injector: Injector): Unit = {
injector.gluten.legacy.injectTransform {
c =>
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/component/Component.scala
b/gluten-core/src/main/scala/org/apache/gluten/component/Component.scala
index fdd76870f7..c5093692d4 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/component/Component.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/component/Component.scala
@@ -42,6 +42,15 @@ trait Component {
private val uid = nextUid.getAndIncrement()
private val isRegistered = new AtomicBoolean(false)
+ /**
+ * Determines whether a component should be registered based on runtime
conditions. For instance,
+ * if a component depends on a Spark extension's JAR, this method should be
overridden to check
+ * whether its core class (e.g., the extension class) is available in the
runtime environment.
+ */
+ def isRuntimeCompatible: Boolean = {
+ true
+ }
+
def ensureRegistered(): Unit = {
if (!isRegistered.compareAndSet(false, true)) {
return
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
b/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
index cf0181c39c..cb74a0b3c8 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
@@ -28,11 +28,13 @@ package object component extends Logging {
return
}
- // Load all components in classpath.
+ // Discover all components available in the classpath.
val all = Discovery.discoverAll()
-
- // Register all components.
- all.foreach(_.ensureRegistered())
+ val (compatibleComponents, incompatibleComponents) =
all.partition(_.isRuntimeCompatible)
+ incompatibleComponents.foreach(
+ c => logWarning(s"Excluding runtime-incompatible component: ${c.name}"))
+ // Register all runtime-compatible components.
+ compatibleComponents.foreach(_.ensureRegistered())
// Output log so user could view the component loading order.
// Call #sortedUnsafe than on #sorted to avoid unnecessary recursion.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]