This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push:
new 77526b2 KYLIN-5076 Fix spark tracking url missing on spark standalone
mode
77526b2 is described below
commit 77526b253ad12e1c6f93520244a187f161e1638a
Author: yaqian.zhang <[email protected]>
AuthorDate: Fri Sep 10 18:19:18 2021 +0800
KYLIN-5076 Fix spark tracking url missing on spark standalone mode
---
.../engine/spark/application/SparkApplication.java | 8 ++++++--
.../org/apache/spark/deploy/StandaloneAppClient.scala | 18 +++++++++++++++++-
.../scala/org/apache/spark/sql/SparderContext.scala | 6 ++++++
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
index 0d4352b..362ef07 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
+++
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
@@ -55,6 +55,7 @@ import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.JsonUtil;
import org.apache.kylin.metadata.MetadataConstants;
import org.apache.spark.SparkConf;
+import org.apache.spark.deploy.StandaloneAppClient;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
import org.apache.spark.util.Utils;
@@ -197,11 +198,14 @@ public abstract class SparkApplication {
}
}
- private Map<String, String> getTrackingInfo(boolean ipAddressPreferred) {
+ private Map<String, String> getTrackingInfo(boolean ipAddressPreferred,
String sparkMaster) {
String applicationId = ss.sparkContext().applicationId();
Map<String, String> extraInfo = new HashMap<>();
try {
String trackingUrl = getTrackingUrl(applicationId);
+ if (sparkMaster.startsWith("spark")) {
+ trackingUrl = StandaloneAppClient.getAppUrl(applicationId,
sparkMaster);
+ }
if (StringUtils.isBlank(trackingUrl)) {
logger.warn("Get tracking url of application {}, but empty url
found.", applicationId);
return extraInfo;
@@ -290,7 +294,7 @@ public abstract class SparkApplication {
if (isJobOnCluster(sparkConf)) {
updateSparkJobExtraInfo("/kylin/api/jobs/spark", project,
jobId,
-
getTrackingInfo(config.isTrackingUrlIpAddressEnabled()));
+
getTrackingInfo(config.isTrackingUrlIpAddressEnabled(),
sparkConf.get("spark.master")));
}
// for spark metrics
//JobMetricsUtils.registerListener(ss);
diff --git
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
index 15f456a..e42a76b 100644
---
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
+++
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
@@ -38,7 +38,6 @@ object StandaloneAppClient extends Logging {
private val cacheTtl = 3600 * 1000 * 24 * 5
private val cacheMaxSize = 30000
- // private val masterUrlHtml: String =
KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/app/?appId="
private val masterUrlJson: String =
KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/json"
private val restService: RestService = new RestService(10000, 10000)
@@ -81,6 +80,23 @@ object StandaloneAppClient extends Logging {
}
}
+ def getAppUrl(appId: String, standaloneMaster: String): String = {
+ var sparkUI = KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI
+ if (sparkUI.isEmpty) {
+ sparkUI = "http://" + getMasterHost(standaloneMaster) + ":8080/"
+ logWarning("Parameter 'kylin.engine.spark.standalone.master.httpUrl' is
not configured. Use " +
+ sparkUI + " as the spark standalone Web UI address.")
+ }
+ if (!sparkUI.endsWith("/")) {
+ sparkUI = sparkUI + "/"
+ }
+ val sparkApp = sparkUI + "app/?appId="
+ sparkApp + appId
+ }
+
+ def getMasterHost(master: String): String = {
+ master.split("(://|:)").tail.head
+ }
def parseApplicationState(responseStr: String): Unit = {
val curr = System.currentTimeMillis()
diff --git
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
index 5ae1961..47d0c9b 100644
---
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
+++
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
@@ -36,6 +36,8 @@ import org.apache.kylin.common.KylinConfig
import org.apache.kylin.common.util.ToolUtil
import org.apache.kylin.query.monitor.SparderContextCanary
import org.apache.kylin.spark.classloader.ClassLoaderUtils
+import org.apache.spark.deploy.StandaloneAppClient
+import org.apache.spark.sql.SparderContext.master_app_url
import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
import org.apache.spark.sql.execution.datasource.{KylinSourceStrategy,
ShardFileStatusCache}
import org.apache.spark.sql.metrics.SparderMetricsListener
@@ -183,6 +185,10 @@ object SparderContext extends Logging {
}
master_app_url = "http://" + hostName + ":" +
sparkSession.sparkContext.getConf
.get("spark.ui.port", "4040")
+ case mode: String if mode.startsWith("spark") =>
+ val hostName =
StandaloneAppClient.getMasterHost(kylinConf.getSparderConfigOverrideWithSpecificName("spark.master"))
+ master_app_url = "http://" + hostName + ":" +
sparkSession.sparkContext.getConf
+ .get("spark.ui.port", "4040")
case _ =>
master_app_url = YarnInfoFetcherUtils.getTrackingUrl(appid)
}