Re: [PR] [SPARK-50783][CORE] Canonicalize JVM profiler results file name and layout on DFS [spark]

via GitHub Fri, 10 Jan 2025 16:52:47 -0800


parthchandra commented on code in PR #49440:
URL: https://github.com/apache/spark/pull/49440#discussion_r1911779031



##########
connector/profiler/README.md:
##########
@@ -54,7 +54,7 @@ Then enable the profiling in the configuration.
   <td><code>spark.executor.profiling.dfsDir</code></td>
   <td>(none)</td>
   <td>
-      An HDFS compatible path to which the profiler's output files are copied. 
The output files will be written as 
<i>dfsDir/application_id/profile-appname-exec-executor_id.jfr</i> <br/>
+      An HDFS compatible path to which the profiler's output files are copied. 
The output files will be written as 
<i>dfsDir/{{APP_ID}}/profile-{{APP_ID}}-exec-{{EXECUTOR_ID}}.jfr</i> <br/>

Review Comment:
   I think this is fine. 



##########
connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala:
##########
@@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, 
executorId: String) ex
   private var running = false
   private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED)
   private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS)
-  private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR)
+  private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR)
   private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR)
   private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL)
 
-  private val startcmd = 
s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val stopcmd = 
s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val dumpcmd = 
s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val resumecmd = 
s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr"
+  private val appId = try {
+    conf.getAppId
+  } catch {
+    case _: NoSuchElementException => "local-" + System.currentTimeMillis

Review Comment:
   I remember getting this error when developing this feature. The app id had 
not been generated when the profiler was being initialized. I don't know if we 
might still be getting this, but safer this way.



##########
connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala:
##########
@@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, 
executorId: String) ex
   private var running = false
   private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED)
   private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS)
-  private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR)
+  private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR)
   private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR)
   private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL)
 
-  private val startcmd = 
s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val stopcmd = 
s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val dumpcmd = 
s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val resumecmd = 
s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr"
+  private val appId = try {
+    conf.getAppId
+  } catch {
+    case _: NoSuchElementException => "local-" + System.currentTimeMillis
+  }
+  private val appAttemptId = conf.getOption("spark.app.attempt.id")
+  private val baseName = Utils.nameForAppAndAttempt(appId, appAttemptId)
+  private val profileFile = s"profile-$baseName-exec-$executorId.jfr"
+
+  private val startcmd = 
s"start,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val stopcmd = 
s"stop,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val dumpcmd = 
s"dump,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val resumecmd = 
s"resume,$profilerOptions,file=$profilerLocalDir/$profileFile"
 
+  private val PROFILER_FOLDER_PERMISSIONS = new 
FsPermission(Integer.parseInt("770", 8).toShort)
+  private val PROFILER_FILE_PERMISSIONS = new 
FsPermission(Integer.parseInt("660", 8).toShort)

Review Comment:
   +1, thank you.
   It would be really nice if you take up the integration with the History UI. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Re: [PR] [SPARK-50783][CORE] Canonicalize JVM profiler results file name and layout on DFS [spark]

Reply via email to