parthchandra commented on code in PR #49440: URL: https://github.com/apache/spark/pull/49440#discussion_r1911779031
########## connector/profiler/README.md: ########## @@ -54,7 +54,7 @@ Then enable the profiling in the configuration. <td><code>spark.executor.profiling.dfsDir</code></td> <td>(none)</td> <td> - An HDFS compatible path to which the profiler's output files are copied. The output files will be written as <i>dfsDir/application_id/profile-appname-exec-executor_id.jfr</i> <br/> + An HDFS compatible path to which the profiler's output files are copied. The output files will be written as <i>dfsDir/{{APP_ID}}/profile-{{APP_ID}}-exec-{{EXECUTOR_ID}}.jfr</i> <br/> Review Comment: I think this is fine. ########## connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala: ########## @@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex private var running = false private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED) private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS) - private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR) + private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR) private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR) private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL) - private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr" + private val appId = try { + conf.getAppId + } catch { + case _: NoSuchElementException => "local-" + System.currentTimeMillis Review Comment: I remember getting this error when developing this feature. The app id had not been generated when the profiler was being initialized. I don't know if we might still be getting this, but safer this way. ########## connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala: ########## @@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex private var running = false private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED) private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS) - private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR) + private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR) private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR) private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL) - private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr" - private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr" + private val appId = try { + conf.getAppId + } catch { + case _: NoSuchElementException => "local-" + System.currentTimeMillis + } + private val appAttemptId = conf.getOption("spark.app.attempt.id") + private val baseName = Utils.nameForAppAndAttempt(appId, appAttemptId) + private val profileFile = s"profile-$baseName-exec-$executorId.jfr" + + private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/$profileFile" + private val PROFILER_FOLDER_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort) + private val PROFILER_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("660", 8).toShort) Review Comment: +1, thank you. It would be really nice if you take up the integration with the History UI. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org