[GitHub] spark pull request: [SPARK-2261] Make event logger use a single fi...

andrewor14 Mon, 06 Oct 2014 22:31:32 -0700

Github user andrewor14 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/1222#discussion_r18501147
  
    --- Diff: 
core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala ---
    @@ -17,74 +17,43 @@
     
     package org.apache.spark.scheduler
     
    -import java.io.{BufferedInputStream, InputStream}
    +import java.io.{InputStream, IOException}
     
     import scala.io.Source
     
    -import org.apache.hadoop.fs.{Path, FileSystem}
     import org.json4s.jackson.JsonMethods._
     
     import org.apache.spark.Logging
    -import org.apache.spark.io.CompressionCodec
     import org.apache.spark.util.JsonProtocol
     
     /**
    - * A SparkListenerBus that replays logged events from persisted storage.
    - *
    - * This assumes the given paths are valid log files, where each line can 
be deserialized into
    - * exactly one SparkListenerEvent.
    + * A SparkListenerBus that can be used to replay events from serialized 
event data.
      */
    -private[spark] class ReplayListenerBus(
    -    logPaths: Seq[Path],
    -    fileSystem: FileSystem,
    -    compressionCodec: Option[CompressionCodec])
    -  extends SparkListenerBus with Logging {
    -
    -  private var replayed = false
    -
    -  if (logPaths.length == 0) {
    -    logWarning("Log path provided contains no log files.")
    -  }
    +private[spark] class ReplayListenerBus extends SparkListenerBus with 
Logging {
     
       /**
    -   * Replay each event in the order maintained in the given logs.
    -   * This should only be called exactly once.
    +   * Replay each event in the order maintained in the given stream.
    +   * This method can be called multiple times, but the listener behavior 
is undefined after any
    +   * error is thrown by this method.
    +   *
    +   * @param logData Stream containing event log data.
    +   * @param version Spark version that generated the events.
        */
    -  def replay() {
    -    assert(!replayed, "ReplayListenerBus cannot replay events more than 
once")
    -    logPaths.foreach { path =>
    -      // Keep track of input streams at all levels to close them later
    -      // This is necessary because an exception can occur in between 
stream initializations
    -      var fileStream: Option[InputStream] = None
    -      var bufferedStream: Option[InputStream] = None
    -      var compressStream: Option[InputStream] = None
    -      var currentLine = "<not started>"
    -      try {
    -        fileStream = Some(fileSystem.open(path))
    -        bufferedStream = Some(new BufferedInputStream(fileStream.get))
    -        compressStream = Some(wrapForCompression(bufferedStream.get))
    -
    -        // Parse each line as an event and post the event to all attached 
listeners
    -        val lines = Source.fromInputStream(compressStream.get).getLines()
    -        lines.foreach { line =>
    -          currentLine = line
    -          postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
    -        }
    -      } catch {
    -        case e: Exception =>
    -          logError("Exception in parsing Spark event log %s".format(path), 
e)
    -          logError("Malformed line: %s\n".format(currentLine))
    -      } finally {
    -        fileStream.foreach(_.close())
    -        bufferedStream.foreach(_.close())
    -        compressStream.foreach(_.close())
    +  def replay(logData: InputStream, version: String) {
    --- End diff --
    
    is version used?



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: [SPARK-2261] Make event logger use a single fi...

Reply via email to