If you are learning about Spark Streaming, as I am, you've probably use
netcat "nc" as mentioned in the spark streaming programming guide.  I
wanted something a little more useful, so I modified the
ClickStreamGenerator code to make a very simple script that simply reads a
file off disk and passes it to a socket, character by character.  You
specify the port, filename, and bytesPerSecond that you want it to send.

Thought someone else might find this helpful, so here it is.

import java.net.ServerSocket
import java.io.PrintWriter
import scala.io.Source

object StreamingDataGenerator {

  def main(args : Array[String]) {
    if (args.length != 3) {
      System.err.println("Usage: StreamingDataGenerator <port> <file>
<bytesPerSecond>")
      System.exit(1)
    }
    val port = args(0).toInt
    val file = Source.fromFile(args(1))
    val bytesPerSecond = args(2).toFloat

    val sleepDelayMs = (1000.0 / bytesPerSecond).toInt
    val listener = new ServerSocket(port)

    println("Reading from file: " + file.descr)

    while (true) {
      println("Listening on port: " + port)
      val socket = listener.accept()
      new Thread() {
        override def run = {
          println("Got client connect from: " + socket.getInetAddress)
          val out = new PrintWriter(socket.getOutputStream(), true)

          file.foreach(c =>
            {
              Thread.sleep(sleepDelayMs)
              // write the byte to the socket
              out.write(c)
              out.flush()
              // also print the byte to stdout, for debugging ease
              print(c)
            }
          )
          socket.close()
        }
      }.start()
    }
  }
}

Reply via email to