If you are learning about Spark Streaming, as I am, you've probably use netcat "nc" as mentioned in the spark streaming programming guide. I wanted something a little more useful, so I modified the ClickStreamGenerator code to make a very simple script that simply reads a file off disk and passes it to a socket, character by character. You specify the port, filename, and bytesPerSecond that you want it to send.
Thought someone else might find this helpful, so here it is. import java.net.ServerSocket import java.io.PrintWriter import scala.io.Source object StreamingDataGenerator { def main(args : Array[String]) { if (args.length != 3) { System.err.println("Usage: StreamingDataGenerator <port> <file> <bytesPerSecond>") System.exit(1) } val port = args(0).toInt val file = Source.fromFile(args(1)) val bytesPerSecond = args(2).toFloat val sleepDelayMs = (1000.0 / bytesPerSecond).toInt val listener = new ServerSocket(port) println("Reading from file: " + file.descr) while (true) { println("Listening on port: " + port) val socket = listener.accept() new Thread() { override def run = { println("Got client connect from: " + socket.getInetAddress) val out = new PrintWriter(socket.getOutputStream(), true) file.foreach(c => { Thread.sleep(sleepDelayMs) // write the byte to the socket out.write(c) out.flush() // also print the byte to stdout, for debugging ease print(c) } ) socket.close() } }.start() } } }