Hi,
Thanks for response.Could you please look into my repo..Here Utils class is
the class.I cannot paste the entire code..Thaswhy..
I have other class from where I would be calling Utils class for object
creation..
package main.scala
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import com.codahale.jerkson.Json._
import scala.collection.JavaConversions._
import scala.collection.immutable._
import scala.io.Source
import rtree._
object Utils {
//Main project directory
val work_directory = "/Users/Meghana/Documents/workspace/assignment"
//Location of spark home
val spark_home = "/Users/Meghana/Downloads/spark-0.9.0-incubating"
//Location of Twitter data
val data_home = "/Users/Meghana/Desktop/Twitter/Europe/2012/3/1"
//CSV file that has the
val bbox_file = work_directory + "/cities_eu.csv"
//Locations to store the intermediate data
val intermediate_ucg_data = work_directory + "/ucg_int"
val intermediate_rt_data = work_directory + "/rtc_int"
val intermediate_wc_data = work_directory + "/wc_int"
val intermediate_ucc_data = work_directory + "/ucc_int"
//Create spark context
val sc = new SparkContext("local", "Simple App", Utils.spark_home,
List("target/scala-2.10/simple-project_2.10-1.0.jar"))
//RTree structure with key as the city name.
//First initialize with an empty tree.
var rtree:RTree[String] = RTree.empty
//default window size
var jumping_window_size:Integer = 1;
var sliding_window_size:Integer = 1;
//We start from hour 1
val initialHour:Integer = 1;
//We calculate with this frequency (eg; every one hour)
val calcFreq:Integer = 1;
//Object representation of Tweet
//text -> Tweet text
//retweets -> number of re-tweets of the current tweet.
//country -> contry where the tweet appeared
//city -> from which city the tweet has appeared
//hour -> hour of the tweet.
class Tweet(val user:String,
val text: String,
val retweets:Integer,
val country:String,
val city:String,
val hour:Int) {
override def toString: String =
"User: " + user + "\n" +
"Text: " + text + "\n" +
"Retweets: " + retweets + "\n" +
"Country: " + country + "\n" +
"City: " + city + "\n" +
"hour: " + hour + "\n"
}
//Function to parse a line of string to json object, and then create a
Tweet instance
def parseTweet(s: String): Tweet = {
//Parse the given json line from the twitter dataset to the
key:value
map.
val tweet_details_map = parse[Map[String, Any]](s)
//Extract tweet string from the given line
val text:String =
tweet_details_map.get("text").get.asInstanceOf[String]
//Extract the user data from the tweet line.
val user_details =
tweet_details_map.get("user").get.asInstanceOf[java.util.LinkedHashMap[String,Any]]
//Extract the retweet count from the given tweet line.
val retweets:Integer =
tweet_details_map.get("retweet_count").get.asInstanceOf[Integer]
https://bitbucket.org/smartmetersproject/twitterdatasets1/src/c379405f1437a9eb4fc7fa0f3f9a2834e766ad2d/src/main/scala/Utils.scala?at=master
--
View this message in context:
http://apache-spark-user-list.1001560.n3.nabble.com/Need-suggestions-tp3650p3652.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.