Apologies for the HTML. It shouldn't have happened. I would like to use the dummies as independent variables in a regression. I did manage to use count of observations in a given range using the following code:
for (i in filelist) { # i <- filelist[1] tmp1 <- as.data.table(read.csv(i, sep=",")) year<-tmp1$year[1] mykey=c("City","year","week") output <- as.data.frame(tmp1[,sum(avsft< -0),by=mykey])[,1:length(mykey)] output$avsft_1<- as.data.frame(tmp1[,sum(avsft>= -17.78 & avsft< -12.22, na.rm=T), by=mykey])[,length(mykey)+1] Where "i" is filenames (each file has data for 1 year). But instead of count I would like to generate dummy variables for ranges [(-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33)], so if a temperature observation falls within a given range - the dummy variable for that range will have a value of 1 for that week. Thanks again! tmp2<- dput(head(tmp1,10)) structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO SPRINGS", "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, -86.80249), latitude = c(41.081445, 42.652579, 35.110703, 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", "City", "cell_number", "longitude", "latitude", "State", "avsft", "year", "day", "hour", "yearweek", "week"), row.names = c(NA, 10L), class = "data.frame") Sincerely, Shouro On Fri, Sep 11, 2015 at 12:33 AM, Bert Gunter <bgunter.4...@gmail.com> wrote: > 1. Posting in HTML largely negated your ability to provide data > through dput(). Folow he posting guide and post in PLAIN TEXT only, > please. > > 2. See ?cut . I think this will at least get you started. > > Cheers, > Bert > Bert Gunter > > "Data is not information. Information is not knowledge. And knowledge > is certainly not wisdom." > -- Clifford Stoll > > > On Thu, Sep 10, 2015 at 3:28 PM, Shouro Dasgupta <sho...@gmail.com> wrote: > > Dear all, > > > > I have 3-hourly temperature data from 1970-2010 for 122 cities in the > US. I > > would like to bin this data by city-year-week. My idea is if the > > temperature for a particular city in a given week falls within a given > > range (-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33), then the > > corresponding bin would have a value of 1 and 0 otherwise. > > > > The data looks like this. Basically, I need to generate a dummy variable > > for each temperature range. Any help will be greatly appreciated. > > > > tmp2<- dput(head(tmp1,10)) > >> structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, > >> 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), > >> City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", > >> "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", > >> "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", > >> "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", > >> "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO > >> SPRINGS", > >> "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", > >> "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", > >> "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", > >> "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", > >> "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", > >> "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", > >> "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", > >> "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", > >> "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", > >> "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", > >> "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", > >> "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", > >> "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", > >> "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", > >> "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", > >> "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", > >> "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", > >> "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", > >> "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", > >> "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", > >> "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" > >> ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, > >> 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), > >> longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, > >> -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, > >> -86.80249), latitude = c(41.081445, 42.652579, 35.110703, > >> 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, > >> 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, > >> 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", > >> " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", > >> " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", > >> " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", > >> " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", > >> " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", > >> "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, > >> -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, > >> -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 > >> ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, > >> 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, > >> 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, > >> 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, > >> 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, > >> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", > >> "City", "cell_number", "longitude", "latitude", "State", "avsft", > >> "year", "day", "hour", "yearweek", "week"), row.names = c(NA, > >> 10L), class = "data.frame") > > > > > > Sincerely, > > > > Shouro > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. > [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.