Hi David, Thank you so much for your help and others. Here is the code.
balok <- read.csv("G:/A_backup 11 mei 2015/DATA (D)/1 Universiti Malaysia Pahang/ISM-3 2016 UM/Data/Hourly Rainfall/balok2.csv",header=TRUE) head(balok, 10); tail(balok, 10) str(balok) ## Introduce NAs for balok$Rain.mm2 <- as.numeric( as.character(balok$Rain.mm) ) head(balok$Rain.mm2); tail(balok$Rain.mm2) head(balok, 10); tail(balok, 10) ## Change date format from DD/MM/YYYY to Day, Month, Year separately realdate <- as.Date(balok$Date,format="%d/%m/%Y") dfdate <- data.frame(date=realdate) year=as.numeric (format(realdate,"%Y")) month=as.numeric (format(realdate,"%m")) day=as.numeric (format(realdate,"%d")) balok2 <-cbind(dfdate,day,month,year,balok) colnames(balok2) head(balok2) ## New data format balok2_new <- balok2[,c(-1,-5,-7)] colnames(balok2_new) head(balok2_new); tail(balok2_new) ## Aggregate data ## Sum rainfall amount from HOURLY to DAILY dt <- balok2_new str(dt) aggbalok_day <- aggregate(dt[,5], by=dt[,c(1,2,3)],FUN=sum, na.rm=TRUE) head(aggbalok_day) ## Sum rainfall amount from HOURLY to MONTHLY dt <- balok2_new str(dt) aggbalok_mth <- aggregate(dt[,5], by=dt[,c(2,3)],FUN=sum, na.rm=TRUE) head(aggbalok_mth) Now I would like to find the basic statistics summary for the data according to monthly. Best regards On Wed, Jul 13, 2016 at 10:37 PM, David Winsemius <dwinsem...@comcast.net> wrote: > > > On Jul 13, 2016, at 3:21 AM, roslinazairimah zakaria < > roslina...@gmail.com> wrote: > > > > Dear David, > > > > I got your point. How do I remove the data that contain "0.0?". > > > > I tried : balok <- cbind(balok3[,-5], > balok3$Rain.mm[balok3$Rain.mm==0.0?] <- NA) > > If you had done as I suggested, the items with factor levels of "0.0?" > would have automatically become NA and you would have gotten a warning > message: > > > testfac <- factor( c(rep("0.0",7), "0.07", "0.0?", '0.01', '0.17')) > > testfac > [1] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.07 0.0? 0.01 0.17 > Levels: 0.0 0.0? 0.01 0.07 0.17 > > as.numeric(as.character( testfac)) > [1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.07 NA 0.01 0.17 > Warning message: > NAs introduced by coercion > > > > > > > However all the Rain.mm column all become NA. > > > > day month year Time balok3$Rain.mm[balok3$Rain.mm == "0.0?"] <- NA > > 1 30 7 2008 9:00:00 NA > > 2 30 7 2008 10:00:00 NA > > 3 30 7 2008 11:00:00 NA > > 4 30 7 2008 12:00:00 NA > > 5 30 7 2008 13:00:00 NA > > 6 30 7 2008 14:00:00 NA > > 7 30 7 2008 15:00:00 NA > > 8 30 7 2008 16:00:00 NA > > 9 30 7 2008 17:00:00 NA > > 10 30 7 2008 18:00:00 NA > > > > Thank you so much. > > > > > > On Wed, Jul 13, 2016 at 9:42 AM, David Winsemius <dwinsem...@comcast.net> > wrote: > > > > > On Jul 12, 2016, at 3:45 PM, roslinazairimah zakaria < > roslina...@gmail.com> wrote: > > > > > > Dear R-users, > > > > > > I have these data: > > > > > > head(balok, 10); tail(balok, 10) > > > Date Time Rain.mm > > > 1 30/7/2008 9:00:00 0 > > > 2 30/7/2008 10:00:00 0 > > > 3 30/7/2008 11:00:00 0 > > > 4 30/7/2008 12:00:00 0 > > > 5 30/7/2008 13:00:00 0 > > > 6 30/7/2008 14:00:00 0 > > > 7 30/7/2008 15:00:00 0 > > > 8 30/7/2008 16:00:00 0 > > > 9 30/7/2008 17:00:00 0 > > > 10 30/7/2008 18:00:00 0 > > > Date Time Rain.mm > > > 63667 4/11/2015 3:00:00 0 > > > 63668 4/11/2015 4:00:00 0 > > > 63669 4/11/2015 5:00:00 0 > > > 63670 4/11/2015 6:00:00 0 > > > 63671 4/11/2015 7:00:00 0 > > > 63672 4/11/2015 8:00:00 0 > > > 63673 4/11/2015 9:00:00 0.1 > > > 63674 4/11/2015 10:00:00 0.1 > > > 63675 4/11/2015 11:00:00 0.1 > > > 63676 4/11/2015 12:00:00 0.1? > > > > > >> str(balok) > > > 'data.frame': 63676 obs. of 3 variables: > > > $ Date : Factor w/ 2654 levels "1/1/2009","1/1/2010",..: 2056 2056 > 2056 > > > 2056 2056 2056 2056 2056 2056 2056 ... > > > $ Time : Factor w/ 24 levels "1:00:00","10:00:00",..: 24 2 3 4 5 6 7 > 8 9 > > > 10 ... > > > $ Rain.mm: Factor w/ 352 levels "0","0.0?","0.1",..: 1 1 1 1 1 1 1 1 1 > 1 > > > > Thar's your problem: > > > > Rain.mm: Factor w/ 352 levels "0","0.0?","0.1" > > > > Need to use the standard fix for the screwed-up-factor-on-input-problem > > > > balok$Rain.mm2 <- as.numeric( as.character(balok$Rain.mm) ) > > > > Cannot just do as.numeric because factors are actually already numeric. > > > > -- > > David. > > > > > > > ... > > > > > > and I have change the data as follows: > > > > > > realdate <- as.Date(balok$Date,format="%d/%m/%Y") > > > dfdate <- data.frame(date=realdate) > > > year=as.numeric (format(realdate,"%Y")) > > > month=as.numeric (format(realdate,"%m")) > > > day=as.numeric (format(realdate,"%d")) > > > > > > balok2 <-cbind(dfdate,day,month,year,balok[,2:3]) > > > colnames(balok2) > > > head(balok2) > > > date day month year Time Rain.mm > > > 1 2008-07-30 30 7 2008 9:00:00 0 > > > 2 2008-07-30 30 7 2008 10:00:00 0 > > > 3 2008-07-30 30 7 2008 11:00:00 0 > > > 4 2008-07-30 30 7 2008 12:00:00 0 > > > 5 2008-07-30 30 7 2008 13:00:00 0 > > > 6 2008-07-30 30 7 2008 14:00:00 0 > > > ... > > > > > >> balok3 <- balok2[,-1]; head(balok3, n=100) > > > day month year Time Rain.mm > > > 1 30 7 2008 9:00:00 0 > > > 2 30 7 2008 10:00:00 0 > > > 3 30 7 2008 11:00:00 0 > > > 4 30 7 2008 12:00:00 0 > > > 5 30 7 2008 13:00:00 0 > > > 6 30 7 2008 14:00:00 0 > > > 7 30 7 2008 15:00:00 0 > > > 8 30 7 2008 16:00:00 0 > > > 9 30 7 2008 17:00:00 0 > > > 10 30 7 2008 18:00:00 0 > > > 11 30 7 2008 19:00:00 0 > > > 12 30 7 2008 20:00:00 0 > > > 13 30 7 2008 21:00:00 0 > > > 14 30 7 2008 22:00:00 0 > > > 15 30 7 2008 23:00:00 0 > > > 16 30 7 2008 24:00:00 0 > > > 17 31 7 2008 1:00:00 0 > > > 18 31 7 2008 2:00:00 0 > > > 19 31 7 2008 3:00:00 0 > > > 20 31 7 2008 4:00:00 0 > > > 21 31 7 2008 5:00:00 0 > > > 22 31 7 2008 6:00:00 0 > > > 23 31 7 2008 7:00:00 0 > > > 24 31 7 2008 8:00:00 0 > > > 25 31 7 2008 9:00:00 0 > > > 26 31 7 2008 10:00:00 0 > > > 27 31 7 2008 11:00:00 0 > > > 28 31 7 2008 12:00:00 0 > > > 29 31 7 2008 13:00:00 0 > > > 30 31 7 2008 14:00:00 0 > > > 31 31 7 2008 15:00:00 0 > > > 32 31 7 2008 16:00:00 0 > > > 33 31 7 2008 17:00:00 0 > > > 34 31 7 2008 18:00:00 0 > > > 35 31 7 2008 19:00:00 0 > > > 36 31 7 2008 20:00:00 0 > > > 37 31 7 2008 21:00:00 0 > > > 38 31 7 2008 22:00:00 0 > > > 39 31 7 2008 23:00:00 0 > > > 40 31 7 2008 24:00:00 0 > > > 41 1 8 2008 1:00:00 0 > > > 42 1 8 2008 2:00:00 0 > > > 43 1 8 2008 3:00:00 0 > > > 44 1 8 2008 4:00:00 0 > > > 45 1 8 2008 5:00:00 0 > > > 46 1 8 2008 6:00:00 0 > > > 47 1 8 2008 7:00:00 0 > > > 48 1 8 2008 8:00:00 0 > > > 49 1 8 2008 9:00:00 0 > > > 50 1 8 2008 10:00:00 0 > > > 51 1 8 2008 11:00:00 0 > > > 52 1 8 2008 12:00:00 0 > > > 53 1 8 2008 13:00:00 0 > > > 54 1 8 2008 14:00:00 0 > > > 55 1 8 2008 15:00:00 0 > > > 56 1 8 2008 16:00:00 0 > > > 57 1 8 2008 17:00:00 0 > > > 58 1 8 2008 18:00:00 0 > > > 59 1 8 2008 19:00:00 0 > > > 60 1 8 2008 20:00:00 0 > > > 61 1 8 2008 21:00:00 0 > > > 62 1 8 2008 22:00:00 0 > > > 63 1 8 2008 23:00:00 0 > > > 64 1 8 2008 24:00:00 0 > > > 65 2 8 2008 1:00:00 0 > > > 66 2 8 2008 2:00:00 0 > > > 67 2 8 2008 3:00:00 0 > > > 68 2 8 2008 4:00:00 0 > > > 69 2 8 2008 5:00:00 0 > > > 70 2 8 2008 6:00:00 0 > > > 71 2 8 2008 7:00:00 0 > > > 72 2 8 2008 8:00:00 0 > > > 73 2 8 2008 9:00:00 0 > > > 74 2 8 2008 10:00:00 0 > > > 75 2 8 2008 11:00:00 0 > > > 76 2 8 2008 12:00:00 0 > > > 77 2 8 2008 13:00:00 0 > > > 78 2 8 2008 14:00:00 0 > > > 79 2 8 2008 15:00:00 0 > > > 80 2 8 2008 16:00:00 0 > > > 81 2 8 2008 17:00:00 0 > > > 82 2 8 2008 18:00:00 0 > > > 83 2 8 2008 19:00:00 0 > > > 84 2 8 2008 20:00:00 0 > > > 85 2 8 2008 21:00:00 0 > > > 86 2 8 2008 22:00:00 0 > > > 87 2 8 2008 23:00:00 0 > > > 88 2 8 2008 24:00:00 11.1 > > > 89 3 8 2008 1:00:00 0.4 > > > 90 3 8 2008 2:00:00 0 > > > 91 3 8 2008 3:00:00 0 > > > 92 3 8 2008 4:00:00 0 > > > 93 3 8 2008 5:00:00 0 > > > 94 3 8 2008 6:00:00 0 > > > 95 3 8 2008 7:00:00 0 > > > 96 3 8 2008 8:00:00 0 > > > 97 3 8 2008 9:00:00 0 > > > 98 3 8 2008 10:00:00 0 > > > 99 3 8 2008 11:00:00 0 > > > 100 3 8 2008 12:00:00 0 > > > > > > The rainfall data is in hourly unit, and I would like to sum the > Rain.mm > > > according to month. I tried to use aggregate(), but I got this > message: > > > > > > dt <- balok4 > > > str(dt) > > > aggbalok <- aggregate(dt[,5], by=dt[,c(1,4)],FUN=sum, na.rm=TRUE) > > > aggbalok > > > > > > Error in Summary.factor(1L, na.rm = TRUE) : > > > sum not meaningful for factors > > > > > > > > > Thank you so much for any help given. > > > > > > Roslina > > > > > > [[alternative HTML version deleted]] > > > > > > ______________________________________________ > > > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > > > https://stat.ethz.ch/mailman/listinfo/r-help > > > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > > > and provide commented, minimal, self-contained, reproducible code. > > > > David Winsemius > > Alameda, CA, USA > > > > > > > > > > -- > > Dr. Roslinazairimah Binti Zakaria > > Tel: +609-5492370; Fax. No.+609-5492766 > > Email: roslinazairi...@ump.edu.my; roslina...@gmail.com > > Deputy Dean (Academic & Student Affairs) > > Faculty of Industrial Sciences & Technology > > University Malaysia Pahang > > Lebuhraya Tun Razak, 26300 Gambang, Pahang, Malaysia > > David Winsemius > Alameda, CA, USA > > -- *Dr. Roslinazairimah Binti Zakaria* *Tel: +609-5492370; Fax. No.+609-5492766* *Email: roslinazairi...@ump.edu.my <roslinazairi...@ump.edu.my>; roslina...@gmail.com <roslina...@gmail.com>* Deputy Dean (Academic & Student Affairs) Faculty of Industrial Sciences & Technology University Malaysia Pahang Lebuhraya Tun Razak, 26300 Gambang, Pahang, Malaysia [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.