Hi, May be this helps: dat1<- read.csv("dat7.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") dat.bru<- dat1[!is.na(dat1$evnmt_brutal),]
fun2<- function(dat){ lst1<- split(dat,dat$patient_id) lst2<- lapply(lst1,function(x) x[cumsum(x$evnmt_brutal==0)>0,]) lst3<- lapply(lst2,function(x) x[!(all(x$evnmt_brutal==1)|all(x$evnmt_brutal==0)),]) lst4<- lst3[lapply(lst3,nrow)!=0] lst5<- lapply(seq_along(lst4),function(i){ do.call(rbind,lapply(which(lst4[[i]]$evnmt_brutal==1),function(x) { x1<-c(x-2,x-1,x) x2<-x1[!any(x1==0)] x3<-lst4[[i]][x2,] x4<-x3[!is.na(match(paste(x3$evnmt_brutal,collapse=""),"001")),] x4[!any(duplicated(x4$number))] } )) }) lst6<-lst5[lapply(lst5,nrow)!=0] names(lst6)<- unlist(lapply(lst6,function(x) unique(x$patient_id))) Mean0bet_01<- do.call(rbind,lapply(lst6,function(x) mean(x[seq(nrow(x))%%3==2,"basdai_d"]))) lst7<-list(lst6,Mean0bet_01) lst7 #lapply(lst7,head,2) } fun2(dat.bru) ##output from first 2 patients #[[1]] #[[1]]$`2` # X patient_id number responsed_at t basdai_d evnmt_brutal #13 13 2 12 2011-07-05 12 -1.0 0 #14 14 2 13 2011-08-07 13 0.9 0 #15 15 2 14 2011-09-11 14 -0.8 1 # #[[1]]$`5` # X patient_id number responsed_at t basdai_d evnmt_brutal #52 52 5 8 2011-01-11 7 -2.8 0 #53 53 5 9 2011-02-13 8 0.0 0 #54 54 5 10 2011-03-19 9 -1.2 1 # #[[2]] # [,1] #2 0.9 #5 0.0 A.K. ________________________________ From: GUANGUAN LUO <guanguan...@gmail.com> To: arun <smartpink...@yahoo.com> Sent: Tuesday, June 4, 2013 3:54 AM Subject: choose the lines2 Hello, Arun, now it is nearly the same problem. I want to know if I want to choose three period : one line with evnmt_brutal ==0 , one line with evnmt_brutal==0 and one line with evnmt_brutal==1, then I want to choose the second and third period for each patient, so that i can calculate the average of scores of basdai when evnmt_brutal==0 (in condition that the precedent line with evnmt_brutal==0) and evnmt_brutal==1. In writing with the phrase "if", if i have two conditions, i don't know how can i write that. This is when i want to choose the period with evnmt_brutal ==0 et evnmt_brutal==1 for each patient, you have written this code. If i want to add one condition that before the line evnmt_brutal==0, evnmt_brutal of that line equal to 0 too. The result i want to get is just the two last lines. Do you know how can i realize that? dat1<- read.csv("dat7.csv",header=TRUE,stringsAsFactors=FALSE,sep="\t") dat.bru<- dat1[!is.na(dat1$evnmt_brutal),] fun1<- function(dat){ lst1<- split(dat,dat$patient_id) lst2<- lapply(lst1,function(x) x[cumsum(x$evnmt_brutal==0)>0,]) lst3<- lapply(lst2,function(x) x[!(all(x$evnmt_brutal==1)|all(x$evnmt_brutal==0)),]) lst4<-lapply(lst3,function(x) {vect.brutal=c() for(line in which(x$evnmt_brutal==1)){ if(x$evnmt_brutal[line-1]==0){ vect.brutal=c(vect.brutal,line) } } vect.brutal1<- sort(c(vect.brutal,vect.brutal-1)) x[vect.brutal1,] } ) res<- do.call(rbind,lst4) row.names(res)<- 1:nrow(res) res } fun1(dat.bru)head(fun1(dat.bru),10) # X patient_id number responsed_at t basdai_d evnmt_brutal #1 14 2 13 2011-08-07 13 0.900 0 #2 15 2 14 2011-09-11 14 -0.800 1 #3 22 3 2 2010-06-29 1 -0.800 0 #4 23 3 3 2010-08-05 2 0.000 1 #5 24 3 4 2010-09-05 3 1.200 0 #6 25 3 5 2010-10-13 4 1.925 1 #7 26 3 6 2010-11-15 5 -2.525 0 #8 27 3 7 2010-12-18 6 -0.200 1 #9 53 5 9 2011-02-13 8 0.000 0 #10 54 5 10 2011-03-19 9 -1.200 1 ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.