Hi, I changed the fun1(). Now, it should be possible to get all the possible combinations within each group.
final3New<-read.table(file="real_data_cecilia.txt",sep="\t",header=T) final3New1<-read.csv("real_data_cecilia_new.csv") fun1New<- function(dat,percent,number){ lst1<- split(dat,list(dat$year,dat$industry)) lst2<- lst1[lapply(lst1,nrow)>1] lst3<- lapply(lst2,function(x) { CombN1<-combn(seq_len(nrow(x)),2) lapply(split(CombN1,col(CombN1)),function(y){ x1<-x[y,] x1[sum(x1$dummy)==1,] }) }) lst4<- lapply(lst3,function(x) x[lapply(x,nrow)>0]) lst5<- lst4[lapply(lst4,length)>0] lst6<- lapply(lst5,function(x){ lapply(x,function(y){ x1<- abs(diff(y$dimension))< number x2<- y$dimension[2]+ (y$dimension[2]*percent) x3<- y$dimension[2]- (y$dimension[2]*percent) x4<- (y$dimension[1] < x2) & (y$dimension[1] > x3) y[x4 & x1,] }) } ) lst7<- lapply(lst6,function(x) x[lapply(x,nrow)>0]) lst8<- lst7[lapply(lst7,length)>0] res<- do.call(rbind,lapply(lst8,function(x){ do.call(rbind,x) })) row.names(res)<- 1:nrow(res) res } ##Applying fun1New res5Percent<- fun1New(final3New,0.05,50) dim(res5Percent) #[1] 718 5 res5PercentHigh<- fun1New(final3New,0.05,500000) dim(res5PercentHigh) #[1] 2788 5 res5Percent1<- fun1New(final3New1,0.05,50) dim(res5Percent1) #[1] 870 5 res5Percent1High<- fun1New(final3New1,0.05,500000) dim(res5Percent1High) #[1] 2902 5 res10Percent<- fun1New(final3New,0.10,200) dim(res10Percent) #[1] 2928 5 res10Percent1<- fun1New(final3New1,0.10,200) dim(res10Percent1) #[1] 3092 5 fun3<- function(dat){ indx<- duplicated(dat) dat1<- subset(dat[indx,],dummy==1) dat0<- subset(dat[indx,],dummy==0) indx1<- as.numeric(row.names(dat1)) indx11<- sort(c(indx1,indx1+1)) indx0<- as.numeric(row.names(dat0)) indx00<- sort(c(indx0,indx0-1)) indx10<- sort(c(indx11,indx00)) res <- dat[-indx10,] res } #Applying fun3() res5F3<- fun3(res5Percent) dim(res5F3) #[1] 278 5 res5F3High<- fun3(res5PercentHigh) dim(res5F3High) #[1] 546 5 res5F3_1<- fun3(res5Percent1) #[1] 302 5 res5F3High_1<- fun3(res5Percent1High) dim(res5F3High_1) #[1] 570 5 res10F3<- fun3(res10Percent) dim(res10F3) #[1] 462 5 res10F3_1<- fun3(res10Percent1) #[1] 474 5 nrow(subset(res5F3,dummy==0)) #[1] 139 nrow(subset(res5F3,dummy==1)) #[1] 139 nrow(subset(res5F3High,dummy==1)) #[1] 273 nrow(subset(res5F3High,dummy==0)) #[1] 273 nrow(subset(res10F3,dummy==0)) #[1] 231 nrow(subset(res10F3,dummy==1)) #[1] 231 nrow(subset(res10F3_1,dummy==1)) #[1] 237 nrow(subset(res10F3_1,dummy==0)) #[1] 237 dim(unique(res5F3)) #[1] 278 5 dim(unique(res5F3High)) #[1] 546 5 dim(unique(res10F3_1)) #[1] 474 5 dim(unique(res10F3)) #[1] 462 5 A.K. ________________________________ From: Cecilia Carmo <cecilia.ca...@ua.pt> To: arun <smartpink...@yahoo.com> Sent: Friday, June 14, 2013 10:44 AM Subject: me again There some matchs that are missing. That is, it is possible to have more matchs. I'm sending you a sript and the data. Than you. Cecília ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.