Dear Sir, I start using (R) 3 months ago, and I am still learning, I have a project and I am using R in this project, my friend helped me to build a code for this project and it's working perfect, but I need to make a small change in, it looks very simple but for me it's very complicated. I insert the code and I hope if you can help me this problem. I highlighted what exactly I need to change. This project is calculating the market and industry weighted returns for each based on the date levels.
sync = read.csv("country-14.csv",header=T) id.country = 14 sync = sync[sync$country!="country" & sync$country==id.country,-c(2,5)] sync$price=as.numeric(as.character(sync$price)) sync$mv=as.numeric(as.character(sync$mv)) attach(sync) #### Calculate returns and add to the dataset n.comp = nlevels(as.factor(as.character(sync$company_name))) comp.names = levels(as.factor(as.character(sync$company_name))) data = vector("list",n.comp) for(i in 1:n.comp){ temp = sync[sync$company_name==comp.names[i],] data[[i]] = cbind(temp,c(NA,diff(temp$price)/temp$price[1:(length(temp$price)-1)])) } sync = do.call(rbind,data) names(sync)[7] = "returns" detach(sync) attach(sync) #### Fill industry_code column industry_code=rep(NA,dim(sync)[1]) for(i in 1:dim(sync)[1]){ if(nchar(as.character(company_code[i])) == 3){ industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,1)) } else { industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,2)) } print((i/dim(sync)[1])*100) } sync = cbind(sync,as.factor(industry_code)) names(sync)[8] = "industry_code" detach(sync) attach(sync) #### Calculate market weighted returns and add to the dataset market_returns = rep(NA,dim(sync)[1]) industry_returns = rep(NA,dim(sync)[1]) for(i in 1:nlevels(date)){ data = sync[date==levels(date)[i],] data$company_name = as.factor(as.character(data$company_name)) for(m in 1:nlevels(data$company_name)){ index1 = data$company_name == levels(data$company_name)[m] index2 = date==levels(date)[i] & company_name==levels(data$company_name)[m] market_returns[index2] = (sum(data$returns*(data$mv/sum(data$mv,na.rm=TRUE)),na.rm=TRUE) - (data$returns[index1]*(data$mv[index1]/sum(data$mv,na.rm=TRUE))))/(nlevels(data$company_name)-1) ## this what I need to change, instead of using the number of levels companies in the dataset (nlevels(data$company_name) , I need to put the number of returns values(data$returns) without NA (by the way this code is calculating returns at the date level as you can see from above) } print(i/nlevels(date)) } sync = cbind(sync,market_returns) names(sync)[9] = "market_returns" detach(sync) attach(sync) #### Calculate industry weighted returns and add to the dataset for(i in 1:nlevels(date)){ for(k in 1:nlevels(as.factor(as.character(industry_code)))){ data1 = sync[date==levels(date)[i] & industry_code==levels(as.factor(as.character(industry_code)))[k],] data1$company_name = as.factor(as.character(data1$company_name)) for(l in 1:nlevels(data1$company_name)){ index3 = data1$company_name == levels(data1$company_name)[l] index4 = date==levels(date)[i] & company_name==levels(data1$company_name)[l] industry_returns[index4] = (sum(data1$returns*(data1$mv/sum(data1$mv,na.rm=TRUE)),na.rm=TRUE) - (data1$returns[index3]*(data1$mv[index3]/sum(data1$mv,na.rm=TRUE))))/(nlevels(data1$company_name)-1) ## also here I need to change, instead of using the number of levels companies in the dataset (nlevels(data1$company_name) , I need to put the number of returns values(data1$returns) without NA (by the way this code is calculating returns at the date level and industry level as you can see from above) } } print(i/nlevels(date)) } sync = cbind(sync,industry_returns) names(sync)[10] = "industry_returns" detach(sync) attach(sync) year = apply(as.matrix(sync$date),1,function(x) as.factor(substr(as.character(x),7,10))) sync = cbind(sync,as.factor(year)) names(sync)[11] = "year" sync = sync[sync$year!="1999",] sync$year = as.factor(as.character(sync$year)) detach(sync) attach(sync) year = as.factor(as.character(year)) industry_code = as.factor(as.character(industry_code)) comp.per.ind = rep(NA, dim(sync)[1]) for(i in 1:nlevels(year)){ for(j in 1:nlevels(industry_code)){ index = year==levels(year)[i] & industry_code==levels(industry_code)[j] data = sync[index,] comp.per.ind[index] = nlevels(as.factor(as.character(data$company_name))) } } sync = cbind(sync,as.factor(comp.per.ind)) names(sync)[12] = "comp.per.ind" detach(sync) attach(sync) write.csv(sync,paste("Returns_data",id.country,".csv",sep="")) Thank you for your help Rami Alzebdieh [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.