#Here is I think an easier way of coding all the components you need. #The within-group variances, you get with this function:
apply(iris[,1:4],2,function(x) tapply(x,iris$Species,var)) #You can get what you computed by taking the column means. apply(apply(iris[,1:4],2,function(x) tapply(x,iris$Species,var)),2,mean) #HOWEVER, note that this takes unweighted column means. If your groups contain an UNequal number of observations, you do not want to take the UNweighted means. #Here is how you get the group and grand means: apply(iris[,1:4],2,function(x) tapply(x,iris$Species,mean)) #group means apply(iris[,1:4],2,mean) #grand means #And you also need the number of observations in each bin, I guess: apply(iris[,1:4],2,function(x) tapply(x,iris$Species,length)) #I don't want to ruin all the fun for you. HTH, Daniel Coghlan, Avril wrote: > > Hello, > > I have been looking for functions for calculating the within-groups > variance and between-groups variance, for the case where you have > several numerical variables describing samples from a number of groups. > > I didn't find such functions in R, so wrote my own versions myself (see > below). I can calculate the within- and between-groups variance for the > Sepal.length variable (iris[1]) in the "iris" data set, by typing: >> calcWithinGroupsVariance(iris[1],iris[5]) > [1] 0.2650082 >> calcBetweenGroupsVariance(iris[1],iris[5]) > [1] 0.4300145 > > I am wondering however if there are functions for doing this already in > R? > I would prefer to use a standard R function if one exists. > > Kind Regards, > Avril > > > Within-Groups Variance: > ======================= > > calcWithinGroupsVariance <- function(variable,groupvariable) > { > # find out how many values the group variable can take > groupvariable2 <- as.factor(groupvariable[[1]]) > levels <- levels(groupvariable2) > numlevels <- length(levels) > # get the mean and standard deviation for each group: > numtotal <- 0 > denomtotal <- 0 > for (i in 1:numlevels) > { > leveli <- levels[i] > levelidata <- variable[groupvariable==leveli,] > levelilength <- length(levelidata) > # get the mean and standard deviation for group i: > meani <- mean(levelidata) > sdi <- sd(levelidata) > numi <- (levelilength - 1)*(sdi * sdi) > denomi <- levelilength > numtotal <- numtotal + numi > denomtotal <- denomtotal + denomi > } > # calculate the within-groups variance > Vw <- numtotal / (denomtotal - numlevels) > return(Vw) > } > > Between-Groups-Variance: > ======================== > > calcBetweenGroupsVariance <- function(variable,groupvariable) > { > # find out how many values the group variable can take > groupvariable2 <- as.factor(groupvariable[[1]]) > levels <- levels(groupvariable2) > numlevels <- length(levels) > # calculate the overall grand mean: > grandmean <- mean(variable) > # get the mean and standard deviation for each group: > numtotal <- 0 > denomtotal <- 0 > for (i in 1:numlevels) > { > leveli <- levels[i] > levelidata <- variable[groupvariable==leveli,] > levelilength <- length(levelidata) > # get the mean and standard deviation for group i: > meani <- mean(levelidata) > sdi <- sd(levelidata) > numi <- levelilength * ((meani - grandmean)^2) > denomi <- levelilength > numtotal <- numtotal + numi > denomtotal <- denomtotal + denomi > } > # calculate the between-groups variance > Vb <- numtotal / (denomtotal - numlevels) > Vb <- Vb[[1]] > return(Vb) > } > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > -- View this message in context: http://r.789695.n4.nabble.com/within-groups-variance-and-between-groups-variance-tp3769027p3769248.html Sent from the R help mailing list archive at Nabble.com. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.