#subject: type III sum of squares - anova() Anova() AnovaM()
#R-version: 2.12.2

#Hello everyone,

#I am currently evaluating experimental data of a  two factor 
experiment. to illustrate de my problem I will use following #dummy 
dataset: Factor "T1" has 3 levels ("A","B","C") and factor "T2" has 2 
levels "E" and "F". The design is #completly balanced, each factor 
combinations has 4 replicates.

#the dataset looks like this:

T1<-(c(rep(c("A","B","C"),each=8)))
T2<-(c(rep(rep(c("E","F"),each=4),3)))
RESPONSE<-c(1,2,3,2,2,1,3,2,9,8,8,9,6,5,5,6,5,5,5,6,1,2,3,3)
  DF<-as.data.frame(cbind(T1,T2,RESPONSE))
DF$RESPONSE<-as.numeric(DF$RESPONSE)

 > DF
    T1 T2 RESPONSE
1   A  E        1
2   A  E        2
3   A  E        3
4   A  E        2
5   A  F        2
6   A  F        1
7   A  F        3
8   A  F        2
9   B  E        7
10  B  E        6
11  B  E        6
12  B  E        7
13  B  F        5
14  B  F        4
15  B  F        4
16  B  F        5
17  C  E        4
18  C  E        4
19  C  E        4
20  C  E        5
21  C  F        1
22  C  F        2
23  C  F        3
24  C  F        3

library(biology)
replications(RESPONSE ~ T1*T2,data=DF)
    T1    T2 T1:T2
     8    12     4
  is.balanced(RESPONSE ~ T1*T2,data=DF)
[1] TRUE


#Now I would like to know whether T1, T2 or T1*T2 have a significant 
effect on RESPONSE. As far as I know, the #theory says that I should use 
a type III sum of squares, but the theory also says that if the design 
is completely #balanced, there is no difference between type I,II or III 
sum of squares.

#so I first fit a linear model:

my.anov<-lm(RESPONSE~T1+T2+T1:T2)

#then I do a normal Anova

 > anova(my.anov)

Analysis of Variance Table

Response: RESPONSE
           Df Sum Sq Mean Sq F value    Pr(>F)
T1         2  103.0  51.500  97.579 2.183e-10 ***
T2         1   24.0  24.000  45.474 2.550e-06 ***
T1:T2      2   12.0   6.000  11.368  0.000642 ***
Residuals 18    9.5   0.528

#When I do the same with the Anova() function from the "car" package I 
get the same result

Anova(my.anov)

Anova Table (Type II tests)

Response: RESPONSE
           Sum Sq Df F value    Pr(>F)
T1         103.0  2  97.579 2.183e-10 ***
T2          24.0  1  45.474 2.550e-06 ***
T1:T2       12.0  2  11.368  0.000642 ***
Residuals    9.5 18

#(type two sees to be the default and type="I" produces an error (why?))

#yet, when I specify type="III" it gives me something completely different:

Anova(my.anov,type="III")
Anova Table (Type III tests)

Response: RESPONSE
             Sum Sq Df F value    Pr(>F)
(Intercept)   16.0  1  30.316 3.148e-05 ***
T1            84.5  2  80.053 1.100e-09 ***
T2             0.0  1   0.000  1.000000
T1:T2         12.0  2  11.368  0.000642 ***
Residuals      9.5 18

#an the AnovaM() function from the "biology" package does the same for 
type I and II and produces the following #result:

library(biology)
  AnovaM(my.anov,type="III")
             Df Sum Sq Mean Sq F value   Pr(>F)
T1           2   84.5  42.250  80.053 1.10e-09 ***
T2           1   24.0  24.000  45.474 2.55e-06 ***
T1:T2        2   12.0   6.000  11.368 0.000642 ***
Residuals   18    9.5   0.528

#Is type 3 the Type I should use and why do the results differ if the 
design is balanced? I am really confused, it would #be great if someone 
could help me out!

#Thanks a lot for your help!

#/Fabian
#University of Gothenburg



















        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to