This is an OpenPGP/MIME signed message (RFC 2440 and 3156) --------------enig5A700CC15DEABD01445A1B70 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable
Hi, I have modified the code originally posted to include capability for arbitrary weightings for the rows and columns (which apply in the nominal-ordinal and the ordinal-ordinal cases). The code is pasted below (as well as an example based on job satisfaction data). However, I cannot figure out how to make the output of class "htest" make nicely-formatted output. When I use the class htest, with a list of df's, test statistics, etc, the printed output looks ugly. (See the last few commented lines at the end of the function for my aborted attempt to do this. To see what happens when that code is used, comment out the last four uncommented lines (the ones that mention variable "result"), and uncomment all the commented lines at the end.) I am an R newbie by any standard, so if anyone could clue me in on how to use htest in the case of multiple statistics and dfs, that would be great. Once we have that, does that mean that this can serve as a good replacement for mantelhaen.test() in R? Any comments appreciated! code follows: ------ # takes a 3-d array x, scores for rows, and scores for columns # runs nominal-nominal, ordinal-nominal, and ordinal-ordinal CMH tests # this function is based on one posted at # http://bugs.r-project.org/cgi-bin/R/wishlist?id=3D7779;user=3Dguest mh.test <- function(x, row_scores=3DNULL, col_scores=3DNULL) { if (length(dim(x)) !=3D 3){ stop("x must be a 3 dimensional array") } if (any(apply(x, 3, sum) < 2)){ stop("sample size in each stratum must be > 1") } I <- dim(x)[1]; J <- dim(x)[2]; K <- dim(x)[3] if ( (length(row_scores) !=3D I & !is.null(row_scores) ) | (length(col_scores) !=3D J & !is.null(col_scores)) ){ stop("scores dimensions must equal to data dimensions") } if (is.null(row_scores)){ row_scores =3D 1:I } if (is.null(col_scores)) { col_scores =3D 1:J } df_GMH <- (I - 1) * (J - 1); df_SMH <- I - 1; df_CSMH <- 1 A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0) #A_SMH <- cbind(diag(I-1), 0) %x% t(1:J) A_SMH <- cbind(diag(I-1), 0) %x% t(col_scores) #A_CSMH <- t(1:I) %x% t(1:J) A_CSMH <- t(row_scores) %x% t(col_scores) Y_GMH <- matrix(0, nc =3D 1, nr =3D df_GMH) Y_SMH <- matrix(0, nc =3D 1, nr =3D df_SMH) Y_CSMH <- matrix(0, nc =3D 1, nr =3D df_CSMH) S_GMH <- matrix(0, nc =3D df_GMH, nr =3D df_GMH) S_SMH <- matrix(0, nc =3D df_SMH, nr =3D df_SMH) S_CSMH <- matrix(0, nc =3D df_CSMH, nr =3D df_CSMH) for(k in 1:K) { V <- NULL f <- x[, , k] ntot <- sum(f) p_ip <- apply(f, 1, sum) / ntot p_pj <- apply(f, 2, sum) / ntot m <- p_ip %x% p_pj * ntot V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) - p_pj %*% t(p_pj))) / (ntot-1) Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m) Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m) Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m) S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH) S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH) S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH) } Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH) PARAMETER <- c(df_CSMH, df_SMH, df_GMH) PVAL <- pchisq(STATISTIC, PARAMETER, lower =3D FALSE) result <- cbind(STATISTIC, PARAMETER, PVAL) rownames(result) <- list("Nonzero Correlation", "Row Mean Scores Differ", "General Association") colnames(result) <- list("Statistic", "df", "p-value") return (result) #DNAME <- deparse(substitute(x)) #METHOD =3D "Cochran-Mantel-Haenszel Statistics (Based on Table Score= s)" #names(STATISTIC) =3D list("Nonzero Correlation", "Row Mean Scores Differ", "General Association") #names(PARAMETER) =3D list("df", "df", "df") #structure(list(statistic =3D STATISTIC, parameter =3D PARAMETER, p.value =3D PVAL, method =3D METHOD, data.name=3DDNAME), class =3D "power= =2Ehtest") #data.name =3D DNAME, observed =3D x, expected =3D E, residuals =3D= (x - E)/sqrt(E)), class =3D "htest") } --------test results:-------- > Satisfaction <- + as.table(array(c(1, 2, 0, 0, 3, 3, 1, 2, + 11, 17, 8, 4, 2, 3, 5, 2, + 1, 0, 0, 0, 1, 3, 0, 1, + 2, 5, 7, 9, 1, 1, 3, 6), + dim =3D c(4, 4, 2), + dimnames =3D + list(Income =3D + c("<5000", "5000-15000", + "15000-25000", ">25000"), + "JobSatisfaction" =3D + c("V_D", "L_S", "M_S", "V_S"), + Gender =3D c("Female", "Male")))) > Satisfaction , , Gender =3D Female JobSatisfaction Income V_D L_S M_S V_S <5000 1 3 11 2 5000-15000 2 3 17 3 15000-25000 0 1 8 5 >25000 0 2 4 2 , , Gender =3D Male JobSatisfaction Income V_D L_S M_S V_S <5000 1 1 2 1 5000-15000 0 3 5 1 15000-25000 0 0 7 3 >25000 0 1 9 6 > mh.test(Satisfaction, c(3,10,20,35), c(1,3,4,5)) Statistic df p-value Nonzero Correlation 6.156301 1 0.01309447 Row Mean Scores Differ 9.034222 3 0.02883933 General Association 10.200089 9 0.33453118 --------------enig5A700CC15DEABD01445A1B70 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.2.2 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iD8DBQFFRqs35/k4vslVlLIRAg/5AJ93AGEdIakk7T27coDij6jAdo7n6gCeNP/0 YoIcLAawlVvg4yILUXHJZM0= =+GEH -----END PGP SIGNATURE----- --------------enig5A700CC15DEABD01445A1B70-- ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel