try this: > x <- read.table(textConnection("SampleID A1 A2 A3 A4 + GM920222 GATTGCC GATTGCC GATAGAC GATAGAC + GM930040 GTCATCA GAGTGCA ACTATAA GATTGCC + GM930040 GTCATCA GAGTGCA ACTATAA GATTGCC + GM960023 GATTGCC GTCATCA GATTGCC GATTGCC + GM920224 ACTAGAA GTCATCA GTCATCA ACTAGAA + GM920224 ACTAGAA GTCATCA GTCATCA ACTAGAA + GM920034 GATTGCC GTCATCA GATTGCA GATTGCA + GM920096 GATTGCC GATTGCC GATTGCA GATTGCC + GM930029 GTCATCA GATTGCC GTCATCA GATTGCC + GM940031 GATTGCC GAGTGCA GATTGCA ACTAGAA + GM960028 GATTGCC GAGTGCA GATTGCA ACTAGAA + GM980007 GTCATCA GATTGCC ACTTGAA GTCATCA + GM970009 ACTAGAA GTCAGAA GTCAGCA ACTAGCA + GM930026 ACTAGAA GAGTGCA GAGTGCA ACTAGAA + GM920031 GATTGCC GTCATCA GATTGCC GATTGCC + GM990105 GATTGCC GATTGCC GTCAGCA GTCAGCA + GM920202 GATTGCC GATTGCC GATTGCC GATTGCC + GM920089 GAGTGCA GTCAGAA ACTATCA GATTGCC + GM980051 ACTAGAA ACTAGAA GATAGCC GATAGCC + GM930109 GTCATCA GAGTGCA GTTTTAA ACTAGAA + GM940039 GTCATCA GAGTGCA GTTTGCC ACTTTCA + GM050099 GAGTGCA GTCAGAA GTTATCC ACTTTCA + GM050099 GAGTGCA GTCAGAA GTTATCC ACTTTCA + GM030005 ACTAGAA GAGTGCA ACTAGAA ACTAGAA + GM050009 ACTAGAA GATTGCC GATTGCC ACTAGAA + GM990027 GATTGCC GAGTGCA GATTGCA GATTGCC"), header=TRUE, as.is=TRUE) > x <- as.matrix(x) > t(apply(x, 1, function(.row){ + # separate characters + z <- do.call(rbind, strsplit(.row[-1], '')) + # combine each column + z.col <- t(apply(z, 2, paste, collapse='')) + # add the ID + cbind(.row[1], z.col) + })) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] "GM920222" "GGGG" "AAAA" "TTTT" "TTAA" "GGGG" "CCAA" "CCCC" [2,] "GM930040" "GGAG" "TACA" "CGTT" "ATAT" "TGTG" "CCAC" "AAAC" [3,] "GM930040" "GGAG" "TACA" "CGTT" "ATAT" "TGTG" "CCAC" "AAAC" [4,] "GM960023" "GGGG" "ATAA" "TCTT" "TATT" "GTGG" "CCCC" "CACC" [5,] "GM920224" "AGGA" "CTTC" "TCCT" "AAAA" "GTTG" "ACCA" "AAAA" [6,] "GM920224" "AGGA" "CTTC" "TCCT" "AAAA" "GTTG" "ACCA" "AAAA" [7,] "GM920034" "GGGG" "ATAA" "TCTT" "TATT" "GTGG" "CCCC" "CAAA" [8,] "GM920096" "GGGG" "AAAA" "TTTT" "TTTT" "GGGG" "CCCC" "CCAC" [9,] "GM930029" "GGGG" "TATA" "CTCT" "ATAT" "TGTG" "CCCC" "ACAC" [10,] "GM940031" "GGGA" "AAAC" "TGTT" "TTTA" "GGGG" "CCCA" "CAAA" [11,] "GM960028" "GGGA" "AAAC" "TGTT" "TTTA" "GGGG" "CCCA" "CAAA" [12,] "GM980007" "GGAG" "TACT" "CTTC" "ATTA" "TGGT" "CCAC" "ACAA" [13,] "GM970009" "AGGA" "CTTC" "TCCT" "AAAA" "GGGG" "AACC" "AAAA" [14,] "GM930026" "AGGA" "CAAC" "TGGT" "ATTA" "GGGG" "ACCA" "AAAA" [15,] "GM920031" "GGGG" "ATAA" "TCTT" "TATT" "GTGG" "CCCC" "CACC" [16,] "GM990105" "GGGG" "AATT" "TTCC" "TTAA" "GGGG" "CCCC" "CCAA" [17,] "GM920202" "GGGG" "AAAA" "TTTT" "TTTT" "GGGG" "CCCC" "CCCC" [18,] "GM920089" "GGAG" "ATCA" "GCTT" "TAAT" "GGTG" "CACC" "AAAC" [19,] "GM980051" "AAGG" "CCAA" "TTTT" "AAAA" "GGGG" "AACC" "AACC" [20,] "GM930109" "GGGA" "TATC" "CGTT" "ATTA" "TGTG" "CCAA" "AAAA" [21,] "GM940039" "GGGA" "TATC" "CGTT" "ATTT" "TGGT" "CCCC" "AACA" [22,] "GM050099" "GGGA" "ATTC" "GCTT" "TAAT" "GGTT" "CACC" "AACA" [23,] "GM050099" "GGGA" "ATTC" "GCTT" "TAAT" "GGTT" "CACC" "AACA" [24,] "GM030005" "AGAA" "CACC" "TGTT" "ATAA" "GGGG" "ACAA" "AAAA" [25,] "GM050009" "AGGA" "CAAC" "TTTT" "ATTA" "GGGG" "ACCA" "ACCA" [26,] "GM990027" "GGGG" "AAAA" "TGTT" "TTTT" "GGGG" "CCCC" "CAAC"
On Wed, May 19, 2010 at 8:29 AM, Laetitia Schmid <laeti...@gmt.su.se> wrote: > Dear Wu Gong and Peter Ehlers, > thank you very much for your help debugging my script. > > Now I have a general following up question: > Is there a straightforward way to rearrange the following dataset so that > all first letters of each column will be combined in one column, all the > second letters in a second column, all the third ones in a third column and > so on, resulting in 7 columns, > i.e. for the first individual (GM920222) GGGG AAAA TTTT TTAA GGGG CCAA CCCC > ? > > Thank you very much, > Laetitia > > SampleID A1 A2 A3 A4 > GM920222 GATTGCC GATTGCC GATAGAC GATAGAC > GM930040 GTCATCA GAGTGCA ACTATAA GATTGCC > GM930040 GTCATCA GAGTGCA ACTATAA GATTGCC > GM960023 GATTGCC GTCATCA GATTGCC GATTGCC > GM920224 ACTAGAA GTCATCA GTCATCA ACTAGAA > GM920224 ACTAGAA GTCATCA GTCATCA ACTAGAA > GM920034 GATTGCC GTCATCA GATTGCA GATTGCA > GM920096 GATTGCC GATTGCC GATTGCA GATTGCC > GM930029 GTCATCA GATTGCC GTCATCA GATTGCC > GM940031 GATTGCC GAGTGCA GATTGCA ACTAGAA > GM960028 GATTGCC GAGTGCA GATTGCA ACTAGAA > GM980007 GTCATCA GATTGCC ACTTGAA GTCATCA > GM970009 ACTAGAA GTCAGAA GTCAGCA ACTAGCA > GM930026 ACTAGAA GAGTGCA GAGTGCA ACTAGAA > GM920031 GATTGCC GTCATCA GATTGCC GATTGCC > GM990105 GATTGCC GATTGCC GTCAGCA GTCAGCA > GM920202 GATTGCC GATTGCC GATTGCC GATTGCC > GM920089 GAGTGCA GTCAGAA ACTATCA GATTGCC > GM980051 ACTAGAA ACTAGAA GATAGCC GATAGCC > GM930109 GTCATCA GAGTGCA GTTTTAA ACTAGAA > GM940039 GTCATCA GAGTGCA GTTTGCC ACTTTCA > GM050099 GAGTGCA GTCAGAA GTTATCC ACTTTCA > GM050099 GAGTGCA GTCAGAA GTTATCC ACTTTCA > GM030005 ACTAGAA GAGTGCA ACTAGAA ACTAGAA > GM050009 ACTAGAA GATTGCC GATTGCC ACTAGAA > GM990027 GATTGCC GAGTGCA GATTGCA GATTGCC > GM990066 GATTGCC GTCATCA GTCATCA GATTGCC > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > -- Jim Holtman Cincinnati, OH +1 513 646 9390 What is the problem that you are trying to solve? ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.