Here's a solution using the plyr library: #################### library(plyr)
dat <- read.table(header=TRUE, sep=",", as.is=TRUE, ###### <---- as.is=TRUE text="site,tax_name,count,countTotal,countPercentage CID_1,Cyanobacteria,46295,123509,37.483098398 CID_1,Proteobacteria,36120,123509,29.244832360 CID_1,Bacteroidetes,19546,123509,15.825567368 CID_1,Verrucomicrobia,7886,123509,6.384959801 CID_1,Firmicutes,2843,123509,2.301856545 ... <lines deleted here> CID_9,Armatimonadetes,27,77120,0.035010373 CID_9,Fusobacteria,25,77120,0.032417012 CID_9,Aquificae,13,77120,0.016856846 CID_9,Synergistetes,12,77120,0.015560166 CID_9,Deferribacteres,8,77120,0.010373444 CID_9,Thermotogae,7,77120,0.009076763 CID_9,Chrysiogenetes,3,77120,0.003890041. ") summ_cid = function(frm) { # grab at most 6 rows from data frame toprows = head(frm, 6) # add a 7th row toprows[nrow(toprows)+1,] = c(toprows[1,1], "", NA, NA, sum(toprows$countPercentage)) # all done return(toprows) } result = ddply(dat, "site", summ_cid) #################### Notes: 1. I needed to add the as.is=TRUE option to read.table() 2. The invocation of ddply() does *not* use summarize() 3. Because there is no use of summarize(), I have not figured out how to use the dplyr package in this context. -John > -----Original Message----- > From: R-help [mailto:r-help-boun...@r-project.org] On Behalf Of Chel Hee > Lee > Sent: Sunday, December 07, 2014 11:43 AM > To: Morway, Eric; R mailing list > Subject: Re: [R] Condensing data.frame > > > datBySite <- split(dat, dat$site) > > output <- lapply(datBySite, function(x){ > + x$idx <- seq_len(nrow(x)) > + x$grp <- ifelse(x$idx < 7, x$idx, 7) > + rval <- tapply(x$countPercentage, x$grp, sum) x$grp <- x$count <- > + x$countTotal <- NULL x <- x[seq_len(7), ] x$tax_name <- > + as.character(x$tax_name) x$tax_name[7] <- "Others" > + x$new <- rval > + return(x) > + }) > > > > head(do.call(rbind, output), 14) > site tax_name countPercentage idx new > CID_1.1 CID_1 Cyanobacteria 37.483098 1 37.483098 > CID_1.2 CID_1 Proteobacteria 29.244832 2 29.244832 > CID_1.3 CID_1 Bacteroidetes 15.825567 3 15.825567 > CID_1.4 CID_1 Verrucomicrobia 6.384960 4 6.384960 > CID_1.5 CID_1 Firmicutes 2.301857 5 2.301857 > CID_1.6 CID_1 Acidobacteria 2.075152 6 2.075152 > CID_1.7 CID_1 Others 1.675182 7 6.684533 > CID_10.27 CID_10 Proteobacteria 35.366606 1 35.366606 > CID_10.28 CID_10 Bacteroidetes 25.188484 2 25.188484 > CID_10.29 CID_10 Cyanobacteria 23.294828 3 23.294828 > CID_10.30 CID_10 Verrucomicrobia 6.970592 4 6.970592 > CID_10.31 CID_10 Acidobacteria 1.988448 5 1.988448 > CID_10.32 CID_10 Actinobacteria 1.644548 6 1.644548 > CID_10.33 CID_10 Others 1.582823 7 5.546493 > > > > I hope this helps. > > Chel Hee Lee > > On 12/07/2014 08:21 AM, Morway, Eric wrote: > > Using the dataset "dat" (found below), I'm seeking a way to condense > > down the data.frame such that each "site" (i.e., "CID_1"..."CID_13") > > has a maximum of 7 rows of post-processed data, where the first 6 have > > the highest "countPercentage" and the 7th row is the sum of > "countPercentage" > > from all other rows within that "site", and it is assigned the name > > "Other". So, for the first two sites in the provided data.frame, > > CID_1 & CID_10, they would reduce to: > > > > CID_1 Cyanobacteria 37.48 > > CID_1 Proteobacteria 29.24 > > CID_1 Bacteroidetes 15.83 > > CID_1 Verrucomicrobia 6.38 > > CID_1 Firmicutes 2.30 > > CID_1 Acidobacteria 2.08 > > CID_1 Other 6.68 > > CID_10 Proteobacteria 35.37 > > CID_10 Bacteroidetes 25.19 > > CID_10 Cyanobacteria 23.29 > > CID_10 Verrucomicrobia 6.97 > > CID_10 Acidobacteria 1.99 > > CID_10 Actinobacteria 1.64 > > CID_10 Other 5.55 > > > > > > dat <- read.table(header=TRUE, sep=",", > > text="site,tax_name,count,countTotal,countPercentage > > CID_1,Cyanobacteria,46295,123509,37.483098398 > > CID_1,Proteobacteria,36120,123509,29.244832360 > > CID_1,Bacteroidetes,19546,123509,15.825567368 > > CID_1,Verrucomicrobia,7886,123509,6.384959801 > > CID_1,Firmicutes,2843,123509,2.301856545 > > CID_1,Acidobacteria,2563,123509,2.075152418 > > CID_1,Actinobacteria,2069,123509,1.675181566 > > CID_1,Planctomycetes,1481,123509,1.199102899 > > CID_1,Chloroflexi,1181,123509,0.956205621 > > CID_1,Gemmatimonadetes,956,123509,0.774032662 > > CID_1,Spirochaetes,688,123509,0.557044426 > > CID_1,Lentisphaerae,526,123509,0.425879895 > > CID_1,Ignavibacteriae,324,123509,0.262329061 > > CID_1,Chlorobi,238,123509,0.192698508 > > CID_1,Nitrospirae,230,123509,0.186221247 > > CID_1,Nitrospinae,169,123509,0.136832134 > > CID_1,Elusimicrobia,131,123509,0.106065145 > > CID_1,Tenericutes,114,123509,0.092300966 > > CID_1,Fibrobacteres,72,123509,0.058295347 > > CID_1,Thermotogae,21,123509,0.017002810 > > CID_1,Fusobacteria,21,123509,0.017002810 > > CID_1,Armatimonadetes,15,123509,0.012144864 > > CID_1,Synergistetes,10,123509,0.008096576 > > CID_1,Deinococcus-Thermus,6,123509,0.004857946 > > CID_1,Deferribacteres,2,123509,0.001619315 > > CID_1,Caldiserica,2,123509,0.001619315 > > CID_10,Proteobacteria,16043,45362,35.366606411 > > CID_10,Bacteroidetes,11426,45362,25.188483753 > > CID_10,Cyanobacteria,10567,45362,23.294828270 > > CID_10,Verrucomicrobia,3162,45362,6.970592126 > > CID_10,Acidobacteria,902,45362,1.988448481 > > CID_10,Actinobacteria,746,45362,1.644548300 > > CID_10,Firmicutes,718,45362,1.582822627 > > CID_10,Gemmatimonadetes,358,45362,0.789206825 > > CID_10,Planctomycetes,357,45362,0.787002337 > > CID_10,Chloroflexi,265,45362,0.584189410 > > CID_10,Spirochaetes,235,45362,0.518054759 > > CID_10,Ignavibacteriae,177,45362,0.390194436 > > CID_10,Lentisphaerae,108,45362,0.238084741 > > CID_10,Nitrospinae,75,45362,0.165336625 > > CID_10,Nitrospirae,58,45362,0.127860324 > > CID_10,Chlorobi,44,45362,0.096997487 > > CID_10,Elusimicrobia,28,45362,0.061725673 > > CID_10,Fibrobacteres,26,45362,0.057316697 > > CID_10,Armatimonadetes,15,45362,0.033067325 > > CID_10,Deinococcus-Thermus,13,45362,0.028658348 > > CID_10,Tenericutes,10,45362,0.022044883 > > CID_10,Synergistetes,9,45362,0.019840395 > > CID_10,Fusobacteria,9,45362,0.019840395 > > CID_10,Deferribacteres,6,45362,0.013226930 > > CID_10,Thermotogae,3,45362,0.006613465 > > CID_10,Caldiserica,2,45362,0.004408977 > > CID_11,Proteobacteria,10019,31387,31.920858954 > > CID_11,Cyanobacteria,8811,31387,28.072131774 > > CID_11,Bacteroidetes,7930,31387,25.265237200 > > CID_11,Verrucomicrobia,1750,31387,5.575556759 > > CID_11,Firmicutes,806,31387,2.567942142 > > CID_11,Acidobacteria,548,31387,1.745945774 > > CID_11,Actinobacteria,434,31387,1.382738076 > > CID_11,Chloroflexi,203,31387,0.646764584 > > CID_11,Planctomycetes,197,31387,0.627648389 > > CID_11,Gemmatimonadetes,192,31387,0.611718227 > > CID_11,Ignavibacteriae,87,31387,0.277184822 > > CID_11,Spirochaetes,80,31387,0.254882595 > > CID_11,Tenericutes,71,31387,0.226208303 > > CID_11,Fusobacteria,67,31387,0.213464173 > > CID_11,Lentisphaerae,54,31387,0.172045751 > > CID_11,Chlorobi,40,31387,0.127441297 > > CID_11,Nitrospinae,33,31387,0.105139070 > > CID_11,Armatimonadetes,22,31387,0.070092714 > > CID_11,Fibrobacteres,15,31387,0.047790487 > > CID_11,Nitrospirae,13,31387,0.041418422 > > CID_11,Elusimicrobia,13,31387,0.041418422 > > CID_11,Deinococcus-Thermus,2,31387,0.006372065 > > CID_12,Cyanobacteria,241,644,37.422360248 > > CID_12,Bacteroidetes,210,644,32.608695652 > > CID_12,Proteobacteria,118,644,18.322981366 > > CID_12,Verrucomicrobia,38,644,5.900621118 > > CID_12,Acidobacteria,11,644,1.708074534 > > CID_12,Ignavibacteriae,6,644,0.931677019 > > CID_12,Lentisphaerae,5,644,0.776397516 > > CID_12,Firmicutes,5,644,0.776397516 > > CID_12,Planctomycetes,3,644,0.465838509 > > CID_12,Fusobacteria,3,644,0.465838509 > > CID_12,Tenericutes,2,644,0.310559006 > > CID_12,Actinobacteria,2,644,0.310559006 > > CID_13,Cyanobacteria,8581,25530,33.611437524 > > CID_13,Bacteroidetes,6878,25530,26.940853897 > > CID_13,Proteobacteria,5341,25530,20.920485703 > > CID_13,Verrucomicrobia,1244,25530,4.872698786 > > CID_13,Firmicutes,1148,25530,4.496670584 > > CID_13,Acidobacteria,548,25530,2.146494320 > > CID_13,Spirochaetes,477,25530,1.868390129 > > CID_13,Ignavibacteriae,298,25530,1.167254211 > > CID_13,Actinobacteria,227,25530,0.889150020 > > CID_13,Planctomycetes,184,25530,0.720720721 > > CID_13,Chloroflexi,181,25530,0.708969839 > > CID_13,Gemmatimonadetes,121,25530,0.473952213 > > CID_13,Lentisphaerae,93,25530,0.364277321 > > CID_13,Tenericutes,61,25530,0.238934587 > > CID_13,Fibrobacteres,47,25530,0.184097141 > > CID_13,Nitrospinae,28,25530,0.109674892 > > CID_13,Nitrospirae,26,25530,0.101840971 > > CID_13,Chlorobi,18,25530,0.070505288 > > CID_13,Elusimicrobia,13,25530,0.050920486 > > CID_13,Synergistetes,8,25530,0.031335684 > > CID_13,Fusobacteria,4,25530,0.015667842 > > CID_13,Deinococcus-Thermus,2,25530,0.007833921 > > CID_13,Thermotogae,2,25530,0.007833921 > > CID_2,Cyanobacteria,43812,94826,46.202518297 > > CID_2,Proteobacteria,22180,94826,23.390209436 > > CID_2,Bacteroidetes,16993,94826,17.920190665 > > CID_2,Verrucomicrobia,4779,94826,5.039757029 > > CID_2,Acidobacteria,1728,94826,1.822285027 > > CID_2,Firmicutes,1385,94826,1.460569886 > > CID_2,Planctomycetes,815,94826,0.859468922 > > CID_2,Actinobacteria,677,94826,0.713939215 > > CID_2,Gemmatimonadetes,625,94826,0.659101934 > > CID_2,Chloroflexi,416,94826,0.438698247 > > CID_2,Spirochaetes,415,94826,0.437643684 > > CID_2,Lentisphaerae,221,94826,0.233058444 > > CID_2,Ignavibacteriae,180,94826,0.189821357 > > CID_2,Fibrobacteres,155,94826,0.163457280 > > CID_2,Chlorobi,112,94826,0.118111067 > > CID_2,Elusimicrobia,111,94826,0.117056503 > > CID_2,Tenericutes,75,94826,0.079092232 > > CID_2,Nitrospinae,40,94826,0.042182524 > > CID_2,Nitrospirae,31,94826,0.032691456 > > CID_2,Deinococcus-Thermus,17,94826,0.017927573 > > CID_2,Armatimonadetes,17,94826,0.017927573 > > CID_2,Synergistetes,16,94826,0.016873010 > > CID_2,Fusobacteria,15,94826,0.015818446 > > CID_2,Deferribacteres,7,94826,0.007381942 > > CID_2,Caldiserica,2,94826,0.002109126 > > CID_2,Thermotogae,2,94826,0.002109126 > > CID_3,Cyanobacteria,18888,46181,40.899937204 > > CID_3,Proteobacteria,12532,46181,27.136701241 > > CID_3,Bacteroidetes,9070,46181,19.640111734 > > CID_3,Verrucomicrobia,2291,46181,4.960914662 > > CID_3,Acidobacteria,689,46181,1.491955566 > > CID_3,Firmicutes,631,46181,1.366362790 > > CID_3,Actinobacteria,470,46181,1.017734566 > > CID_3,Spirochaetes,366,46181,0.792533726 > > CID_3,Planctomycetes,326,46181,0.705918018 > > CID_3,Chloroflexi,282,46181,0.610640740 > > CID_3,Gemmatimonadetes,194,46181,0.420086183 > > CID_3,Fibrobacteres,116,46181,0.251185552 > > CID_3,Ignavibacteriae,109,46181,0.236027804 > > CID_3,Nitrospinae,46,46181,0.099608064 > > CID_3,Nitrospirae,44,46181,0.095277279 > > CID_3,Tenericutes,40,46181,0.086615708 > > CID_3,Lentisphaerae,38,46181,0.082284922 > > CID_3,Chlorobi,16,46181,0.034646283 > > CID_3,Elusimicrobia,14,46181,0.030315498 > > CID_3,Fusobacteria,10,46181,0.021653927 > > CID_3,Armatimonadetes,7,46181,0.015157749 > > CID_3,Synergistetes,2,46181,0.004330785 > > CID_4,Proteobacteria,433,1005,43.084577114 > > CID_4,Bacteroidetes,301,1005,29.950248756 > > CID_4,Actinobacteria,111,1005,11.044776119 > > CID_4,Cyanobacteria,44,1005,4.378109453 > > CID_4,Acidobacteria,28,1005,2.786069652 > > CID_4,Chloroflexi,24,1005,2.388059701 > > CID_4,Nitrospirae,21,1005,2.089552239 > > CID_4,Verrucomicrobia,12,1005,1.194029851 > > CID_4,Gemmatimonadetes,12,1005,1.194029851 > > CID_4,Firmicutes,7,1005,0.696517413 > > CID_4,Spirochaetes,5,1005,0.497512438 > > CID_4,Ignavibacteriae,5,1005,0.497512438 > > CID_4,Elusimicrobia,2,1005,0.199004975 > > CID_5,Proteobacteria,5002,11914,41.984220245 > > CID_5,Bacteroidetes,1512,11914,12.690951821 > > CID_5,Verrucomicrobia,1361,11914,11.423535337 > > CID_5,Acidobacteria,1207,11914,10.130938392 > > CID_5,Cyanobacteria,721,11914,6.051703878 > > CID_5,Planctomycetes,635,11914,5.329864026 > > CID_5,Actinobacteria,398,11914,3.340607688 > > CID_5,Nitrospirae,314,11914,2.635554809 > > CID_5,Chloroflexi,313,11914,2.627161323 > > CID_5,Firmicutes,195,11914,1.636729898 > > CID_5,Gemmatimonadetes,129,11914,1.082759778 > > CID_5,Chlorobi,31,11914,0.260198086 > > CID_5,Armatimonadetes,22,11914,0.184656706 > > CID_5,Ignavibacteriae,21,11914,0.176263220 > > CID_5,Fusobacteria,14,11914,0.117508813 > > CID_5,Deinococcus-Thermus,10,11914,0.083934867 > > CID_5,Lentisphaerae,9,11914,0.075541380 > > CID_5,Elusimicrobia,7,11914,0.058754407 > > CID_5,Nitrospinae,7,11914,0.058754407 > > CID_5,Synergistetes,4,11914,0.033573947 > > CID_5,Spirochaetes,2,11914,0.016786973 > > CID_6,Cyanobacteria,6462,17852,36.197624916 > > CID_6,Proteobacteria,5036,17852,28.209724401 > > CID_6,Bacteroidetes,3906,17852,21.879901412 > > CID_6,Verrucomicrobia,1016,17852,5.691239077 > > CID_6,Acidobacteria,317,17852,1.775711405 > > CID_6,Actinobacteria,286,17852,1.602061394 > > CID_6,Firmicutes,234,17852,1.310777504 > > CID_6,Planctomycetes,134,17852,0.750616177 > > CID_6,Gemmatimonadetes,112,17852,0.627380686 > > CID_6,Spirochaetes,97,17852,0.543356487 > > CID_6,Chloroflexi,77,17852,0.431324221 > > CID_6,Lentisphaerae,56,17852,0.313690343 > > CID_6,Ignavibacteriae,35,17852,0.196056464 > > CID_6,Nitrospirae,23,17852,0.128837105 > > CID_6,Nitrospinae,19,17852,0.106430652 > > CID_6,Tenericutes,12,17852,0.067219359 > > CID_6,Chlorobi,8,17852,0.044812906 > > CID_6,Armatimonadetes,7,17852,0.039211293 > > CID_6,Fibrobacteres,7,17852,0.039211293 > > CID_6,Elusimicrobia,4,17852,0.022406453 > > CID_6,Fusobacteria,2,17852,0.011203227 > > CID_6,Deferribacteres,2,17852,0.011203227 > > CID_7,Cyanobacteria,11046,30425,36.305669680 > > CID_7,Proteobacteria,8418,30425,27.668036154 > > CID_7,Bacteroidetes,6197,30425,20.368118324 > > CID_7,Verrucomicrobia,1745,30425,5.735414955 > > CID_7,Firmicutes,732,30425,2.405916187 > > CID_7,Acidobacteria,582,30425,1.912900575 > > CID_7,Actinobacteria,365,30425,1.199671323 > > CID_7,Fusobacteria,344,30425,1.130649137 > > CID_7,Planctomycetes,253,30425,0.831552999 > > CID_7,Chloroflexi,221,30425,0.726376335 > > CID_7,Gemmatimonadetes,131,30425,0.430566968 > > CID_7,Spirochaetes,127,30425,0.417419885 > > CID_7,Lentisphaerae,88,30425,0.289235826 > > CID_7,Ignavibacteriae,69,30425,0.226787182 > > CID_7,Nitrospinae,37,30425,0.121610518 > > CID_7,Nitrospirae,21,30425,0.069022186 > > CID_7,Chlorobi,17,30425,0.055875103 > > CID_7,Elusimicrobia,15,30425,0.049301561 > > CID_7,Fibrobacteres,9,30425,0.029580937 > > CID_7,Armatimonadetes,4,30425,0.013147083 > > CID_7,Deferribacteres,4,30425,0.013147083 > > CID_8,Cyanobacteria,14446,43589,33.141388883 > > CID_8,Proteobacteria,13270,43589,30.443460506 > > CID_8,Bacteroidetes,8834,43589,20.266581018 > > CID_8,Verrucomicrobia,2529,43589,5.801922503 > > CID_8,Firmicutes,1176,43589,2.697928376 > > CID_8,Acidobacteria,780,43589,1.789442290 > > CID_8,Actinobacteria,542,43589,1.243432976 > > CID_8,Spirochaetes,406,43589,0.931427654 > > CID_8,Planctomycetes,295,43589,0.676776251 > > CID_8,Chloroflexi,277,43589,0.635481429 > > CID_8,Ignavibacteriae,243,43589,0.557480098 > > CID_8,Lentisphaerae,230,43589,0.527656060 > > CID_8,Gemmatimonadetes,162,43589,0.371653399 > > CID_8,Fusobacteria,106,43589,0.243180619 > > CID_8,Tenericutes,57,43589,0.130766937 > > CID_8,Nitrospirae,51,43589,0.117001996 > > CID_8,Chlorobi,50,43589,0.114707839 > > CID_8,Nitrospinae,36,43589,0.082589644 > > CID_8,Fibrobacteres,34,43589,0.078001331 > > CID_8,Elusimicrobia,29,43589,0.066530547 > > CID_8,Armatimonadetes,19,43589,0.043588979 > > CID_8,Aquificae,8,43589,0.018353254 > > CID_8,Deferribacteres,7,43589,0.016059097 > > CID_8,Dictyoglomi,2,43589,0.004588314 > > CID_9,Proteobacteria,26463,77120,34.314056017 > > CID_9,Cyanobacteria,20329,77120,26.360217842 > > CID_9,Bacteroidetes,15956,77120,20.689834025 > > CID_9,Verrucomicrobia,3323,77120,4.308869295 > > CID_9,Firmicutes,2726,77120,3.534751037 > > CID_9,Spirochaetes,1644,77120,2.131742739 > > CID_9,Acidobacteria,1634,77120,2.118775934 > > CID_9,Actinobacteria,1200,77120,1.556016598 > > CID_9,Chloroflexi,1128,77120,1.462655602 > > CID_9,Planctomycetes,872,77120,1.130705394 > > CID_9,Ignavibacteriae,578,77120,0.749481328 > > CID_9,Lentisphaerae,264,77120,0.342323651 > > CID_9,Gemmatimonadetes,263,77120,0.341026971 > > CID_9,Fibrobacteres,170,77120,0.220435685 > > CID_9,Nitrospirae,148,77120,0.191908714 > > CID_9,Nitrospinae,136,77120,0.176348548 > > CID_9,Chlorobi,74,77120,0.095954357 > > CID_9,Tenericutes,71,77120,0.092064315 > > CID_9,Elusimicrobia,46,77120,0.059647303 > > CID_9,Armatimonadetes,27,77120,0.035010373 > > CID_9,Fusobacteria,25,77120,0.032417012 > > CID_9,Aquificae,13,77120,0.016856846 > > CID_9,Synergistetes,12,77120,0.015560166 > > CID_9,Deferribacteres,8,77120,0.010373444 > > CID_9,Thermotogae,7,77120,0.009076763 > > CID_9,Chrysiogenetes,3,77120,0.003890041 > > ") > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide > > http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. > > > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting- > guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.