Hi, In your 'resultData", some observations seems to be omitted. with(rawData,tapply(codes, PersonID,FUN=function(x) x))$Person3 #[1] 56177 61704 70879 69033 87224 68670 65602 25476 81209 62086 35492 39771 #[13] 14380 43858 53679 78023 43785 69884 12840 54021
resultData[4,] # PersonId Code1 Code2 Code3 Code4 Code5 Code6 Code7 Code8 Code9 Code10 Code11 #4 Person3 56177 61704 70879 69033 87224 68670 65602 25476 81209 62086 35492 # Code12 Code13 Code14 Code15 Code16 Code17 Code18 Code19 Code20 Code21 Code22 #4 39771 14380 43858 53679 # Code23 Code24 Code25 One way would be: rawData$Seq<-with(rawData,ave(codes,PersonID,FUN=function(x) rep(1:25,length.out=length(x)))) rawData$Seq1<- with(rawData,ave(codes,PersonID,FUN=function(x) rep(seq(length(x) %/%25 +1),each=25,length.out=length(x)))) res <- reshape(rawData,v.names="codes",idvar=c("PersonID","Seq1"),timevar="Seq",direction="wide",sep="")[,-2] res[is.na(res)] <- "" colnames(res) <- colnames(resultData) rownames(res) <- rownames(resultData) A.K. On Monday, March 24, 2014 10:15 AM, Dark <i...@software-solutions.nl> wrote: Hi R-experts, I have a data.frame that I want to reshape to a certain format so I can use it in a tool for further analysis. Basicly I have a very long list with IDs of persons and their codes. I create a row for every person with 25 of their codes. I a person has more then 25 codes, I want to add another row for that person. If a row contains less then 25 codes I want to fill with empty string values. I have manually created a sample rawData and resultData and used dput so you can see my starting DF and the wanted result DF. The sample is of very limited size, the real data would contain a few million(!) records. rawData <- structure(list(PersonID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Person1", "Person2", "Person3", "Person4", "Person5"), class = "factor"), codes = c(34396L, 81878L, 67829L, 13428L, 12992L, 63724L, 85930L, 78497L, 59578L, 50733L, 26154L, 47205L, 74578L, 12204L, 42435L, 96643L, 35242L, 29836L, 73031L, 11326L, 96686L, 55849L, 56415L, 11064L, 78509L, 55715L, 75851L, 60682L, 16277L, 52763L, 23429L, 39723L, 95809L, 60081L, 19618L, 46012L, 79188L, 54664L, 64420L, 72875L, 97428L, 74897L, 75615L, 12023L, 21572L, 56177L, 61704L, 70879L, 69033L, 87224L, 68670L, 65602L, 25476L, 81209L, 62086L, 35492L, 39771L, 14380L, 43858L, 53679L, 78023L, 43785L, 69884L, 12840L, 54021L, 68002L, 79249L, 61784L, 22227L, 28935L, 91406L, 42045L, 97716L, 65690L, 57310L, 57627L, 32227L, 43121L, 22251L, 31255L, 90660L, 89118L, 14558L, 99824L, 25005L, 62186L, 10527L, 99438L, 85656L, 79465L, 35357L, 41697L, 83084L, 83590L, 16234L, 32480L, 50991L, 79524L, 93888L, 32637L, 13253L, 76576L, 48632L, 68014L, 24281L, 74320L, 44601L, 36251L, 27825L, 85569L, 21634L, 50364L, 74436L, 73216L, 89342L, 63562L, 88485L, 40552L, 49359L, 29636L, 26285L, 13263L, 18106L, 78589L, 43479L, 12491L, 50840L, 77453L, 80578L, 43693L, 89857L, 12837L, 55950L, 63049L, 84508L, 29736L, 88194L, 86849L, 54274L, 38713L)), .Names = c("PersonID", "codes"), row.names = c(NA, -140L), class = "data.frame") resultData = structure(list(PersonId = c("Person1", "Person1", "Person2", "Person3", "Person4", "Person5", "Person5", "Person5"), Code1 = c(34396, 55715, 97428, 56177, 68002, 90660, 74320, 89857), Code2 = c(81878, 75851, 74897, 61704, 79249, 89118, 44601, 12837), Code3 = c(67829, 60682, 75615, 70879, 61784, 14558, 36251, 55950), Code4 = c(13428, 16277, 12023, 69033, 22227, 99824, 27825, 63049), Code5 = c(12992, 52763, 21572, 87224, 28935, 25005, 85569, 84508), Code6 = c("63724", "23429", "", "68670", "91406", "62186", "21634", "29736"), Code7 = c("85930", "39723", "", "65602", "42045", "10527", "50364", "88194"), Code8 = c("78497", "95809", "", "25476", "97716", "99438", "74436", "86849"), Code9 = c("59578", "60081", "", "81209", "65690", "85656", "73216", "54274"), Code10 = c("50733", "19618", "", "62086", "57310", "79465", "89342", "38713"), Code11 = c("26154", "46012", "", "35492", "57627", "35357", "63562", ""), Code12 = c("47205", "79188", "", "39771", "32227", "41697", "88485", ""), Code13 = c("74578", "54664", "", "14380", "43121", "83084", "40552", ""), Code14 = c("12204", "64420", "", "43858", "22251", "83590", "49359", ""), Code15 = c("42435", "72875", "", "53679", "31255", "16234", "29636", ""), Code16 = c("96643", "", "", "", "78023", "32480", "26285", ""), Code17 = c("35242", "", "", "", "43785", "50991", "13263", ""), Code18 = c("29836", "", "", "", "69884", "79524", "18106", ""), Code19 = c("73031", "", "", "", "12840", "93888", "78589", ""), Code20 = c("11326", "", "", "", "54021", "32637", "43479", ""), Code21 = c("96686", "", "", "", "", "13253", "12491", ""), Code22 = c("55849", "", "", "", "", "76576", "50840", ""), Code23 = c("56415", "", "", "", "", "48632", "77453", ""), Code24 = c("11064", "", "", "", "", "68014", "80578", ""), Code25 = c("78509", "", "", "", "", "24281", "43693", "")), .Names = c("PersonId", "Code1", "Code2", "Code3", "Code4", "Code5", "Code6", "Code7", "Code8", "Code9", "Code10", "Code11", "Code12", "Code13", "Code14", "Code15", "Code16", "Code17", "Code18", "Code19", "Code20", "Code21", "Code22", "Code23", "Code24", "Code25"), row.names = c(NA, -8L), class = "data.frame") This sample data explains very well what I'm trying to achieve. As you can see there are 2 rows for Person1 and 3 rows for Person 5 because they have respectively 40 and 60 codes. I'm a big fan of the data.table package so maybe someone has an solution using that package? But of course any solution is welcome:-) Thanks for any help in advance, Regards Dark -- View this message in context: http://r.789695.n4.nabble.com/Reshape-large-Data-Frame-to-new-format-tp4687431.html Sent from the R help mailing list archive at Nabble.com. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.