Hi experts,

I have a dataframe with 100k+ records. it has a key/id column and 25 code
columns. I would like to restructure it having a row for each code column.

I have a structure like this (used dput):
structure(list(DSYSRTKY = structure(c(1L, 2L, 3L, 3L, 4L, 4L), .Names =
c("1", 
"2", "3", "4", "5", "6"), .Label = c("100000005", "100000203", 
"100000315", "100000327"), class = "factor"), C1 = structure(c(6L, 
3L, 2L, 5L, 1L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label =
c("41401", 
"42831", "45341", "486", "5990", "71535"), class = "factor"), 
    C2 = structure(c(5L, 1L, 3L, 6L, 4L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("4019", "51881", "5990", 
    "6826", "78900", "V4986"), class = "factor"), C3 = structure(c(6L, 
    3L, 5L, 2L, 4L, 1L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("5119", "5939", "72400", "7850", "8052", 
    "V1251"), class = "factor"), C4 = structure(c(6L, 5L, 3L, 
    1L, 2L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label =
c("3109", 
    "4019", "4241", "42789", "V1011", "V454"), class = "factor"), 
    C5 = structure(c(1L, 1L, 3L, 1L, 2L, 4L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "2720", "4019", 
    "7823"), class = "factor"), C6 = structure(c(1L, 1L, 2L, 
    1L, 4L, 3L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("", 
    "311", "41400", "49390"), class = "factor"), C7 = structure(c(1L, 
    1L, 2L, 1L, 3L, 4L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("", "2724", "2859", "V4581"), class = "factor"), 
    C8 = structure(c(1L, 1L, 3L, 1L, 4L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "40390", "71680", 
    "79029"), class = "factor"), C9 = structure(c(1L, 1L, 2L, 
    1L, 4L, 3L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("", 
    "4168", "5859", "V1582"), class = "factor"), C10 = structure(c(1L, 
    1L, 3L, 1L, 1L, 2L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("", "49390", "7804"), class = "factor"), 
    C11 = structure(c(1L, 1L, 3L, 1L, 1L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "2724", "V066"), class =
"factor"), 
    C12 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "6930"), class = "factor"), 
    C13 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "41400"), class = "factor"), 
    C14 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "V4581"), class = "factor"), 
    C15 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "40291"), class = "factor"), 
    C16 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "4280"), class = "factor"), 
    C17 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C18 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C19 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C20 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C21 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C22 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C23 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C24 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C25 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor")), .Names =
c("DSYSRTKY", 
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "C10", 
"C11", "C12", "C13", "C14", "C15", "C16", "C17", "C18", "C19", 
"C20", "C21", "C22", "C23", "C24", "C25"), row.names = c("1", 
"2", "3", "4", "5", "6"), class = "data.frame")

Now I want to restructure this dataframe not having 25 code fields but a row
for each code but only if the code has a value!

The new structure should look something like:
NewDataFrame <- data.frame(ID=integer(), DSYSRTKY=integer(),
CODE=character(),  PRIMAIRY=logical())

The ID column should just be an increment. PRIMAIRY is a boolean which
should be true if orriginally was the first code (C1).

It has to be efficient since my real data has many more rows than my example
structure of only 6 rows.
I tried some looping mechanism and it was working but it was not performing
at all.

Hopefully I provided enough information using dput.

Regards Derk




--
View this message in context: 
http://r.789695.n4.nabble.com/Create-rows-for-columns-in-dataframe-tp4673607.html
Sent from the R help mailing list archive at Nabble.com.

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to