On 8/13/2010 11:08 AM, Hosack, Michael wrote:
R Experts,

I would like to create a series of variables without having
to assign a separate line of code for each new variable. My dataframe (DF) 
contains
two groups of linked variables (ESP1:ESP9) and (ECRL1:ECRL9). Within ESP1:ESP9 
are
abbreviated species codes (full dataframe contains 26 codes). ECRL1 represents 
the
number of species x in variable ESP1 harvested, and so on through ESP9 and 
ECRL9.
What I want to do is create 26 new variables (one for each unique species code) 
that
contains the number harvested (ECRL) for each corresponding species code listed 
for
each row of the data set. Example (row 14), the new variable YP Harvest would 
equal
90 (ECRL2) and WP Harvest would equal 0 (ECRL1), all other species code var's 
would
contain NA.

I hope I made this clear enough.

Thank you,

Mike

Current method: one line per species

EBTCH1.h$YP.H<- with(EBTCH1.h,ifelse(ESP1 %in% 'YP',ECRL1,ifelse(ESP2 %in% 
'YP',ECRL2,
ifelse(ESP3 %in% 'YP',ECRL3,ifelse(ESP4 %in% 'YP',ECRL4,ifelse(ESP5 %in% 
'YP',ECRL5,
ifelse(ESP6 %in% 'YP',ECRL6,ifelse(ESP7 %in% 'YP',ECRL7,ifelse(ESP8 %in% 
'YP',ECRL8,
ifelse(ESP9 %in% 'YP',ECRL9,0))))))))))


DF<-
structure(list(MM = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L), DD = c(3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 8L, 8L), DTYPE = c(2, 2, 2, 2, 2, 2, 1, 1, 1,
1, 1, 1, 1, 1, 1), TOD = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
1, 1, 1), SITENUM = c("102", "104", "104", "104", "101", "101",
"102", "103", "101", "101", "101", "101", "103", "103", "103"
), CURTIM = c(1450L, 1736L, 1755L, 1804L, 1950L, 2007L, 1150L,
1450L, 2000L, 1003L, 1030L, 1036L, 1300L, 1310L, 1320L), GRPFSH = c(2L,
2L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 2L, 4L, 1L, 1L, 3L, 1L), EEFF = c(11.5,
19, 5, 20, 0, 0, 5, 8, 0, 0, 0, 0, 3, 12, 6), ESP1 = c("SMB",
"SMB", "SMB", "SMB", NA, NA, "YP", "YP", NA, NA, NA, NA, "RG",
"WP", "WP"), ESP2 = c(NA, "RB", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "SMB", "YP", "YP"), ESP3 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "RB", "RBS", NA), ESP4 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
     ESP5 = c(NA_character_, NA_character_, NA_character_, NA_character_,
     NA_character_, NA_character_, NA_character_, NA_character_,
     NA_character_, NA_character_, NA_character_, NA_character_,
     NA_character_, NA_character_, NA_character_), ESP6 = c(NA,
     NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
     ESP7 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
     NA, NA, NA), ESP8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
     NA, NA, NA, NA, NA, NA), ESP9 = c(NA, NA, NA, NA, NA, NA,
     NA, NA, NA, NA, NA, NA, NA, NA, NA), ECRL1 = c(0L, 0L, 0L,
     0L, 0L, 0L, 4L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL2 = c(0L,
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 90L, 30L),
     ECRL3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
     0L, 0L, 0L), ECRL4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
     0L, 0L, 0L, 0L, 0L, 0L), ECRL5 = c(0L, 0L, 0L, 0L, 0L, 0L,
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL6 = c(0L, 0L, 0L,
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL7 = c(0L,
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
     ECRL8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
     0L, 0L, 0L), ECRL9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
     0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("MM", "DD", "DTYPE",
"TOD", "SITENUM", "CURTIM", "GRPFSH", "EEFF", "ESP1", "ESP2",
"ESP3", "ESP4", "ESP5", "ESP6", "ESP7", "ESP8", "ESP9", "ECRL1",
"ECRL2", "ECRL3", "ECRL4", "ECRL5", "ECRL6", "ECRL7", "ECRL8",
"ECRL9"), row.names = c(NA, 15L), class = "data.frame")


Michael,

An easier approach is to reshape this into a long format (one row for each ESP/ECRL combination, carrying the rest of the identifying variables), and then reshape it back to wide format using the ESP value to define the new column:

# for convenience, variables that don't vary for each observation
idvar <- names(DF)[1:8]

DFr <- reshape(DF, varying=list(c("ESP1","ESP2","ESP3","ESP4","ESP5","ESP6","ESP7","ESP8","ESP9"), c("ECRL1","ECRL2","ECRL3","ECRL4","ECRL5","ECRL6","ECRL7","ECRL8","ECRL9")), timevar=NULL, idvar=idvar, direction="long", v.name=c("ESP","ECRL"))

DFr <- DFr[!is.na(DFr$ESP),]

DFr <- reshape(DFr, timevar="ESP", idvar=idvar, direction="wide")

merge(DF, DFr, by=idvar)


If you want to rename columns, or convert NA's to 0, you can do that to DFr or after the merge.

--
Brian Diggs
Senior Research Associate, Department of Surgery, Oregon Health & Science University

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to