Thank you John, I have put a copy of the dput file below my signature block. I hope that is what you need as I am unfamiliar with that function. Note that empty cells need to become "NA". The "=" character is part of the "PARVALUE" column. Yours, Guy gj...@itsi.com
structure(list(X = c(2268L, 2269L, 2270L, 2272L, 2273L, 2274L, 2275L, 2276L, 2289L, 2290L, 2291L, 2292L, 2293L, 2294L, 2295L, 2296L, 2326L, 2327L, 2328L, 2329L, 2330L, 2331L, 2332L, 2346L, 2349L, NA, NA, NA, NA, NA), fldsampid = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 2L, 3L, 4L), .Label = c("", "fldsampid", "LHR020GW-01E2", "LHR020SD-00E2"), class = "factor"), CLP_ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 3L), .Label = c("", "CLP_ID", "MY77J8"), class = "factor"), sacode = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 2L, 2L), .Label = c("", "N", "sacode"), class = "factor"), matrix = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 4L, 3L), .Label = c("", "matrix", "SE", "WG"), class = "factor"), etc. = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label = c("", "etc."), class = "factor"), prccode = structure(c(4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 8L, 5L, 6L, 7L, 5L, 5L, 5L, 5L, 5L, 6L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "CL", "INO", "MET", "MI", "ORG", "SN"), class = "factor"), Lab = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 5L, 2L, NA), .Label = c("", "<value>", "A4SW", "BRLS", "PO4"), class = "factor"), EXMCODE = structure(c(5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 7L, 4L, 1L, 1L, 6L, 2L, NA), .Label = c("", "<value>", "FLDFLT", "METHOD", "NONE", "SO4", "SW3050B"), class = "factor"), Analysis = structure(c(13L, 13L, 13L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 3L, 3L, 3L, 5L, 13L, 9L, 8L, 15L, 7L, 12L, 12L, 12L, 14L, 4L, 1L, 1L, 6L, 2L, 2L), .Label = c("", "<value>", "A2320", "A2540G", "A5310B", "AG", "C245.5", "E160.3", "E1630", "E1631", "E1638", "E200.8", "E300", "SW6010B", "SW9060" ), class = "factor"), PARLABEL = structure(c(10L, 16L, 17L, 12L, 3L, 4L, 8L, 9L, 20L, 21L, 15L, 5L, 6L, 7L, 11L, 14L, 13L, 18L, 19L, 12L, 3L, 8L, 9L, 20L, 18L, 1L, 1L, 4L, 2L, 2L), .Label = c("", "<value>", "AG", "AL", "ALK", "ALKB", "ALKC", "AS", "B", "CL", "DOC", "HG", "MEHG", "NO3", "PB", "PO4", "SO4", "SOLID", "TOC", "V", "Zn"), class = "factor"), PARVQ = structure(c(3L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 3L, 1L, 1L, 4L, 2L, 2L), .Label = c("", "<value>", "=", "AS", "ND"), class = "factor"), Result = structure(c(12L, 20L, 11L, 3L, 10L, 8L, 14L, 26L, 9L, 5L, 4L, 25L, 25L, 13L, 19L, 24L, 15L, 18L, 16L, 21L, 6L, 22L, 7L, 23L, 17L, 1L, 1L, 27L, 2L, 2L), .Label = c("", "<value>", "0.00171", "0.008", "1.76", "1050", "11400", "122", "131", "2.57", "22460", "23590.9", "2500", "317", "4.28", "4.823", "47.7", "48.45", "49330", "50", "5100", "5500", "56900", "792", "807000", "9970", "B" ), class = "factor"), X.1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 2L), .Label = c("", "<value>", "V"), class = "factor"), X.2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "Zn"), class = "factor"), X.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 2L), .Label = c("", "<value>", "etc."), class = "factor"), X.4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "ALK"), class = "factor"), X.5 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "ALKB"), class = "factor"), X.6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "ALKC"), class = "factor"), X.7 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, 2L), .Label = c("", "<value>", "SOLID"), class = "factor"), X.8 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "DOC"), class = "factor"), X.9 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "TOC"), class = "factor"), X.10 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 2L, NA), .Label = c("", "<value>", "NO3"), class = "factor")), .Names = c("X", "fldsampid", "CLP_ID", "sacode", "matrix", "etc.", "prccode", "Lab", "EXMCODE", "Analysis", "PARLABEL", "PARVQ", "Result", "X.1", "X.2", "X.3", "X.4", "X.5", "X.6", "X.7", "X.8", "X.9", "X.10"), class = "data.frame", row.names = c(NA, -30L)) -----Original Message----- From: John Kane [mailto:jrkrid...@yahoo.ca] Sent: Monday, January 10, 2011 1:43 PM To: r-help@r-project.org; Guy Jett Subject: Re: [R] Help with Data Transformation That sample data set is really hard to read. Could you resent it after having used dput on it? A file output with dput is easily read into R and makes seeing what you need much easier. BTW what are the = doing? Thanks --- On Mon, 1/10/11, Guy Jett <gj...@itsi.com> wrote: > From: Guy Jett <gj...@itsi.com> > Subject: [R] Help with Data Transformation > To: "r-help@r-project.org" <r-help@r-project.org> > Received: Monday, January 10, 2011, 3:59 PM Greetings, I am new to R > and am having trouble with parsing a file with the following > characteristics: > > * Individual results > for a single sample are written to multiple lines. > > * First 16 columns > are constant from sample to sample. > > * Remaining 10 need > to be matched up (cross-tabbed?) > > o (the exact contents for the remaining 10 vary from sample to > sample, as indicated in the extract > below) > > * Ultimate goal is to > run various comparisons between the variable columns, compare samples > from separate populations, and graph samples from the separate > populations. > > * (An extract is > provided below) > > The data is initially extracted from an SQL database into Excel, then > saved as a tab-delimited text file for use in R. > I have been successful in using subset() to extract specific sample > types, but have not yet been able to transform the data so that all > the data needed is on a single line. I have looked at several R > manuals, read through 'R in a Nutshell', prowled the help resources (R > Site Search and the Google link), tried stack(), subset(), reshape(), > and several other functions, to no avail. > > Thank you very much for your help. This seems like a wonderful > community, Guy Jett, R.G. > Project Geologist > gj...@itsi.com<mailto:gj...@itsi.com> > > Example Data Input (subset): > > fldsampid > CLP_ID sacode > matrix etc... > prccode > Lab > EXMCODE > Analysis > PARLABEL > PARVQ Result > 2268 LHR020GW-01E2 > > N > WG > > INO > BRLS NONE E300 > CL > = > 23590.9 > 2269 LHR020GW-01E2 > > N > WG > > INO > BRLS NONE E300 > PO4 ND > 50 > 2270 LHR020GW-01E2 > > N > WG > > INO > BRLS NONE E300 > SO4 = > 22460 > 2272 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1631 HG > = > 0.00171 > 2273 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 AG > = 2.57 > 2274 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 AL > = > 122 > 2275 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 AS > = > 317 > 2276 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 B > = > 9970 > 2289 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 V > = > 131 > 2290 LHR020GW-01E2 > > N > WG > > MET > BRLS FLDFLT > E1638 Zn > = > 1.76 > 2291 LHR020GW-01E2 > > N > WG > > MET > BRLS METHOD > E1638 > PB ND > 0.008 > 2292 LHR020GW-01E2 > > N > WG > > MI > BRLS NONE A2320 > ALK = > 807000 > 2293 LHR020GW-01E2 > > N > WG > > MI > BRLS NONE A2320 > ALKB = > 807000 > 2294 LHR020GW-01E2 > > N > WG > > MI > BRLS NONE A2320 > ALKC ND > 2500 > 2295 LHR020GW-01E2 > > N > WG > > ORG > BRLS NONE > A5310B DOC = > 49330 > 2296 LHR020GW-01E2 > > N > WG > > SN > BRLS NONE E300 > NO3 = > 792 > 2326 LHR020SD-00E2 > > N > SE > > MET > BRLS METHOD > E1630 > MEHG = > 4.28 > 2327 LHR020SD-00E2 > > N > SE > > MI > BRLS METHOD > E160.3 SOLID > = > 48.45 > 2328 LHR020SD-00E2 > > N > SE > > ORG > BRLS NONE > SW9060 > TOC = > 4.823 > 2329 LHR020SD-00E2 MY77J8 > N > SE > > MET > A4SW METHOD > > C245.5 HG > = 5100 > 2330 LHR020SD-00E2 MY77J8 > N > SE > > MET > A4SW METHOD > > E200.8 AG > ND 1050 > 2331 LHR020SD-00E2 MY77J8 > N > SE > > MET > A4SW METHOD > > E200.8 AS > = > 5500 > 2332 LHR020SD-00E2 MY77J8 > N > SE > > MET > A4SW METHOD > > E200.8 B > = > 11400 > 2346 LHR020SD-00E2 MY77J8 > N > SE > > MET > A4SW SW3050B > SW6010B > V > = > 56900 > 2349 LHR020SD-00E2 MY77J8 > N > SE > > MI > A4SW METHOD > A2540G > SOLID = > 47.7 > > Desired output: > > fldsampid > CLP_ID sacode > matrix etc... CL > PO4 > SO4 AG > AL > AS > B > V > Zn > etc... ALK > ALKB ALKC > SOLID DOC > TOC NO3 > > LHR020GW-01E2 > > N > WG > > <value> > <value> > <value> > > <value> > <value> > <value> > > <value> > <value> > <value> > > <value> > <value> > <value> > > <value> > <value> > <value> > > <value> > <value> > > LHR020SD-00E2 MY77J8 N > SE > > NA > NA NA > <value> > <value> > > <value> > <value> > <value> > NA > <value> > NA > NA NA > <value> > NA > NA > NA > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help@r-project.org > mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, > reproducible code. > ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.