tcc.new2 <- droplevels(tcc.new) -----Original Message----- From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On Behalf Of Ben Harrison Sent: 19 January 2011 12:24 To: r-help@r-project.org Subject: [R] Using subset to filter data table
I am having difficulty understanding how I would constrain a data set by filtering out 'records' based on certain criteria. Using SQL I could query using 'select * from my.data where LithClass in ('sand', 'clay')' or some such. Using subset, there seem to be ghosts left behind (that is, all of the LithClass *.Labels* remain after subset) > dput(tcc) structure(list(Density = c(1.84, 1.91, 2, NA, 1.95, 2.04, 2.11, 1.85, 1.97, 2, 2.27, NA, NA, 2.18, NA, NA, 1.9, 2.68, 2.69, NA, -- snip -- 1.45, 1.49, 2.11), LithClass = structure(c(6L, 6L, 5L, 6L, 7L, 7L, 6L, 6L, 5L, 5L, 5L, 2L, 3L, 2L, 3L, 7L, 7L, 4L, 4L, 2L, 7L, 3L, 2L, 2L, 3L, 7L, 2L, 2L, 3L, 2L, 6L, 5L, 5L, 5L, 6L, 6L, 7L, 2L, 3L, 7L, 7L, 7L, 7L, 2L, 7L, 7L, 2L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 7L, 3L, 2L, 2L, 3L, 2L, 7L, 3L, 5L, 6L, 6L, 6L, 3L, 6L, 3L, 7L, 2L, 3L, 3L, 3L, 7L, 3L, 2L, 2L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 2L, 3L, 3L, 3L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 2L, 2L, 7L, 7L, 2L, 7L, 7L, 7L, 3L, 3L, 2L, 5L, 5L), .Label = c("basalt", "clay", "coal", "dolomite", "limestone", "marl", "sand"), class = "factor")), .Names = c("Density", "Depth", "Porosity", "Conductivity", "LithClass"), row.names = c(NA, 114L), class = "data.frame") > tcc.new = subset(tcc, LithClass %in% c('clay','sand','marl')) >dput(tcc.new) structure(list(Density = c(1.84, 1.91, NA, 1.95, 2.04, 2.11, -- snip-- 1.63, 2.84, 1.93, 1.67, 2.22, 2.42, 1.84, 2.24, 2.38, 2.33, 1.45), LithClass = structure(c(6L, 6L, 6L, 7L, 7L, 6L, 6L, 2L, 2L, 7L, 7L, 2L, 7L, 2L, 2L, 7L, 2L, 2L, 2L, 6L, 6L, 6L, 7L, 2L, 7L, 7L, 7L, 7L, 2L, 7L, 7L, 2L, 7L, 6L, 7L, 2L, 2L, 2L, 7L, 6L, 6L, 6L, 6L, 7L, 2L, 7L, 2L, 2L, 7L, 2L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 7L, 7L, 2L, 2L, 7L, 7L, 2L, 7L, 7L, 7L, 2L), .Label = c("basalt", "clay", "coal", "dolomite", "limestone", "marl", "sand"), class = "factor")), .Names = c("Density", "Depth", "Porosity", "Conductivity", "LithClass"), row.names = c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 12L, 14L, 16L, 17L, 20L, 21L, 23L, 24L, 26L, 27L, 28L, 30L, 31L, 35L, 36L, 37L, 38L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 55L, 56L, 58L, 59L, 61L, 62L, 65L, 66L, 67L, 69L, 71L, 72L, 76L, 78L, 79L, 80L, 88L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 112L), class = "data.frame") The data (in the "structure") is removed, but the labels for all LithClass remain. If I then produce a boxplot or some other graphic where LithClass is involved, blank plots are produced for the deleted LithClass labels, which is not what I want. i.e.: > with(tcc.new, boxplot(Conductivity~LithClass)) Here is the full data: 1> dput(tcc) structure(list(Density = c(1.84, 1.91, 2, NA, 1.95, 2.04, 2.11, 1.85, 1.97, 2, 2.27, NA, NA, 2.18, NA, NA, 1.9, 2.68, 2.69, NA, 2.41, 1.16, 1.92, 1.96, 0.87, NA, NA, NA, 1.4, NA, 1.83, 1.93, 2.14, 2.18, NA, 2.12, 2.05, NA, 1.53, 2.23, 2.2, NA, 2.42, 2.54, 2.48, 2.58, 1.91, 2.09, 1.85, 2.48, 2.34, 2.19, 2.23, 2.09, 2.57, 2.12, 1.88, 2.21, 2.06, 1.21, 2.08, 1.84, 1.71, 2.11, 1.9, 1.84, 1.97, 1.57, NA, NA, 1.92, 2.29, NA, 1.17, NA, NA, 1.77, 2.03, 2.27, 2.22, 1.84, 2.09, 1.94, 2.3, 2.59, NA, 1.52, 2.15, 1.19, NA, 1.14, 1.99, 1.94, 2.01, 2.02, 1.91, 2.45, 2.09, NA, NA, 2.14, 2.4, 2.47, 2.48, 2.49, 2.51, 2.49, 2.51, 2.5, 1.18, 1.23, 1.94, 2.04, 2.39), Depth = c(151, 198, 284, 480, 33.4, 75, 142, 186, 258, 420.6, 679.2, 884.1, 916, 943, 972, 998, 1064.4, 1076.7, 1125.6, 1176.5, 1225.6, 255.8, 272, 205.6, 319.3, 352.6, 666.1, 667.6, 918, 105, 156.5, 213, 305.1, 498, 591.75, 726, 765.4, 793.4, 829.1, 861, 896, 929, 957.9, 989.7, 1017, 1018, 96, 229, 355, 376, 456, 496, 515, 537, 557, 627, 512, 551, 615, 381.8, 585.3, 587.2, 591.25, 114, 302.2, 496.4, 597, 596, 616.7, 834.3, 980.3, 1064.6, 251.9, 253.75, 284.8, 318.3, 526.5, 627.5, 768, 770.3, 229.58, 323.8, 376.3, 470.85, 611.7, 797.38, 963.6, 1047.25, 334, 404.7, 517, 459, 338, 545.5, 189, 378, 515, 593, 670, 736, 931, 1003, 1325, 1391, 1457, 1523, 1590, 1664, 1769, 124.3, 176.5, 201.5, 502.1, 602.6), Porosity = c(50.22, 44.26, 39.31, NA, 40.38, 39.65, 35.3, 40.48, 40.13, 34.8, 18.52, NA, NA, 32.1, NA, NA, 39.79, 1.62, 1.71, NA, 14.55, 36.87, 39.08, 37.67, 16.9, NA, NA, NA, 30.5, NA, 46.52, 43.18, 33.33, 31.35, NA, 26.74, 26.4, NA, 36.19, 26.88, 27.2, NA, 11.9, 7.65, 9.25, 5.74, 40.71, 27.98, 47.6, 14.03, 20.62, 30.27, 27.51, 33.83, 9.16, 32.2, 26.71, 24.99, 20.1, 47.76, 28.54, 33.54, 23.74, 29.04, 45.38, 37.09, 43.14, 30.46, NA, NA, 30.7, 22.51, NA, 42.94, NA, NA, 29.39, 24.72, 22.36, 25.39, 45.26, 36.23, 41.38, 22, 7.6, NA, 24.09, 30.53, 36.18, NA, 25.19, 37.93, 42.95, 37.84, 47.21, 41.69, 13.64, 34.81, NA, NA, 38.39, 12.31, 9.46, 12.09, 10.3, 11.02, 10.36, 9.27, 10.17, 29.38, 35.67, 42.38, 37.54, 15.44), Conductivity = c(1.28, 1.38, 1.47, 1.13, 0.98, 1.83, 2.2, 1.92, 2.04, 1.44, 2.08, 1.06, 1.19, 1.6, 0.36, 2.81, 2.14, 4.2, 4.41, 1.64, 1.93, 0.56, 1.71, 1.52, 0.22, 2.38, 2.06, 1.95, 0.52, 1.94, 2.42, 2.29, 1.62, 1.58, 1.01, 1.43, 2.41, 2.14, 0.59, 2.94, 2.6, 1.58, 1.52, 1.94, 1.64, 1.77, 1.49, 2.5, 1.16, 1.87, 1.81, 1.58, 1.62, 1.74, 2.14, 2.55, 1.08, 2.27, 1.28, 0.43, 1.79, 1.26, 0.8, 2.44, 1.39, 1.16, 1.17, 0.52, 2.05, 0.4, 2.7, 2.06, 0.64, 0.39, 0.4, 2.61, 1.54, 1.71, 2.3, 2.84, 1.32, 1.5, 1.43, 1.84, 2.3, 1.69, 0.45, 1.47, 0.36, 0.65, 0.91, 2.59, 1.64, 1.73, 1.97, 1.4, 2, 2.11, 1.63, 2.84, 1.56, 1.93, 1.67, 2.22, 2.42, 1.84, 2.24, 2.38, 2.33, 0.29, 0.41, 1.45, 1.49, 2.11), LithClass = structure(c(6L, 6L, 5L, 6L, 7L, 7L, 6L, 6L, 5L, 5L, 5L, 2L, 3L, 2L, 3L, 7L, 7L, 4L, 4L, 2L, 7L, 3L, 2L, 2L, 3L, 7L, 2L, 2L, 3L, 2L, 6L, 5L, 5L, 5L, 6L, 6L, 7L, 2L, 3L, 7L, 7L, 7L, 7L, 2L, 7L, 7L, 2L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 7L, 3L, 2L, 2L, 3L, 2L, 7L, 3L, 5L, 6L, 6L, 6L, 3L, 6L, 3L, 7L, 2L, 3L, 3L, 3L, 7L, 3L, 2L, 2L, 7L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 2L, 3L, 3L, 3L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 2L, 2L, 7L, 7L, 2L, 7L, 7L, 7L, 3L, 3L, 2L, 5L, 5L), .Label = c("basalt", "clay", "coal", "dolomite", "limestone", "marl", "sand"), class = "factor")), .Names = c("Density", "Depth", "Porosity", "Conductivity", "LithClass"), row.names = c(NA, 114L), class = "data.frame") [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.