Perhaps ds_example <- ds_example[ with( ds_example, 1 < ave( Debitor, Debitor, FUN=length ) ), ]
-- Sent from my phone. Please excuse my brevity. On May 23, 2016 6:57:04 AM PDT, g.maub...@weinwolf.de wrote: >Hi All, > >the solution for my question is as follows > >## Filter duplicates and correpsonding non-duplicates >### To filter duplicates and their corresponding non-duplicates use the >### following code snippet: >Debitor <- c("968691", "968691", "968691", > "A04046", "A04046", > "L0006", "L0006", "L0006", > "L0023", "L0023", > "L0056", "L0056", > "L0094", "L0094", "L0094", > "L0124", "L0124", > "L0143", > "L0170", > "13459", > "473908", > "394704", > "4711", > "4712", > "4713") >Debitor <- as.character(Debitor) >var1 <- c(11, 12, 13, > 14, 14, > 12, 13, 14, > 10, 11, > 12, 12, > 12, 12, 12, > 15, 17, > 11, > 14, > 12, > 17, > 13, > 15, > 16, > 11) >ds_example <- data.frame(Debitor, var1) >ds_example$case_id <- 1:nrow(ds_example) >ds_example <- ds_example[, sort(colnames(ds_example))] >ds_example > ># This task is to generate a data frame that contains the duplicates >AND >the ># corresponding non-duplicates to the duplicates. ># For example, finding the duplicates will deliver case 2 and 3 but the > >list ># should also contain case 1 because case 1 is the corresponding case >to >the ># duplicate cases 2 and 3. ># For the whole example dataset that would be: >needed <- c(1, 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 0, 0, 0, 0, 0, 0, 0, 0) >needed <- as.logical(needed) >ds_example <- data.frame(ds_example, needed) >ds_example > ># To find the duplicates and the corresponding non-duplicates >duplicates <- duplicated(ds_example$Debitor) > >list_of_duplicated_debitors <- as.character(ds_example[duplicates, >"Debitor"]) > >filter_variable <- unique(list_of_duplicated_debitors) > >### Wrong code. Do not run. >### ds_duplicates <- ds_example["Debitor" == filter_variable] # >Result: >dataset with 0 columns >### duplicates_and_correponding_non_duplicates <- ds_example["Debitor"] > >%in% filter_variable # Result: FALSE > >duplicates_and_correponding_non_duplicates <- ds_example$Debitor %in% >filter_variable # Result: OK >duplicates_and_correponding_non_duplicates <- ds_example[, "Debitor"] >%in% >filter_variable # Result: OK > >### Create the dataset with duplicates and corresponding non-duplicates >ds_example <- ds_example[duplicates_and_correponding_non_duplicates, ] >ds_example > >It was a simple mistake when subscripting. > >Kind regards > >Georg Maubach > > >----- Weitergeleitet von Georg Maubach/WWBO/WW/HAW am 23.05.2016 15:54 >----- > >Von: Georg Maubach/WWBO/WW/HAW >An: r-help@r-project.org, >Datum: 23.05.2016 15:28 >Betreff: Filtering String Variables > > ># Hi All, ># ># I have the following data frame (example): > >Debitor <- c("968691", "968691", "968691", > "A04046", "A04046", > "L0006", "L0006", "L0006", > "L0023", "L0023", > "L0056", "L0056", > "L0094", "L0094", "L0094", > "L0124", "L0124", > "L0143", > "L0170", > "13459", > "473908", > "394704", > "4711", > "4712", > "4713") >Debitor <- as.character(Debitor) >var1 <- c(11, 12, 13, > 14, 14, > 12, 13, 14, > 10, 11, > 12, 12, > 12, 12, 12, > 15, 17, > 11, > 14, > 12, > 17, > 13, > 15, > 16, > 11) >ds_example <- data.frame(Debitor, var1) >ds_example$case_id <- 1:nrow(ds_example) >ds_example <- ds_example[, sort(colnames(ds_example))] >ds_example > ># I would like to generate a data frame that contains the duplicates >AND >the ># corresponding non-duplicates to the duplicates. ># For example, finding the duplicates with deliver case 2 and 3 but the > >list ># should also contain case 1 because case 1 is the corresponding case >to >the ># duplicate cases 2 and 3. ># For the whole example dataset that would be: >needed <- c(1, 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 1, 1, > 1, 1, 1, > 1, 1, > 0, 0, 0, 0, 0, 0, 0, 0) >needed <- as.logical(needed) >ds_example <- data.frame(ds_example, needed) >ds_example > ># To find the duplicates and the corresponding non-duplicates >duplicates <- duplicated(ds_example$Debitor) > >list_of_duplicated_debitors <- as.character(ds_example[duplicates, >"Debitor"]) > >filter_variable <- unique(list_of_duplicated_debitors) > >ds_duplicates <- ds_example["Debitor" == filter_variable] # Result: >dataset with 0 columns > >ds_duplicates <- ds_example["Debitor"] %in% filter_variable # Result: >FALSE > ># How can I create a dataset like this > >ds_example <- ds_example[needed, ] >ds_example > ># using the Debitor IDs? > >Kind regards > >Georg Maubach > >______________________________________________ >R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.