#or dat1$V2[is.na(match(dat1$V2,dat2$V2))] #[1] "ALKBH1" "ALKBH2" "ANKRD17" "ASF1A" "ASTE1" "ATRX" "ATXN3" #[8] "BCCIP" a[is.na(match(a,b))] #[1] 2 2 4 A.K.
----- Original Message ----- From: William Dunlap <wdun...@tibco.com> To: arun <smartpink...@yahoo.com>; R help <r-help@r-project.org> Cc: Sent: Thursday, May 23, 2013 3:18 PM Subject: RE: [R] strings You recommended > library(sqldf) > sqldf('SELECT * FROM dat1 EXCEPT SELECT * FROM dat2') Using nothing but the core R packages setdiff() returns the difference between two sets. > setdiff(dat1$V2, dat2$V2) [1] "ALKBH1" "ALKBH2" "ANKRD17" "ASF1A" "ASTE1" "ATRX" "ATXN3" "BCCIP" If there are possibly duplicates in dat1$V2, so it is not a "set", and you want the duplicates in the result, use > dat1$V2[ !is.element(dat1$V2, dat2$V2) ] [1] "ALKBH1" "ALKBH2" "ANKRD17" "ASF1A" "ASTE1" "ATRX" "ATXN3" "BCCIP" > a <- c(1, 2, 3, 2, 1, 4) > b <- c(1, 3) > setdiff(a, b) [1] 2 4 > a[ !is.element(a, b) ] [1] 2 2 4 Bill Dunlap Spotfire, TIBCO Software wdunlap tibco.com > -----Original Message----- > From: r-help-boun...@r-project.org [mailto:r-help-boun...@r-project.org] On > Behalf > Of arun > Sent: Thursday, May 23, 2013 12:05 PM > To: R help > Subject: Re: [R] strings > > Hi, > Try: > > dat1<- structure(list(V2 = c("ALKBH1", "ALKBH2", "ALKBH3", "ANKRD17", > "APEX1", "APEX2", "APTX", "ASF1A", "ASTE1", "ATM", "ATR", "ATRIP", > "ATRX", "ATXN3", "BCCIP", "BLM", "BRCA1", "BRCA2")), .Names = "V2", class = > "data.frame", row.names = c(NA, > 18L)) > > > dat2<- structure(list(V2 = c("ALKBH3", "APEX1", "APEX2", "APLF", "APTX", > "ATM", "ATR", "ATRIP", "BLM", "BRCA1", "BRCA2", "BRIP1", "BTBD12", > "CCNH")), .Names = "V2", class = "data.frame", row.names = c(NA, > 14L)) > > > library(sqldf) > sqldf('SELECT * FROM dat1 EXCEPT SELECT * FROM dat2') > # V2 > #1 ALKBH1 > #2 ALKBH2 > #3 ANKRD17 > #4 ASF1A > #5 ASTE1 > #6 ATRX > #7 ATXN3 > #8 BCCIP > > > > #or > dat2$id<- 1 > res<-merge(dat1,dat2,all=TRUE) > subset(res,is.na(res$id))[1] > # V2 > #1 ALKBH1 > #2 ALKBH2 > #4 ANKRD17 > #9 ASF1A > #10 ASTE1 > #14 ATRX > #15 ATXN3 > #16 BCCIP > A.K. > > > > > I have two files containing words. I want to print the are in file 1 but > NOT in file 2. > How do I go about? > > file 1: > ABL1 > 1 ALKBH1 > 2 ALKBH2 > 3 ALKBH3 > 4 ANKRD17 > 5 APEX1 > 6 APEX2 > 7 APTX > 8 ASF1A > 9 ASTE1 > 10 ATM > 11 ATR > 12 ATRIP > 13 ATRX > 14 ATXN3 > 15 BCCIP > 16 BLM > 17 BRCA1 > 18 BRCA2 > > > file2: > ALKBH2 > 1 ALKBH3 > 2 APEX1 > 3 APEX2 > 4 APLF > 5 APTX > 6 ATM > 7 ATR > 8 ATRIP > 9 BLM > 10 BRCA1 > 11 BRCA2 > 12 BRIP1 > 13 BTBD12 > 14 CCNH > > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.