Please try this

## Import data
id1<-c(4,17,9,1,1,1,3,3,6,15,1,1,1,1,3,3,3,3,4,4,4,5,5,12,9,9,10,10)
id2<-c(8,18,10,3,6,7,6,7,7,16,4,5,12,18,4,5,12,18,5,12,18,12,18,18,15,16,15,16)
id<-data.frame(id1 = id1, id2 = id2)

## Create same structure table
id <- id0 <- unique(id)
leng <- nrow(id)

n <- 0
repeat {
        if (n == leng) {break}
        n <- 0
        id <- id[order(-id$id1, -id$id2),]
        for (i in 1:leng) {
                if (id$id1[i] == id$id2[i]) { 
                n <- n+1
                next }
                smal <- min(id[i,])
                larg <- max(id[i,])
                id$id2[which(id$id2 == larg)] <- smal
                id$id1[which(id$id1 == larg)] <- smal
                }}

## Create results
tab <- table(as.matrix(id0),
as.matrix(id[order(as.numeric(rownames(id))),]))
res <- list()
for (i in 1:ncol(tab)) {
        res[[i]] <- rownames(tab[(tab[,i] != 0),])}
res

-----
A R learner.
-- 
View this message in context: 
http://r.789695.n4.nabble.com/deduplication-tp2241637p2242921.html
Sent from the R help mailing list archive at Nabble.com.

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to