To whom it may concerns,

We encountered stack overflow issues when we implemented DFS(depth first 
search) algorithm on a directed graph having 800,000+ vertices and millions of 
edges.  The purpose of running DFS is to use Kosaraju Algorithm to calculate 
the size of SCC(strongly connected componment) in the graph. This is an 
assignment from Coursea.org <http://coursea.org/>. We know the maximum size of 
SCC in the graph is 434,821, which means the maximum recursion depth during 
code running is 434,821. We know it is a large calculation behind the scene, 
therefore, we’ve done the below pre-setup hopefully to solve the stack overflow 
issue. However, we have not got the luck yet. We’ve tried running on both R and 
RStudio.

We would really appreciate if someone in your team can help to investigate. We 
can’t wait to see if our code is working in R. 

System Information:
Model Name:     MacBook Pro
  Model Identifier:     MacBookPro11,1
  Processor Name:       Intel Core i5
  Processor Speed:      2.4 GHz
  Number of Processors: 1
  Total Number of Cores:        2
  L2 Cache (per Core):  256 KB
  L3 Cache:     3 MB
  Memory:       8 GB

System settings we have tried:
1. ulimit- s 65000 (to increase stack size before starting up R)
2. R --max-ppsize =500000 (start R with max ppsize)
3. options(expression=500000)


The data we used can be found on 
https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A
 
<https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A>

Data discription provided as follow:
https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4
 
<https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4>
 

Below is our code:

options(expressions=500000)
#Prepare input test file  
g=read.table("Downloads/scc.txt")
x<<-as.matrix(g)
remove(g)
vector.is.empty<<-function(x) {return(length(x)==0)}
'%!in%' <- function(x,y)!('%in%'(x,y))
#g<-c(2,1,3,1,3,4,4,2,5,4,5,6,6,7,7,8,8,9,2,3)
#x<<-matrix(g,nrow=10,ncol=2, byrow=TRUE)
#g<-c(1,4,2,8,3,6,4,7,5,2,6,9,7,1,8,5,8,6,9,3,9,7,10,2)
#x<<-matrix(g,nrow=12,ncol=2, byrow=TRUE)
#g<-c(1,2,2,3,2,4,3,4,3,5,4,1,4,13,5,6,6,7,7,8,8,9,9,6,10,9,10,11,11,8,11,12,12,13,12,14,13,10,14,15)
#x<<-matrix(g,20,2,byrow=TRUE)

u1<-unique(x[,1])
u2<-unique(x[,2])
u<-c(u1,u2)
n<<-length(unique(u))
remove(u1,u2,u)

G <<- vector("list", length=n)
G_REV <<- vector("list", length=n)
P = numeric(n)
FT = numeric(n)

for (i in 1:nrow(x)) {
  a = x[i,1]
  b = x[i,2]
  #for G
  if (is.null(G[[a]])) {
    G[[a]] = c(b)
  } else {
    G[[a]] = c(G[[a]], b)
  }
  if (is.null(G[[b]])) {
    G[[b]] = numeric()
  }
  #for G_VEV
  if (is.null(G_REV[[b]])) {
    G_REV[[b]] = c(a)
  } else {
    G_REV[[b]] = c(G_REV[[b]], a)
  }
  if (is.null(G_REV[[a]])) {
    G_REV[[a]] = numeric()
  }
}

G_TEMP <<- G_REV

#G_REV<<-x[,c(2,1)]

#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is 
explored or not

#colnames(P)<-c("node","f","parent_vertex")
explore<<-numeric()
assign("time",0,envir=globalenv())
#Algorithm -DFS

DFS_LOOP<<-function(G){
  counter = n 
  for(i in n : 1){
    if (i < counter) {
      counter = counter - 1000;
      print(i);
    }
    if (i %in% explore){next}
    else {
      DFS(i)
    }
  }
}

DFS<<-function(i){
  #if(time>=n){return(p)}
  #else{
  #print(c("i=",i))
  explore<<-c(explore,i)
  #print(c("explore",explore))
  
  #P[i,2] <<- 1 # gray
  v=G_TEMP[[i]]
  
  #print(c("v=",v))
  if (vector.is.empty(v) ==TRUE){
    len=1
    v=i
  }
  
  if(vector.is.empty(v)==FALSE){
    len=length(v)
  }
  
  for(j in 1: len){
    if(v[j] %!in% explore){
      DFS(v[j])
      #P[v[j],3] <<-i
      P[v[j]] <<- i
      # print(c("child",j,"parent",i))
    }
  }
  
  time<<-time + 1
  FT[i] <<- time
  #P[i,2] <<- time
  #P[i,2] <<- 2 #black
  # } <<-else
}
print('Starting DFS_loop on G_REV')
DFS_LOOP(G_REV)
###################################################
#temp0<-matrix(1:n,n,1)
# temp1<-P[,c(1,2)]
# colnames(temp1)<-c("before","after")
# temp1<-as.data.frame(temp1)
# colnames(x)<-c("vertex","edge")
# X<-as.data.frame(x)
# X_NEW<-merge(x=X,y=temp1,by.x="vertex",by.y="before")
# remove(X)
# names(X_NEW)[names(X_NEW)=="after"]<-"vertex_new"
# X_NEW2<-merge(x=X_NEW,y=temp1,by.x="edge",by.y="before")
# remove(X_NEW,temp1)
# names(X_NEW2)[names(X_NEW2)=="after"]<-"edge_new"
# G2<-as.matrix(X_NEW2)
# remove(X_NEW2)
# G2<-G2[,c(3,4)]
# u1<-unique(G2[,1])
# u2<-unique(G2[,2])
# u<-c(u1,u2)
# n<<-length(unique(u))
# remove(u1,u2,u)

FT_SORTED = numeric(n)
for (i in length(FT):1) {
  finish_time = FT[i]
  FT_SORTED[finish_time] = i
}

P = numeric(n)
FT = numeric(n)

#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is 
explored or not
#colnames(P)<-c("vertex","f","parent_vertex")
explore<<-numeric()
assign("time",0,envir=globalenv())

print('Starting DFS_loop on G')
#DFS_LOOP(G2)#2nd DFS
explore<<-numeric()

G_TEMP <<- G

for (i in length(FT_SORTED):1) { 
  k = FT_SORTED[i]
  if (k %!in% explore) {
    DFS(k)
  }
}

mscc_temp = which(P==0)
scc_temp=FT[mscc_temp]
#scc_temp<-P[P[,3]==0,2]
scc_temp=sort(scc_temp,decreasing=TRUE)
m=length(scc_temp)
scc=numeric()
for (i in 1:(m-1)){
  scc[i]=scc_temp[i]-scc_temp[i+1]
}
scc[m]<-scc_temp[m]
scc_top_5<-tail(sort(scc),5)


Thanks,
Jing
        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to