To whom it may concerns, We encountered stack overflow issues when we implemented DFS(depth first search) algorithm on a directed graph having 800,000+ vertices and millions of edges. The purpose of running DFS is to use Kosaraju Algorithm to calculate the size of SCC(strongly connected componment) in the graph. This is an assignment from Coursea.org <http://coursea.org/>. We know the maximum size of SCC in the graph is 434,821, which means the maximum recursion depth during code running is 434,821. We know it is a large calculation behind the scene, therefore, we’ve done the below pre-setup hopefully to solve the stack overflow issue. However, we have not got the luck yet. We’ve tried running on both R and RStudio.
We would really appreciate if someone in your team can help to investigate. We can’t wait to see if our code is working in R. System Information: Model Name: MacBook Pro Model Identifier: MacBookPro11,1 Processor Name: Intel Core i5 Processor Speed: 2.4 GHz Number of Processors: 1 Total Number of Cores: 2 L2 Cache (per Core): 256 KB L3 Cache: 3 MB Memory: 8 GB System settings we have tried: 1. ulimit- s 65000 (to increase stack size before starting up R) 2. R --max-ppsize =500000 (start R with max ppsize) 3. options(expression=500000) The data we used can be found on https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A <https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A> Data discription provided as follow: https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4 <https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4> Below is our code: options(expressions=500000) #Prepare input test file g=read.table("Downloads/scc.txt") x<<-as.matrix(g) remove(g) vector.is.empty<<-function(x) {return(length(x)==0)} '%!in%' <- function(x,y)!('%in%'(x,y)) #g<-c(2,1,3,1,3,4,4,2,5,4,5,6,6,7,7,8,8,9,2,3) #x<<-matrix(g,nrow=10,ncol=2, byrow=TRUE) #g<-c(1,4,2,8,3,6,4,7,5,2,6,9,7,1,8,5,8,6,9,3,9,7,10,2) #x<<-matrix(g,nrow=12,ncol=2, byrow=TRUE) #g<-c(1,2,2,3,2,4,3,4,3,5,4,1,4,13,5,6,6,7,7,8,8,9,9,6,10,9,10,11,11,8,11,12,12,13,12,14,13,10,14,15) #x<<-matrix(g,20,2,byrow=TRUE) u1<-unique(x[,1]) u2<-unique(x[,2]) u<-c(u1,u2) n<<-length(unique(u)) remove(u1,u2,u) G <<- vector("list", length=n) G_REV <<- vector("list", length=n) P = numeric(n) FT = numeric(n) for (i in 1:nrow(x)) { a = x[i,1] b = x[i,2] #for G if (is.null(G[[a]])) { G[[a]] = c(b) } else { G[[a]] = c(G[[a]], b) } if (is.null(G[[b]])) { G[[b]] = numeric() } #for G_VEV if (is.null(G_REV[[b]])) { G_REV[[b]] = c(a) } else { G_REV[[b]] = c(G_REV[[b]], a) } if (is.null(G_REV[[a]])) { G_REV[[a]] = numeric() } } G_TEMP <<- G_REV #G_REV<<-x[,c(2,1)] #P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is explored or not #colnames(P)<-c("node","f","parent_vertex") explore<<-numeric() assign("time",0,envir=globalenv()) #Algorithm -DFS DFS_LOOP<<-function(G){ counter = n for(i in n : 1){ if (i < counter) { counter = counter - 1000; print(i); } if (i %in% explore){next} else { DFS(i) } } } DFS<<-function(i){ #if(time>=n){return(p)} #else{ #print(c("i=",i)) explore<<-c(explore,i) #print(c("explore",explore)) #P[i,2] <<- 1 # gray v=G_TEMP[[i]] #print(c("v=",v)) if (vector.is.empty(v) ==TRUE){ len=1 v=i } if(vector.is.empty(v)==FALSE){ len=length(v) } for(j in 1: len){ if(v[j] %!in% explore){ DFS(v[j]) #P[v[j],3] <<-i P[v[j]] <<- i # print(c("child",j,"parent",i)) } } time<<-time + 1 FT[i] <<- time #P[i,2] <<- time #P[i,2] <<- 2 #black # } <<-else } print('Starting DFS_loop on G_REV') DFS_LOOP(G_REV) ################################################### #temp0<-matrix(1:n,n,1) # temp1<-P[,c(1,2)] # colnames(temp1)<-c("before","after") # temp1<-as.data.frame(temp1) # colnames(x)<-c("vertex","edge") # X<-as.data.frame(x) # X_NEW<-merge(x=X,y=temp1,by.x="vertex",by.y="before") # remove(X) # names(X_NEW)[names(X_NEW)=="after"]<-"vertex_new" # X_NEW2<-merge(x=X_NEW,y=temp1,by.x="edge",by.y="before") # remove(X_NEW,temp1) # names(X_NEW2)[names(X_NEW2)=="after"]<-"edge_new" # G2<-as.matrix(X_NEW2) # remove(X_NEW2) # G2<-G2[,c(3,4)] # u1<-unique(G2[,1]) # u2<-unique(G2[,2]) # u<-c(u1,u2) # n<<-length(unique(u)) # remove(u1,u2,u) FT_SORTED = numeric(n) for (i in length(FT):1) { finish_time = FT[i] FT_SORTED[finish_time] = i } P = numeric(n) FT = numeric(n) #P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex is explored or not #colnames(P)<-c("vertex","f","parent_vertex") explore<<-numeric() assign("time",0,envir=globalenv()) print('Starting DFS_loop on G') #DFS_LOOP(G2)#2nd DFS explore<<-numeric() G_TEMP <<- G for (i in length(FT_SORTED):1) { k = FT_SORTED[i] if (k %!in% explore) { DFS(k) } } mscc_temp = which(P==0) scc_temp=FT[mscc_temp] #scc_temp<-P[P[,3]==0,2] scc_temp=sort(scc_temp,decreasing=TRUE) m=length(scc_temp) scc=numeric() for (i in 1:(m-1)){ scc[i]=scc_temp[i]-scc_temp[i+1] } scc[m]<-scc_temp[m] scc_top_5<-tail(sort(scc),5) Thanks, Jing [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.