I am reading several hundred files. Anywhere from 50k-400k in size. It
appears that when I read these files with R 2.15.1 the process will hang or
seg fault on the scan() call. This does not happen on R 2.14.1.
This is happening on the precise build of Ubuntu.
I have included everything, but the issue appears to be when performing the
scan in the method parseTickData.
Below is the code. Hopefully this is the right place to post.
parseTickDataFromDir = function(tickerDir, per, subper, fun) {
tickerAbsFilenames = list.files(tickerDir,full.names=T)
tickerNames = list.files(tickerDir,full.names=F)
tickerNames = gsub("_[a-zA-Z0-9].csv","",tickerNames)
pb <- txtProgressBar(min = 0, max = length(tickerAbsFilenames), style = 3)
for(i in 1:length(tickerAbsFilenames)) {
# Grab Raw Tick Data
dat.i = parseTickData(tickerAbsFilenames[i])
#Sys.sleep(1)
# Create Template
dates <- unique(substr(as.character(index(dat.i)), 1,10))
times <- rep("09:30:00", length(dates))
openDateTimes <- strptime(paste(dates, times), "%F %H:%M:%S")
templateTimes <- NULL
for (j in 1:length(openDateTimes)) {
if (is.null(templateTimes)) {
templateTimes <- openDateTimes[j] + 0:23400
} else {
templateTimes <- c(templateTimes, openDateTimes[j] + 0:23400)
}
}
# Convert templateTimes to XTS, merge with data and convert NA's
templateTimes <- as.xts(templateTimes)
dat.i <- merge(dat.i, templateTimes, all=T)
# If there is no data in the first print, we will have leading NA's. So
set them to -1.
# Since we do not want these values removed by to.period
if (is.na(dat.i[1])) {
dat.i[1] <- -1
}
# Fix remaining NA's
dat.i <- na.locf(dat.i)
# Convert to desired bucket size
dat.i <- to.period(dat.i, period=per, k=subper, name=NULL)
# Always use templated index, otherwise merge fails with other symbols
index(dat.i) <- index(to.period(templateTimes, period=per, k=subper))
# If there was missing data at open, set close to NA
valsToChange <- which(dat.i[,"Open"] == -1)
if (length(valsToChange) != 0) {
dat.i[valsToChange, "Close"] <- NA
}
if(i == 1) {
DAT = fun(dat.i)
} else {
DAT = merge(DAT,fun(dat.i))
}
setTxtProgressBar(pb, i)
}
close(pb)
colnames(DAT) = tickerNames
return(DAT)
}
parseTickData <- function(inputFile) {
DAT.list <- scan(file=inputFile,
sep=",",skip=1,what=list(Date="",Time="",Close=0,Volume=0),quiet=T)
index <- as.POSIXct(paste(DAT.list$Date,DAT.list$Time),format="%m/%d/%Y
%H:%M:%S")
DAT.xts <- xts(DAT.list$Close,index)
DAT.xts <- make.index.unique(DAT.xts)
return(DAT.xts)
}
[[alternative HTML version deleted]]
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel