The value of 'url.zill' is a vector of 407 character strings: Browse[1]> str(url.zill) chr [1:407] "http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=10+PACER+LN&citystatezip=East+"| __truncated__ ...
Isn't it supposed to be just a single file name? On Mon, Jul 4, 2011 at 8:42 PM, eric <ericst...@aol.com> wrote: > Can't seem to get the code below working. It gets stuck on line 24 inside the > function hm; comments show the line in question. The function hm is called > by sapply and is at the bottom of the code. Other stuff above line 24 works > correctly including the first couple of lines of the function hm. Should I > be using a different apply function or am I doing something wrong with > xmlTreeParse ? > > > library(XML) > url.montco <- > "http://webapp.montcopa.org/sherreal/salelist.asp?saledate=07/27/2011" > tbl <-data.frame(readHTMLTable(url.montco))[, c(3,5,6,8,9)] > tbl <-tbl[2: length(tbl[,1]),] > names(tbl) <- c("Address", "Township", "Parcel", "SaleDate", "Costs"); > rownames(tbl) <- NULL > v <- gregexpr("( aka )|( AKA )",tbl$Address) > s <-sapply(v, function(x) max(unlist(x))) > tbl$Address <- substring(tbl$Address, ifelse(s== -1, 0, s+4), 10000) > tbl$Cost <- gsub(',', '', tbl$Costs) > temp <- strsplit(tbl$Cost, "\\$") > temp <- do.call(rbind, temp) # create a matrix > mode(temp) <- 'numeric' > tbl$Debt <- round(temp[, 2]/1000,2) > tbl$Court <- round(temp[, 3]/1000,2) > z <- data.frame(substr(tbl$SaleDate,regexpr("[A-Za-z]", tbl$SaleDate), > regexpr("[0-9]", tbl$SaleDate,)-1)) ; names(z) <- "Action" > y <- data.frame(substr(tbl$SaleDate,regexpr("[0-9]", tbl$SaleDate),2011)) ; > names(y) <- "ActionDate" > tbl <-cbind(tbl[, c(1,2,3,7,8)],z,y) > new.add <- paste(tbl$Address,"&citystatezip=",tbl$Township,"%2C+PA", sep='') > new.add <- sub("^( )+","", new.add) > new.add <-data.frame(gsub("( )+",'+', new.add)); names(new.add) <- > "ParseAddress" > hm <- function(x) { > url.zill > <-paste("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1bup03e49vv_5kvb6&address=",x, > sep="") > ############## problem line is next ################################# > zdoc <-xmlTreeParse(url.zill, useInternalNode=TRUE, isURL=TRUE) > ############# problem line above ################################## > f$zpid <- sapply(getNodeSet(zdoc, "//result/zpid"), xmlValue) > f$zest.low <-sapply(getNodeSet(zdoc, "//valuationRange/low"), xmlValue) > f$zest <- sapply(getNodeSet(zdoc, "//zestimate/amount"), xmlValue) > rm(zdoc) > return(f) > } > j <-sapply(new.add, FUN=hm) > print(zest) > > -- > View this message in context: > http://r.789695.n4.nabble.com/Stuck-can-t-get-sapply-and-xmlTreeParse-working-tp3644894p3644894.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > -- Jim Holtman Data Munger Guru What is the problem that you are trying to solve? ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.