See below.

Am 30.11.2017 um 15:27 schrieb Martin Møller Skarbiniks Pedersen:
Hi,
   I think and hope this a good place to ask for code review for a R
beginners?

   I have made a R script which generates a dataset based on 2009 danish
referendum and it does work.

   But I think the code could be better and I would any comments how the
code can be improved.
   At least I would like to know how I avoid converting several of the
columns to factors in the end of the code?

Description of the code:

   It reads a lot of xml-files from ../raw/ and saves a data.frame with
information
from these xml-files.

   In the ../raw/ directiory I have placed the xml-files which I got from
"Statistics Denmark"
   I have also put these xml-files on my website and they can be download
freely from http://20dage.dk/R/referendum-2009/raw.tar.gz

   The code is below but I have also put the code at this place:
http://20dage.dk/R/referendum-2009/convert_from_xml.R

Best Regards
Martin M. S. Pedersen

-------
library(xml2)

convert_one_file <- function(url) {
     x <- read_xml(url)

     Sted <- xml_find_first(x, ".//Sted")
     StedType <- xml_attr(Sted, "Type")
     StedTekst <- xml_text(Sted)

     Parti <- xml_find_all(x, ".//Parti")
     PartiId <- xml_attr(Parti, "Id")
     PartiBogstav <- xml_attr(Parti, "Bogstav")
     PartiNavn <- xml_attr(Parti, "Navn")


     StemmerAntal <- xml_attr(Parti, "StemmerAntal")
     Stemmeberettigede <- xml_integer(xml_find_first(x,
".//Stemmeberettigede"))
     DeltagelsePct <- xml_double(xml_find_first(x, ".//DeltagelsePct"))
     IAltGyldigeStemmer <- xml_integer(xml_find_first(x,
".//IAltGyldigeStemmer"))
     BlankeStemmer <- xml_integer(xml_find_first(x, ".//BlankeStemmer"))
     AndreUgyldigeStemmer <- xml_integer(xml_find_first(x,
".//AndreUgyldigeStemmer"))

     data.frame(cbind(StedType, StedTekst, PartiId, PartiBogstav, PartiNavn,
                  StemmerAntal, Stemmeberettigede, DeltagelsePct,
IAltGyldigeStemmer,
        BlankeStemmer, AndreUgyldigeStemmer), stringsAsFactors = FALSE)
}

raw_path <- "../raw"
filenames <- dir(path = raw_path, pattern = "fintal_.*", full.names = T)

result <- data.frame(StedType = factor(),
                      StedTekst = character(),
                      PartiId   = factor(),
                      PartiBogstav = factor(),
                      PartiNavn    = factor(),
                      StemmerAntal = integer(),
                      Stemmeberettigede = integer(),
                      DeltagelsePct = numeric(),
                      IAltGyldigeStemmer = integer(),
                      BlankeStemmer = integer(),
                      AndreUgyldigeStemmer = integer(),
                      stringsAsFactors = FALSE)

for (i in 1:length(filenames)) {
     #cat(paste0(filenames[i],"\n"))
     returnCode <-  tryCatch({
        result <- rbind(result, convert_one_file(filenames[i]))
     }, error = function(e) {
        cat(paste0(filenames[i]," failed:\n",e,"\n"))
     })
}

result$StedType <- as.factor(result$StedType)
result$PartiId <- as.factor(result$PartiId)
result$PartiBogstav <- as.factor(result$PartiBogstav)
result$PartiNavn <- as.factor(result$PartiNavn)
result$StemmerAntal <- as.integer(result$StemmerAntal)
result$Stemmeberettigede <- as.integer(result$Stemmeberettigede)
result$DeltagelsePct <- as.numeric(result$DeltagelsePct)
result$IAltGyldigeStemmer <- as.integer(result$IAltGyldigeStemmer)
result$BlankeStemmer <- as.integer(result$BlankeStemmer)
result$AndreUgyldigeStemmer <- as.integer(result$AndreUgyldigeStemmer)
str(result)
save(result, file = "folkeafstemning2009.Rdata")

Maybe two loops simplify this a little bit for you (not tested):

for(v in c("StedType", <etc.>))
 result[[v]] <- factor(result[[v]])

for(v in c("StemmerAntal", <etc.>))
 result[[v]] <- as.integer(result[[v]])

 Hth  --  Gerrit

---------------------------------------------------------------------
Dr. Gerrit Eichner                   Mathematical Institute, Room 212
gerrit.eich...@math.uni-giessen.de   Justus-Liebig-University Giessen
Tel: +49-(0)641-99-32104          Arndtstr. 2, 35392 Giessen, Germany
Fax: +49-(0)641-99-32109            http://www.uni-giessen.de/eichner
---------------------------------------------------------------------


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to