Here is a quick (and largely untested, apart from the toy example) hack that 
should read and write UTF-8 encoded CSV files on Window. It is *not* the best 
solution, which should use proper C-level code. But it could help a bit in your 
case.
Best,

Philippe Grosjean


read.tableUTF8 <- function (file, ...)
{
        if (l10n_info()$`UTF-8`) {
                read.table(file = file, fileEncoding = "UTF-8", ...)
        } else {
                res <- read.table(file = file, ...) # Read in default encoding
                ## For each character variable, change encoding to 'UTF-8'
                ## For each factor, change encoding to 'UTF-8'
                as.data.frame(lapply(res, function (x) switch(data.class(x),
                        character = {Encoding(x) <- "UTF-8"; x},
                        factor = {Encoding(levels(x)) <- "UTF-8"; x},
                        x))
                )
        }
}

write.tableUTF8 <- function (x, file = "", ...)
{
        if (l10n_info()$`UTF-8`) {
                write.table(x = x, file = file, fileEncoding = "UTF-8", ...)
        } else {
                ## Change encoding to "bytes"  and save it like that
                x <- lapply(x, function (x) {
                        if (is.character(x)) {
                                Encoding(x) <- "bytes"
                        } else if (is.factor(x)) {
                                Encoding(levels(x)) <- "bytes"
                        }
                        x
                })
                write.table(x = x, file = file, ...)
        }
}

fact <- factor(c("\u0444", "\u220F", "\u2030"))
char <- c("\u2202x", "\u2202y", "\u2202z")
dfr <- data.frame(x = 1:3, f = fact, s = I(char))
dfr

write.tableUTF8(dfr, file = "testUTF8.txt")
dfr2 <- read.tableUTF8("testUTF8.txt")
dfr2$s <- I(as.character(dfr2$s))
dfr2

identical(dfr$f, dfr2$f)
identical(dfr$s, dfr2$s)

______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to