### Tests of iconv, especially 'sub' x <- "fa\xE7ile" Encoding(x) <- "latin1" x xx <- iconv(x, "latin1", "UTF-8") xx stopifnot(charToRaw(xx) == as.raw(c(0x66, 0x61, 0xc3, 0xa7, 0x69, 0x6c, 0x65))) stopifnot(is.na(iconv(x, "latin1", "ASCII"))) stopifnot(iconv(x, "latin1", "ASCII", "?") == "fa?ile") stopifnot(iconv(x, "latin1", "ASCII", "") == "faile") stopifnot(iconv(x, "latin1", "ASCII", "byte") == "faile") stopifnot(iconv(xx, "UTF-8", "ASCII", "Unicode") == "faile") stopifnot(iconv(xx, "UTF-8", "ASCII", "c99") == "fa\\u00e7ile") stopifnot(charToRaw(iconv(xx, "UTF-8", "ASCII", "c99")) == sapply(c("f", "a", "\\", "u", "0", "0", "e", "7", "i", "l", "e"), charToRaw)) z <- "\U1f600" charToRaw(z) stopifnot(iconv(z, "UTF-8", "ASCII", "byte") == "<9f><98><80>") stopifnot(iconv(z, "UTF-8", "ASCII", "Unicode") == "") stopifnot(iconv(z, "UTF-8", "ASCII", "c99") == "\\U0001f600") ## write out to compare with GNU libiconv's iconv on e.g. macOS ## The reading can only work in a UTF-8 locale if(startsWith(extSoftVersion()["iconv"], 'GNU libiconv') && l10n_info()[["UTF-8"]]) { writeLines(c(xx, z), "test.txt") zz <- system2("iconv", c("-f", "UTF-8", "-t", "c99", "test.txt"), stdout = TRUE) unlink('test.txt') stopifnot(zz == iconv(c(xx, z), "UTF-8", "ASCII", "c99")) message('sub = "c99" agrees with GNU libiconv') } else message('sub = "c99" agrees with GNU libiconv -- SKIPPED')