/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 2005-2014 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ /* This file was contributed by Ei-ji Nakama. * It exports locale2charset for use in gram.y, and rlocale.c on macOS. * And sysutils.c, grDevices/src/devPS.c */ /* setlocale(LC_CTYPE,NULL) to encodingname cf nl_langinfo(LC_CTYPE) */ /********************************************************************* * usage : char *locale2charset(const char *locale) * * return : ASCII - default and undefine * * other - encodename * * * * cc -o localecharset -DDEBUG_TEST=1 localecharset.c * * or * * cc -o localecharset -DDEBUG_TEST=2 localecharset.c * *********************************************************************/ #ifdef HAVE_CONFIG_H # include #endif #ifdef DEBUG_TEST #define SPRINT(x) printf("%6d:" #x "=%s\n", __LINE__, x) #define DPRINT(x) printf("%6d:" #x "=%d\n", __LINE__, x) //#define HAVE_STRING_H #endif #include #include #include #include #include #include //#include /* To get the correct linkage for locale2charset */ /* name_value struct */ typedef struct { char *name; char *value; } name_value; #ifndef __APPLE__ /* * codeset name defined. * cat /usr/X11R6/lib/X11/locale/locale.alias | \ sed -e '/#.*$/d' -e 's/://' | \ awk '{gsub(/^[^.]+\./, "", $2); $2=toupper($2); gsub(/^EUC/, "EUC-",$2); gsub(/^BIG5HKSCS$/, "BIG5-HKSCS",$2); if (($2!="")&&(!system("iconv --list|grep " $2 ))) print $2 }' | \ sed -e '/\/$/d' | \ sort | uniq | \ awk '{NAME=$1;gsub(/-/,"_",NAME); printf("static const char ENC_%-20s\"%s\";\n", NAME "[]=" , $1)}' */ static char ENC_ARMSCII_8[]= "ARMSCII-8"; static char ENC_BIG5[]= "BIG5"; static char ENC_BIG5_HKSCS[]= "BIG5-HKSCS"; static char ENC_C[]= "C"; static char ENC_CP1251[]= "CP1251"; static char ENC_CP1255[]= "CP1255"; static char ENC_CP1256[]= "CP1256"; static char ENC_EUC_CN[]= "EUC-CN"; static char ENC_EUC_JP[]= "EUC-JP"; static char ENC_EUC_KR[]= "EUC-KR"; static char ENC_EUC_TW[]= "EUC-TW"; static char ENC_GB2312[]= "GB2312"; static char ENC_GBK[]= "GBK"; static char ENC_GEORGIAN_ACADEMY[]= "GEORGIAN-ACADEMY"; /* static char ENC_GEORGIAN_PS[]= "GEORGIAN-PS"; */ /* static char ENC_ISIRI_3342[]= "ISIRI-3342"; */ static char ENC_ISO8859_1[]= "ISO8859-1"; static char ENC_ISO8859_10[]= "ISO8859-10"; static char ENC_ISO8859_11[]= "ISO8859-11"; static char ENC_ISO8859_13[]= "ISO8859-13"; /* static char ENC_ISO8859_14[]= "ISO8859-14"; */ static char ENC_ISO8859_15[]= "ISO8859-15"; static char ENC_ISO8859_2[]= "ISO8859-2"; static char ENC_ISO8859_3[]= "ISO8859-3"; /* static char ENC_ISO8859_4[]= "ISO8859-4"; */ static char ENC_ISO8859_5[]= "ISO8859-5"; static char ENC_ISO8859_6[]= "ISO8859-6"; static char ENC_ISO8859_7[]= "ISO8859-7"; static char ENC_ISO8859_8[]= "ISO8859-8"; static char ENC_ISO8859_9[]= "ISO8859-9"; static char ENC_KOI8_R[]= "KOI8-R"; static char ENC_KOI8_U[]= "KOI8-U"; /* static char ENC_SJIS[]= "SJIS"; */ static char ENC_TCVN[]= "TCVN"; /* static char ENC_TIS620[]= "TIS620"; */ static char ENC_UTF_8[]= "UTF-8"; /* static char ENC_VISCII[]= "VISCII"; */ /* # charset getscript. iconv list output line is backslant. cat /usr/X11R6/lib/X11/locale/locale.alias | \ sed -e '/#.*$/d ; /^[A-z]*\./d' -e 's/://' | \ awk '{gsub(/^[^.]+\./, "", $2); $2=toupper($2); gsub(/^EUC/, "EUC-",$2); gsub(/^BIG5HKSCS$/, "BIG5-HKSCS",$2); NAME=$2; gsub(/\xe7/,"\"\"\\xe7\"\"",$1); gsub(/\xe5/,"\"\"\\xe5\"\"",$1); gsub(/-/, "_",NAME); NAME="ENC_" NAME; if (($2!="")&&(!system("iconv --list|grep " $2 ))) print $1 " " NAME }' | \ sed -e '/\/$/d' | \ sort -k 1 | uniq | \ awk '{printf (" {%-34s%s},\n", "\"" $1 "\",", $2)}' */ static const name_value guess[] = { {"Cextend", ENC_ISO8859_1}, {"English_United-States.437", ENC_C}, {"ISO-8859-1", ENC_ISO8859_1}, {"ISO8859-1", ENC_ISO8859_1}, {"Japanese-EUC", ENC_EUC_JP}, {"Jp_JP", ENC_EUC_JP}, {"POSIX", ENC_C}, {"POSIX-UTF2", ENC_C}, {"aa_DJ", ENC_ISO8859_1}, {"aa_ER", ENC_UTF_8}, {"aa_ER@saaho", ENC_UTF_8}, {"aa_ET", ENC_UTF_8}, {"af", ENC_ISO8859_1}, {"af_ZA", ENC_ISO8859_1}, {"am", ENC_UTF_8}, {"am_ET", ENC_UTF_8}, {"an_ES", ENC_ISO8859_15}, {"ar", ENC_ISO8859_6}, {"ar_AA", ENC_ISO8859_6}, {"ar_AE", ENC_ISO8859_6}, {"ar_BH", ENC_ISO8859_6}, {"ar_DZ", ENC_ISO8859_6}, {"ar_EG", ENC_ISO8859_6}, {"ar_IN", ENC_UTF_8}, {"ar_IQ", ENC_ISO8859_6}, {"ar_JO", ENC_ISO8859_6}, {"ar_KW", ENC_ISO8859_6}, {"ar_LB", ENC_ISO8859_6}, {"ar_LY", ENC_ISO8859_6}, {"ar_MA", ENC_ISO8859_6}, {"ar_OM", ENC_ISO8859_6}, {"ar_QA", ENC_ISO8859_6}, {"ar_SA", ENC_ISO8859_6}, {"ar_SD", ENC_ISO8859_6}, {"ar_SY", ENC_ISO8859_6}, {"ar_TN", ENC_ISO8859_6}, {"ar_YE", ENC_ISO8859_6}, {"be", ENC_CP1251}, {"be_BY", ENC_CP1251}, {"bg", ENC_CP1251}, {"bg_BG", ENC_CP1251}, {"bn_BD", ENC_UTF_8}, {"bn_IN", ENC_UTF_8}, {"bokm""\xe5""l", ENC_ISO8859_1}, {"bokmal", ENC_ISO8859_1}, {"br", ENC_ISO8859_1}, {"br_FR", ENC_ISO8859_1}, {"br_FR@euro", ENC_ISO8859_15}, {"bs_BA", ENC_ISO8859_2}, {"bulgarian", ENC_CP1251}, {"byn_ER", ENC_UTF_8}, {"c-french.iso88591", ENC_ISO8859_1}, {"ca", ENC_ISO8859_1}, {"ca_ES", ENC_ISO8859_1}, {"ca_ES@euro", ENC_ISO8859_15}, {"catalan", ENC_ISO8859_1}, {"chinese-s", ENC_EUC_CN}, {"chinese-t", ENC_EUC_TW}, {"croatian", ENC_ISO8859_2}, {"cs", ENC_ISO8859_2}, {"cs_CS", ENC_ISO8859_2}, {"cs_CZ", ENC_ISO8859_2}, {"cy", ENC_ISO8859_1}, {"cy_GB", ENC_ISO8859_1}, {"cz", ENC_ISO8859_2}, {"cz_CZ", ENC_ISO8859_2}, {"czech", ENC_ISO8859_2}, {"da", ENC_ISO8859_1}, {"da_DK", ENC_ISO8859_1}, {"danish", ENC_ISO8859_1}, {"dansk", ENC_ISO8859_1}, {"de", ENC_ISO8859_1}, {"de_AT", ENC_ISO8859_1}, {"de_AT@euro", ENC_ISO8859_15}, {"de_BE", ENC_ISO8859_1}, {"de_BE@euro", ENC_ISO8859_15}, {"de_CH", ENC_ISO8859_1}, {"de_DE", ENC_ISO8859_1}, {"de_DE@euro", ENC_ISO8859_15}, {"de_LI", ENC_ISO8859_1}, {"de_LI@euro", ENC_ISO8859_15}, {"de_LU", ENC_ISO8859_1}, {"de_LU@euro", ENC_ISO8859_15}, {"deutsch", ENC_ISO8859_1}, {"dutch", ENC_ISO8859_1}, {"eesti", ENC_ISO8859_1}, {"el", ENC_ISO8859_7}, {"el_GR", ENC_ISO8859_7}, {"en", ENC_ISO8859_1}, {"en_AU", ENC_ISO8859_1}, {"en_BW", ENC_ISO8859_1}, {"en_CA", ENC_ISO8859_1}, {"en_DK", ENC_ISO8859_1}, {"en_GB", ENC_ISO8859_1}, {"en_HK", ENC_ISO8859_1}, {"en_IE", ENC_ISO8859_1}, {"en_IE@euro", ENC_ISO8859_15}, {"en_IN", ENC_UTF_8}, {"en_NZ", ENC_ISO8859_1}, {"en_PH", ENC_ISO8859_1}, {"en_SG", ENC_ISO8859_1}, {"en_UK", ENC_ISO8859_1}, {"en_US", ENC_ISO8859_1}, {"en_ZA", ENC_ISO8859_1}, {"en_ZW", ENC_ISO8859_1}, {"es", ENC_ISO8859_1}, {"es_AR", ENC_ISO8859_1}, {"es_BO", ENC_ISO8859_1}, {"es_CL", ENC_ISO8859_1}, {"es_CO", ENC_ISO8859_1}, {"es_CR", ENC_ISO8859_1}, {"es_DO", ENC_ISO8859_1}, {"es_EC", ENC_ISO8859_1}, {"es_ES", ENC_ISO8859_1}, {"es_ES@euro", ENC_ISO8859_15}, {"es_GT", ENC_ISO8859_1}, {"es_HN", ENC_ISO8859_1}, {"es_MX", ENC_ISO8859_1}, {"es_NI", ENC_ISO8859_1}, {"es_PA", ENC_ISO8859_1}, {"es_PE", ENC_ISO8859_1}, {"es_PR", ENC_ISO8859_1}, {"es_PY", ENC_ISO8859_1}, {"es_SV", ENC_ISO8859_1}, {"es_US", ENC_ISO8859_1}, {"es_UY", ENC_ISO8859_1}, {"es_VE", ENC_ISO8859_1}, {"estonian", ENC_ISO8859_1}, {"et", ENC_ISO8859_15}, {"et_EE", ENC_ISO8859_15}, {"eu", ENC_ISO8859_1}, {"eu_ES", ENC_ISO8859_1}, {"eu_ES@euro", ENC_ISO8859_15}, {"eu_FR", ENC_ISO8859_1}, {"eu_FR@euro", ENC_ISO8859_15}, {"fa", ENC_UTF_8}, {"fa_IR", ENC_UTF_8}, {"fi", ENC_ISO8859_1}, {"fi_FI", ENC_ISO8859_1}, {"fi_FI@euro", ENC_ISO8859_15}, {"finnish", ENC_ISO8859_1}, {"fo", ENC_ISO8859_1}, {"fo_FO", ENC_ISO8859_1}, {"fr", ENC_ISO8859_1}, {"fr_BE", ENC_ISO8859_1}, {"fr_BE@euro", ENC_ISO8859_15}, {"fr_CA", ENC_ISO8859_1}, {"fr_CH", ENC_ISO8859_1}, {"fr_FR", ENC_ISO8859_1}, {"fr_FR@euro", ENC_ISO8859_15}, {"fr_LU", ENC_ISO8859_1}, {"fr_LU@euro", ENC_ISO8859_15}, {"fran""\xe7""ais", ENC_ISO8859_1}, {"french", ENC_ISO8859_1}, {"ga", ENC_ISO8859_1}, {"ga_IE", ENC_ISO8859_1}, {"ga_IE@euro", ENC_ISO8859_15}, {"galego", ENC_ISO8859_1}, {"galician", ENC_ISO8859_1}, {"gd", ENC_ISO8859_1}, {"gd_GB", ENC_ISO8859_1}, {"german", ENC_ISO8859_1}, {"gez_ER", ENC_UTF_8}, {"gez_ER@abegede", ENC_UTF_8}, {"gez_ET", ENC_UTF_8}, {"gez_ET@abegede", ENC_UTF_8}, {"gl", ENC_ISO8859_1}, {"gl_ES", ENC_ISO8859_1}, {"gl_ES@euro", ENC_ISO8859_15}, {"greek", ENC_ISO8859_7}, {"gu_IN", ENC_UTF_8}, {"gv", ENC_ISO8859_1}, {"gv_GB", ENC_ISO8859_1}, {"he", ENC_ISO8859_8}, {"he_IL", ENC_ISO8859_8}, {"hebrew", ENC_ISO8859_8}, {"hr", ENC_ISO8859_2}, {"hr_HR", ENC_ISO8859_2}, {"hrvatski", ENC_ISO8859_2}, {"hu", ENC_ISO8859_2}, {"hu_HU", ENC_ISO8859_2}, {"hungarian", ENC_ISO8859_2}, {"hy", ENC_ARMSCII_8}, {"hy_AM", ENC_ARMSCII_8}, {"icelandic", ENC_ISO8859_1}, {"id", ENC_ISO8859_1}, {"id_ID", ENC_ISO8859_1}, {"in", ENC_ISO8859_1}, {"in_ID", ENC_ISO8859_1}, {"is", ENC_ISO8859_1}, {"is_IS", ENC_ISO8859_1}, {"iso_8859_1", ENC_ISO8859_1}, {"it", ENC_ISO8859_1}, {"it_CH", ENC_ISO8859_1}, {"it_IT", ENC_ISO8859_1}, {"it_IT@euro", ENC_ISO8859_15}, {"italian", ENC_ISO8859_1}, {"iw", ENC_ISO8859_8}, {"iw_IL", ENC_ISO8859_8}, {"ja", ENC_EUC_JP}, {"ja_JP", ENC_EUC_JP}, {"japan", ENC_EUC_JP}, {"japanese", ENC_EUC_JP}, {"ka", ENC_GEORGIAN_ACADEMY}, {"ka_GE", ENC_GEORGIAN_ACADEMY}, {"kl", ENC_ISO8859_1}, {"kl_GL", ENC_ISO8859_1}, {"kn_IN", ENC_UTF_8}, {"ko", ENC_EUC_KR}, {"ko_KR", ENC_EUC_KR}, {"korean", ENC_EUC_KR}, {"kw", ENC_ISO8859_1}, {"kw_GB", ENC_ISO8859_1}, {"lg_UG", ENC_ISO8859_10}, {"lithuanian", ENC_ISO8859_13}, {"lt", ENC_ISO8859_13}, {"lt_LT", ENC_ISO8859_13}, {"lv", ENC_ISO8859_13}, {"lv_LV", ENC_ISO8859_13}, {"mi", ENC_ISO8859_13}, {"mi_NZ", ENC_ISO8859_13}, {"mk", ENC_ISO8859_5}, {"mk_MK", ENC_ISO8859_5}, {"ml_IN", ENC_UTF_8}, {"mn_MN", ENC_UTF_8}, {"mr_IN", ENC_UTF_8}, {"ms", ENC_ISO8859_1}, {"ms_MY", ENC_ISO8859_1}, {"mt", ENC_ISO8859_3}, {"mt_MT", ENC_ISO8859_3}, {"nb", ENC_ISO8859_1}, {"nb_NO", ENC_ISO8859_1}, {"ne_NP", ENC_UTF_8}, {"nl", ENC_ISO8859_1}, {"nl_BE", ENC_ISO8859_1}, {"nl_BE@euro", ENC_ISO8859_15}, {"nl_NL", ENC_ISO8859_1}, {"nl_NL@euro", ENC_ISO8859_15}, {"nn", ENC_ISO8859_1}, {"nn_NO", ENC_ISO8859_1}, {"no", ENC_ISO8859_1}, {"no@nynorsk", ENC_ISO8859_1}, {"no_NO", ENC_ISO8859_1}, {"norwegian", ENC_ISO8859_1}, {"nynorsk", ENC_ISO8859_1}, {"oc", ENC_ISO8859_1}, {"oc_FR", ENC_ISO8859_1}, {"oc_FR@euro", ENC_ISO8859_15}, {"om_ET", ENC_UTF_8}, {"om_KE", ENC_ISO8859_1}, {"pa_IN", ENC_UTF_8}, {"ph", ENC_ISO8859_1}, {"ph_PH", ENC_ISO8859_1}, {"pl", ENC_ISO8859_2}, {"pl_PL", ENC_ISO8859_2}, {"polish", ENC_ISO8859_2}, {"portuguese", ENC_ISO8859_1}, {"pp", ENC_ISO8859_1}, {"pp_AN", ENC_ISO8859_1}, {"pt", ENC_ISO8859_1}, {"pt_BR", ENC_ISO8859_1}, {"pt_PT", ENC_ISO8859_1}, {"pt_PT@euro", ENC_ISO8859_15}, {"ro", ENC_ISO8859_2}, {"ro_RO", ENC_ISO8859_2}, {"romanian", ENC_ISO8859_2}, {"ru", ENC_KOI8_R}, {"ru_RU", ENC_KOI8_R}, {"ru_UA", ENC_KOI8_U}, {"rumanian", ENC_ISO8859_2}, {"russian", ENC_ISO8859_5}, {"se_NO", ENC_UTF_8}, {"serbocroatian", ENC_ISO8859_2}, {"sh", ENC_ISO8859_2}, {"sh_SP", ENC_ISO8859_2}, {"sh_YU", ENC_ISO8859_2}, {"sid_ET", ENC_UTF_8}, {"sk", ENC_ISO8859_2}, {"sk_SK", ENC_ISO8859_2}, {"sl", ENC_ISO8859_2}, {"sl_SI", ENC_ISO8859_2}, {"slovak", ENC_ISO8859_2}, {"slovene", ENC_ISO8859_2}, {"slovenian", ENC_ISO8859_2}, {"so_DJ", ENC_ISO8859_1}, {"so_ET", ENC_UTF_8}, {"so_KE", ENC_ISO8859_1}, {"so_SO", ENC_ISO8859_1}, {"sp", ENC_ISO8859_5}, {"sp_YU", ENC_ISO8859_5}, {"spanish", ENC_ISO8859_1}, {"sq", ENC_ISO8859_2}, {"sq_AL", ENC_ISO8859_2}, {"sr", ENC_ISO8859_5}, {"sr@cyrillic", ENC_ISO8859_5}, {"sr_SP", ENC_ISO8859_2}, {"sr_YU", ENC_ISO8859_5}, {"sr_YU@cyrillic", ENC_ISO8859_5}, {"st_ZA", ENC_ISO8859_1}, {"sv", ENC_ISO8859_1}, {"sv_FI", ENC_ISO8859_1}, {"sv_FI@euro", ENC_ISO8859_15}, {"sv_SE", ENC_ISO8859_1}, {"sv_SE@euro", ENC_ISO8859_15}, {"swedish", ENC_ISO8859_1}, {"te_IN", ENC_UTF_8}, {"th", ENC_ISO8859_11}, {"th_TH", ENC_ISO8859_11}, {"thai", ENC_ISO8859_11}, {"ti_ER", ENC_UTF_8}, {"ti_ET", ENC_UTF_8}, {"tig_ER", ENC_UTF_8}, {"tl", ENC_ISO8859_1}, {"tl_PH", ENC_ISO8859_1}, {"tr", ENC_ISO8859_9}, {"tr_TR", ENC_ISO8859_9}, {"turkish", ENC_ISO8859_9}, {"uk", ENC_KOI8_U}, {"uk_UA", ENC_KOI8_U}, {"ur", ENC_CP1256}, {"ur_PK", ENC_CP1256}, {"uz_UZ", ENC_ISO8859_1}, {"uz_UZ@cyrillic", ENC_UTF_8}, {"vi", ENC_TCVN}, {"vi_VN", ENC_TCVN}, {"wa", ENC_ISO8859_1}, {"wa_BE", ENC_ISO8859_1}, {"wa_BE@euro", ENC_ISO8859_15}, {"xh_ZA", ENC_ISO8859_1}, {"yi", ENC_CP1255}, {"yi_US", ENC_CP1255}, {"zh_CN", ENC_GBK}, {"zh_HK", ENC_BIG5_HKSCS}, {"zh_SG", ENC_GB2312}, {"zh_TW", ENC_BIG5}, {"zu_ZA", ENC_ISO8859_1}, }; static const int guess_count = (sizeof(guess)/sizeof(name_value)); #endif static const name_value known[] = { {"iso88591", "ISO8859-1"}, {"iso88592", "ISO8859-2"}, {"iso88593", "ISO8859-3"}, {"iso88596", "ISO8859-6"}, {"iso88597", "ISO8859-7"}, {"iso88598", "ISO8859-8"}, {"iso88599", "ISO8859-9"}, {"iso885910", "ISO8859-10"}, {"iso885913", "ISO8859-13"}, {"iso885914", "ISO8859-14"}, {"iso885915", "ISO8859-15"}, {"cp1251", "CP1251"}, {"cp1255", "CP1255"}, {"eucjp", "EUC-JP"}, {"euckr", "EUC-KR"}, {"euctw", "EUC-TW"}, {"georgianps", "GEORGIAN-PS"}, {"koi8u", "KOI8-U"}, {"tcvn", "TCVN"}, {"big5", "BIG5"}, {"gb2312", "GB2312"}, {"gb18030", "GB18030"}, {"gbk", "GBK"}, {"tis-620", "TIS-620"}, {"sjis", "SHIFT_JIS"}, {"euccn", "GB2312"}, {"big5-hkscs", "BIG5-HKSCS"}, #ifdef __APPLE__ /* known additional Apple encodings (see locale -a) up to macOS 10.5, unlike other systems they correspond directly */ {"iso8859-1", "ISO8859-1"}, {"iso8859-2", "ISO8859-2"}, {"iso8859-4", "ISO8859-4"}, {"iso8859-7", "ISO8859-7"}, {"iso8859-9", "ISO8859-9"}, {"iso8859-13", "ISO8859-13"}, {"iso8859-15", "ISO8859-15"}, {"koi8-u", "KOI8-U"}, {"koi8-r", "KOI8-R"}, {"pt154", "PT154"}, {"us-ascii", "ASCII"}, {"armscii-8", "ARMSCII-8"}, {"iscii-dev", "ISCII-DEV"}, {"big5hkscs", "BIG5-HKSCS"}, #endif }; static const int known_count = (sizeof(known)/sizeof(name_value)); #ifndef __APPLE__ static char* name_value_search(const char *name, const name_value table[], const int table_count) { int min, mid, max; #if defined(DEBUG_TEST) static last; DPRINT(last); last = 0; #endif min = 0; max = table_count - 1; if ( 0 > strcmp(name,table[min].name) || 0 < strcmp(name,table[max].name) ) { #if defined(DEBUG_TEST) && DEBUG_TEST > 1 DPRINT(strcmp(name, table[min].name)); DPRINT(strcmp(name, table[max].name)); #endif return (NULL); } while (max >= min) { #if defined(DEBUG_TEST) last++; #endif mid = (min + max) / 2; #if defined(DEBUG_TEST) && DEBUG_TEST > 1 SPRINT(table[mid].name); #endif if (0 < strcmp(name,table[mid].name)) { #if defined(DEBUG_TEST) && DEBUG_TEST > 1 DPRINT(strcmp(name, table[mid].name)); #endif min = mid + 1; } else if (0 > strcmp(name, table[mid].name)) { #if defined(DEBUG_TEST) && DEBUG_TEST > 1 DPRINT(strcmp(name, table[mid].name)); #endif max = mid - 1; } else { #if defined(DEBUG_TEST) && DEBUG_TEST > 1 DPRINT(strcmp(name, table[mid].name)); #endif return(table[mid].value); } } return (NULL); } #endif const char *locale2charset(const char *locale) { static char charset[128]; char la_loc[128]; char enc[128], *p; int i; int cp; #ifndef __APPLE__ char *value; #endif if ((locale == NULL) || (0 == strcmp(locale, "NULL"))) locale = setlocale(LC_CTYPE,NULL); /* in some rare circumstances Darwin may return NULL */ if (!locale || !strcmp(locale, "C") || !strcmp(locale, "POSIX")) return ("ASCII"); memset(charset,0,sizeof(charset)); /* separate language_locale.encoding NB, under Windows 'locale' may contains dots */ memset(la_loc, 0, sizeof(la_loc)); memset(enc, 0, sizeof(enc)); p = strrchr(locale, '.'); if(p) { strncpy(enc, p+1, sizeof(enc)-1); enc[sizeof(enc) - 1] = '\0'; strncpy(la_loc, locale, sizeof(la_loc)-1); la_loc[sizeof(la_loc) - 1] = '\0'; p = strrchr(la_loc, '.'); if(p) *p = '\0'; } #ifdef Win32 /* ## PUTTY suggests mapping Windows code pages as ## 1250 -> ISO 8859-2: this is WRONG ## 1251 -> KOI8-U ## 1252 -> ISO 8859-1 ## 1253 -> ISO 8859-7 ## 1254 -> ISO 8859-9 ## 1255 -> ISO 8859-8 ## 1256 -> ISO 8859-6 ## 1257 -> ISO 8859-13 */ switch(cp = atoi(enc)) { /* case 1250: return "ISO8859-2"; */ /* case 1251: return "KOI8-U"; This is not anywhere near the same */ case 1252: return "ISO8859-1"; /* case 1253: return "ISO8859-7"; case 1254: return "ISO8859-9"; case 1255: return "ISO8859-8"; case 1256: return "ISO8859-6"; */ case 1257: return "ISO8859-13"; default: snprintf(charset, 128, "CP%u", cp); return charset; } #endif /* * Assume locales are like en_US[.utf8[@euro]] */ /* cut encoding @hoge no use. for(i=0;enc[i] && enc[i]!='@' && i