|
@@ -18,6 +18,7 @@
|
|
|
|
|
|
#include <system.h>
|
|
|
#include <quotearg.h>
|
|
|
+#include <localcharset.h>
|
|
|
#include "common.h"
|
|
|
#ifdef HAVE_ICONV_H
|
|
|
# include <iconv.h>
|
|
@@ -27,254 +28,7 @@
|
|
|
# define ICONV_CONST
|
|
|
#endif
|
|
|
|
|
|
-#ifdef HAVE_LIBICONV
|
|
|
-
|
|
|
-struct langtab
|
|
|
-{
|
|
|
- char const *lang;
|
|
|
- char const *terr;
|
|
|
- char const *charset;
|
|
|
-};
|
|
|
-
|
|
|
-
|
|
|
- default character sets.
|
|
|
-
|
|
|
- NOTES:
|
|
|
-
|
|
|
- 1) The list must be ordered by:
|
|
|
- a) lang field in ascending order
|
|
|
- b) terr field in descending order.
|
|
|
- NULL fields are considered less than non-null ones.
|
|
|
- 2) Many entries have NULL charset fields. Please help fill them!
|
|
|
- 3) The "default" character set for a given language is a matter
|
|
|
- of preference. Possibly the table should contain a *list* of
|
|
|
- possible character sets.
|
|
|
- 4) LC_ALL "modifier" field is not taken into account */
|
|
|
-
|
|
|
-static struct langtab langtab[] = {
|
|
|
- { "C", NULL, "ASCII"},
|
|
|
- { "POSIX", NULL, "ASCII" },
|
|
|
- { "aa", NULL, NULL},
|
|
|
- { "ab", NULL, NULL},
|
|
|
- { "ae", NULL, NULL},
|
|
|
- { "af", NULL, "iso-8859-1"},
|
|
|
- { "am", NULL, "UTF-8"},
|
|
|
- { "ar", NULL, "iso-8859-6"},
|
|
|
- { "as", NULL, NULL},
|
|
|
- { "ay", NULL, "iso-8859-1"},
|
|
|
- { "az", NULL, NULL},
|
|
|
- { "ba", NULL, NULL},
|
|
|
- { "be", NULL, "UTF-8"},
|
|
|
- { "bg", NULL, "iso-8859-5"},
|
|
|
- { "bh", NULL, NULL},
|
|
|
- { "bi", NULL, NULL},
|
|
|
- { "bn", NULL, NULL},
|
|
|
- { "bo", NULL, NULL},
|
|
|
- { "br", NULL, "iso-8859-1"},
|
|
|
- { "bs", NULL, NULL},
|
|
|
- { "ca", NULL, "iso-8859-1"},
|
|
|
- { "ce", NULL, NULL},
|
|
|
- { "ch", NULL, NULL},
|
|
|
- { "co", NULL, "iso-8859-1"},
|
|
|
- { "cs", NULL, "iso-8859-2"},
|
|
|
- { "cu", NULL, NULL },
|
|
|
- { "cv", NULL, NULL},
|
|
|
- { "cy", NULL, "iso-8859-1"},
|
|
|
- { "da", NULL, "iso-8859-1"},
|
|
|
- { "de", NULL, "iso-8859-1"},
|
|
|
- { "dz", NULL, NULL },
|
|
|
- { "el", NULL, "iso-8859-7"},
|
|
|
- { "en", NULL, "iso-8859-1"},
|
|
|
- { "eo", NULL, "iso-8859-3"},
|
|
|
- { "es", NULL, "iso-8859-1"},
|
|
|
- { "et", NULL, "iso-8859-15"},
|
|
|
- { "eu", NULL, "iso-8859-1"},
|
|
|
- { "fa", NULL, "UTF-8"},
|
|
|
- { "fi", NULL, "iso-8859-15"},
|
|
|
- { "fj", NULL, NULL },
|
|
|
- { "fo", NULL, "iso-8859-1"},
|
|
|
- { "fr", NULL, "iso-8859-1"},
|
|
|
- { "fy", NULL, "iso-8859-1"},
|
|
|
- { "ga", NULL, "iso-8859-14"},
|
|
|
- { "gd", NULL, "iso-8859-14" },
|
|
|
- { "gl", NULL, NULL },
|
|
|
- { "gn", NULL, NULL},
|
|
|
- { "gu", NULL, NULL},
|
|
|
- { "gv", NULL, "iso-8859-14"},
|
|
|
- { "ha", NULL, NULL },
|
|
|
- { "he", NULL, "iso-8859-8" },
|
|
|
- { "hi", NULL, NULL},
|
|
|
- { "ho", NULL, NULL},
|
|
|
- { "hr", NULL, "iso-8859-2"},
|
|
|
- { "hu", NULL, "iso-8859-2"},
|
|
|
- { "hy", NULL, NULL},
|
|
|
- { "hz", NULL, NULL},
|
|
|
- { "id", NULL, "iso-8859-1"},
|
|
|
- { "ia", NULL, NULL},
|
|
|
- { "ie", NULL, NULL},
|
|
|
- { "ik", NULL, NULL},
|
|
|
- { "io", NULL, NULL},
|
|
|
- { "is", NULL, "iso-8859-1"},
|
|
|
- { "it", NULL, "iso-8859-1"},
|
|
|
- { "iu", NULL, NULL},
|
|
|
- { "ja", NULL, "EUC-JP"},
|
|
|
- { "jv", NULL, NULL},
|
|
|
- { "ka", NULL, NULL},
|
|
|
- { "ki", NULL, NULL},
|
|
|
- { "kj", NULL, NULL},
|
|
|
- { "kk", NULL, NULL},
|
|
|
- { "kl", NULL, "iso-8859-1"},
|
|
|
- { "km", NULL, NULL},
|
|
|
- { "kn", NULL, NULL},
|
|
|
- { "ko", NULL, "EUC-KR"},
|
|
|
- { "ks", NULL, NULL},
|
|
|
- { "ku", NULL, NULL},
|
|
|
- { "kv", NULL, NULL},
|
|
|
- { "kw", NULL, "iso-8859-14"},
|
|
|
- { "ky", NULL, NULL},
|
|
|
- { "la", NULL, "iso-8859-1"},
|
|
|
- { "lb", NULL, "iso-8859-1"},
|
|
|
- { "ln", NULL, NULL},
|
|
|
- { "lo", NULL, NULL},
|
|
|
- { "lt", NULL, "iso-8859-4"},
|
|
|
- { "lv", NULL, "iso-8859-4"},
|
|
|
- { "mg", NULL, NULL},
|
|
|
- { "mh", NULL, NULL},
|
|
|
- { "mi", NULL, NULL},
|
|
|
- { "mk", NULL, NULL},
|
|
|
- { "ml", NULL, NULL},
|
|
|
- { "mn", NULL, NULL},
|
|
|
- { "mo", NULL, "iso-8859-2"},
|
|
|
- { "mr", NULL, NULL},
|
|
|
- { "ms", NULL, NULL},
|
|
|
- { "mt", NULL, "iso-8859-3"},
|
|
|
- { "my", NULL, NULL},
|
|
|
- { "na", NULL, NULL},
|
|
|
- { "nb", NULL, "iso-8859-1"},
|
|
|
- { "nd", NULL, NULL},
|
|
|
- { "ne", NULL, NULL},
|
|
|
- { "ng", NULL, NULL},
|
|
|
- { "nl", NULL, "iso-8859-1"},
|
|
|
- { "nn", NULL, "iso-8859-1"},
|
|
|
- { "no", NULL, "iso-8859-1"},
|
|
|
- { "nr", NULL, NULL},
|
|
|
- { "nv", NULL, NULL},
|
|
|
- { "ny", NULL, NULL},
|
|
|
- { "oc", NULL, NULL},
|
|
|
- { "om", NULL, NULL},
|
|
|
- { "or", NULL, NULL},
|
|
|
- { "os", NULL, NULL},
|
|
|
- { "pa", NULL, NULL},
|
|
|
- { "pi", NULL, NULL},
|
|
|
- { "pl", NULL, "iso-8859-2"},
|
|
|
- { "ps", NULL, NULL},
|
|
|
- { "pt", NULL, "iso-8859-1"},
|
|
|
- { "qu", NULL, "iso-8859-1"},
|
|
|
- { "rm", NULL, "iso-8859-1"},
|
|
|
- { "rn", NULL, NULL },
|
|
|
- { "ro", NULL, "iso-8859-2"},
|
|
|
- { "ru", NULL, "koi8-r"},
|
|
|
- { "rw", NULL, NULL},
|
|
|
- { "sa", NULL, NULL},
|
|
|
- { "sc", NULL, "iso-8859-1"},
|
|
|
- { "sd", NULL, NULL},
|
|
|
- { "se", NULL, "iso-8859-10"},
|
|
|
- { "sg", NULL, NULL},
|
|
|
- { "si", NULL, NULL},
|
|
|
- { "sk", NULL, "iso-8859-2"},
|
|
|
- { "sl", NULL, "iso-8859-1"},
|
|
|
- { "sm", NULL, NULL},
|
|
|
- { "sn", NULL, NULL},
|
|
|
- { "so", NULL, NULL},
|
|
|
- { "sq", NULL, "iso-8859-1"},
|
|
|
- { "sr", NULL, "iso-8859-2"},
|
|
|
- { "ss", NULL, NULL},
|
|
|
- { "st", NULL, NULL},
|
|
|
- { "su", NULL, NULL},
|
|
|
- { "sv", NULL, "iso-8859-1"},
|
|
|
- { "sw", NULL, NULL},
|
|
|
- { "ta", NULL, NULL},
|
|
|
- { "te", NULL, NULL},
|
|
|
- { "tg", NULL, NULL},
|
|
|
- { "th", NULL, "iso-8859-11"},
|
|
|
- { "ti", NULL, NULL},
|
|
|
- { "tk", NULL, NULL},
|
|
|
- { "tl", NULL, "iso-8859-1"},
|
|
|
- { "tn", NULL, NULL},
|
|
|
- { "to", NULL, NULL},
|
|
|
- { "tr", NULL, "iso-8859-9"},
|
|
|
- { "ts", NULL, NULL},
|
|
|
- { "tt", NULL, NULL},
|
|
|
- { "tw", NULL, NULL},
|
|
|
- { "ty", NULL, NULL},
|
|
|
- { "ug", NULL, NULL},
|
|
|
- { "uk", NULL, "koi8-u"},
|
|
|
- { "ur", NULL, NULL},
|
|
|
- { "uz", NULL, NULL},
|
|
|
- { "vi", NULL, NULL},
|
|
|
- { "vo", NULL, NULL},
|
|
|
- { "wa", NULL, "iso-8859-1"},
|
|
|
- { "wo", NULL, NULL},
|
|
|
- { "xh", NULL, NULL},
|
|
|
- { "yi", NULL, "iso-8859-8"},
|
|
|
- { "yo", NULL, NULL},
|
|
|
- { "za", NULL, NULL},
|
|
|
- { "zh", "TW", "big5"},
|
|
|
- { "zh", NULL, "gb2312"},
|
|
|
- { "zu", NULL, NULL},
|
|
|
- { NULL, NULL, NULL}
|
|
|
-};
|
|
|
-
|
|
|
-
|
|
|
- default character set for that language. See notes above. */
|
|
|
-
|
|
|
-static char const *
|
|
|
-charset_lookup (char const *lang, char const *terr)
|
|
|
-{
|
|
|
- struct langtab const *p;
|
|
|
-
|
|
|
- if (!lang)
|
|
|
- return NULL;
|
|
|
- for (p = langtab; p->lang; p++)
|
|
|
- if (strcasecmp (p->lang, lang) == 0
|
|
|
- && (terr == NULL
|
|
|
- || p->terr == NULL
|
|
|
- || !strcasecmp (p->terr, terr) == 0))
|
|
|
- return p->charset;
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-
|
|
|
-static const char *
|
|
|
-get_input_charset (void)
|
|
|
-{
|
|
|
- const char *charset = NULL;
|
|
|
- char *tmp;
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- tmp = getenv ("LC_ALL");
|
|
|
- if (!tmp)
|
|
|
- tmp = getenv ("LANG");
|
|
|
-
|
|
|
- if (tmp)
|
|
|
- {
|
|
|
- char *lang;
|
|
|
- char *terr;
|
|
|
-
|
|
|
- lang = strtok (tmp, "_");
|
|
|
- terr = strtok (NULL, ".");
|
|
|
- charset = strtok (NULL, "@");
|
|
|
-
|
|
|
- if (!charset)
|
|
|
- charset = charset_lookup (lang, terr);
|
|
|
- }
|
|
|
-
|
|
|
- if (!charset)
|
|
|
- charset = "iso-8859-1";
|
|
|
- return charset;
|
|
|
-}
|
|
|
-
|
|
|
-#else
|
|
|
+#ifndef HAVE_ICONV
|
|
|
|
|
|
# undef iconv_open
|
|
|
# define iconv_open(tocode, fromcode) ((iconv_t) -1)
|
|
@@ -285,7 +39,7 @@ get_input_charset (void)
|
|
|
# undef iconv_close
|
|
|
# define iconv_close(cd) 0
|
|
|
|
|
|
-#endif
|
|
|
+#endif
|
|
|
|
|
|
|
|
|
|
|
@@ -298,9 +52,9 @@ utf8_init (bool to_utf)
|
|
|
if (conv_desc[(int) to_utf] == (iconv_t) -1)
|
|
|
{
|
|
|
if (to_utf)
|
|
|
- conv_desc[(int) to_utf] = iconv_open ("UTF-8", get_input_charset ());
|
|
|
+ conv_desc[(int) to_utf] = iconv_open ("UTF-8", locale_charset ());
|
|
|
else
|
|
|
- conv_desc[(int) to_utf] = iconv_open (get_input_charset (), "UTF-8");
|
|
|
+ conv_desc[(int) to_utf] = iconv_open (locale_charset (), "UTF-8");
|
|
|
}
|
|
|
return conv_desc[(int) to_utf];
|
|
|
}
|