utf8.c 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /* Charset handling for GNU tar.
  2. Copyright 2004-2023 Free Software Foundation, Inc.
  3. This file is part of GNU tar.
  4. GNU tar is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. GNU tar is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  14. #include <system.h>
  15. #include <quotearg.h>
  16. #include <localcharset.h>
  17. #include "common.h"
  18. #ifdef HAVE_ICONV_H
  19. # include <iconv.h>
  20. #endif
  21. #ifndef ICONV_CONST
  22. # define ICONV_CONST
  23. #endif
  24. #ifndef HAVE_ICONV
  25. # undef iconv_open
  26. # define iconv_open(tocode, fromcode) ((iconv_t) -1)
  27. # undef iconv
  28. # define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) (errno = ENOSYS, (size_t) -1)
  29. # undef iconv_close
  30. # define iconv_close(cd) 0
  31. # undef iconv_t
  32. # define iconv_t int
  33. #endif
  34. static iconv_t conv_desc[2] = { (iconv_t) -1, (iconv_t) -1 };
  35. static iconv_t
  36. utf8_init (bool to_utf)
  37. {
  38. if (conv_desc[(int) to_utf] == (iconv_t) -1)
  39. {
  40. if (to_utf)
  41. conv_desc[(int) to_utf] = iconv_open ("UTF-8", locale_charset ());
  42. else
  43. conv_desc[(int) to_utf] = iconv_open (locale_charset (), "UTF-8");
  44. }
  45. return conv_desc[(int) to_utf];
  46. }
  47. bool
  48. utf8_convert (bool to_utf, char const *input, char **output)
  49. {
  50. char ICONV_CONST *ib;
  51. char *ob, *ret;
  52. size_t inlen;
  53. size_t outlen;
  54. iconv_t cd = utf8_init (to_utf);
  55. if (cd == 0)
  56. {
  57. *output = xstrdup (input);
  58. return true;
  59. }
  60. else if (cd == (iconv_t)-1)
  61. return false;
  62. inlen = strlen (input) + 1;
  63. outlen = inlen * MB_LEN_MAX + 1;
  64. ob = ret = xmalloc (outlen);
  65. ib = (char ICONV_CONST *) input;
  66. /* According to POSIX, "if iconv() encounters a character in the input
  67. buffer that is valid, but for which an identical character does not
  68. exist in the target codeset, iconv() shall perform an
  69. implementation-defined conversion on this character." It will "update
  70. the variables pointed to by the arguments to reflect the extent of the
  71. conversion and return the number of non-identical conversions performed".
  72. On error, it returns -1.
  73. In other words, non-zero return always indicates failure, either because
  74. the input was not fully converted, or because it was converted in a
  75. non-reversible way.
  76. */
  77. if (iconv (cd, &ib, &inlen, &ob, &outlen) != 0)
  78. {
  79. free (ret);
  80. return false;
  81. }
  82. *ob = 0;
  83. *output = ret;
  84. return true;
  85. }
  86. bool
  87. string_ascii_p (char const *p)
  88. {
  89. for (; *p; p++)
  90. if (*p & ~0x7f)
  91. return false;
  92. return true;
  93. }