quotearg.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. /* quotearg.c - quote arguments for output
  2. Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software Foundation,
  13. Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  14. /* Written by Paul Eggert <[email protected]> */
  15. #if HAVE_CONFIG_H
  16. # include <config.h>
  17. #endif
  18. #if HAVE_STDDEF_H
  19. # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
  20. #endif
  21. #include <sys/types.h>
  22. #include <quotearg.h>
  23. #include <xalloc.h>
  24. #include <ctype.h>
  25. #if ENABLE_NLS
  26. # include <libintl.h>
  27. # define _(text) gettext (text)
  28. #else
  29. # define _(text) text
  30. #endif
  31. #define N_(text) text
  32. #if HAVE_LIMITS_H
  33. # include <limits.h>
  34. #endif
  35. #ifndef CHAR_BIT
  36. # define CHAR_BIT 8
  37. #endif
  38. #ifndef SIZE_MAX
  39. # define SIZE_MAX ((size_t) -1)
  40. #endif
  41. #ifndef UCHAR_MAX
  42. # define UCHAR_MAX ((unsigned char) -1)
  43. #endif
  44. #ifndef UINT_MAX
  45. # define UINT_MAX ((unsigned int) -1)
  46. #endif
  47. #if HAVE_C_BACKSLASH_A
  48. # define ALERT_CHAR '\a'
  49. #else
  50. # define ALERT_CHAR '\7'
  51. #endif
  52. #if HAVE_STDLIB_H
  53. # include <stdlib.h>
  54. #endif
  55. #if HAVE_STRING_H
  56. # include <string.h>
  57. #endif
  58. #if HAVE_WCHAR_H
  59. /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
  60. # include <stdio.h>
  61. # include <time.h>
  62. # include <wchar.h>
  63. #endif
  64. #if !HAVE_MBRTOWC
  65. /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
  66. other macros are defined only for documentation and to satisfy C
  67. syntax. */
  68. # undef MB_CUR_MAX
  69. # define MB_CUR_MAX 1
  70. # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
  71. # define mbsinit(ps) 1
  72. # define iswprint(wc) ISPRINT ((unsigned char) (wc))
  73. #endif
  74. #ifndef iswprint
  75. # if HAVE_WCTYPE_H
  76. # include <wctype.h>
  77. # endif
  78. # if !defined iswprint && !HAVE_ISWPRINT
  79. # define iswprint(wc) 1
  80. # endif
  81. #endif
  82. #define INT_BITS (sizeof (int) * CHAR_BIT)
  83. #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
  84. # define IN_CTYPE_DOMAIN(c) 1
  85. #else
  86. # define IN_CTYPE_DOMAIN(c) isascii(c)
  87. #endif
  88. /* Undefine to protect against the definition in wctype.h of solaris2.6. */
  89. #undef ISPRINT
  90. #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
  91. struct quoting_options
  92. {
  93. /* Basic quoting style. */
  94. enum quoting_style style;
  95. /* Quote the characters indicated by this bit vector even if the
  96. quoting style would not normally require them to be quoted. */
  97. int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
  98. };
  99. /* Names of quoting styles. */
  100. char const *const quoting_style_args[] =
  101. {
  102. "literal",
  103. "shell",
  104. "shell-always",
  105. "c",
  106. "escape",
  107. "locale",
  108. "clocale",
  109. 0
  110. };
  111. /* Correspondences to quoting style names. */
  112. enum quoting_style const quoting_style_vals[] =
  113. {
  114. literal_quoting_style,
  115. shell_quoting_style,
  116. shell_always_quoting_style,
  117. c_quoting_style,
  118. escape_quoting_style,
  119. locale_quoting_style,
  120. clocale_quoting_style
  121. };
  122. /* The default quoting options. */
  123. static struct quoting_options default_quoting_options;
  124. /* Allocate a new set of quoting options, with contents initially identical
  125. to O if O is not null, or to the default if O is null.
  126. It is the caller's responsibility to free the result. */
  127. struct quoting_options *
  128. clone_quoting_options (struct quoting_options *o)
  129. {
  130. struct quoting_options *p
  131. = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
  132. *p = *(o ? o : &default_quoting_options);
  133. return p;
  134. }
  135. /* Get the value of O's quoting style. If O is null, use the default. */
  136. enum quoting_style
  137. get_quoting_style (struct quoting_options *o)
  138. {
  139. return (o ? o : &default_quoting_options)->style;
  140. }
  141. /* In O (or in the default if O is null),
  142. set the value of the quoting style to S. */
  143. void
  144. set_quoting_style (struct quoting_options *o, enum quoting_style s)
  145. {
  146. (o ? o : &default_quoting_options)->style = s;
  147. }
  148. /* In O (or in the default if O is null),
  149. set the value of the quoting options for character C to I.
  150. Return the old value. Currently, the only values defined for I are
  151. 0 (the default) and 1 (which means to quote the character even if
  152. it would not otherwise be quoted). */
  153. int
  154. set_char_quoting (struct quoting_options *o, char c, int i)
  155. {
  156. unsigned char uc = c;
  157. int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
  158. int shift = uc % INT_BITS;
  159. int r = (*p >> shift) & 1;
  160. *p ^= ((i & 1) ^ r) << shift;
  161. return r;
  162. }
  163. /* MSGID approximates a quotation mark. Return its translation if it
  164. has one; otherwise, return either it or "\"", depending on S. */
  165. static char const *
  166. gettext_quote (char const *msgid, enum quoting_style s)
  167. {
  168. char const *translation = _(msgid);
  169. if (translation == msgid && s == clocale_quoting_style)
  170. translation = "\"";
  171. return translation;
  172. }
  173. /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
  174. argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
  175. non-quoting-style part of O to control quoting.
  176. Terminate the output with a null character, and return the written
  177. size of the output, not counting the terminating null.
  178. If BUFFERSIZE is too small to store the output string, return the
  179. value that would have been returned had BUFFERSIZE been large enough.
  180. If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
  181. This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
  182. ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
  183. style specified by O, and O may not be null. */
  184. static size_t
  185. quotearg_buffer_restyled (char *buffer, size_t buffersize,
  186. char const *arg, size_t argsize,
  187. enum quoting_style quoting_style,
  188. struct quoting_options const *o)
  189. {
  190. size_t i;
  191. size_t len = 0;
  192. char const *quote_string = 0;
  193. size_t quote_string_len = 0;
  194. int backslash_escapes = 0;
  195. int unibyte_locale = MB_CUR_MAX == 1;
  196. #define STORE(c) \
  197. do \
  198. { \
  199. if (len < buffersize) \
  200. buffer[len] = (c); \
  201. len++; \
  202. } \
  203. while (0)
  204. switch (quoting_style)
  205. {
  206. case c_quoting_style:
  207. STORE ('"');
  208. backslash_escapes = 1;
  209. quote_string = "\"";
  210. quote_string_len = 1;
  211. break;
  212. case escape_quoting_style:
  213. backslash_escapes = 1;
  214. break;
  215. case locale_quoting_style:
  216. case clocale_quoting_style:
  217. {
  218. /* Get translations for open and closing quotation marks.
  219. The message catalog should translate "`" to a left
  220. quotation mark suitable for the locale, and similarly for
  221. "'". If the catalog has no translation,
  222. locale_quoting_style quotes `like this', and
  223. clocale_quoting_style quotes "like this".
  224. For example, an American English Unicode locale should
  225. translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
  226. should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
  227. MARK). A British English Unicode locale should instead
  228. translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
  229. U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
  230. char const *left = gettext_quote (N_("`"), quoting_style);
  231. char const *right = gettext_quote (N_("'"), quoting_style);
  232. for (quote_string = left; *quote_string; quote_string++)
  233. STORE (*quote_string);
  234. backslash_escapes = 1;
  235. quote_string = right;
  236. quote_string_len = strlen (quote_string);
  237. }
  238. break;
  239. case shell_always_quoting_style:
  240. STORE ('\'');
  241. quote_string = "'";
  242. quote_string_len = 1;
  243. break;
  244. default:
  245. break;
  246. }
  247. for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
  248. {
  249. unsigned char c;
  250. unsigned char esc;
  251. if (backslash_escapes
  252. && quote_string_len
  253. && i + quote_string_len <= argsize
  254. && memcmp (arg + i, quote_string, quote_string_len) == 0)
  255. STORE ('\\');
  256. c = arg[i];
  257. switch (c)
  258. {
  259. case '\0':
  260. if (backslash_escapes)
  261. {
  262. STORE ('\\');
  263. STORE ('0');
  264. STORE ('0');
  265. c = '0';
  266. }
  267. break;
  268. case '?':
  269. switch (quoting_style)
  270. {
  271. case shell_quoting_style:
  272. goto use_shell_always_quoting_style;
  273. case c_quoting_style:
  274. if (i + 2 < argsize && arg[i + 1] == '?')
  275. switch (arg[i + 2])
  276. {
  277. case '!': case '\'':
  278. case '(': case ')': case '-': case '/':
  279. case '<': case '=': case '>':
  280. /* Escape the second '?' in what would otherwise be
  281. a trigraph. */
  282. i += 2;
  283. c = arg[i + 2];
  284. STORE ('?');
  285. STORE ('\\');
  286. STORE ('?');
  287. break;
  288. }
  289. break;
  290. default:
  291. break;
  292. }
  293. break;
  294. case ALERT_CHAR: esc = 'a'; goto c_escape;
  295. case '\b': esc = 'b'; goto c_escape;
  296. case '\f': esc = 'f'; goto c_escape;
  297. case '\n': esc = 'n'; goto c_and_shell_escape;
  298. case '\r': esc = 'r'; goto c_and_shell_escape;
  299. case '\t': esc = 't'; goto c_and_shell_escape;
  300. case '\v': esc = 'v'; goto c_escape;
  301. case '\\': esc = c; goto c_and_shell_escape;
  302. c_and_shell_escape:
  303. if (quoting_style == shell_quoting_style)
  304. goto use_shell_always_quoting_style;
  305. c_escape:
  306. if (backslash_escapes)
  307. {
  308. c = esc;
  309. goto store_escape;
  310. }
  311. break;
  312. case '#': case '~':
  313. if (i != 0)
  314. break;
  315. /* Fall through. */
  316. case ' ':
  317. case '!': /* special in bash */
  318. case '"': case '$': case '&':
  319. case '(': case ')': case '*': case ';':
  320. case '<': case '>': case '[':
  321. case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
  322. case '`': case '|':
  323. /* A shell special character. In theory, '$' and '`' could
  324. be the first bytes of multibyte characters, which means
  325. we should check them with mbrtowc, but in practice this
  326. doesn't happen so it's not worth worrying about. */
  327. if (quoting_style == shell_quoting_style)
  328. goto use_shell_always_quoting_style;
  329. break;
  330. case '\'':
  331. switch (quoting_style)
  332. {
  333. case shell_quoting_style:
  334. goto use_shell_always_quoting_style;
  335. case shell_always_quoting_style:
  336. STORE ('\'');
  337. STORE ('\\');
  338. STORE ('\'');
  339. break;
  340. default:
  341. break;
  342. }
  343. break;
  344. case '%': case '+': case ',': case '-': case '.': case '/':
  345. case '0': case '1': case '2': case '3': case '4': case '5':
  346. case '6': case '7': case '8': case '9': case ':': case '=':
  347. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  348. case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
  349. case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
  350. case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
  351. case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
  352. case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
  353. case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
  354. case 'o': case 'p': case 'q': case 'r': case 's': case 't':
  355. case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
  356. case '{': case '}':
  357. /* These characters don't cause problems, no matter what the
  358. quoting style is. They cannot start multibyte sequences. */
  359. break;
  360. default:
  361. /* If we have a multibyte sequence, copy it until we reach
  362. its end, find an error, or come back to the initial shift
  363. state. For C-like styles, if the sequence has
  364. unprintable characters, escape the whole sequence, since
  365. we can't easily escape single characters within it. */
  366. {
  367. /* Length of multibyte sequence found so far. */
  368. size_t m;
  369. int printable;
  370. if (unibyte_locale)
  371. {
  372. m = 1;
  373. printable = ISPRINT (c);
  374. }
  375. else
  376. {
  377. mbstate_t mbstate;
  378. memset (&mbstate, 0, sizeof mbstate);
  379. m = 0;
  380. printable = 1;
  381. if (argsize == (size_t) -1)
  382. argsize = strlen (arg);
  383. do
  384. {
  385. wchar_t w;
  386. size_t bytes = mbrtowc (&w, &arg[i + m],
  387. argsize - (i + m), &mbstate);
  388. if (bytes == 0)
  389. break;
  390. else if (bytes == (size_t) -1)
  391. {
  392. printable = 0;
  393. break;
  394. }
  395. else if (bytes == (size_t) -2)
  396. {
  397. printable = 0;
  398. while (i + m < argsize && arg[i + m])
  399. m++;
  400. break;
  401. }
  402. else
  403. {
  404. if (! iswprint (w))
  405. printable = 0;
  406. m += bytes;
  407. }
  408. }
  409. while (! mbsinit (&mbstate));
  410. }
  411. if (1 < m || (backslash_escapes && ! printable))
  412. {
  413. /* Output a multibyte sequence, or an escaped
  414. unprintable unibyte character. */
  415. size_t ilim = i + m;
  416. for (;;)
  417. {
  418. if (backslash_escapes && ! printable)
  419. {
  420. STORE ('\\');
  421. STORE ('0' + (c >> 6));
  422. STORE ('0' + ((c >> 3) & 7));
  423. c = '0' + (c & 7);
  424. }
  425. if (ilim <= i + 1)
  426. break;
  427. STORE (c);
  428. c = arg[++i];
  429. }
  430. goto store_c;
  431. }
  432. }
  433. }
  434. if (! (backslash_escapes
  435. && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
  436. goto store_c;
  437. store_escape:
  438. STORE ('\\');
  439. store_c:
  440. STORE (c);
  441. }
  442. if (quote_string)
  443. for (; *quote_string; quote_string++)
  444. STORE (*quote_string);
  445. if (len < buffersize)
  446. buffer[len] = '\0';
  447. return len;
  448. use_shell_always_quoting_style:
  449. return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
  450. shell_always_quoting_style, o);
  451. }
  452. /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
  453. argument ARG (of size ARGSIZE), using O to control quoting.
  454. If O is null, use the default.
  455. Terminate the output with a null character, and return the written
  456. size of the output, not counting the terminating null.
  457. If BUFFERSIZE is too small to store the output string, return the
  458. value that would have been returned had BUFFERSIZE been large enough.
  459. If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
  460. size_t
  461. quotearg_buffer (char *buffer, size_t buffersize,
  462. char const *arg, size_t argsize,
  463. struct quoting_options const *o)
  464. {
  465. struct quoting_options const *p = o ? o : &default_quoting_options;
  466. return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
  467. p->style, p);
  468. }
  469. /* Use storage slot N to return a quoted version of argument ARG.
  470. ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
  471. OPTIONS specifies the quoting options.
  472. The returned value points to static storage that can be
  473. reused by the next call to this function with the same value of N.
  474. N must be nonnegative. N is deliberately declared with type "int"
  475. to allow for future extensions (using negative values). */
  476. static char *
  477. quotearg_n_options (int n, char const *arg, size_t argsize,
  478. struct quoting_options const *options)
  479. {
  480. /* Preallocate a slot 0 buffer, so that the caller can always quote
  481. one small component of a "memory exhausted" message in slot 0. */
  482. static char slot0[256];
  483. static unsigned int nslots = 1;
  484. unsigned int n0 = n;
  485. struct slotvec
  486. {
  487. size_t size;
  488. char *val;
  489. };
  490. static struct slotvec slotvec0 = {sizeof slot0, slot0};
  491. static struct slotvec *slotvec = &slotvec0;
  492. if (n < 0)
  493. abort ();
  494. if (nslots <= n0)
  495. {
  496. unsigned int n1 = n0 + 1;
  497. size_t s = n1 * sizeof *slotvec;
  498. if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
  499. && n1 != s / sizeof *slotvec)
  500. xalloc_die ();
  501. if (slotvec == &slotvec0)
  502. {
  503. slotvec = (struct slotvec *) xmalloc (sizeof *slotvec);
  504. *slotvec = slotvec0;
  505. }
  506. slotvec = (struct slotvec *) xrealloc (slotvec, s);
  507. memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
  508. nslots = n1;
  509. }
  510. {
  511. size_t size = slotvec[n].size;
  512. char *val = slotvec[n].val;
  513. size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
  514. if (size <= qsize)
  515. {
  516. slotvec[n].size = size = qsize + 1;
  517. slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
  518. quotearg_buffer (val, size, arg, argsize, options);
  519. }
  520. return val;
  521. }
  522. }
  523. char *
  524. quotearg_n (int n, char const *arg)
  525. {
  526. return quotearg_n_options (n, arg, (size_t) -1, &default_quoting_options);
  527. }
  528. char *
  529. quotearg (char const *arg)
  530. {
  531. return quotearg_n (0, arg);
  532. }
  533. /* Return quoting options for STYLE, with no extra quoting. */
  534. static struct quoting_options
  535. quoting_options_from_style (enum quoting_style style)
  536. {
  537. struct quoting_options o;
  538. o.style = style;
  539. memset (o.quote_these_too, 0, sizeof o.quote_these_too);
  540. return o;
  541. }
  542. char *
  543. quotearg_n_style (int n, enum quoting_style s, char const *arg)
  544. {
  545. struct quoting_options const o = quoting_options_from_style (s);
  546. return quotearg_n_options (n, arg, (size_t) -1, &o);
  547. }
  548. char *
  549. quotearg_n_style_mem (int n, enum quoting_style s,
  550. char const *arg, size_t argsize)
  551. {
  552. struct quoting_options const o = quoting_options_from_style (s);
  553. return quotearg_n_options (n, arg, argsize, &o);
  554. }
  555. char *
  556. quotearg_style (enum quoting_style s, char const *arg)
  557. {
  558. return quotearg_n_style (0, s, arg);
  559. }
  560. char *
  561. quotearg_char (char const *arg, char ch)
  562. {
  563. struct quoting_options options;
  564. options = default_quoting_options;
  565. set_char_quoting (&options, ch, 1);
  566. return quotearg_n_options (0, arg, (size_t) -1, &options);
  567. }
  568. char *
  569. quotearg_colon (char const *arg)
  570. {
  571. return quotearg_char (arg, ':');
  572. }