123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658 |
- /* quotearg.c - quote arguments for output
- Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
- /* Written by Paul Eggert <[email protected]> */
- #if HAVE_CONFIG_H
- # include <config.h>
- #endif
- #if HAVE_STDDEF_H
- # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
- #endif
- #include <sys/types.h>
- #include <quotearg.h>
- #include <xalloc.h>
- #include <ctype.h>
- #if ENABLE_NLS
- # include <libintl.h>
- # define _(text) gettext (text)
- #else
- # define _(text) text
- #endif
- #define N_(text) text
- #if HAVE_LIMITS_H
- # include <limits.h>
- #endif
- #ifndef CHAR_BIT
- # define CHAR_BIT 8
- #endif
- #ifndef SIZE_MAX
- # define SIZE_MAX ((size_t) -1)
- #endif
- #ifndef UCHAR_MAX
- # define UCHAR_MAX ((unsigned char) -1)
- #endif
- #ifndef UINT_MAX
- # define UINT_MAX ((unsigned int) -1)
- #endif
- #if HAVE_C_BACKSLASH_A
- # define ALERT_CHAR '\a'
- #else
- # define ALERT_CHAR '\7'
- #endif
- #if HAVE_STDLIB_H
- # include <stdlib.h>
- #endif
- #if HAVE_STRING_H
- # include <string.h>
- #endif
- #if HAVE_WCHAR_H
- /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
- # include <stdio.h>
- # include <time.h>
- # include <wchar.h>
- #endif
- #if !HAVE_MBRTOWC
- /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
- other macros are defined only for documentation and to satisfy C
- syntax. */
- # undef MB_CUR_MAX
- # define MB_CUR_MAX 1
- # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
- # define mbsinit(ps) 1
- # define iswprint(wc) ISPRINT ((unsigned char) (wc))
- #endif
- #ifndef iswprint
- # if HAVE_WCTYPE_H
- # include <wctype.h>
- # endif
- # if !defined iswprint && !HAVE_ISWPRINT
- # define iswprint(wc) 1
- # endif
- #endif
- #define INT_BITS (sizeof (int) * CHAR_BIT)
- #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
- # define IN_CTYPE_DOMAIN(c) 1
- #else
- # define IN_CTYPE_DOMAIN(c) isascii(c)
- #endif
- /* Undefine to protect against the definition in wctype.h of solaris2.6. */
- #undef ISPRINT
- #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
- struct quoting_options
- {
- /* Basic quoting style. */
- enum quoting_style style;
- /* Quote the characters indicated by this bit vector even if the
- quoting style would not normally require them to be quoted. */
- int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
- };
- /* Names of quoting styles. */
- char const *const quoting_style_args[] =
- {
- "literal",
- "shell",
- "shell-always",
- "c",
- "escape",
- "locale",
- "clocale",
- 0
- };
- /* Correspondences to quoting style names. */
- enum quoting_style const quoting_style_vals[] =
- {
- literal_quoting_style,
- shell_quoting_style,
- shell_always_quoting_style,
- c_quoting_style,
- escape_quoting_style,
- locale_quoting_style,
- clocale_quoting_style
- };
- /* The default quoting options. */
- static struct quoting_options default_quoting_options;
- /* Allocate a new set of quoting options, with contents initially identical
- to O if O is not null, or to the default if O is null.
- It is the caller's responsibility to free the result. */
- struct quoting_options *
- clone_quoting_options (struct quoting_options *o)
- {
- struct quoting_options *p
- = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
- *p = *(o ? o : &default_quoting_options);
- return p;
- }
- /* Get the value of O's quoting style. If O is null, use the default. */
- enum quoting_style
- get_quoting_style (struct quoting_options *o)
- {
- return (o ? o : &default_quoting_options)->style;
- }
- /* In O (or in the default if O is null),
- set the value of the quoting style to S. */
- void
- set_quoting_style (struct quoting_options *o, enum quoting_style s)
- {
- (o ? o : &default_quoting_options)->style = s;
- }
- /* In O (or in the default if O is null),
- set the value of the quoting options for character C to I.
- Return the old value. Currently, the only values defined for I are
- 0 (the default) and 1 (which means to quote the character even if
- it would not otherwise be quoted). */
- int
- set_char_quoting (struct quoting_options *o, char c, int i)
- {
- unsigned char uc = c;
- int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
- int shift = uc % INT_BITS;
- int r = (*p >> shift) & 1;
- *p ^= ((i & 1) ^ r) << shift;
- return r;
- }
- /* MSGID approximates a quotation mark. Return its translation if it
- has one; otherwise, return either it or "\"", depending on S. */
- static char const *
- gettext_quote (char const *msgid, enum quoting_style s)
- {
- char const *translation = _(msgid);
- if (translation == msgid && s == clocale_quoting_style)
- translation = "\"";
- return translation;
- }
- /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
- argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
- non-quoting-style part of O to control quoting.
- Terminate the output with a null character, and return the written
- size of the output, not counting the terminating null.
- If BUFFERSIZE is too small to store the output string, return the
- value that would have been returned had BUFFERSIZE been large enough.
- If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
- This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
- ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
- style specified by O, and O may not be null. */
- static size_t
- quotearg_buffer_restyled (char *buffer, size_t buffersize,
- char const *arg, size_t argsize,
- enum quoting_style quoting_style,
- struct quoting_options const *o)
- {
- size_t i;
- size_t len = 0;
- char const *quote_string = 0;
- size_t quote_string_len = 0;
- int backslash_escapes = 0;
- int unibyte_locale = MB_CUR_MAX == 1;
- #define STORE(c) \
- do \
- { \
- if (len < buffersize) \
- buffer[len] = (c); \
- len++; \
- } \
- while (0)
- switch (quoting_style)
- {
- case c_quoting_style:
- STORE ('"');
- backslash_escapes = 1;
- quote_string = "\"";
- quote_string_len = 1;
- break;
- case escape_quoting_style:
- backslash_escapes = 1;
- break;
- case locale_quoting_style:
- case clocale_quoting_style:
- {
- /* Get translations for open and closing quotation marks.
- The message catalog should translate "`" to a left
- quotation mark suitable for the locale, and similarly for
- "'". If the catalog has no translation,
- locale_quoting_style quotes `like this', and
- clocale_quoting_style quotes "like this".
- For example, an American English Unicode locale should
- translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
- should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
- MARK). A British English Unicode locale should instead
- translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
- U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
- char const *left = gettext_quote (N_("`"), quoting_style);
- char const *right = gettext_quote (N_("'"), quoting_style);
- for (quote_string = left; *quote_string; quote_string++)
- STORE (*quote_string);
- backslash_escapes = 1;
- quote_string = right;
- quote_string_len = strlen (quote_string);
- }
- break;
- case shell_always_quoting_style:
- STORE ('\'');
- quote_string = "'";
- quote_string_len = 1;
- break;
- default:
- break;
- }
- for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
- {
- unsigned char c;
- unsigned char esc;
- if (backslash_escapes
- && quote_string_len
- && i + quote_string_len <= argsize
- && memcmp (arg + i, quote_string, quote_string_len) == 0)
- STORE ('\\');
- c = arg[i];
- switch (c)
- {
- case '\0':
- if (backslash_escapes)
- {
- STORE ('\\');
- STORE ('0');
- STORE ('0');
- c = '0';
- }
- break;
- case '?':
- switch (quoting_style)
- {
- case shell_quoting_style:
- goto use_shell_always_quoting_style;
- case c_quoting_style:
- if (i + 2 < argsize && arg[i + 1] == '?')
- switch (arg[i + 2])
- {
- case '!': case '\'':
- case '(': case ')': case '-': case '/':
- case '<': case '=': case '>':
- /* Escape the second '?' in what would otherwise be
- a trigraph. */
- i += 2;
- c = arg[i + 2];
- STORE ('?');
- STORE ('\\');
- STORE ('?');
- break;
- }
- break;
- default:
- break;
- }
- break;
- case ALERT_CHAR: esc = 'a'; goto c_escape;
- case '\b': esc = 'b'; goto c_escape;
- case '\f': esc = 'f'; goto c_escape;
- case '\n': esc = 'n'; goto c_and_shell_escape;
- case '\r': esc = 'r'; goto c_and_shell_escape;
- case '\t': esc = 't'; goto c_and_shell_escape;
- case '\v': esc = 'v'; goto c_escape;
- case '\\': esc = c; goto c_and_shell_escape;
- c_and_shell_escape:
- if (quoting_style == shell_quoting_style)
- goto use_shell_always_quoting_style;
- c_escape:
- if (backslash_escapes)
- {
- c = esc;
- goto store_escape;
- }
- break;
- case '#': case '~':
- if (i != 0)
- break;
- /* Fall through. */
- case ' ':
- case '!': /* special in bash */
- case '"': case '$': case '&':
- case '(': case ')': case '*': case ';':
- case '<': case '>': case '[':
- case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
- case '`': case '|':
- /* A shell special character. In theory, '$' and '`' could
- be the first bytes of multibyte characters, which means
- we should check them with mbrtowc, but in practice this
- doesn't happen so it's not worth worrying about. */
- if (quoting_style == shell_quoting_style)
- goto use_shell_always_quoting_style;
- break;
- case '\'':
- switch (quoting_style)
- {
- case shell_quoting_style:
- goto use_shell_always_quoting_style;
- case shell_always_quoting_style:
- STORE ('\'');
- STORE ('\\');
- STORE ('\'');
- break;
- default:
- break;
- }
- break;
- case '%': case '+': case ',': case '-': case '.': case '/':
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9': case ':': case '=':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
- case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
- case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
- case 'o': case 'p': case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
- case '{': case '}':
- /* These characters don't cause problems, no matter what the
- quoting style is. They cannot start multibyte sequences. */
- break;
- default:
- /* If we have a multibyte sequence, copy it until we reach
- its end, find an error, or come back to the initial shift
- state. For C-like styles, if the sequence has
- unprintable characters, escape the whole sequence, since
- we can't easily escape single characters within it. */
- {
- /* Length of multibyte sequence found so far. */
- size_t m;
- int printable;
- if (unibyte_locale)
- {
- m = 1;
- printable = ISPRINT (c);
- }
- else
- {
- mbstate_t mbstate;
- memset (&mbstate, 0, sizeof mbstate);
- m = 0;
- printable = 1;
- if (argsize == (size_t) -1)
- argsize = strlen (arg);
- do
- {
- wchar_t w;
- size_t bytes = mbrtowc (&w, &arg[i + m],
- argsize - (i + m), &mbstate);
- if (bytes == 0)
- break;
- else if (bytes == (size_t) -1)
- {
- printable = 0;
- break;
- }
- else if (bytes == (size_t) -2)
- {
- printable = 0;
- while (i + m < argsize && arg[i + m])
- m++;
- break;
- }
- else
- {
- if (! iswprint (w))
- printable = 0;
- m += bytes;
- }
- }
- while (! mbsinit (&mbstate));
- }
- if (1 < m || (backslash_escapes && ! printable))
- {
- /* Output a multibyte sequence, or an escaped
- unprintable unibyte character. */
- size_t ilim = i + m;
- for (;;)
- {
- if (backslash_escapes && ! printable)
- {
- STORE ('\\');
- STORE ('0' + (c >> 6));
- STORE ('0' + ((c >> 3) & 7));
- c = '0' + (c & 7);
- }
- if (ilim <= i + 1)
- break;
- STORE (c);
- c = arg[++i];
- }
- goto store_c;
- }
- }
- }
- if (! (backslash_escapes
- && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
- goto store_c;
- store_escape:
- STORE ('\\');
- store_c:
- STORE (c);
- }
- if (quote_string)
- for (; *quote_string; quote_string++)
- STORE (*quote_string);
- if (len < buffersize)
- buffer[len] = '\0';
- return len;
- use_shell_always_quoting_style:
- return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
- shell_always_quoting_style, o);
- }
- /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
- argument ARG (of size ARGSIZE), using O to control quoting.
- If O is null, use the default.
- Terminate the output with a null character, and return the written
- size of the output, not counting the terminating null.
- If BUFFERSIZE is too small to store the output string, return the
- value that would have been returned had BUFFERSIZE been large enough.
- If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
- size_t
- quotearg_buffer (char *buffer, size_t buffersize,
- char const *arg, size_t argsize,
- struct quoting_options const *o)
- {
- struct quoting_options const *p = o ? o : &default_quoting_options;
- return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
- p->style, p);
- }
- /* Use storage slot N to return a quoted version of argument ARG.
- ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
- OPTIONS specifies the quoting options.
- The returned value points to static storage that can be
- reused by the next call to this function with the same value of N.
- N must be nonnegative. N is deliberately declared with type "int"
- to allow for future extensions (using negative values). */
- static char *
- quotearg_n_options (int n, char const *arg, size_t argsize,
- struct quoting_options const *options)
- {
- /* Preallocate a slot 0 buffer, so that the caller can always quote
- one small component of a "memory exhausted" message in slot 0. */
- static char slot0[256];
- static unsigned int nslots = 1;
- unsigned int n0 = n;
- struct slotvec
- {
- size_t size;
- char *val;
- };
- static struct slotvec slotvec0 = {sizeof slot0, slot0};
- static struct slotvec *slotvec = &slotvec0;
- if (n < 0)
- abort ();
- if (nslots <= n0)
- {
- unsigned int n1 = n0 + 1;
- size_t s = n1 * sizeof *slotvec;
- if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
- && n1 != s / sizeof *slotvec)
- xalloc_die ();
- if (slotvec == &slotvec0)
- {
- slotvec = (struct slotvec *) xmalloc (sizeof *slotvec);
- *slotvec = slotvec0;
- }
- slotvec = (struct slotvec *) xrealloc (slotvec, s);
- memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
- nslots = n1;
- }
- {
- size_t size = slotvec[n].size;
- char *val = slotvec[n].val;
- size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
- if (size <= qsize)
- {
- slotvec[n].size = size = qsize + 1;
- slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
- quotearg_buffer (val, size, arg, argsize, options);
- }
- return val;
- }
- }
- char *
- quotearg_n (int n, char const *arg)
- {
- return quotearg_n_options (n, arg, (size_t) -1, &default_quoting_options);
- }
- char *
- quotearg (char const *arg)
- {
- return quotearg_n (0, arg);
- }
- /* Return quoting options for STYLE, with no extra quoting. */
- static struct quoting_options
- quoting_options_from_style (enum quoting_style style)
- {
- struct quoting_options o;
- o.style = style;
- memset (o.quote_these_too, 0, sizeof o.quote_these_too);
- return o;
- }
- char *
- quotearg_n_style (int n, enum quoting_style s, char const *arg)
- {
- struct quoting_options const o = quoting_options_from_style (s);
- return quotearg_n_options (n, arg, (size_t) -1, &o);
- }
- char *
- quotearg_n_style_mem (int n, enum quoting_style s,
- char const *arg, size_t argsize)
- {
- struct quoting_options const o = quoting_options_from_style (s);
- return quotearg_n_options (n, arg, argsize, &o);
- }
- char *
- quotearg_style (enum quoting_style s, char const *arg)
- {
- return quotearg_n_style (0, s, arg);
- }
- char *
- quotearg_char (char const *arg, char ch)
- {
- struct quoting_options options;
- options = default_quoting_options;
- set_char_quoting (&options, ch, 1);
- return quotearg_n_options (0, arg, (size_t) -1, &options);
- }
- char *
- quotearg_colon (char const *arg)
- {
- return quotearg_char (arg, ':');
- }
|