Browse Source

Parse in a more locale-independent way

update submodules to latest
* gnulib.modules: Add c-ctype.
* lib/wordsplit.c, src/buffer.c, src/exclist.c, src/incremen.c:
* src/list.c, src/misc.c, src/names.c, src/sparse.c, src/tar.c:
* src/xheader.c:
Include c-ctype.h, and use its API rather than ctype.h’s.
This is more likely to work when oddball locales are used.
* src/transform.c: Include ctype.h, since this module still uses
tolower and toupper (this is probably wrong - should be multi-byte).
Paul Eggert 1 year ago
parent
commit
01f986b921
14 changed files with 47 additions and 43 deletions
  1. 1 1
      gnulib
  2. 1 0
      gnulib.modules
  3. 14 21
      lib/wordsplit.c
  4. 1 1
      paxutils
  5. 2 1
      src/buffer.c
  6. 5 4
      src/exclist.c
  7. 2 1
      src/incremen.c
  8. 3 2
      src/list.c
  9. 5 4
      src/misc.c
  10. 2 1
      src/names.c
  11. 2 1
      src/sparse.c
  12. 3 2
      src/tar.c
  13. 1 0
      src/transform.c
  14. 5 4
      src/xheader.c

+ 1 - 1
gnulib

@@ -1 +1 @@
-Subproject commit 78b62f8320f8dffb813222c1480563ed14251ca3
+Subproject commit 0e8fd2c0eed44dba1d39840a8733c9159220d671

+ 1 - 0
gnulib.modules

@@ -24,6 +24,7 @@ argp
 argp-version-etc
 attribute
 backupfile
+c-ctype
 closeout
 configmake
 dirname

+ 14 - 21
lib/wordsplit.c

@@ -18,8 +18,9 @@
 # include <config.h>
 #endif
 
+#include <wordsplit.h>
+
 #include <errno.h>
-#include <ctype.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,6 +29,8 @@
 #include <pwd.h>
 #include <glob.h>
 
+#include <c-ctype.h>
+
 #if ENABLE_NLS
 # include <gettext.h>
 #else
@@ -36,22 +39,12 @@
 #define _(msgid) gettext (msgid)
 #define N_(msgid) msgid
 
-#include <wordsplit.h>
-
 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
 #define ISDELIM(ws,c) \
   (strchr ((ws)->ws_delim, (c)) != NULL)
-#define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
-#define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
-#define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
-#define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
-#define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
-#define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
-#define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
-#define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
-
-#define ISVARBEG(c) (ISALPHA(c) || c == '_')
-#define ISVARCHR(c) (ISALNUM(c) || c == '_')
+
+#define ISVARBEG(c) (c_isalpha (c) || c == '_')
+#define ISVARCHR(c) (c_isalnum (c) || c == '_')
 
 #define WSP_RETURN_DELIMS(wsp) \
  ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
@@ -1891,7 +1884,7 @@ skip_sed_expr (const char *command, size_t i, size_t len)
 
       if (command[i] == ';')
 	i++;
-      if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
+      if (!(command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1])))
 	break;
 
       delim = command[++i];
@@ -1900,7 +1893,7 @@ skip_sed_expr (const char *command, size_t i, size_t len)
 	{
 	  if (state == 3)
 	    {
-	      if (command[i] == delim || !ISALNUM (command[i]))
+	      if (command[i] == delim || !c_isalnum (command[i]))
 		break;
 	    }
 	  else if (command[i] == '\\')
@@ -1987,7 +1980,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all)
   start = i;
 
   if (wsp->ws_flags & WRDSF_SED_EXPR
-      && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
+      && command[i] == 's' && i + 3 < len && c_ispunct (command[i + 1]))
     {
       flags = _WSNF_SEXP;
       i = skip_sed_expr (command, i, len);
@@ -2080,7 +2073,7 @@ scan_word (struct wordsplit *wsp, size_t start, int consume_all)
 }
 
 #define to_num(c) \
-  (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
+  (c_isdigit(c) ? c - '0' : c_isxdigit (c) ? c_toupper (c) - 'A' + 10 : 255)
 
 static int
 xtonum (int *pval, const char *src, int base, int cnt)
@@ -2113,7 +2106,7 @@ wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
 	len++;
       else if (*str == '"')
 	len += 2;
-      else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
+      else if (*str != '\t' && *str != '\\' && c_isprint (*str))
 	len++;
       else if (quote_hex)
 	len += 3;
@@ -2201,7 +2194,7 @@ wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
 		}
 	    }
 	  else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
-		   && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
+		   && (unsigned char) src[i] < 128 && c_isdigit (src[i]))
 	    {
 	      if (n - i < 1)
 		{
@@ -2251,7 +2244,7 @@ wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
 	  *dst++ = '\\';
 	  *dst++ = *src;
 	}
-      else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
+      else if (*src != '\t' && *src != '\\' && c_isprint (*src))
 	*dst++ = *src;
       else
 	{

+ 1 - 1
paxutils

@@ -1 +1 @@
-Subproject commit 481bae11050fcbdca67a66eb57390267b280a312
+Subproject commit d56df6838db922686bdfb3325f6368d295ae7f15

+ 2 - 1
src/buffer.c

@@ -23,6 +23,7 @@
 
 #include <signal.h>
 
+#include <c-ctype.h>
 #include <closeout.h>
 #include <fnmatch.h>
 #include <human.h>
@@ -1573,7 +1574,7 @@ drop_volume_label_suffix (const char *label)
   idx_t prefix_len = 0;
 
   for (idx_t i = 0; label[i]; i++)
-    if (!isdigit ((unsigned char) label[i]))
+    if (!c_isdigit (label[i]))
       prefix_len = i + 1;
 
   ptrdiff_t len = prefix_len - VOLUME_TEXT_LEN;

+ 5 - 4
src/exclist.c

@@ -18,6 +18,7 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <system.h>
+#include <c-ctype.h>
 #include <quotearg.h>
 #include <flexmember.h>
 #include <fnmatch.h>
@@ -219,7 +220,7 @@ static void
 git_addfn (struct exclude *ex, char const *pattern, int options,
 	   MAYBE_UNUSED void *data)
 {
-  while (isspace (*pattern))
+  while (c_isspace (*pattern))
     ++pattern;
   if (*pattern == 0 || *pattern == '#')
     return;
@@ -232,7 +233,7 @@ static void
 bzr_addfn (struct exclude *ex, char const *pattern, int options,
 	   MAYBE_UNUSED void *data)
 {
-  while (isspace (*pattern))
+  while (c_isspace (*pattern))
     ++pattern;
   if (*pattern == 0 || *pattern == '#')
     return;
@@ -269,13 +270,13 @@ hg_addfn (struct exclude *ex, char const *pattern, int options, void *data)
   int *hgopt = data;
   size_t len;
 
-  while (isspace (*pattern))
+  while (c_isspace (*pattern))
     ++pattern;
   if (*pattern == 0 || *pattern == '#')
     return;
   if (strncmp (pattern, "syntax:", 7) == 0)
     {
-      for (pattern += 7; isspace (*pattern); ++pattern)
+      for (pattern += 7; c_isspace (*pattern); ++pattern)
 	;
       if (strcmp (pattern, "regexp") == 0)
 	/* FIXME: Regexps must be perl-style */

+ 2 - 1
src/incremen.c

@@ -18,6 +18,7 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include <system.h>
+#include <c-ctype.h>
 #include <hash.h>
 #include <quotearg.h>
 #include "common.h"
@@ -1132,7 +1133,7 @@ read_num (FILE *fp, char const *fieldname,
   int c = getc (fp);
   bool negative = c == '-';
 
-  for (i = 0; (i == 0 && negative) || ISDIGIT (c); i++)
+  for (i = 0; (i == 0 && negative) || c_isdigit (c); i++)
     {
       buf[i] = c;
       if (i == sizeof buf - 1)

+ 3 - 2
src/list.c

@@ -20,6 +20,7 @@
    Written by John Gilmore, on 1985-08-26.  */
 
 #include <system.h>
+#include <c-ctype.h>
 #include <inttostr.h>
 #include <quotearg.h>
 #include <time.h>
@@ -780,7 +781,7 @@ from_header (char const *where0, size_t digs, char const *type,
 		    type));
 	  return -1;
 	}
-      if (!isspace ((unsigned char) *where))
+      if (!c_isspace (*where))
 	break;
       where++;
     }
@@ -916,7 +917,7 @@ from_header (char const *where0, size_t digs, char const *type,
 	value = -value;
     }
 
-  if (where != lim && *where && !isspace ((unsigned char) *where))
+  if (where != lim && *where && !c_isspace (*where))
     {
       if (type)
 	{

+ 5 - 4
src/misc.c

@@ -19,6 +19,7 @@
 #include <system.h>
 #include <rmt.h>
 #include "common.h"
+#include <c-ctype.h>
 #include <quotearg.h>
 #include <xgetcwd.h>
 #include <unlinkdir.h>
@@ -414,7 +415,7 @@ strtosysint (char const *arg, char **arglim, intmax_t minval, uintmax_t maxval)
   errno = 0;
   if (maxval <= INTMAX_MAX)
     {
-      if (ISDIGIT (arg[*arg == '-']))
+      if (c_isdigit (arg[*arg == '-']))
 	{
 	  intmax_t i = strtoimax (arg, arglim, 10);
 	  intmax_t imaxval = maxval;
@@ -426,7 +427,7 @@ strtosysint (char const *arg, char **arglim, intmax_t minval, uintmax_t maxval)
     }
   else
     {
-      if (ISDIGIT (*arg))
+      if (c_isdigit (*arg))
 	{
 	  uintmax_t i = strtoumax (arg, arglim, 10);
 	  if (i <= maxval)
@@ -506,7 +507,7 @@ decode_timespec (char const *arg, char **arg_lim, bool parse_fraction)
   bool negative = *arg == '-';
   struct timespec r;
 
-  if (! ISDIGIT (arg[negative]))
+  if (! c_isdigit (arg[negative]))
     errno = EINVAL;
   else
     {
@@ -537,7 +538,7 @@ decode_timespec (char const *arg, char **arg_lim, bool parse_fraction)
 	  int digits = 0;
 	  bool trailing_nonzero = false;
 
-	  while (ISDIGIT (*++p))
+	  while (c_isdigit (*++p))
 	    if (digits < LOG10_BILLION)
 	      digits++, ns = 10 * ns + (*p - '0');
 	    else

+ 2 - 1
src/names.c

@@ -17,6 +17,7 @@
 
 #include <system.h>
 
+#include <c-ctype.h>
 #include <fnmatch.h>
 #include <hash.h>
 #include <quotearg.h>
@@ -990,7 +991,7 @@ handle_option (const char *str, struct name_elt const *ent)
   int i;
   struct option_locus loc;
 
-  while (*str && isspace (*str))
+  while (*str && c_isspace (*str))
     ++str;
   if (*str != '-')
     return 1;

+ 2 - 1
src/sparse.c

@@ -16,6 +16,7 @@
    with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include <system.h>
+#include <c-ctype.h>
 #include <inttostr.h>
 #include <quotearg.h>
 #include "common.h"
@@ -1251,7 +1252,7 @@ decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
   uintmax_t u;
   char *arg_lim;
 
-  if (!ISDIGIT (*arg))
+  if (!c_isdigit (*arg))
     return false;
 
   errno = 0;

+ 3 - 2
src/tar.c

@@ -38,6 +38,7 @@
 #include "common.h"
 
 #include <argmatch.h>
+#include <c-ctype.h>
 #include <closeout.h>
 #include <configmake.h>
 #include <exitfail.h>
@@ -1223,7 +1224,7 @@ expand_pax_option (struct tar_args *targs, const char *arg)
 	  size_t len = p - arg + 1;
 	  obstack_grow (&stk, arg, len);
 	  len = seglen - len;
-	  for (++p; *p && isspace ((unsigned char) *p); p++)
+	  for (++p; *p && c_isspace (*p); p++)
 	    len--;
 	  if (*p == '{' && p[len-1] == '}')
 	    {
@@ -1720,7 +1721,7 @@ parse_opt (int key, char *arg, struct argp_state *state)
     case SET_MTIME_FORMAT_OPTION:
       set_mtime_format = arg;
       break;
-      
+
     case SPARSE_VERSION_OPTION:
       sparse_option = true;
       {

+ 1 - 0
src/transform.c

@@ -15,6 +15,7 @@
    with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 #include <system.h>
+#include <ctype.h>
 #include <regex.h>
 #include "common.h"
 

+ 5 - 4
src/xheader.c

@@ -19,6 +19,7 @@
 
 #include <system.h>
 
+#include <c-ctype.h>
 #include <fnmatch.h>
 #include <hash.h>
 #include <inttostr.h>
@@ -192,12 +193,12 @@ xheader_set_keyword_equal (char *kw, char *eq)
       global = false;
     }
 
-  while (p > kw && isspace ((unsigned char) *p))
+  while (p > kw && c_isspace (*p))
     p--;
 
   *p = 0;
 
-  for (p = eq + 1; *p && isspace ((unsigned char) *p); p++)
+  for (p = eq + 1; *p && c_isspace (*p); p++)
     ;
 
   if (strcmp (kw, "delete") == 0)
@@ -616,7 +617,7 @@ decode_record (struct xheader *xhdr,
   while (*p == ' ' || *p == '\t')
     p++;
 
-  if (! ISDIGIT (*p))
+  if (! c_isdigit (*p))
     {
       if (*p)
 	ERROR ((0, 0, _("Malformed extended header: missing length")));
@@ -1445,7 +1446,7 @@ sparse_map_decoder (struct tar_stat_info *st,
       intmax_t u;
       char *delim;
 
-      if (!ISDIGIT (*arg))
+      if (!c_isdigit (*arg))
 	{
 	  ERROR ((0, 0, _("Malformed extended header: invalid %s=%s"),
 		  keyword, arg));