wordsplit.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. /* wordsplit - a word splitter
  2. Copyright (C) 2009-2018 Sergey Poznyakoff
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU General Public License as published by the
  5. Free Software Foundation; either version 3 of the License, or (at your
  6. option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. #ifndef __WORDSPLIT_H
  14. #define __WORDSPLIT_H
  15. #include <stddef.h>
  16. #if 2 < __GNUC__ + (7 <= __GNUC_MINOR__)
  17. # define __WORDSPLIT_ATTRIBUTE_FORMAT(spec) __attribute__ ((__format__ spec))
  18. #else
  19. # define __WORDSPLIT_ATTRIBUTE_FORMAT(spec) /* empty */
  20. #endif
  21. typedef struct wordsplit wordsplit_t;
  22. /* Structure used to direct the splitting. Members marked with [Input]
  23. can be defined before calling wordsplit(), those marked with [Output]
  24. provide return values when the function returns. If neither mark is
  25. used, the member is internal and must not be used by the caller.
  26. In the comments below, the identifiers in parentheses indicate bits that
  27. must be set (or unset, if starting with !) in ws_flags (if starting with
  28. WRDSF_) or ws_options (if starting with WRDSO_) to initialize or use the
  29. given member.
  30. If not redefined explicitly, most of them are set to some reasonable
  31. default value upon entry to wordsplit(). */
  32. struct wordsplit
  33. {
  34. size_t ws_wordc; /* [Output] Number of words in ws_wordv. */
  35. char **ws_wordv; /* [Output] Array of parsed out words. */
  36. size_t ws_offs; /* [Input] (WRDSF_DOOFFS) Number of initial
  37. elements in ws_wordv to fill with NULLs. */
  38. size_t ws_wordn; /* Number of elements ws_wordv can accomodate. */
  39. unsigned ws_flags; /* [Input] Flags passed to wordsplit. */
  40. unsigned ws_options; /* [Input] (WRDSF_OPTIONS)
  41. Additional options. */
  42. size_t ws_maxwords; /* [Input] (WRDSO_MAXWORDS) Return at most that
  43. many words */
  44. size_t ws_wordi; /* [Output] (WRDSF_INCREMENTAL) Total number of
  45. words returned so far */
  46. const char *ws_delim; /* [Input] (WRDSF_DELIM) Word delimiters. */
  47. const char *ws_comment; /* [Input] (WRDSF_COMMENT) Comment characters. */
  48. const char *ws_escape[2]; /* [Input] (WRDSF_ESCAPE) Characters to be escaped
  49. with backslash. */
  50. void (*ws_alloc_die) (wordsplit_t *wsp);
  51. /* [Input] (WRDSF_ALLOC_DIE) Function called when
  52. out of memory. Must not return. */
  53. void (*ws_error) (const char *, ...)
  54. __attribute__ ((__format__ (__printf__, 1, 2)));
  55. /* [Input] (WRDSF_ERROR) Function used for error
  56. reporting */
  57. void (*ws_debug) (const char *, ...)
  58. __attribute__ ((__format__ (__printf__, 1, 2)));
  59. /* [Input] (WRDSF_DEBUG) Function used for debug
  60. output. */
  61. const char **ws_env; /* [Input] (WRDSF_ENV, !WRDSF_NOVAR) Array of
  62. environment variables. */
  63. char **ws_envbuf;
  64. size_t ws_envidx;
  65. size_t ws_envsiz;
  66. int (*ws_getvar) (char **ret, const char *var, size_t len, void *clos);
  67. /* [Input] (WRDSF_GETVAR, !WRDSF_NOVAR) Looks up
  68. the name VAR (LEN bytes long) in the table of
  69. variables and if found returns in memory
  70. location pointed to by RET the value of that
  71. variable. Returns WRDSE_OK (0) on success,
  72. and an error code (see WRDSE_* defines below)
  73. on error. User-specific errors can be returned
  74. by storing the error diagnostic string in RET
  75. and returning WRDSE_USERERR.
  76. Whatever is stored in RET, it must be allocated
  77. using malloc(3). */
  78. void *ws_closure; /* [Input] (WRDSF_CLOSURE) Passed as the CLOS
  79. argument to ws_getvar and ws_command. */
  80. int (*ws_command) (char **ret, const char *cmd, size_t len, char **argv,
  81. void *clos);
  82. /* [Input] (!WRDSF_NOCMD) Returns in the memory
  83. location pointed to by RET the expansion of
  84. the command CMD (LEN bytes long). If WRDSO_ARGV
  85. option is set, ARGV contains CMD split out to
  86. words. Otherwise ARGV is NULL.
  87. See ws_getvar for a discussion of possible
  88. return values. */
  89. const char *ws_input; /* Input string (the S argument to wordsplit. */
  90. size_t ws_len; /* Length of ws_input. */
  91. size_t ws_endp; /* Points past the last processed byte in
  92. ws_input. */
  93. int ws_errno; /* [Output] Error code, if an error occurred. */
  94. char *ws_usererr; /* Points to textual description of
  95. the error, if ws_errno is WRDSE_USERERR. Must
  96. be allocated with malloc(3). */
  97. struct wordsplit_node *ws_head, *ws_tail;
  98. /* Doubly-linked list of parsed out nodes. */
  99. int ws_lvl; /* Invocation nesting level. */
  100. };
  101. /* Initial size for ws_env, if allocated automatically */
  102. #define WORDSPLIT_ENV_INIT 16
  103. /* Wordsplit flags. */
  104. /* Append the words found to the array resulting from a previous
  105. call. */
  106. #define WRDSF_APPEND 0x00000001
  107. /* Insert ws_offs initial NULLs in the array ws_wordv.
  108. (These are not counted in the returned ws_wordc.) */
  109. #define WRDSF_DOOFFS 0x00000002
  110. /* Don't do command substitution. */
  111. #define WRDSF_NOCMD 0x00000004
  112. /* The parameter p resulted from a previous call to
  113. wordsplit(), and wordsplit_free() was not called. Reuse the
  114. allocated storage. */
  115. #define WRDSF_REUSE 0x00000008
  116. /* Print errors */
  117. #define WRDSF_SHOWERR 0x00000010
  118. /* Consider it an error if an undefined variable is expanded. */
  119. #define WRDSF_UNDEF 0x00000020
  120. /* Don't do variable expansion. */
  121. #define WRDSF_NOVAR 0x00000040
  122. /* Abort on ENOMEM error */
  123. #define WRDSF_ENOMEMABRT 0x00000080
  124. /* Trim off any leading and trailind whitespace */
  125. #define WRDSF_WS 0x00000100
  126. /* Handle single quotes */
  127. #define WRDSF_SQUOTE 0x00000200
  128. /* Handle double quotes */
  129. #define WRDSF_DQUOTE 0x00000400
  130. /* Handle single and double quotes */
  131. #define WRDSF_QUOTE (WRDSF_SQUOTE|WRDSF_DQUOTE)
  132. /* Replace each input sequence of repeated delimiters with a single
  133. delimiter */
  134. #define WRDSF_SQUEEZE_DELIMS 0x00000800
  135. /* Return delimiters */
  136. #define WRDSF_RETURN_DELIMS 0x00001000
  137. /* Treat sed expressions as words */
  138. #define WRDSF_SED_EXPR 0x00002000
  139. /* ws_delim field is initialized */
  140. #define WRDSF_DELIM 0x00004000
  141. /* ws_comment field is initialized */
  142. #define WRDSF_COMMENT 0x00008000
  143. /* ws_alloc_die field is initialized */
  144. #define WRDSF_ALLOC_DIE 0x00010000
  145. /* ws_error field is initialized */
  146. #define WRDSF_ERROR 0x00020000
  147. /* ws_debug field is initialized */
  148. #define WRDSF_DEBUG 0x00040000
  149. /* ws_env field is initialized */
  150. #define WRDSF_ENV 0x00080000
  151. /* ws_getvar field is initialized */
  152. #define WRDSF_GETVAR 0x00100000
  153. /* enable debugging */
  154. #define WRDSF_SHOWDBG 0x00200000
  155. /* Don't split input into words. Useful for side effects. */
  156. #define WRDSF_NOSPLIT 0x00400000
  157. /* Keep undefined variables in place, instead of expanding them to
  158. empty strings. */
  159. #define WRDSF_KEEPUNDEF 0x00800000
  160. /* Warn about undefined variables */
  161. #define WRDSF_WARNUNDEF 0x01000000
  162. /* Handle C escapes */
  163. #define WRDSF_CESCAPES 0x02000000
  164. /* ws_closure is set */
  165. #define WRDSF_CLOSURE 0x04000000
  166. /* ws_env is a Key/Value environment, i.e. the value of a variable is
  167. stored in the element that follows its name. */
  168. #define WRDSF_ENV_KV 0x08000000
  169. /* ws_escape is set */
  170. #define WRDSF_ESCAPE 0x10000000
  171. /* Incremental mode */
  172. #define WRDSF_INCREMENTAL 0x20000000
  173. /* Perform pathname and tilde expansion */
  174. #define WRDSF_PATHEXPAND 0x40000000
  175. /* ws_options is initialized */
  176. #define WRDSF_OPTIONS 0x80000000
  177. #define WRDSF_DEFFLAGS \
  178. (WRDSF_NOVAR | WRDSF_NOCMD | \
  179. WRDSF_QUOTE | WRDSF_SQUEEZE_DELIMS | WRDSF_CESCAPES)
  180. /* Remove the word that produces empty string after path expansion */
  181. #define WRDSO_NULLGLOB 0x00000001
  182. /* Print error message if path expansion produces empty string */
  183. #define WRDSO_FAILGLOB 0x00000002
  184. /* Allow a leading period to be matched by metacharacters. */
  185. #define WRDSO_DOTGLOB 0x00000004
  186. /* ws_command needs argv parameter */
  187. #define WRDSO_ARGV 0x00000008
  188. /* Keep backslash in unrecognized escape sequences in words */
  189. #define WRDSO_BSKEEP_WORD 0x00000010
  190. /* Handle octal escapes in words */
  191. #define WRDSO_OESC_WORD 0x00000020
  192. /* Handle hex escapes in words */
  193. #define WRDSO_XESC_WORD 0x00000040
  194. /* ws_maxwords field is initialized */
  195. #define WRDSO_MAXWORDS 0x00000080
  196. /* Keep backslash in unrecognized escape sequences in quoted strings */
  197. #define WRDSO_BSKEEP_QUOTE 0x00000100
  198. /* Handle octal escapes in quoted strings */
  199. #define WRDSO_OESC_QUOTE 0x00000200
  200. /* Handle hex escapes in quoted strings */
  201. #define WRDSO_XESC_QUOTE 0x00000400
  202. #define WRDSO_BSKEEP WRDSO_BSKEEP_WORD
  203. #define WRDSO_OESC WRDSO_OESC_WORD
  204. #define WRDSO_XESC WRDSO_XESC_WORD
  205. /* Indices into ws_escape */
  206. #define WRDSX_WORD 0
  207. #define WRDSX_QUOTE 1
  208. /* Set escape option F in WS for words (Q==0) or quoted strings (Q==1) */
  209. #define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
  210. /* Test WS for escape option F for words (Q==0) or quoted strings (Q==1) */
  211. #define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
  212. #define WRDSE_OK 0
  213. #define WRDSE_EOF WRDSE_OK
  214. #define WRDSE_QUOTE 1
  215. #define WRDSE_NOSPACE 2
  216. #define WRDSE_USAGE 3
  217. #define WRDSE_CBRACE 4
  218. #define WRDSE_UNDEF 5
  219. #define WRDSE_NOINPUT 6
  220. #define WRDSE_PAREN 7
  221. #define WRDSE_GLOBERR 8
  222. #define WRDSE_USERERR 9
  223. int wordsplit (const char *s, wordsplit_t *ws, unsigned flags);
  224. int wordsplit_len (const char *s, size_t len, wordsplit_t *ws, unsigned flags);
  225. void wordsplit_free (wordsplit_t *ws);
  226. void wordsplit_free_words (wordsplit_t *ws);
  227. void wordsplit_free_envbuf (wordsplit_t *ws);
  228. int wordsplit_get_words (wordsplit_t *ws, size_t *wordc, char ***wordv);
  229. static inline void wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
  230. __attribute__ ((deprecated));
  231. static inline void
  232. wordsplit_getwords (wordsplit_t *ws, size_t *wordc, char ***wordv)
  233. {
  234. wordsplit_get_words (ws, wordc, wordv);
  235. }
  236. int wordsplit_append (wordsplit_t *wsp, int argc, char **argv);
  237. int wordsplit_c_unquote_char (int c);
  238. int wordsplit_c_quote_char (int c);
  239. size_t wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote);
  240. void wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex);
  241. void wordsplit_perror (wordsplit_t *ws);
  242. const char *wordsplit_strerror (wordsplit_t *ws);
  243. void wordsplit_clearerr (wordsplit_t *ws);
  244. #endif