4
0

wordsplit.c 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561
  1. /* wordsplit - a word splitter
  2. Copyright (C) 2009-2018 Sergey Poznyakoff
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU General Public License as published by the
  5. Free Software Foundation; either version 3 of the License, or (at your
  6. option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. #ifdef HAVE_CONFIG_H
  14. # include <config.h>
  15. #endif
  16. #include <errno.h>
  17. #include <ctype.h>
  18. #include <unistd.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <stdio.h>
  22. #include <stdarg.h>
  23. #include <pwd.h>
  24. #include <glob.h>
  25. #if ENABLE_NLS
  26. # include <gettext.h>
  27. #else
  28. # define gettext(msgid) msgid
  29. #endif
  30. #define _(msgid) gettext (msgid)
  31. #define N_(msgid) msgid
  32. #include <wordsplit.h>
  33. #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
  34. #define ISDELIM(ws,c) \
  35. (strchr ((ws)->ws_delim, (c)) != NULL)
  36. #define ISPUNCT(c) (strchr("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",(c))!=NULL)
  37. #define ISUPPER(c) ('A' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'Z')
  38. #define ISLOWER(c) ('a' <= ((unsigned) (c)) && ((unsigned) (c)) <= 'z')
  39. #define ISALPHA(c) (ISUPPER(c) || ISLOWER(c))
  40. #define ISDIGIT(c) ('0' <= ((unsigned) (c)) && ((unsigned) (c)) <= '9')
  41. #define ISXDIGIT(c) (strchr("abcdefABCDEF", c)!=NULL)
  42. #define ISALNUM(c) (ISALPHA(c) || ISDIGIT(c))
  43. #define ISPRINT(c) (' ' <= ((unsigned) (c)) && ((unsigned) (c)) <= 127)
  44. #define ISVARBEG(c) (ISALPHA(c) || c == '_')
  45. #define ISVARCHR(c) (ISALNUM(c) || c == '_')
  46. #define WSP_RETURN_DELIMS(wsp) \
  47. ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
  48. #define ALLOC_INIT 128
  49. #define ALLOC_INCR 128
  50. static void
  51. _wsplt_alloc_die (struct wordsplit *wsp)
  52. {
  53. wsp->ws_error ("%s", _("memory exhausted"));
  54. abort ();
  55. }
  56. static void ATTRIBUTE_FORMAT ((__printf__, 1, 2))
  57. _wsplt_error (const char *fmt, ...)
  58. {
  59. va_list ap;
  60. va_start (ap, fmt);
  61. vfprintf (stderr, fmt, ap);
  62. va_end (ap);
  63. fputc ('\n', stderr);
  64. }
  65. static void wordsplit_free_nodes (struct wordsplit *);
  66. static int
  67. _wsplt_seterr (struct wordsplit *wsp, int ec)
  68. {
  69. wsp->ws_errno = ec;
  70. if (wsp->ws_flags & WRDSF_SHOWERR)
  71. wordsplit_perror (wsp);
  72. return ec;
  73. }
  74. static int
  75. _wsplt_nomem (struct wordsplit *wsp)
  76. {
  77. errno = ENOMEM;
  78. wsp->ws_errno = WRDSE_NOSPACE;
  79. if (wsp->ws_flags & WRDSF_ENOMEMABRT)
  80. wsp->ws_alloc_die (wsp);
  81. if (wsp->ws_flags & WRDSF_SHOWERR)
  82. wordsplit_perror (wsp);
  83. if (!(wsp->ws_flags & WRDSF_REUSE))
  84. wordsplit_free (wsp);
  85. wordsplit_free_nodes (wsp);
  86. return wsp->ws_errno;
  87. }
  88. static int wordsplit_run (const char *command, size_t length,
  89. struct wordsplit *wsp,
  90. unsigned flags, int lvl);
  91. static int wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
  92. unsigned flags);
  93. static int wordsplit_process_list (struct wordsplit *wsp, size_t start);
  94. static int wordsplit_finish (struct wordsplit *wsp);
  95. static int
  96. _wsplt_subsplit (struct wordsplit *wsp, struct wordsplit *wss,
  97. char const *str, int len,
  98. unsigned flags, int finalize)
  99. {
  100. int rc;
  101. wss->ws_delim = wsp->ws_delim;
  102. wss->ws_debug = wsp->ws_debug;
  103. wss->ws_error = wsp->ws_error;
  104. wss->ws_alloc_die = wsp->ws_alloc_die;
  105. if (!(flags & WRDSF_NOVAR))
  106. {
  107. wss->ws_env = wsp->ws_env;
  108. wss->ws_getvar = wsp->ws_getvar;
  109. flags |= wsp->ws_flags & (WRDSF_ENV | WRDSF_ENV_KV | WRDSF_GETVAR);
  110. }
  111. if (!(flags & WRDSF_NOCMD))
  112. {
  113. wss->ws_command = wsp->ws_command;
  114. }
  115. if ((flags & (WRDSF_NOVAR|WRDSF_NOCMD)) != (WRDSF_NOVAR|WRDSF_NOCMD))
  116. {
  117. wss->ws_closure = wsp->ws_closure;
  118. flags |= wsp->ws_flags & WRDSF_CLOSURE;
  119. }
  120. wss->ws_options = wsp->ws_options;
  121. flags |= WRDSF_DELIM
  122. | WRDSF_ALLOC_DIE
  123. | WRDSF_ERROR
  124. | WRDSF_DEBUG
  125. | (wsp->ws_flags & (WRDSF_SHOWDBG | WRDSF_SHOWERR | WRDSF_OPTIONS));
  126. rc = wordsplit_init (wss, str, len, flags);
  127. if (rc)
  128. return rc;
  129. wss->ws_lvl = wsp->ws_lvl + 1;
  130. rc = wordsplit_process_list (wss, 0);
  131. if (rc)
  132. {
  133. wordsplit_free_nodes (wss);
  134. return rc;
  135. }
  136. if (finalize)
  137. {
  138. rc = wordsplit_finish (wss);
  139. wordsplit_free_nodes (wss);
  140. }
  141. return rc;
  142. }
  143. static void
  144. _wsplt_seterr_sub (struct wordsplit *wsp, struct wordsplit *wss)
  145. {
  146. if (wsp->ws_errno == WRDSE_USERERR)
  147. free (wsp->ws_usererr);
  148. wsp->ws_errno = wss->ws_errno;
  149. if (wss->ws_errno == WRDSE_USERERR)
  150. {
  151. wsp->ws_usererr = wss->ws_usererr;
  152. wss->ws_errno = WRDSE_EOF;
  153. wss->ws_usererr = NULL;
  154. }
  155. }
  156. static void
  157. wordsplit_init0 (struct wordsplit *wsp)
  158. {
  159. if (wsp->ws_flags & WRDSF_REUSE)
  160. {
  161. if (!(wsp->ws_flags & WRDSF_APPEND))
  162. wordsplit_free_words (wsp);
  163. wordsplit_clearerr (wsp);
  164. }
  165. else
  166. {
  167. wsp->ws_wordv = NULL;
  168. wsp->ws_wordc = 0;
  169. wsp->ws_wordn = 0;
  170. }
  171. wsp->ws_errno = 0;
  172. }
  173. char wordsplit_c_escape_tab[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
  174. static int
  175. wordsplit_init (struct wordsplit *wsp, const char *input, size_t len,
  176. unsigned flags)
  177. {
  178. wsp->ws_flags = flags;
  179. if (!(wsp->ws_flags & WRDSF_ALLOC_DIE))
  180. wsp->ws_alloc_die = _wsplt_alloc_die;
  181. if (!(wsp->ws_flags & WRDSF_ERROR))
  182. wsp->ws_error = _wsplt_error;
  183. if (!(wsp->ws_flags & WRDSF_NOVAR))
  184. {
  185. /* These will be initialized on first variable assignment */
  186. wsp->ws_envidx = wsp->ws_envsiz = 0;
  187. wsp->ws_envbuf = NULL;
  188. }
  189. if (!(wsp->ws_flags & WRDSF_NOCMD))
  190. {
  191. if (!wsp->ws_command)
  192. {
  193. _wsplt_seterr (wsp, WRDSE_USAGE);
  194. errno = EINVAL;
  195. return wsp->ws_errno;
  196. }
  197. }
  198. if (wsp->ws_flags & WRDSF_SHOWDBG)
  199. {
  200. if (!(wsp->ws_flags & WRDSF_DEBUG))
  201. {
  202. if (wsp->ws_flags & WRDSF_ERROR)
  203. wsp->ws_debug = wsp->ws_error;
  204. else if (wsp->ws_flags & WRDSF_SHOWERR)
  205. wsp->ws_debug = _wsplt_error;
  206. else
  207. wsp->ws_flags &= ~WRDSF_SHOWDBG;
  208. }
  209. }
  210. wsp->ws_input = input;
  211. wsp->ws_len = len;
  212. if (!(wsp->ws_flags & WRDSF_DOOFFS))
  213. wsp->ws_offs = 0;
  214. if (!(wsp->ws_flags & WRDSF_DELIM))
  215. wsp->ws_delim = " \t\n";
  216. if (!(wsp->ws_flags & WRDSF_COMMENT))
  217. wsp->ws_comment = NULL;
  218. if (!(wsp->ws_flags & WRDSF_CLOSURE))
  219. wsp->ws_closure = NULL;
  220. if (!(wsp->ws_flags & WRDSF_OPTIONS))
  221. wsp->ws_options = 0;
  222. if (wsp->ws_flags & WRDSF_ESCAPE)
  223. {
  224. if (!wsp->ws_escape[WRDSX_WORD])
  225. wsp->ws_escape[WRDSX_WORD] = "";
  226. if (!wsp->ws_escape[WRDSX_QUOTE])
  227. wsp->ws_escape[WRDSX_QUOTE] = "";
  228. }
  229. else
  230. {
  231. if (wsp->ws_flags & WRDSF_CESCAPES)
  232. {
  233. wsp->ws_escape[WRDSX_WORD] = wordsplit_c_escape_tab;
  234. wsp->ws_escape[WRDSX_QUOTE] = wordsplit_c_escape_tab;
  235. wsp->ws_options |= WRDSO_OESC_QUOTE | WRDSO_OESC_WORD
  236. | WRDSO_XESC_QUOTE | WRDSO_XESC_WORD;
  237. }
  238. else
  239. {
  240. wsp->ws_escape[WRDSX_WORD] = "";
  241. wsp->ws_escape[WRDSX_QUOTE] = "\\\\\"\"";
  242. wsp->ws_options |= WRDSO_BSKEEP_QUOTE;
  243. }
  244. }
  245. wsp->ws_endp = 0;
  246. wsp->ws_wordi = 0;
  247. if (wsp->ws_flags & WRDSF_REUSE)
  248. wordsplit_free_nodes (wsp);
  249. wsp->ws_head = wsp->ws_tail = NULL;
  250. wordsplit_init0 (wsp);
  251. return 0;
  252. }
  253. static int
  254. alloc_space (struct wordsplit *wsp, size_t count)
  255. {
  256. size_t offs = (wsp->ws_flags & WRDSF_DOOFFS) ? wsp->ws_offs : 0;
  257. char **ptr;
  258. size_t newalloc;
  259. if (wsp->ws_wordv == NULL)
  260. {
  261. newalloc = offs + count > ALLOC_INIT ? count : ALLOC_INIT;
  262. ptr = calloc (newalloc, sizeof (ptr[0]));
  263. }
  264. else if (wsp->ws_wordn < offs + wsp->ws_wordc + count)
  265. {
  266. newalloc = offs + wsp->ws_wordc +
  267. (count > ALLOC_INCR ? count : ALLOC_INCR);
  268. ptr = realloc (wsp->ws_wordv, newalloc * sizeof (ptr[0]));
  269. }
  270. else
  271. return 0;
  272. if (ptr)
  273. {
  274. wsp->ws_wordn = newalloc;
  275. wsp->ws_wordv = ptr;
  276. }
  277. else
  278. return _wsplt_nomem (wsp);
  279. return 0;
  280. }
  281. /* Node state flags */
  282. #define _WSNF_NULL 0x01 /* null node (a noop) */
  283. #define _WSNF_WORD 0x02 /* node contains word in v.word */
  284. #define _WSNF_QUOTE 0x04 /* text is quoted */
  285. #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
  286. #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
  287. #define _WSNF_SEXP 0x20 /* is a sed expression */
  288. #define _WSNF_DELIM 0x40 /* node is a delimiter */
  289. #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
  290. wordsplit_add_segm must add the
  291. segment even if it is empty */
  292. struct wordsplit_node
  293. {
  294. struct wordsplit_node *prev; /* Previous element */
  295. struct wordsplit_node *next; /* Next element */
  296. unsigned flags; /* Node flags */
  297. union
  298. {
  299. struct
  300. {
  301. size_t beg; /* Start of word in ws_input */
  302. size_t end; /* End of word in ws_input */
  303. } segm;
  304. char *word;
  305. } v;
  306. };
  307. static const char *
  308. wsnode_flagstr (unsigned flags)
  309. {
  310. static char retbuf[7];
  311. char *p = retbuf;
  312. if (flags & _WSNF_WORD)
  313. *p++ = 'w';
  314. else if (flags & _WSNF_NULL)
  315. *p++ = 'n';
  316. else
  317. *p++ = '-';
  318. if (flags & _WSNF_QUOTE)
  319. *p++ = 'q';
  320. else
  321. *p++ = '-';
  322. if (flags & _WSNF_NOEXPAND)
  323. *p++ = 'E';
  324. else
  325. *p++ = '-';
  326. if (flags & _WSNF_JOIN)
  327. *p++ = 'j';
  328. else
  329. *p++ = '-';
  330. if (flags & _WSNF_SEXP)
  331. *p++ = 's';
  332. else
  333. *p++ = '-';
  334. if (flags & _WSNF_DELIM)
  335. *p++ = 'd';
  336. else
  337. *p++ = '-';
  338. *p = 0;
  339. return retbuf;
  340. }
  341. static const char *
  342. wsnode_ptr (struct wordsplit *wsp, struct wordsplit_node *p)
  343. {
  344. if (p->flags & _WSNF_NULL)
  345. return "";
  346. else if (p->flags & _WSNF_WORD)
  347. return p->v.word;
  348. else
  349. return wsp->ws_input + p->v.segm.beg;
  350. }
  351. static size_t
  352. wsnode_len (struct wordsplit_node *p)
  353. {
  354. if (p->flags & _WSNF_NULL)
  355. return 0;
  356. else if (p->flags & _WSNF_WORD)
  357. return strlen (p->v.word);
  358. else
  359. return p->v.segm.end - p->v.segm.beg;
  360. }
  361. static struct wordsplit_node *
  362. wsnode_new (struct wordsplit *wsp)
  363. {
  364. struct wordsplit_node *node = calloc (1, sizeof (*node));
  365. if (!node)
  366. _wsplt_nomem (wsp);
  367. return node;
  368. }
  369. static void
  370. wsnode_free (struct wordsplit_node *p)
  371. {
  372. if (p->flags & _WSNF_WORD)
  373. free (p->v.word);
  374. free (p);
  375. }
  376. static void
  377. wsnode_append (struct wordsplit *wsp, struct wordsplit_node *node)
  378. {
  379. node->next = NULL;
  380. node->prev = wsp->ws_tail;
  381. if (wsp->ws_tail)
  382. wsp->ws_tail->next = node;
  383. else
  384. wsp->ws_head = node;
  385. wsp->ws_tail = node;
  386. }
  387. static void
  388. wsnode_remove (struct wordsplit *wsp, struct wordsplit_node *node)
  389. {
  390. struct wordsplit_node *p;
  391. p = node->prev;
  392. if (p)
  393. {
  394. p->next = node->next;
  395. if (!node->next)
  396. p->flags &= ~_WSNF_JOIN;
  397. }
  398. else
  399. wsp->ws_head = node->next;
  400. p = node->next;
  401. if (p)
  402. p->prev = node->prev;
  403. else
  404. wsp->ws_tail = node->prev;
  405. node->next = node->prev = NULL;
  406. }
  407. static struct wordsplit_node *
  408. wsnode_tail (struct wordsplit_node *p)
  409. {
  410. while (p->next)
  411. p = p->next;
  412. return p;
  413. }
  414. static void
  415. wsnode_insert (struct wordsplit *wsp, struct wordsplit_node *node,
  416. struct wordsplit_node *anchor, int before)
  417. {
  418. if (!wsp->ws_head)
  419. {
  420. node->next = node->prev = NULL;
  421. wsp->ws_head = wsp->ws_tail = node;
  422. }
  423. else if (before)
  424. {
  425. if (anchor->prev)
  426. wsnode_insert (wsp, node, anchor->prev, 0);
  427. else
  428. {
  429. struct wordsplit_node *tail = wsnode_tail (node);
  430. node->prev = NULL;
  431. tail->next = anchor;
  432. anchor->prev = tail;
  433. wsp->ws_head = node;
  434. }
  435. }
  436. else
  437. {
  438. struct wordsplit_node *p;
  439. struct wordsplit_node *tail = wsnode_tail (node);
  440. p = anchor->next;
  441. if (p)
  442. p->prev = tail;
  443. else
  444. wsp->ws_tail = tail;
  445. tail->next = p;
  446. node->prev = anchor;
  447. anchor->next = node;
  448. }
  449. }
  450. static int
  451. wordsplit_add_segm (struct wordsplit *wsp, size_t beg, size_t end, int flg)
  452. {
  453. if (end == beg && !(flg & _WSNF_EMPTYOK))
  454. return 0;
  455. struct wordsplit_node *node = wsnode_new (wsp);
  456. if (!node)
  457. return 1;
  458. node->flags = flg & ~(_WSNF_WORD | _WSNF_EMPTYOK);
  459. node->v.segm.beg = beg;
  460. node->v.segm.end = end;
  461. wsnode_append (wsp, node);
  462. return 0;
  463. }
  464. static void
  465. wordsplit_free_nodes (struct wordsplit *wsp)
  466. {
  467. struct wordsplit_node *p;
  468. for (p = wsp->ws_head; p;)
  469. {
  470. struct wordsplit_node *next = p->next;
  471. wsnode_free (p);
  472. p = next;
  473. }
  474. wsp->ws_head = wsp->ws_tail = NULL;
  475. }
  476. static void
  477. wordsplit_dump_nodes (struct wordsplit *wsp)
  478. {
  479. struct wordsplit_node *p;
  480. int n = 0;
  481. for (p = wsp->ws_head, n = 0; p; p = p->next, n++)
  482. {
  483. if (p->flags & _WSNF_WORD)
  484. wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%s;",
  485. wsp->ws_lvl,
  486. n, p, p->flags, wsnode_flagstr (p->flags), p->v.word);
  487. else
  488. wsp->ws_debug ("(%02d) %4d: %p: %#04x (%s):%.*s;",
  489. wsp->ws_lvl,
  490. n, p, p->flags, wsnode_flagstr (p->flags),
  491. (int) (p->v.segm.end - p->v.segm.beg),
  492. wsp->ws_input + p->v.segm.beg);
  493. }
  494. }
  495. static int
  496. coalesce_segment (struct wordsplit *wsp, struct wordsplit_node *node)
  497. {
  498. struct wordsplit_node *p, *end;
  499. size_t len = 0;
  500. char *buf, *cur;
  501. for (p = node; p->flags & _WSNF_JOIN; )
  502. {
  503. len += wsnode_len (p);
  504. p = p->next;
  505. if (!p)
  506. break;
  507. }
  508. if (p == node)
  509. return 0;
  510. end = p;
  511. buf = malloc (len + 1);
  512. if (!buf)
  513. return _wsplt_nomem (wsp);
  514. cur = buf;
  515. p = node;
  516. for (;;)
  517. {
  518. struct wordsplit_node *next = p->next;
  519. const char *str = wsnode_ptr (wsp, p);
  520. size_t slen = wsnode_len (p);
  521. memcpy (cur, str, slen);
  522. cur += slen;
  523. if (p != node)
  524. {
  525. node->flags |= p->flags & _WSNF_QUOTE;
  526. wsnode_remove (wsp, p);
  527. if (p == end)
  528. {
  529. /* Call wsnode_free separately to work around GCC bug 106427. */
  530. wsnode_free (p);
  531. break;
  532. }
  533. wsnode_free (p);
  534. }
  535. p = next;
  536. }
  537. *cur = 0;
  538. node->flags &= ~_WSNF_JOIN;
  539. if (node->flags & _WSNF_WORD)
  540. free (node->v.word);
  541. else
  542. node->flags |= _WSNF_WORD;
  543. node->v.word = buf;
  544. return 0;
  545. }
  546. static void wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
  547. char *dst, const char *src,
  548. size_t n);
  549. static int
  550. wsnode_quoteremoval (struct wordsplit *wsp)
  551. {
  552. struct wordsplit_node *p;
  553. for (p = wsp->ws_head; p; p = p->next)
  554. {
  555. const char *str = wsnode_ptr (wsp, p);
  556. size_t slen = wsnode_len (p);
  557. int unquote;
  558. if (wsp->ws_flags & WRDSF_QUOTE)
  559. unquote = !(p->flags & _WSNF_NOEXPAND);
  560. else
  561. unquote = 0;
  562. if (unquote)
  563. {
  564. if (!(p->flags & _WSNF_WORD))
  565. {
  566. char *newstr = malloc (slen + 1);
  567. if (!newstr)
  568. return _wsplt_nomem (wsp);
  569. memcpy (newstr, str, slen);
  570. newstr[slen] = 0;
  571. p->v.word = newstr;
  572. p->flags |= _WSNF_WORD;
  573. }
  574. wordsplit_string_unquote_copy (wsp, p->flags & _WSNF_QUOTE,
  575. p->v.word, str, slen);
  576. }
  577. }
  578. return 0;
  579. }
  580. static int
  581. wsnode_coalesce (struct wordsplit *wsp)
  582. {
  583. struct wordsplit_node *p;
  584. for (p = wsp->ws_head; p; p = p->next)
  585. {
  586. if (p->flags & _WSNF_JOIN)
  587. if (coalesce_segment (wsp, p))
  588. return 1;
  589. }
  590. return 0;
  591. }
  592. static int
  593. wsnode_tail_coalesce (struct wordsplit *wsp, struct wordsplit_node *p)
  594. {
  595. if (p->next)
  596. {
  597. struct wordsplit_node *np = p;
  598. while (np && np->next)
  599. {
  600. np->flags |= _WSNF_JOIN;
  601. np = np->next;
  602. }
  603. if (coalesce_segment (wsp, p))
  604. return 1;
  605. }
  606. return 0;
  607. }
  608. static size_t skip_delim (struct wordsplit *wsp);
  609. static int
  610. wordsplit_finish (struct wordsplit *wsp)
  611. {
  612. struct wordsplit_node *p;
  613. size_t n;
  614. int delim;
  615. /* Postprocess delimiters. It would be rather simple, if it weren't for
  616. the incremental operation.
  617. Nodes of type _WSNF_DELIM get inserted to the node list if either
  618. WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
  619. The following cases should be distinguished:
  620. 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
  621. any runs of similar delimiter nodes to a single node. The nodes are
  622. 'similar' if they point to the same delimiter character.
  623. If WRDSO_MAXWORDS option is set, stop compressing when
  624. ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
  625. a single last node.
  626. 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
  627. remove any delimiter nodes. Stop operation when
  628. ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
  629. a single last node.
  630. 3. If incremental operation is in progress, restart the loop any time
  631. a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
  632. is set.
  633. */
  634. again:
  635. delim = 0; /* Delimiter being processed (if any) */
  636. n = 0; /* Number of words processed so far */
  637. p = wsp->ws_head; /* Current node */
  638. while (p)
  639. {
  640. struct wordsplit_node *next = p->next;
  641. if (p->flags & _WSNF_DELIM)
  642. {
  643. if (wsp->ws_flags & WRDSF_RETURN_DELIMS)
  644. {
  645. if (wsp->ws_flags & WRDSF_SQUEEZE_DELIMS)
  646. {
  647. char const *s = wsnode_ptr (wsp, p);
  648. if (delim)
  649. {
  650. if (delim == *s)
  651. {
  652. wsnode_remove (wsp, p);
  653. p = next;
  654. continue;
  655. }
  656. else
  657. {
  658. delim = 0;
  659. n++; /* Count this node; it will be returned */
  660. }
  661. }
  662. else
  663. {
  664. delim = *s;
  665. p = next;
  666. continue;
  667. }
  668. }
  669. }
  670. else if (wsp->ws_options & WRDSO_MAXWORDS)
  671. {
  672. wsnode_remove (wsp, p);
  673. p = next;
  674. continue;
  675. }
  676. }
  677. else
  678. {
  679. if (delim)
  680. {
  681. /* Last node was a delimiter or a compressed run of delimiters;
  682. Count it, and clear the delimiter marker */
  683. n++;
  684. delim = 0;
  685. }
  686. if (wsp->ws_options & WRDSO_MAXWORDS)
  687. {
  688. if (wsp->ws_wordi + n + 1 == wsp->ws_maxwords)
  689. break;
  690. }
  691. }
  692. n++;
  693. if (wsp->ws_flags & WRDSF_INCREMENTAL)
  694. p = NULL; /* Break the loop */
  695. else
  696. p = next;
  697. }
  698. if (p)
  699. {
  700. /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
  701. words have already been collected. Reconstruct a single final
  702. node from the remaining nodes. */
  703. if (wsnode_tail_coalesce (wsp, p))
  704. return wsp->ws_errno;
  705. n++;
  706. }
  707. if (n == 0 && (wsp->ws_flags & WRDSF_INCREMENTAL))
  708. {
  709. /* The loop above have eliminated all nodes. Restart the
  710. processing, if there's any input left. */
  711. if (wsp->ws_endp < wsp->ws_len)
  712. {
  713. int rc;
  714. if (wsp->ws_flags & WRDSF_SHOWDBG)
  715. wsp->ws_debug (_("Restarting"));
  716. rc = wordsplit_process_list (wsp, skip_delim (wsp));
  717. if (rc)
  718. return rc;
  719. }
  720. else
  721. {
  722. wsp->ws_error = WRDSE_EOF;
  723. return WRDSE_EOF;
  724. }
  725. goto again;
  726. }
  727. if (alloc_space (wsp, n + 1))
  728. return wsp->ws_errno;
  729. while (wsp->ws_head)
  730. {
  731. const char *str = wsnode_ptr (wsp, wsp->ws_head);
  732. size_t slen = wsnode_len (wsp->ws_head);
  733. char *newstr = malloc (slen + 1);
  734. /* Assign newstr first, even if it is NULL. This way
  735. wordsplit_free will work even if we return
  736. nomem later. */
  737. wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = newstr;
  738. if (!newstr)
  739. return _wsplt_nomem (wsp);
  740. memcpy (newstr, str, slen);
  741. newstr[slen] = 0;
  742. wsnode_remove (wsp, wsp->ws_head);
  743. wsp->ws_wordc++;
  744. wsp->ws_wordi++;
  745. if (wsp->ws_flags & WRDSF_INCREMENTAL)
  746. break;
  747. }
  748. wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
  749. return 0;
  750. }
  751. int
  752. wordsplit_append (wordsplit_t *wsp, int argc, char **argv)
  753. {
  754. int rc;
  755. size_t i;
  756. rc = alloc_space (wsp, wsp->ws_wordc + argc + 1);
  757. if (rc)
  758. return rc;
  759. for (i = 0; i < argc; i++)
  760. {
  761. char *newstr = strdup (argv[i]);
  762. if (!newstr)
  763. {
  764. while (i > 0)
  765. {
  766. free (wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1]);
  767. wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i - 1] = NULL;
  768. i--;
  769. }
  770. return _wsplt_nomem (wsp);
  771. }
  772. wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc + i] = newstr;
  773. }
  774. wsp->ws_wordc += i;
  775. wsp->ws_wordv[wsp->ws_offs + wsp->ws_wordc] = NULL;
  776. return 0;
  777. }
  778. /* Variable expansion */
  779. static int
  780. node_split_prefix (struct wordsplit *wsp,
  781. struct wordsplit_node **ptail,
  782. struct wordsplit_node *node,
  783. size_t beg, size_t len, int flg)
  784. {
  785. if (len == 0)
  786. return 0;
  787. struct wordsplit_node *newnode = wsnode_new (wsp);
  788. if (!newnode)
  789. return 1;
  790. wsnode_insert (wsp, newnode, *ptail, 0);
  791. if (node->flags & _WSNF_WORD)
  792. {
  793. const char *str = wsnode_ptr (wsp, node);
  794. char *newstr = malloc (len + 1);
  795. if (!newstr)
  796. return _wsplt_nomem (wsp);
  797. memcpy (newstr, str + beg, len);
  798. newstr[len] = 0;
  799. newnode->flags = _WSNF_WORD;
  800. newnode->v.word = newstr;
  801. }
  802. else
  803. {
  804. newnode->v.segm.beg = node->v.segm.beg + beg;
  805. newnode->v.segm.end = newnode->v.segm.beg + len;
  806. }
  807. newnode->flags |= flg;
  808. *ptail = newnode;
  809. return 0;
  810. }
  811. static int
  812. find_closing_paren (const char *str, size_t i, size_t len, size_t *poff,
  813. char const *paren)
  814. {
  815. enum { st_init, st_squote, st_dquote } state = st_init;
  816. size_t level = 1;
  817. for (; i < len; i++)
  818. {
  819. switch (state)
  820. {
  821. case st_init:
  822. switch (str[i])
  823. {
  824. default:
  825. if (str[i] == paren[0])
  826. {
  827. level++;
  828. break;
  829. }
  830. else if (str[i] == paren[1])
  831. {
  832. if (--level == 0)
  833. {
  834. *poff = i;
  835. return 0;
  836. }
  837. break;
  838. }
  839. break;
  840. case '"':
  841. state = st_dquote;
  842. break;
  843. case '\'':
  844. state = st_squote;
  845. break;
  846. }
  847. break;
  848. case st_squote:
  849. if (str[i] == '\'')
  850. state = st_init;
  851. break;
  852. case st_dquote:
  853. if (str[i] == '\\')
  854. i++;
  855. else if (str[i] == '"')
  856. state = st_init;
  857. break;
  858. }
  859. }
  860. return 1;
  861. }
  862. static int
  863. wordsplit_find_env (struct wordsplit *wsp, const char *name, size_t len,
  864. char const **ret)
  865. {
  866. size_t i;
  867. if (!(wsp->ws_flags & WRDSF_ENV))
  868. return WRDSE_UNDEF;
  869. if (wsp->ws_flags & WRDSF_ENV_KV)
  870. {
  871. /* A key-value pair environment */
  872. for (i = 0; wsp->ws_env[i]; i++)
  873. {
  874. size_t elen = strlen (wsp->ws_env[i]);
  875. if (elen == len && memcmp (wsp->ws_env[i], name, elen) == 0)
  876. {
  877. *ret = wsp->ws_env[i + 1];
  878. return WRDSE_OK;
  879. }
  880. /* Skip the value. Break the loop if it is NULL. */
  881. i++;
  882. if (wsp->ws_env[i] == NULL)
  883. break;
  884. }
  885. }
  886. else if (wsp->ws_env)
  887. {
  888. /* Usual (A=B) environment. */
  889. for (i = 0; wsp->ws_env[i]; i++)
  890. {
  891. size_t j;
  892. const char *var = wsp->ws_env[i];
  893. for (j = 0; j < len; j++)
  894. if (name[j] != var[j])
  895. break;
  896. if (j == len && var[j] == '=')
  897. {
  898. *ret = var + j + 1;
  899. return WRDSE_OK;
  900. }
  901. }
  902. }
  903. return WRDSE_UNDEF;
  904. }
  905. static int
  906. wsplt_assign_var (struct wordsplit *wsp, const char *name, size_t namelen,
  907. char *value)
  908. {
  909. int n = (wsp->ws_flags & WRDSF_ENV_KV) ? 2 : 1;
  910. char *v;
  911. if (wsp->ws_envidx + n >= wsp->ws_envsiz)
  912. {
  913. size_t sz;
  914. char **newenv;
  915. if (!wsp->ws_envbuf)
  916. {
  917. if (wsp->ws_flags & WRDSF_ENV)
  918. {
  919. size_t i = 0, j;
  920. if (wsp->ws_env)
  921. {
  922. for (; wsp->ws_env[i]; i++)
  923. ;
  924. }
  925. sz = i + n + 1;
  926. newenv = calloc (sz, sizeof(newenv[0]));
  927. if (!newenv)
  928. return _wsplt_nomem (wsp);
  929. for (j = 0; j < i; j++)
  930. {
  931. newenv[j] = strdup (wsp->ws_env[j]);
  932. if (!newenv[j])
  933. {
  934. for (; j > 1; j--)
  935. free (newenv[j-1]);
  936. free (newenv[j-1]);
  937. free (newenv);
  938. return _wsplt_nomem (wsp);
  939. }
  940. }
  941. newenv[j] = NULL;
  942. wsp->ws_envbuf = newenv;
  943. wsp->ws_envidx = i;
  944. wsp->ws_envsiz = sz;
  945. wsp->ws_env = (const char**) wsp->ws_envbuf;
  946. }
  947. else
  948. {
  949. newenv = calloc (WORDSPLIT_ENV_INIT, sizeof(newenv[0]));
  950. if (!newenv)
  951. return _wsplt_nomem (wsp);
  952. wsp->ws_envbuf = newenv;
  953. wsp->ws_envidx = 0;
  954. wsp->ws_envsiz = WORDSPLIT_ENV_INIT;
  955. wsp->ws_env = (const char**) wsp->ws_envbuf;
  956. wsp->ws_flags |= WRDSF_ENV;
  957. }
  958. }
  959. else
  960. {
  961. wsp->ws_envsiz *= 2;
  962. newenv = realloc (wsp->ws_envbuf,
  963. wsp->ws_envsiz * sizeof (wsp->ws_envbuf[0]));
  964. if (!newenv)
  965. return _wsplt_nomem (wsp);
  966. wsp->ws_envbuf = newenv;
  967. wsp->ws_env = (const char**) wsp->ws_envbuf;
  968. }
  969. }
  970. if (wsp->ws_flags & WRDSF_ENV_KV)
  971. {
  972. /* A key-value pair environment */
  973. char *p = malloc (namelen + 1);
  974. if (!p)
  975. return _wsplt_nomem (wsp);
  976. memcpy (p, name, namelen);
  977. p[namelen] = 0;
  978. v = strdup (value);
  979. if (!v)
  980. {
  981. free (p);
  982. return _wsplt_nomem (wsp);
  983. }
  984. wsp->ws_env[wsp->ws_envidx++] = p;
  985. wsp->ws_env[wsp->ws_envidx++] = v;
  986. }
  987. else
  988. {
  989. v = malloc (namelen + strlen(value) + 2);
  990. if (!v)
  991. return _wsplt_nomem (wsp);
  992. memcpy (v, name, namelen);
  993. v[namelen++] = '=';
  994. strcpy(v + namelen, value);
  995. wsp->ws_env[wsp->ws_envidx++] = v;
  996. }
  997. wsp->ws_env[wsp->ws_envidx++] = NULL;
  998. return WRDSE_OK;
  999. }
  1000. static int
  1001. expvar (struct wordsplit *wsp, const char *str, size_t len,
  1002. struct wordsplit_node **ptail, const char **pend, unsigned flg)
  1003. {
  1004. size_t i = 0;
  1005. const char *defstr = NULL;
  1006. char *value;
  1007. const char *vptr;
  1008. struct wordsplit_node *newnode;
  1009. const char *start = str - 1;
  1010. int rc;
  1011. struct wordsplit ws;
  1012. if (ISVARBEG (str[0]))
  1013. {
  1014. for (i = 1; i < len; i++)
  1015. if (!ISVARCHR (str[i]))
  1016. break;
  1017. *pend = str + i - 1;
  1018. }
  1019. else if (str[0] == '{')
  1020. {
  1021. str++;
  1022. len--;
  1023. for (i = 1; i < len; i++)
  1024. {
  1025. if (str[i] == ':')
  1026. {
  1027. size_t j;
  1028. defstr = str + i + 1;
  1029. if (find_closing_paren (str, i + 1, len, &j, "{}"))
  1030. return _wsplt_seterr (wsp, WRDSE_CBRACE);
  1031. *pend = str + j;
  1032. break;
  1033. }
  1034. else if (str[i] == '}')
  1035. {
  1036. defstr = NULL;
  1037. *pend = str + i;
  1038. break;
  1039. }
  1040. else if (strchr ("-+?=", str[i]))
  1041. {
  1042. size_t j;
  1043. defstr = str + i;
  1044. if (find_closing_paren (str, i, len, &j, "{}"))
  1045. return _wsplt_seterr (wsp, WRDSE_CBRACE);
  1046. *pend = str + j;
  1047. break;
  1048. }
  1049. }
  1050. if (i == len)
  1051. return _wsplt_seterr (wsp, WRDSE_CBRACE);
  1052. }
  1053. else
  1054. {
  1055. newnode = wsnode_new (wsp);
  1056. if (!newnode)
  1057. return 1;
  1058. wsnode_insert (wsp, newnode, *ptail, 0);
  1059. *ptail = newnode;
  1060. newnode->flags = _WSNF_WORD | flg;
  1061. newnode->v.word = malloc (3);
  1062. if (!newnode->v.word)
  1063. return _wsplt_nomem (wsp);
  1064. newnode->v.word[0] = '$';
  1065. newnode->v.word[1] = str[0];
  1066. newnode->v.word[2] = 0;
  1067. *pend = str;
  1068. return 0;
  1069. }
  1070. /* Actually expand the variable */
  1071. /* str - start of the variable name
  1072. i - its length
  1073. defstr - default replacement str */
  1074. if (defstr && strchr("-+?=", defstr[0]) == 0)
  1075. {
  1076. rc = WRDSE_UNDEF;
  1077. defstr = NULL;
  1078. }
  1079. else
  1080. {
  1081. rc = wordsplit_find_env (wsp, str, i, &vptr);
  1082. if (rc == WRDSE_OK)
  1083. {
  1084. if (vptr)
  1085. {
  1086. value = strdup (vptr);
  1087. if (!value)
  1088. rc = WRDSE_NOSPACE;
  1089. }
  1090. else
  1091. rc = WRDSE_UNDEF;
  1092. }
  1093. else if (wsp->ws_flags & WRDSF_GETVAR)
  1094. rc = wsp->ws_getvar (&value, str, i, wsp->ws_closure);
  1095. else
  1096. rc = WRDSE_UNDEF;
  1097. if (rc == WRDSE_OK
  1098. && (!value || value[0] == 0)
  1099. && defstr && defstr[-1] == ':')
  1100. {
  1101. free (value);
  1102. rc = WRDSE_UNDEF;
  1103. }
  1104. }
  1105. switch (rc)
  1106. {
  1107. case WRDSE_OK:
  1108. if (defstr && *defstr == '+')
  1109. {
  1110. size_t size = *pend - ++defstr;
  1111. rc = _wsplt_subsplit (wsp, &ws, defstr, size,
  1112. WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
  1113. (wsp->ws_flags &
  1114. (WRDSF_NOVAR | WRDSF_NOCMD)), 1);
  1115. if (rc)
  1116. return rc;
  1117. free (value);
  1118. value = ws.ws_wordv[0];
  1119. ws.ws_wordv[0] = NULL;
  1120. wordsplit_free (&ws);
  1121. }
  1122. break;
  1123. case WRDSE_UNDEF:
  1124. if (defstr)
  1125. {
  1126. size_t size;
  1127. if (*defstr == '-' || *defstr == '=')
  1128. {
  1129. size = *pend - ++defstr;
  1130. rc = _wsplt_subsplit (wsp, &ws, defstr, size,
  1131. WRDSF_NOSPLIT | WRDSF_WS | WRDSF_QUOTE |
  1132. (wsp->ws_flags &
  1133. (WRDSF_NOVAR | WRDSF_NOCMD)),
  1134. 1);
  1135. if (rc)
  1136. return rc;
  1137. value = ws.ws_wordv[0];
  1138. ws.ws_wordv[0] = NULL;
  1139. wordsplit_free (&ws);
  1140. if (defstr[-1] == '=')
  1141. wsplt_assign_var (wsp, str, i, value);
  1142. }
  1143. else
  1144. {
  1145. if (*defstr == '?')
  1146. {
  1147. size = *pend - ++defstr;
  1148. if (size == 0)
  1149. wsp->ws_error (_("%.*s: variable null or not set"),
  1150. (int) i, str);
  1151. else
  1152. {
  1153. rc = _wsplt_subsplit (wsp, &ws, defstr, size,
  1154. WRDSF_NOSPLIT | WRDSF_WS |
  1155. WRDSF_QUOTE |
  1156. (wsp->ws_flags &
  1157. (WRDSF_NOVAR | WRDSF_NOCMD)),
  1158. 1);
  1159. if (rc == 0)
  1160. wsp->ws_error ("%.*s: %s",
  1161. (int) i, str, ws.ws_wordv[0]);
  1162. else
  1163. wsp->ws_error ("%.*s: %.*s",
  1164. (int) i, str, (int) size, defstr);
  1165. wordsplit_free (&ws);
  1166. }
  1167. }
  1168. value = NULL;
  1169. }
  1170. }
  1171. else if (wsp->ws_flags & WRDSF_UNDEF)
  1172. {
  1173. _wsplt_seterr (wsp, WRDSE_UNDEF);
  1174. return 1;
  1175. }
  1176. else
  1177. {
  1178. if (wsp->ws_flags & WRDSF_WARNUNDEF)
  1179. wsp->ws_error (_("warning: undefined variable `%.*s'"),
  1180. (int) i, str);
  1181. if (wsp->ws_flags & WRDSF_KEEPUNDEF)
  1182. value = NULL;
  1183. else
  1184. {
  1185. value = strdup ("");
  1186. if (!value)
  1187. return _wsplt_nomem (wsp);
  1188. }
  1189. }
  1190. break;
  1191. case WRDSE_NOSPACE:
  1192. return _wsplt_nomem (wsp);
  1193. case WRDSE_USERERR:
  1194. if (wsp->ws_errno == WRDSE_USERERR)
  1195. free (wsp->ws_usererr);
  1196. wsp->ws_usererr = value;
  1197. FALLTHROUGH;
  1198. default:
  1199. _wsplt_seterr (wsp, rc);
  1200. return 1;
  1201. }
  1202. if (value)
  1203. {
  1204. if (flg & _WSNF_QUOTE)
  1205. {
  1206. newnode = wsnode_new (wsp);
  1207. if (!newnode)
  1208. {
  1209. free (value);
  1210. return 1;
  1211. }
  1212. wsnode_insert (wsp, newnode, *ptail, 0);
  1213. *ptail = newnode;
  1214. newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
  1215. newnode->v.word = value;
  1216. }
  1217. else if (*value == 0)
  1218. {
  1219. free (value);
  1220. /* Empty string is a special case */
  1221. newnode = wsnode_new (wsp);
  1222. if (!newnode)
  1223. return 1;
  1224. wsnode_insert (wsp, newnode, *ptail, 0);
  1225. *ptail = newnode;
  1226. newnode->flags = _WSNF_NULL;
  1227. }
  1228. else
  1229. {
  1230. struct wordsplit ws;
  1231. int rc;
  1232. rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
  1233. WRDSF_NOVAR | WRDSF_NOCMD |
  1234. WRDSF_QUOTE
  1235. | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0) ,
  1236. 0);
  1237. free (value);
  1238. if (rc)
  1239. {
  1240. _wsplt_seterr_sub (wsp, &ws);
  1241. wordsplit_free (&ws);
  1242. return 1;
  1243. }
  1244. wsnode_insert (wsp, ws.ws_head, *ptail, 0);
  1245. *ptail = ws.ws_tail;
  1246. ws.ws_head = ws.ws_tail = NULL;
  1247. wordsplit_free (&ws);
  1248. }
  1249. }
  1250. else if (wsp->ws_flags & WRDSF_KEEPUNDEF)
  1251. {
  1252. size_t size = *pend - start + 1;
  1253. newnode = wsnode_new (wsp);
  1254. if (!newnode)
  1255. return 1;
  1256. wsnode_insert (wsp, newnode, *ptail, 0);
  1257. *ptail = newnode;
  1258. newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
  1259. newnode->v.word = malloc (size + 1);
  1260. if (!newnode->v.word)
  1261. return _wsplt_nomem (wsp);
  1262. memcpy (newnode->v.word, start, size);
  1263. newnode->v.word[size] = 0;
  1264. }
  1265. else
  1266. {
  1267. newnode = wsnode_new (wsp);
  1268. if (!newnode)
  1269. return 1;
  1270. wsnode_insert (wsp, newnode, *ptail, 0);
  1271. *ptail = newnode;
  1272. newnode->flags = _WSNF_NULL;
  1273. }
  1274. return 0;
  1275. }
  1276. static int
  1277. begin_var_p (int c)
  1278. {
  1279. return c == '{' || ISVARBEG (c);
  1280. }
  1281. static int
  1282. node_expand (struct wordsplit *wsp, struct wordsplit_node *node,
  1283. int (*beg_p) (int),
  1284. int (*ws_exp_fn) (struct wordsplit *wsp,
  1285. const char *str, size_t len,
  1286. struct wordsplit_node **ptail,
  1287. const char **pend,
  1288. unsigned flg))
  1289. {
  1290. const char *str = wsnode_ptr (wsp, node);
  1291. size_t slen = wsnode_len (node);
  1292. const char *end = str + slen;
  1293. const char *p;
  1294. size_t off = 0;
  1295. struct wordsplit_node *tail = node;
  1296. for (p = str; p < end; p++)
  1297. {
  1298. if (*p == '\\')
  1299. {
  1300. p++;
  1301. continue;
  1302. }
  1303. if (*p == '$' && beg_p (p[1]))
  1304. {
  1305. size_t n = p - str;
  1306. if (tail != node)
  1307. tail->flags |= _WSNF_JOIN;
  1308. if (node_split_prefix (wsp, &tail, node, off, n, _WSNF_JOIN))
  1309. return 1;
  1310. p++;
  1311. if (ws_exp_fn (wsp, p, slen - n, &tail, &p,
  1312. node->flags & (_WSNF_JOIN | _WSNF_QUOTE)))
  1313. return 1;
  1314. off += p - str + 1;
  1315. str = p + 1;
  1316. }
  1317. }
  1318. if (p > str)
  1319. {
  1320. if (tail != node)
  1321. tail->flags |= _WSNF_JOIN;
  1322. if (node_split_prefix (wsp, &tail, node, off, p - str,
  1323. node->flags & (_WSNF_JOIN|_WSNF_QUOTE)))
  1324. return 1;
  1325. }
  1326. if (tail != node)
  1327. {
  1328. wsnode_remove (wsp, node);
  1329. wsnode_free (node);
  1330. }
  1331. return 0;
  1332. }
  1333. /* Remove NULL nodes from the list */
  1334. static void
  1335. wsnode_nullelim (struct wordsplit *wsp)
  1336. {
  1337. struct wordsplit_node *p;
  1338. for (p = wsp->ws_head; p;)
  1339. {
  1340. struct wordsplit_node *next = p->next;
  1341. if (p->flags & _WSNF_DELIM && p->prev)
  1342. p->prev->flags &= ~_WSNF_JOIN;
  1343. if (p->flags & _WSNF_NULL)
  1344. {
  1345. wsnode_remove (wsp, p);
  1346. wsnode_free (p);
  1347. }
  1348. p = next;
  1349. }
  1350. }
  1351. static int
  1352. wordsplit_varexp (struct wordsplit *wsp)
  1353. {
  1354. struct wordsplit_node *p;
  1355. for (p = wsp->ws_head; p;)
  1356. {
  1357. struct wordsplit_node *next = p->next;
  1358. if (!(p->flags & (_WSNF_NOEXPAND|_WSNF_DELIM)))
  1359. if (node_expand (wsp, p, begin_var_p, expvar))
  1360. return 1;
  1361. p = next;
  1362. }
  1363. wsnode_nullelim (wsp);
  1364. return 0;
  1365. }
  1366. static int
  1367. begin_cmd_p (int c)
  1368. {
  1369. return c == '(';
  1370. }
  1371. static int
  1372. expcmd (struct wordsplit *wsp, const char *str, size_t len,
  1373. struct wordsplit_node **ptail, const char **pend, unsigned flg)
  1374. {
  1375. int rc;
  1376. size_t j;
  1377. char *value;
  1378. struct wordsplit_node *newnode;
  1379. str++;
  1380. len--;
  1381. if (find_closing_paren (str, 0, len, &j, "()"))
  1382. {
  1383. _wsplt_seterr (wsp, WRDSE_PAREN);
  1384. return 1;
  1385. }
  1386. *pend = str + j;
  1387. if (wsp->ws_options & WRDSO_ARGV)
  1388. {
  1389. struct wordsplit ws;
  1390. rc = _wsplt_subsplit (wsp, &ws, str, j, WRDSF_WS | WRDSF_QUOTE, 1);
  1391. if (rc)
  1392. {
  1393. _wsplt_seterr_sub (wsp, &ws);
  1394. wordsplit_free (&ws);
  1395. return 1;
  1396. }
  1397. rc = wsp->ws_command (&value, str, j, ws.ws_wordv, wsp->ws_closure);
  1398. wordsplit_free (&ws);
  1399. }
  1400. else
  1401. rc = wsp->ws_command (&value, str, j, NULL, wsp->ws_closure);
  1402. if (rc == WRDSE_NOSPACE)
  1403. return _wsplt_nomem (wsp);
  1404. else if (rc)
  1405. {
  1406. if (rc == WRDSE_USERERR)
  1407. {
  1408. if (wsp->ws_errno == WRDSE_USERERR)
  1409. free (wsp->ws_usererr);
  1410. wsp->ws_usererr = value;
  1411. }
  1412. _wsplt_seterr (wsp, rc);
  1413. return 1;
  1414. }
  1415. if (value)
  1416. {
  1417. if (flg & _WSNF_QUOTE)
  1418. {
  1419. newnode = wsnode_new (wsp);
  1420. if (!newnode)
  1421. return 1;
  1422. wsnode_insert (wsp, newnode, *ptail, 0);
  1423. *ptail = newnode;
  1424. newnode->flags = _WSNF_WORD | _WSNF_NOEXPAND | flg;
  1425. newnode->v.word = value;
  1426. }
  1427. else if (*value == 0)
  1428. {
  1429. free (value);
  1430. /* Empty string is a special case */
  1431. newnode = wsnode_new (wsp);
  1432. if (!newnode)
  1433. return 1;
  1434. wsnode_insert (wsp, newnode, *ptail, 0);
  1435. *ptail = newnode;
  1436. newnode->flags = _WSNF_NULL;
  1437. }
  1438. else
  1439. {
  1440. struct wordsplit ws;
  1441. int rc;
  1442. rc = _wsplt_subsplit (wsp, &ws, value, strlen (value),
  1443. WRDSF_NOVAR | WRDSF_NOCMD
  1444. | WRDSF_WS | WRDSF_QUOTE
  1445. | (WSP_RETURN_DELIMS (wsp) ? WRDSF_RETURN_DELIMS : 0),
  1446. 0);
  1447. free (value);
  1448. if (rc)
  1449. {
  1450. _wsplt_seterr_sub (wsp, &ws);
  1451. wordsplit_free (&ws);
  1452. return 1;
  1453. }
  1454. wsnode_insert (wsp, ws.ws_head, *ptail, 0);
  1455. *ptail = ws.ws_tail;
  1456. ws.ws_head = ws.ws_tail = NULL;
  1457. wordsplit_free (&ws);
  1458. }
  1459. }
  1460. else
  1461. {
  1462. newnode = wsnode_new (wsp);
  1463. if (!newnode)
  1464. return 1;
  1465. wsnode_insert (wsp, newnode, *ptail, 0);
  1466. *ptail = newnode;
  1467. newnode->flags = _WSNF_NULL;
  1468. }
  1469. return 0;
  1470. }
  1471. static int
  1472. wordsplit_cmdexp (struct wordsplit *wsp)
  1473. {
  1474. struct wordsplit_node *p;
  1475. for (p = wsp->ws_head; p;)
  1476. {
  1477. struct wordsplit_node *next = p->next;
  1478. if (!(p->flags & _WSNF_NOEXPAND))
  1479. if (node_expand (wsp, p, begin_cmd_p, expcmd))
  1480. return 1;
  1481. p = next;
  1482. }
  1483. wsnode_nullelim (wsp);
  1484. return 0;
  1485. }
  1486. /* Strip off any leading and trailing whitespace. This function is called
  1487. right after the initial scanning, therefore it assumes that every
  1488. node in the list is a text reference node. */
  1489. static int
  1490. wordsplit_trimws (struct wordsplit *wsp)
  1491. {
  1492. struct wordsplit_node *p;
  1493. for (p = wsp->ws_head; p; p = p->next)
  1494. {
  1495. size_t n;
  1496. if (!(p->flags & _WSNF_QUOTE))
  1497. {
  1498. /* Skip leading whitespace: */
  1499. for (n = p->v.segm.beg; n < p->v.segm.end && ISWS (wsp->ws_input[n]);
  1500. n++)
  1501. ;
  1502. p->v.segm.beg = n;
  1503. }
  1504. while (p->next && (p->flags & _WSNF_JOIN))
  1505. p = p->next;
  1506. if (p->flags & _WSNF_QUOTE)
  1507. continue;
  1508. /* Trim trailing whitespace */
  1509. for (n = p->v.segm.end;
  1510. n > p->v.segm.beg && ISWS (wsp->ws_input[n - 1]); n--);
  1511. p->v.segm.end = n;
  1512. if (p->v.segm.beg == p->v.segm.end)
  1513. p->flags |= _WSNF_NULL;
  1514. }
  1515. wsnode_nullelim (wsp);
  1516. return 0;
  1517. }
  1518. static int
  1519. wordsplit_tildexpand (struct wordsplit *wsp)
  1520. {
  1521. struct wordsplit_node *p;
  1522. char *uname = NULL;
  1523. size_t usize = 0;
  1524. for (p = wsp->ws_head; p; p = p->next)
  1525. {
  1526. const char *str;
  1527. if (p->flags & _WSNF_QUOTE)
  1528. continue;
  1529. str = wsnode_ptr (wsp, p);
  1530. if (str[0] == '~')
  1531. {
  1532. size_t i, size, dlen;
  1533. size_t slen = wsnode_len (p);
  1534. struct passwd *pw;
  1535. char *newstr;
  1536. for (i = 1; i < slen && str[i] != '/'; i++)
  1537. ;
  1538. if (i == slen)
  1539. continue;
  1540. if (i > 1)
  1541. {
  1542. if (i > usize)
  1543. {
  1544. char *p = realloc (uname, i);
  1545. if (!p)
  1546. {
  1547. free (uname);
  1548. return _wsplt_nomem (wsp);
  1549. }
  1550. uname = p;
  1551. usize = i;
  1552. }
  1553. --i;
  1554. memcpy (uname, str + 1, i);
  1555. uname[i] = 0;
  1556. pw = getpwnam (uname);
  1557. }
  1558. else
  1559. pw = getpwuid (getuid ());
  1560. if (!pw)
  1561. continue;
  1562. dlen = strlen (pw->pw_dir);
  1563. size = slen - i + dlen;
  1564. newstr = malloc (size);
  1565. if (!newstr)
  1566. {
  1567. free (uname);
  1568. return _wsplt_nomem (wsp);
  1569. }
  1570. --size;
  1571. memcpy (newstr, pw->pw_dir, dlen);
  1572. memcpy (newstr + dlen, str + i + 1, slen - i - 1);
  1573. newstr[size] = 0;
  1574. if (p->flags & _WSNF_WORD)
  1575. free (p->v.word);
  1576. p->v.word = newstr;
  1577. p->flags |= _WSNF_WORD;
  1578. }
  1579. }
  1580. free (uname);
  1581. return 0;
  1582. }
  1583. static int
  1584. isglob (const char *s, int l)
  1585. {
  1586. while (l--)
  1587. {
  1588. if (strchr ("*?[", *s++))
  1589. return 1;
  1590. }
  1591. return 0;
  1592. }
  1593. static int
  1594. wordsplit_pathexpand (struct wordsplit *wsp)
  1595. {
  1596. struct wordsplit_node *p, *next;
  1597. size_t slen;
  1598. int flags = 0;
  1599. #ifdef GLOB_PERIOD
  1600. if (wsp->ws_options & WRDSO_DOTGLOB)
  1601. flags = GLOB_PERIOD;
  1602. #endif
  1603. for (p = wsp->ws_head; p; p = next)
  1604. {
  1605. const char *str;
  1606. next = p->next;
  1607. if (p->flags & _WSNF_QUOTE)
  1608. continue;
  1609. str = wsnode_ptr (wsp, p);
  1610. slen = wsnode_len (p);
  1611. if (isglob (str, slen))
  1612. {
  1613. int i;
  1614. glob_t g;
  1615. struct wordsplit_node *prev;
  1616. char *pattern;
  1617. pattern = malloc (slen + 1);
  1618. if (!pattern)
  1619. return _wsplt_nomem (wsp);
  1620. memcpy (pattern, str, slen);
  1621. pattern[slen] = 0;
  1622. switch (glob (pattern, flags, NULL, &g))
  1623. {
  1624. case 0:
  1625. free (pattern);
  1626. break;
  1627. case GLOB_NOSPACE:
  1628. free (pattern);
  1629. return _wsplt_nomem (wsp);
  1630. case GLOB_NOMATCH:
  1631. if (wsp->ws_options & WRDSO_NULLGLOB)
  1632. {
  1633. wsnode_remove (wsp, p);
  1634. wsnode_free (p);
  1635. }
  1636. else if (wsp->ws_options & WRDSO_FAILGLOB)
  1637. {
  1638. char buf[128];
  1639. if (wsp->ws_errno == WRDSE_USERERR)
  1640. free (wsp->ws_usererr);
  1641. snprintf (buf, sizeof (buf), _("no files match pattern %s"),
  1642. pattern);
  1643. free (pattern);
  1644. wsp->ws_usererr = strdup (buf);
  1645. if (!wsp->ws_usererr)
  1646. return _wsplt_nomem (wsp);
  1647. else
  1648. return _wsplt_seterr (wsp, WRDSE_USERERR);
  1649. }
  1650. free (pattern);
  1651. continue;
  1652. default:
  1653. free (pattern);
  1654. return _wsplt_seterr (wsp, WRDSE_GLOBERR);
  1655. }
  1656. prev = p;
  1657. for (i = 0; i < g.gl_pathc; i++)
  1658. {
  1659. struct wordsplit_node *newnode = wsnode_new (wsp);
  1660. char *newstr;
  1661. if (!newnode)
  1662. return 1;
  1663. newstr = strdup (g.gl_pathv[i]);
  1664. if (!newstr)
  1665. {
  1666. wsnode_free (newnode);
  1667. return _wsplt_nomem (wsp);
  1668. }
  1669. newnode->v.word = newstr;
  1670. newnode->flags |= _WSNF_WORD|_WSNF_QUOTE;
  1671. wsnode_insert (wsp, newnode, prev, 0);
  1672. prev = newnode;
  1673. }
  1674. globfree (&g);
  1675. wsnode_remove (wsp, p);
  1676. wsnode_free (p);
  1677. }
  1678. }
  1679. return 0;
  1680. }
  1681. static int
  1682. skip_sed_expr (const char *command, size_t i, size_t len)
  1683. {
  1684. int state;
  1685. do
  1686. {
  1687. int delim;
  1688. if (command[i] == ';')
  1689. i++;
  1690. if (!(command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1])))
  1691. break;
  1692. delim = command[++i];
  1693. state = 1;
  1694. for (i++; i < len; i++)
  1695. {
  1696. if (state == 3)
  1697. {
  1698. if (command[i] == delim || !ISALNUM (command[i]))
  1699. break;
  1700. }
  1701. else if (command[i] == '\\')
  1702. i++;
  1703. else if (command[i] == delim)
  1704. state++;
  1705. }
  1706. }
  1707. while (state == 3 && i < len && command[i] == ';');
  1708. return i;
  1709. }
  1710. /* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
  1711. is true, return its value, otherwise return the index past it. */
  1712. static size_t
  1713. skip_delim_internal (struct wordsplit *wsp, int return_delims)
  1714. {
  1715. return return_delims ? wsp->ws_endp : wsp->ws_endp + 1;
  1716. }
  1717. static size_t
  1718. skip_delim (struct wordsplit *wsp)
  1719. {
  1720. return skip_delim_internal (wsp, WSP_RETURN_DELIMS (wsp));
  1721. }
  1722. static size_t
  1723. skip_delim_real (struct wordsplit *wsp)
  1724. {
  1725. return skip_delim_internal (wsp, wsp->ws_flags & WRDSF_RETURN_DELIMS);
  1726. }
  1727. #define _WRDS_EOF 0
  1728. #define _WRDS_OK 1
  1729. #define _WRDS_ERR 2
  1730. static int
  1731. scan_qstring (struct wordsplit *wsp, size_t start, size_t *end)
  1732. {
  1733. size_t j;
  1734. const char *command = wsp->ws_input;
  1735. size_t len = wsp->ws_len;
  1736. char q = command[start];
  1737. for (j = start + 1; j < len && command[j] != q; j++)
  1738. if (q == '"' && command[j] == '\\')
  1739. j++;
  1740. if (j < len && command[j] == q)
  1741. {
  1742. unsigned flags = _WSNF_QUOTE | _WSNF_EMPTYOK;
  1743. if (q == '\'')
  1744. flags |= _WSNF_NOEXPAND;
  1745. if (wordsplit_add_segm (wsp, start + 1, j, flags))
  1746. return _WRDS_ERR;
  1747. *end = j;
  1748. }
  1749. else
  1750. {
  1751. wsp->ws_endp = start;
  1752. _wsplt_seterr (wsp, WRDSE_QUOTE);
  1753. return _WRDS_ERR;
  1754. }
  1755. return 0;
  1756. }
  1757. static int
  1758. scan_word (struct wordsplit *wsp, size_t start, int consume_all)
  1759. {
  1760. size_t len = wsp->ws_len;
  1761. const char *command = wsp->ws_input;
  1762. const char *comment = wsp->ws_comment;
  1763. int join = 0;
  1764. unsigned flags = 0;
  1765. struct wordsplit_node *np = wsp->ws_tail;
  1766. size_t i = start;
  1767. if (i >= len)
  1768. {
  1769. wsp->ws_errno = WRDSE_EOF;
  1770. return _WRDS_EOF;
  1771. }
  1772. start = i;
  1773. if (wsp->ws_flags & WRDSF_SED_EXPR
  1774. && command[i] == 's' && i + 3 < len && ISPUNCT (command[i + 1]))
  1775. {
  1776. flags = _WSNF_SEXP;
  1777. i = skip_sed_expr (command, i, len);
  1778. }
  1779. else if (consume_all || !ISDELIM (wsp, command[i]))
  1780. {
  1781. while (i < len)
  1782. {
  1783. if (comment && strchr (comment, command[i]) != NULL)
  1784. {
  1785. size_t j;
  1786. for (j = i + 1; j < len && command[j] != '\n'; j++)
  1787. ;
  1788. if (wordsplit_add_segm (wsp, start, i, 0))
  1789. return _WRDS_ERR;
  1790. wsp->ws_endp = j;
  1791. return _WRDS_OK;
  1792. }
  1793. if (wsp->ws_flags & WRDSF_QUOTE)
  1794. {
  1795. if (command[i] == '\\')
  1796. {
  1797. if (++i == len)
  1798. break;
  1799. i++;
  1800. continue;
  1801. }
  1802. if (((wsp->ws_flags & WRDSF_SQUOTE) && command[i] == '\'') ||
  1803. ((wsp->ws_flags & WRDSF_DQUOTE) && command[i] == '"'))
  1804. {
  1805. if (join && wsp->ws_tail)
  1806. wsp->ws_tail->flags |= _WSNF_JOIN;
  1807. if (wordsplit_add_segm (wsp, start, i, _WSNF_JOIN))
  1808. return _WRDS_ERR;
  1809. if (scan_qstring (wsp, i, &i))
  1810. return _WRDS_ERR;
  1811. start = i + 1;
  1812. join = 1;
  1813. }
  1814. }
  1815. if (command[i] == '$')
  1816. {
  1817. if (!(wsp->ws_flags & WRDSF_NOVAR)
  1818. && command[i+1] == '{'
  1819. && find_closing_paren (command, i + 2, len, &i, "{}") == 0)
  1820. continue;
  1821. if (!(wsp->ws_flags & WRDSF_NOCMD)
  1822. && command[i+1] == '('
  1823. && find_closing_paren (command, i + 2, len, &i, "()") == 0)
  1824. continue;
  1825. }
  1826. if (!consume_all && ISDELIM (wsp, command[i]))
  1827. break;
  1828. else
  1829. i++;
  1830. }
  1831. }
  1832. else if (WSP_RETURN_DELIMS (wsp))
  1833. {
  1834. i++;
  1835. flags |= _WSNF_DELIM;
  1836. }
  1837. else if (!(wsp->ws_flags & WRDSF_SQUEEZE_DELIMS))
  1838. flags |= _WSNF_EMPTYOK;
  1839. if (join && i > start && wsp->ws_tail)
  1840. wsp->ws_tail->flags |= _WSNF_JOIN;
  1841. if (wordsplit_add_segm (wsp, start, i, flags))
  1842. return _WRDS_ERR;
  1843. wsp->ws_endp = i;
  1844. if (wsp->ws_flags & WRDSF_INCREMENTAL)
  1845. return _WRDS_EOF;
  1846. if (consume_all)
  1847. {
  1848. if (!np)
  1849. np = wsp->ws_head;
  1850. while (np)
  1851. {
  1852. np->flags |= _WSNF_QUOTE;
  1853. np = np->next;
  1854. }
  1855. }
  1856. return _WRDS_OK;
  1857. }
  1858. #define to_num(c) \
  1859. (ISDIGIT(c) ? c - '0' : (ISXDIGIT(c) ? toupper(c) - 'A' + 10 : 255 ))
  1860. static int
  1861. xtonum (int *pval, const char *src, int base, int cnt)
  1862. {
  1863. int i, val;
  1864. for (i = 0, val = 0; i < cnt; i++, src++)
  1865. {
  1866. int n = *(unsigned char *) src;
  1867. if (n > 127 || (n = to_num (n)) >= base)
  1868. break;
  1869. val = val * base + n;
  1870. }
  1871. *pval = val;
  1872. return i;
  1873. }
  1874. size_t
  1875. wordsplit_c_quoted_length (const char *str, int quote_hex, int *quote)
  1876. {
  1877. size_t len = 0;
  1878. *quote = 0;
  1879. for (; *str; str++)
  1880. {
  1881. if (strchr (" \"", *str))
  1882. *quote = 1;
  1883. if (*str == ' ')
  1884. len++;
  1885. else if (*str == '"')
  1886. len += 2;
  1887. else if (*str != '\t' && *str != '\\' && ISPRINT (*str))
  1888. len++;
  1889. else if (quote_hex)
  1890. len += 3;
  1891. else
  1892. {
  1893. if (wordsplit_c_quote_char (*str))
  1894. len += 2;
  1895. else
  1896. len += 4;
  1897. }
  1898. }
  1899. return len;
  1900. }
  1901. static int
  1902. wsplt_unquote_char (const char *transtab, int c)
  1903. {
  1904. while (*transtab && transtab[1])
  1905. {
  1906. if (*transtab++ == c)
  1907. return *transtab;
  1908. ++transtab;
  1909. }
  1910. return 0;
  1911. }
  1912. static int
  1913. wsplt_quote_char (const char *transtab, int c)
  1914. {
  1915. for (; *transtab && transtab[1]; transtab += 2)
  1916. {
  1917. if (transtab[1] == c)
  1918. return *transtab;
  1919. }
  1920. return 0;
  1921. }
  1922. int
  1923. wordsplit_c_unquote_char (int c)
  1924. {
  1925. return wsplt_unquote_char (wordsplit_c_escape_tab, c);
  1926. }
  1927. int
  1928. wordsplit_c_quote_char (int c)
  1929. {
  1930. return wsplt_quote_char (wordsplit_c_escape_tab, c);
  1931. }
  1932. void
  1933. wordsplit_string_unquote_copy (struct wordsplit *ws, int inquote,
  1934. char *dst, const char *src, size_t n)
  1935. {
  1936. int i = 0;
  1937. int c;
  1938. inquote = !!inquote;
  1939. while (i < n)
  1940. {
  1941. if (src[i] == '\\')
  1942. {
  1943. ++i;
  1944. if (WRDSO_ESC_TEST (ws, inquote, WRDSO_XESC)
  1945. && (src[i] == 'x' || src[i] == 'X'))
  1946. {
  1947. if (n - i < 2)
  1948. {
  1949. *dst++ = '\\';
  1950. *dst++ = src[i++];
  1951. }
  1952. else
  1953. {
  1954. int off = xtonum (&c, src + i + 1,
  1955. 16, 2);
  1956. if (off == 0)
  1957. {
  1958. *dst++ = '\\';
  1959. *dst++ = src[i++];
  1960. }
  1961. else
  1962. {
  1963. *dst++ = c;
  1964. i += off + 1;
  1965. }
  1966. }
  1967. }
  1968. else if (WRDSO_ESC_TEST (ws, inquote, WRDSO_OESC)
  1969. && (unsigned char) src[i] < 128 && ISDIGIT (src[i]))
  1970. {
  1971. if (n - i < 1)
  1972. {
  1973. *dst++ = '\\';
  1974. *dst++ = src[i++];
  1975. }
  1976. else
  1977. {
  1978. int off = xtonum (&c, src + i, 8, 3);
  1979. if (off == 0)
  1980. {
  1981. *dst++ = '\\';
  1982. *dst++ = src[i++];
  1983. }
  1984. else
  1985. {
  1986. *dst++ = c;
  1987. i += off;
  1988. }
  1989. }
  1990. }
  1991. else if ((c = wsplt_unquote_char (ws->ws_escape[inquote], src[i])))
  1992. {
  1993. *dst++ = c;
  1994. ++i;
  1995. }
  1996. else
  1997. {
  1998. if (WRDSO_ESC_TEST (ws, inquote, WRDSO_BSKEEP))
  1999. *dst++ = '\\';
  2000. *dst++ = src[i++];
  2001. }
  2002. }
  2003. else
  2004. *dst++ = src[i++];
  2005. }
  2006. *dst = 0;
  2007. }
  2008. void
  2009. wordsplit_c_quote_copy (char *dst, const char *src, int quote_hex)
  2010. {
  2011. for (; *src; src++)
  2012. {
  2013. if (*src == '"')
  2014. {
  2015. *dst++ = '\\';
  2016. *dst++ = *src;
  2017. }
  2018. else if (*src != '\t' && *src != '\\' && ISPRINT (*src))
  2019. *dst++ = *src;
  2020. else
  2021. {
  2022. char tmp[4];
  2023. if (quote_hex)
  2024. {
  2025. snprintf (tmp, sizeof tmp, "%%%02X", *(unsigned char *) src);
  2026. memcpy (dst, tmp, 3);
  2027. dst += 3;
  2028. }
  2029. else
  2030. {
  2031. int c = wordsplit_c_quote_char (*src);
  2032. *dst++ = '\\';
  2033. if (c)
  2034. *dst++ = c;
  2035. else
  2036. {
  2037. snprintf (tmp, sizeof tmp, "%03o", *(unsigned char *) src);
  2038. memcpy (dst, tmp, 3);
  2039. dst += 3;
  2040. }
  2041. }
  2042. }
  2043. }
  2044. }
  2045. /* This structure describes a single expansion phase */
  2046. struct exptab
  2047. {
  2048. char const *descr; /* Textual description (for debugging) */
  2049. int flag; /* WRDSF_ bit that controls this phase */
  2050. int opt; /* Entry-specific options (see EXPOPT_ flags below */
  2051. int (*expansion) (struct wordsplit *wsp); /* expansion function */
  2052. };
  2053. /* The following options control expansions: */
  2054. /* Normally the exptab entry is run if its flag bit is set in struct
  2055. wordsplit. The EXPOPT_NEG option negates this test so that expansion
  2056. is performed if its associated flag bit is not set in struct wordsplit. */
  2057. #define EXPOPT_NEG 0x01
  2058. /* All bits in flag must be set in order for entry to match */
  2059. #define EXPORT_ALLOF 0x02
  2060. /* Coalesce the input list before running the expansion. */
  2061. #define EXPOPT_COALESCE 0x04
  2062. static struct exptab exptab[] = {
  2063. { N_("WS trimming"), WRDSF_WS, 0,
  2064. wordsplit_trimws },
  2065. { N_("command substitution"), WRDSF_NOCMD, EXPOPT_NEG|EXPOPT_COALESCE,
  2066. wordsplit_cmdexp },
  2067. { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
  2068. NULL },
  2069. { N_("tilde expansion"), WRDSF_PATHEXPAND, 0,
  2070. wordsplit_tildexpand },
  2071. { N_("variable expansion"), WRDSF_NOVAR, EXPOPT_NEG,
  2072. wordsplit_varexp },
  2073. { N_("quote removal"), 0, EXPOPT_NEG,
  2074. wsnode_quoteremoval },
  2075. { N_("coalesce list"), 0, EXPOPT_NEG|EXPOPT_COALESCE,
  2076. NULL },
  2077. { N_("path expansion"), WRDSF_PATHEXPAND, 0,
  2078. wordsplit_pathexpand },
  2079. { NULL }
  2080. };
  2081. static int
  2082. exptab_matches (struct exptab *p, struct wordsplit *wsp)
  2083. {
  2084. int result;
  2085. result = (wsp->ws_flags & p->flag);
  2086. if (p->opt & EXPORT_ALLOF)
  2087. result = result == p->flag;
  2088. if (p->opt & EXPOPT_NEG)
  2089. result = !result;
  2090. return result;
  2091. }
  2092. static int
  2093. wordsplit_process_list (struct wordsplit *wsp, size_t start)
  2094. {
  2095. struct exptab *p;
  2096. if (wsp->ws_flags & WRDSF_SHOWDBG)
  2097. wsp->ws_debug (_("(%02d) Input:%.*s;"),
  2098. wsp->ws_lvl, (int) wsp->ws_len, wsp->ws_input);
  2099. if ((wsp->ws_flags & WRDSF_NOSPLIT)
  2100. || ((wsp->ws_options & WRDSO_MAXWORDS)
  2101. && wsp->ws_wordi + 1 == wsp->ws_maxwords))
  2102. {
  2103. /* Treat entire input as a single word */
  2104. if (scan_word (wsp, start, 1) == _WRDS_ERR)
  2105. return wsp->ws_errno;
  2106. }
  2107. else
  2108. {
  2109. int rc;
  2110. while ((rc = scan_word (wsp, start, 0)) == _WRDS_OK)
  2111. start = skip_delim (wsp);
  2112. /* Make sure tail element is not joinable */
  2113. if (wsp->ws_tail)
  2114. wsp->ws_tail->flags &= ~_WSNF_JOIN;
  2115. if (rc == _WRDS_ERR)
  2116. return wsp->ws_errno;
  2117. }
  2118. if (wsp->ws_flags & WRDSF_SHOWDBG)
  2119. {
  2120. wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _("Initial list:"));
  2121. wordsplit_dump_nodes (wsp);
  2122. }
  2123. for (p = exptab; p->descr; p++)
  2124. {
  2125. if (exptab_matches(p, wsp))
  2126. {
  2127. if (p->opt & EXPOPT_COALESCE)
  2128. {
  2129. if (wsnode_coalesce (wsp))
  2130. break;
  2131. if (wsp->ws_flags & WRDSF_SHOWDBG)
  2132. {
  2133. wsp->ws_debug ("(%02d) %s", wsp->ws_lvl,
  2134. _("Coalesced list:"));
  2135. wordsplit_dump_nodes (wsp);
  2136. }
  2137. }
  2138. if (p->expansion)
  2139. {
  2140. if (p->expansion (wsp))
  2141. break;
  2142. if (wsp->ws_flags & WRDSF_SHOWDBG)
  2143. {
  2144. wsp->ws_debug ("(%02d) %s", wsp->ws_lvl, _(p->descr));
  2145. wordsplit_dump_nodes (wsp);
  2146. }
  2147. }
  2148. }
  2149. }
  2150. return wsp->ws_errno;
  2151. }
  2152. static int
  2153. wordsplit_run (const char *command, size_t length, struct wordsplit *wsp,
  2154. unsigned flags, int lvl)
  2155. {
  2156. int rc;
  2157. size_t start;
  2158. if (!command)
  2159. {
  2160. if (!(flags & WRDSF_INCREMENTAL))
  2161. return _wsplt_seterr (wsp, WRDSE_USAGE);
  2162. if (wsp->ws_head)
  2163. return wordsplit_finish (wsp);
  2164. start = skip_delim_real (wsp);
  2165. if (wsp->ws_endp == wsp->ws_len)
  2166. return _wsplt_seterr (wsp, WRDSE_NOINPUT);
  2167. wsp->ws_flags |= WRDSF_REUSE;
  2168. wordsplit_init0 (wsp);
  2169. }
  2170. else
  2171. {
  2172. start = 0;
  2173. rc = wordsplit_init (wsp, command, length, flags);
  2174. if (rc)
  2175. return rc;
  2176. wsp->ws_lvl = lvl;
  2177. }
  2178. rc = wordsplit_process_list (wsp, start);
  2179. if (rc)
  2180. return rc;
  2181. return wordsplit_finish (wsp);
  2182. }
  2183. int
  2184. wordsplit_len (const char *command, size_t length, struct wordsplit *wsp,
  2185. unsigned flags)
  2186. {
  2187. return wordsplit_run (command, length, wsp, flags, 0);
  2188. }
  2189. int
  2190. wordsplit (const char *command, struct wordsplit *ws, unsigned flags)
  2191. {
  2192. return wordsplit_len (command, command ? strlen (command) : 0, ws, flags);
  2193. }
  2194. void
  2195. wordsplit_free_words (struct wordsplit *ws)
  2196. {
  2197. size_t i;
  2198. for (i = 0; i < ws->ws_wordc; i++)
  2199. {
  2200. char *p = ws->ws_wordv[ws->ws_offs + i];
  2201. if (p)
  2202. {
  2203. free (p);
  2204. ws->ws_wordv[ws->ws_offs + i] = NULL;
  2205. }
  2206. }
  2207. ws->ws_wordc = 0;
  2208. }
  2209. void
  2210. wordsplit_free_envbuf (struct wordsplit *ws)
  2211. {
  2212. if (ws->ws_flags & WRDSF_NOCMD)
  2213. return;
  2214. if (ws->ws_envbuf)
  2215. {
  2216. size_t i;
  2217. for (i = 0; ws->ws_envbuf[i]; i++)
  2218. free (ws->ws_envbuf[i]);
  2219. free (ws->ws_envbuf);
  2220. ws->ws_envidx = ws->ws_envsiz = 0;
  2221. ws->ws_envbuf = NULL;
  2222. }
  2223. }
  2224. void
  2225. wordsplit_clearerr (struct wordsplit *ws)
  2226. {
  2227. if (ws->ws_errno == WRDSE_USERERR)
  2228. free (ws->ws_usererr);
  2229. ws->ws_usererr = NULL;
  2230. ws->ws_errno = WRDSE_OK;
  2231. }
  2232. void
  2233. wordsplit_free (struct wordsplit *ws)
  2234. {
  2235. wordsplit_free_nodes (ws);
  2236. wordsplit_free_words (ws);
  2237. free (ws->ws_wordv);
  2238. ws->ws_wordv = NULL;
  2239. wordsplit_free_envbuf (ws);
  2240. }
  2241. int
  2242. wordsplit_get_words (struct wordsplit *ws, size_t *wordc, char ***wordv)
  2243. {
  2244. char **p = realloc (ws->ws_wordv,
  2245. (ws->ws_wordc + 1) * sizeof (ws->ws_wordv[0]));
  2246. if (!p)
  2247. return -1;
  2248. *wordv = p;
  2249. *wordc = ws->ws_wordc;
  2250. ws->ws_wordv = NULL;
  2251. ws->ws_wordc = 0;
  2252. ws->ws_wordn = 0;
  2253. return 0;
  2254. }
  2255. const char *_wordsplit_errstr[] = {
  2256. N_("no error"),
  2257. N_("missing closing quote"),
  2258. N_("memory exhausted"),
  2259. N_("invalid wordsplit usage"),
  2260. N_("unbalanced curly brace"),
  2261. N_("undefined variable"),
  2262. N_("input exhausted"),
  2263. N_("unbalanced parenthesis"),
  2264. N_("globbing error")
  2265. };
  2266. int _wordsplit_nerrs =
  2267. sizeof (_wordsplit_errstr) / sizeof (_wordsplit_errstr[0]);
  2268. const char *
  2269. wordsplit_strerror (struct wordsplit *ws)
  2270. {
  2271. if (ws->ws_errno == WRDSE_USERERR)
  2272. return ws->ws_usererr;
  2273. if (ws->ws_errno < _wordsplit_nerrs)
  2274. return _wordsplit_errstr[ws->ws_errno];
  2275. return N_("unknown error");
  2276. }
  2277. void
  2278. wordsplit_perror (struct wordsplit *wsp)
  2279. {
  2280. switch (wsp->ws_errno)
  2281. {
  2282. case WRDSE_QUOTE:
  2283. wsp->ws_error (_("missing closing %c (start near #%lu)"),
  2284. wsp->ws_input[wsp->ws_endp],
  2285. (unsigned long) wsp->ws_endp);
  2286. break;
  2287. default:
  2288. wsp->ws_error ("%s", wordsplit_strerror (wsp));
  2289. }
  2290. }