PageRenderTime 65ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 2ms

/src/bin/bash/subst.c

http://github.com/Barrett17/Haiku-services-branch
C | 8841 lines | 7859 code | 439 blank | 543 comment | 787 complexity | d804a2ddf1a7916284077ec07a47ec1e MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, LGPL-2.0, LGPL-2.1, BSD-2-Clause, ISC, Apache-2.0, AGPL-1.0, MIT, MPL-2.0-no-copyleft-exception, Unlicense, BSD-3-Clause, LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. /* subst.c -- The part of the shell that does parameter, command, arithmetic,
  2. and globbing substitutions. */
  3. /* ``Have a little faith, there's magic in the night. You ain't a
  4. beauty, but, hey, you're alright.'' */
  5. /* Copyright (C) 1987-2009 Free Software Foundation, Inc.
  6. This file is part of GNU Bash, the Bourne Again SHell.
  7. Bash is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. Bash is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with Bash. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "config.h"
  19. #include "bashtypes.h"
  20. #include <stdio.h>
  21. #include "chartypes.h"
  22. #if defined (HAVE_PWD_H)
  23. # include <pwd.h>
  24. #endif
  25. #include <signal.h>
  26. #include <errno.h>
  27. #if defined (HAVE_UNISTD_H)
  28. # include <unistd.h>
  29. #endif
  30. #include "bashansi.h"
  31. #include "posixstat.h"
  32. #include "bashintl.h"
  33. #include "shell.h"
  34. #include "flags.h"
  35. #include "jobs.h"
  36. #include "execute_cmd.h"
  37. #include "filecntl.h"
  38. #include "trap.h"
  39. #include "pathexp.h"
  40. #include "mailcheck.h"
  41. #include "shmbutil.h"
  42. #include "builtins/getopt.h"
  43. #include "builtins/common.h"
  44. #include "builtins/builtext.h"
  45. #include <tilde/tilde.h>
  46. #include <glob/strmatch.h>
  47. #if !defined (errno)
  48. extern int errno;
  49. #endif /* !errno */
  50. /* The size that strings change by. */
  51. #define DEFAULT_INITIAL_ARRAY_SIZE 112
  52. #define DEFAULT_ARRAY_SIZE 128
  53. /* Variable types. */
  54. #define VT_VARIABLE 0
  55. #define VT_POSPARMS 1
  56. #define VT_ARRAYVAR 2
  57. #define VT_ARRAYMEMBER 3
  58. #define VT_ASSOCVAR 4
  59. #define VT_STARSUB 128 /* $* or ${array[*]} -- used to split */
  60. /* Flags for quoted_strchr */
  61. #define ST_BACKSL 0x01
  62. #define ST_CTLESC 0x02
  63. #define ST_SQUOTE 0x04 /* unused yet */
  64. #define ST_DQUOTE 0x08 /* unused yet */
  65. /* Flags for the `pflags' argument to param_expand() */
  66. #define PF_NOCOMSUB 0x01 /* Do not perform command substitution */
  67. #define PF_IGNUNBOUND 0x02 /* ignore unbound vars even if -u set */
  68. /* These defs make it easier to use the editor. */
  69. #define LBRACE '{'
  70. #define RBRACE '}'
  71. #define LPAREN '('
  72. #define RPAREN ')'
  73. /* Evaluates to 1 if C is one of the shell's special parameters whose length
  74. can be taken, but is also one of the special expansion characters. */
  75. #define VALID_SPECIAL_LENGTH_PARAM(c) \
  76. ((c) == '-' || (c) == '?' || (c) == '#')
  77. /* Evaluates to 1 if C is one of the shell's special parameters for which an
  78. indirect variable reference may be made. */
  79. #define VALID_INDIR_PARAM(c) \
  80. ((c) == '#' || (c) == '?' || (c) == '@' || (c) == '*')
  81. /* Evaluates to 1 if C is one of the OP characters that follows the parameter
  82. in ${parameter[:]OPword}. */
  83. #define VALID_PARAM_EXPAND_CHAR(c) (sh_syntaxtab[(unsigned char)c] & CSUBSTOP)
  84. /* Evaluates to 1 if this is one of the shell's special variables. */
  85. #define SPECIAL_VAR(name, wi) \
  86. ((DIGIT (*name) && all_digits (name)) || \
  87. (name[1] == '\0' && (sh_syntaxtab[(unsigned char)*name] & CSPECVAR)) || \
  88. (wi && name[2] == '\0' && VALID_INDIR_PARAM (name[1])))
  89. /* An expansion function that takes a string and a quoted flag and returns
  90. a WORD_LIST *. Used as the type of the third argument to
  91. expand_string_if_necessary(). */
  92. typedef WORD_LIST *EXPFUNC __P((char *, int));
  93. /* Process ID of the last command executed within command substitution. */
  94. pid_t last_command_subst_pid = NO_PID;
  95. pid_t current_command_subst_pid = NO_PID;
  96. /* Variables used to keep track of the characters in IFS. */
  97. SHELL_VAR *ifs_var;
  98. char *ifs_value;
  99. unsigned char ifs_cmap[UCHAR_MAX + 1];
  100. #if defined (HANDLE_MULTIBYTE)
  101. unsigned char ifs_firstc[MB_LEN_MAX];
  102. size_t ifs_firstc_len;
  103. #else
  104. unsigned char ifs_firstc;
  105. #endif
  106. int assigning_in_environment;
  107. /* Extern functions and variables from different files. */
  108. extern int last_command_exit_value, last_command_exit_signal;
  109. extern int subshell_environment;
  110. extern int subshell_level, parse_and_execute_level;
  111. extern int eof_encountered;
  112. extern int return_catch_flag, return_catch_value;
  113. extern pid_t dollar_dollar_pid;
  114. extern int posixly_correct;
  115. extern char *this_command_name;
  116. extern struct fd_bitmap *current_fds_to_close;
  117. extern int wordexp_only;
  118. extern int expanding_redir;
  119. extern int tempenv_assign_error;
  120. #if !defined (HAVE_WCSDUP) && defined (HANDLE_MULTIBYTE)
  121. extern wchar_t *wcsdup __P((const wchar_t *));
  122. #endif
  123. /* Non-zero means to allow unmatched globbed filenames to expand to
  124. a null file. */
  125. int allow_null_glob_expansion;
  126. /* Non-zero means to throw an error when globbing fails to match anything. */
  127. int fail_glob_expansion;
  128. #if 0
  129. /* Variables to keep track of which words in an expanded word list (the
  130. output of expand_word_list_internal) are the result of globbing
  131. expansions. GLOB_ARGV_FLAGS is used by execute_cmd.c.
  132. (CURRENTLY UNUSED). */
  133. char *glob_argv_flags;
  134. static int glob_argv_flags_size;
  135. #endif
  136. static WORD_LIST expand_word_error, expand_word_fatal;
  137. static WORD_DESC expand_wdesc_error, expand_wdesc_fatal;
  138. static char expand_param_error, expand_param_fatal;
  139. static char extract_string_error, extract_string_fatal;
  140. /* Tell the expansion functions to not longjmp back to top_level on fatal
  141. errors. Enabled when doing completion and prompt string expansion. */
  142. static int no_longjmp_on_fatal_error = 0;
  143. /* Set by expand_word_unsplit; used to inhibit splitting and re-joining
  144. $* on $IFS, primarily when doing assignment statements. */
  145. static int expand_no_split_dollar_star = 0;
  146. /* Used to hold a list of variable assignments preceding a command. Global
  147. so the SIGCHLD handler in jobs.c can unwind-protect it when it runs a
  148. SIGCHLD trap. */
  149. WORD_LIST *subst_assign_varlist = (WORD_LIST *)NULL;
  150. /* A WORD_LIST of words to be expanded by expand_word_list_internal,
  151. without any leading variable assignments. */
  152. static WORD_LIST *garglist = (WORD_LIST *)NULL;
  153. static char *quoted_substring __P((char *, int, int));
  154. static int quoted_strlen __P((char *));
  155. static char *quoted_strchr __P((char *, int, int));
  156. static char *expand_string_if_necessary __P((char *, int, EXPFUNC *));
  157. static inline char *expand_string_to_string_internal __P((char *, int, EXPFUNC *));
  158. static WORD_LIST *call_expand_word_internal __P((WORD_DESC *, int, int, int *, int *));
  159. static WORD_LIST *expand_string_internal __P((char *, int));
  160. static WORD_LIST *expand_string_leave_quoted __P((char *, int));
  161. static WORD_LIST *expand_string_for_rhs __P((char *, int, int *, int *));
  162. static WORD_LIST *list_quote_escapes __P((WORD_LIST *));
  163. static char *make_quoted_char __P((int));
  164. static WORD_LIST *quote_list __P((WORD_LIST *));
  165. static int unquoted_substring __P((char *, char *));
  166. static int unquoted_member __P((int, char *));
  167. #if defined (ARRAY_VARS)
  168. static SHELL_VAR *do_compound_assignment __P((char *, char *, int));
  169. #endif
  170. static int do_assignment_internal __P((const WORD_DESC *, int));
  171. static char *string_extract_verbatim __P((char *, size_t, int *, char *, int));
  172. static char *string_extract __P((char *, int *, char *, int));
  173. static char *string_extract_double_quoted __P((char *, int *, int));
  174. static inline char *string_extract_single_quoted __P((char *, int *));
  175. static inline int skip_single_quoted __P((const char *, size_t, int));
  176. static int skip_double_quoted __P((char *, size_t, int));
  177. static char *extract_delimited_string __P((char *, int *, char *, char *, char *, int));
  178. static char *extract_dollar_brace_string __P((char *, int *, int, int));
  179. static int skip_matched_pair __P((const char *, int, int, int, int));
  180. static char *pos_params __P((char *, int, int, int));
  181. static unsigned char *mb_getcharlens __P((char *, int));
  182. static char *remove_upattern __P((char *, char *, int));
  183. #if defined (HANDLE_MULTIBYTE)
  184. static wchar_t *remove_wpattern __P((wchar_t *, size_t, wchar_t *, int));
  185. #endif
  186. static char *remove_pattern __P((char *, char *, int));
  187. static int match_pattern_char __P((char *, char *));
  188. static int match_upattern __P((char *, char *, int, char **, char **));
  189. #if defined (HANDLE_MULTIBYTE)
  190. static int match_pattern_wchar __P((wchar_t *, wchar_t *));
  191. static int match_wpattern __P((wchar_t *, char **, size_t, wchar_t *, int, char **, char **));
  192. #endif
  193. static int match_pattern __P((char *, char *, int, char **, char **));
  194. static int getpatspec __P((int, char *));
  195. static char *getpattern __P((char *, int, int));
  196. static char *variable_remove_pattern __P((char *, char *, int, int));
  197. static char *list_remove_pattern __P((WORD_LIST *, char *, int, int, int));
  198. static char *parameter_list_remove_pattern __P((int, char *, int, int));
  199. #ifdef ARRAY_VARS
  200. static char *array_remove_pattern __P((SHELL_VAR *, char *, int, char *, int));
  201. #endif
  202. static char *parameter_brace_remove_pattern __P((char *, char *, char *, int, int));
  203. static char *process_substitute __P((char *, int));
  204. static char *read_comsub __P((int, int, int *));
  205. #ifdef ARRAY_VARS
  206. static arrayind_t array_length_reference __P((char *));
  207. #endif
  208. static int valid_brace_expansion_word __P((char *, int));
  209. static int chk_atstar __P((char *, int, int *, int *));
  210. static int chk_arithsub __P((const char *, int));
  211. static WORD_DESC *parameter_brace_expand_word __P((char *, int, int, int));
  212. static WORD_DESC *parameter_brace_expand_indir __P((char *, int, int, int *, int *));
  213. static WORD_DESC *parameter_brace_expand_rhs __P((char *, char *, int, int, int *, int *));
  214. static void parameter_brace_expand_error __P((char *, char *));
  215. static int valid_length_expression __P((char *));
  216. static intmax_t parameter_brace_expand_length __P((char *));
  217. static char *skiparith __P((char *, int));
  218. static int verify_substring_values __P((SHELL_VAR *, char *, char *, int, intmax_t *, intmax_t *));
  219. static int get_var_and_type __P((char *, char *, int, SHELL_VAR **, char **));
  220. static char *mb_substring __P((char *, int, int));
  221. static char *parameter_brace_substring __P((char *, char *, char *, int));
  222. static char *pos_params_pat_subst __P((char *, char *, char *, int));
  223. static char *parameter_brace_patsub __P((char *, char *, char *, int));
  224. static char *pos_params_casemod __P((char *, char *, int, int));
  225. static char *parameter_brace_casemod __P((char *, char *, int, char *, int));
  226. static WORD_DESC *parameter_brace_expand __P((char *, int *, int, int *, int *));
  227. static WORD_DESC *param_expand __P((char *, int *, int, int *, int *, int *, int *, int));
  228. static WORD_LIST *expand_word_internal __P((WORD_DESC *, int, int, int *, int *));
  229. static WORD_LIST *word_list_split __P((WORD_LIST *));
  230. static void exp_jump_to_top_level __P((int));
  231. static WORD_LIST *separate_out_assignments __P((WORD_LIST *));
  232. static WORD_LIST *glob_expand_word_list __P((WORD_LIST *, int));
  233. #ifdef BRACE_EXPANSION
  234. static WORD_LIST *brace_expand_word_list __P((WORD_LIST *, int));
  235. #endif
  236. #if defined (ARRAY_VARS)
  237. static int make_internal_declare __P((char *, char *));
  238. #endif
  239. static WORD_LIST *shell_expand_word_list __P((WORD_LIST *, int));
  240. static WORD_LIST *expand_word_list_internal __P((WORD_LIST *, int));
  241. /* **************************************************************** */
  242. /* */
  243. /* Utility Functions */
  244. /* */
  245. /* **************************************************************** */
  246. #ifdef INCLUDE_UNUSED
  247. static char *
  248. quoted_substring (string, start, end)
  249. char *string;
  250. int start, end;
  251. {
  252. register int len, l;
  253. register char *result, *s, *r;
  254. len = end - start;
  255. /* Move to string[start], skipping quoted characters. */
  256. for (s = string, l = 0; *s && l < start; )
  257. {
  258. if (*s == CTLESC)
  259. {
  260. s++;
  261. continue;
  262. }
  263. l++;
  264. if (*s == 0)
  265. break;
  266. }
  267. r = result = (char *)xmalloc (2*len + 1); /* save room for quotes */
  268. /* Copy LEN characters, including quote characters. */
  269. s = string + l;
  270. for (l = 0; l < len; s++)
  271. {
  272. if (*s == CTLESC)
  273. *r++ = *s++;
  274. *r++ = *s;
  275. l++;
  276. if (*s == 0)
  277. break;
  278. }
  279. *r = '\0';
  280. return result;
  281. }
  282. #endif
  283. #ifdef INCLUDE_UNUSED
  284. /* Return the length of S, skipping over quoted characters */
  285. static int
  286. quoted_strlen (s)
  287. char *s;
  288. {
  289. register char *p;
  290. int i;
  291. i = 0;
  292. for (p = s; *p; p++)
  293. {
  294. if (*p == CTLESC)
  295. {
  296. p++;
  297. if (*p == 0)
  298. return (i + 1);
  299. }
  300. i++;
  301. }
  302. return i;
  303. }
  304. #endif
  305. /* Find the first occurrence of character C in string S, obeying shell
  306. quoting rules. If (FLAGS & ST_BACKSL) is non-zero, backslash-escaped
  307. characters are skipped. If (FLAGS & ST_CTLESC) is non-zero, characters
  308. escaped with CTLESC are skipped. */
  309. static char *
  310. quoted_strchr (s, c, flags)
  311. char *s;
  312. int c, flags;
  313. {
  314. register char *p;
  315. for (p = s; *p; p++)
  316. {
  317. if (((flags & ST_BACKSL) && *p == '\\')
  318. || ((flags & ST_CTLESC) && *p == CTLESC))
  319. {
  320. p++;
  321. if (*p == '\0')
  322. return ((char *)NULL);
  323. continue;
  324. }
  325. else if (*p == c)
  326. return p;
  327. }
  328. return ((char *)NULL);
  329. }
  330. /* Return 1 if CHARACTER appears in an unquoted portion of
  331. STRING. Return 0 otherwise. CHARACTER must be a single-byte character. */
  332. static int
  333. unquoted_member (character, string)
  334. int character;
  335. char *string;
  336. {
  337. size_t slen;
  338. int sindex, c;
  339. DECLARE_MBSTATE;
  340. slen = strlen (string);
  341. sindex = 0;
  342. while (c = string[sindex])
  343. {
  344. if (c == character)
  345. return (1);
  346. switch (c)
  347. {
  348. default:
  349. ADVANCE_CHAR (string, slen, sindex);
  350. break;
  351. case '\\':
  352. sindex++;
  353. if (string[sindex])
  354. ADVANCE_CHAR (string, slen, sindex);
  355. break;
  356. case '\'':
  357. sindex = skip_single_quoted (string, slen, ++sindex);
  358. break;
  359. case '"':
  360. sindex = skip_double_quoted (string, slen, ++sindex);
  361. break;
  362. }
  363. }
  364. return (0);
  365. }
  366. /* Return 1 if SUBSTR appears in an unquoted portion of STRING. */
  367. static int
  368. unquoted_substring (substr, string)
  369. char *substr, *string;
  370. {
  371. size_t slen;
  372. int sindex, c, sublen;
  373. DECLARE_MBSTATE;
  374. if (substr == 0 || *substr == '\0')
  375. return (0);
  376. slen = strlen (string);
  377. sublen = strlen (substr);
  378. for (sindex = 0; c = string[sindex]; )
  379. {
  380. if (STREQN (string + sindex, substr, sublen))
  381. return (1);
  382. switch (c)
  383. {
  384. case '\\':
  385. sindex++;
  386. if (string[sindex])
  387. ADVANCE_CHAR (string, slen, sindex);
  388. break;
  389. case '\'':
  390. sindex = skip_single_quoted (string, slen, ++sindex);
  391. break;
  392. case '"':
  393. sindex = skip_double_quoted (string, slen, ++sindex);
  394. break;
  395. default:
  396. ADVANCE_CHAR (string, slen, sindex);
  397. break;
  398. }
  399. }
  400. return (0);
  401. }
  402. /* Most of the substitutions must be done in parallel. In order
  403. to avoid using tons of unclear goto's, I have some functions
  404. for manipulating malloc'ed strings. They all take INDX, a
  405. pointer to an integer which is the offset into the string
  406. where manipulation is taking place. They also take SIZE, a
  407. pointer to an integer which is the current length of the
  408. character array for this string. */
  409. /* Append SOURCE to TARGET at INDEX. SIZE is the current amount
  410. of space allocated to TARGET. SOURCE can be NULL, in which
  411. case nothing happens. Gets rid of SOURCE by freeing it.
  412. Returns TARGET in case the location has changed. */
  413. INLINE char *
  414. sub_append_string (source, target, indx, size)
  415. char *source, *target;
  416. int *indx, *size;
  417. {
  418. if (source)
  419. {
  420. int srclen, n;
  421. srclen = STRLEN (source);
  422. if (srclen >= (int)(*size - *indx))
  423. {
  424. n = srclen + *indx;
  425. n = (n + DEFAULT_ARRAY_SIZE) - (n % DEFAULT_ARRAY_SIZE);
  426. target = (char *)xrealloc (target, (*size = n));
  427. }
  428. FASTCOPY (source, target + *indx, srclen);
  429. *indx += srclen;
  430. target[*indx] = '\0';
  431. free (source);
  432. }
  433. return (target);
  434. }
  435. #if 0
  436. /* UNUSED */
  437. /* Append the textual representation of NUMBER to TARGET.
  438. INDX and SIZE are as in SUB_APPEND_STRING. */
  439. char *
  440. sub_append_number (number, target, indx, size)
  441. intmax_t number;
  442. int *indx, *size;
  443. char *target;
  444. {
  445. char *temp;
  446. temp = itos (number);
  447. return (sub_append_string (temp, target, indx, size));
  448. }
  449. #endif
  450. /* Extract a substring from STRING, starting at SINDEX and ending with
  451. one of the characters in CHARLIST. Don't make the ending character
  452. part of the string. Leave SINDEX pointing at the ending character.
  453. Understand about backslashes in the string. If (flags & SX_VARNAME)
  454. is non-zero, and array variables have been compiled into the shell,
  455. everything between a `[' and a corresponding `]' is skipped over.
  456. If (flags & SX_NOALLOC) is non-zero, don't return the substring, just
  457. update SINDEX. If (flags & SX_REQMATCH) is non-zero, the string must
  458. contain a closing character from CHARLIST. */
  459. static char *
  460. string_extract (string, sindex, charlist, flags)
  461. char *string;
  462. int *sindex;
  463. char *charlist;
  464. int flags;
  465. {
  466. register int c, i;
  467. int found;
  468. size_t slen;
  469. char *temp;
  470. DECLARE_MBSTATE;
  471. slen = (MB_CUR_MAX > 1) ? strlen (string + *sindex) + *sindex : 0;
  472. i = *sindex;
  473. found = 0;
  474. while (c = string[i])
  475. {
  476. if (c == '\\')
  477. {
  478. if (string[i + 1])
  479. i++;
  480. else
  481. break;
  482. }
  483. #if defined (ARRAY_VARS)
  484. else if ((flags & SX_VARNAME) && c == '[')
  485. {
  486. int ni;
  487. /* If this is an array subscript, skip over it and continue. */
  488. ni = skipsubscript (string, i);
  489. if (string[ni] == ']')
  490. i = ni;
  491. }
  492. #endif
  493. else if (MEMBER (c, charlist))
  494. {
  495. found = 1;
  496. break;
  497. }
  498. ADVANCE_CHAR (string, slen, i);
  499. }
  500. /* If we had to have a matching delimiter and didn't find one, return an
  501. error and let the caller deal with it. */
  502. if ((flags & SX_REQMATCH) && found == 0)
  503. {
  504. *sindex = i;
  505. return (&extract_string_error);
  506. }
  507. temp = (flags & SX_NOALLOC) ? (char *)NULL : substring (string, *sindex, i);
  508. *sindex = i;
  509. return (temp);
  510. }
  511. /* Extract the contents of STRING as if it is enclosed in double quotes.
  512. SINDEX, when passed in, is the offset of the character immediately
  513. following the opening double quote; on exit, SINDEX is left pointing after
  514. the closing double quote. If STRIPDQ is non-zero, unquoted double
  515. quotes are stripped and the string is terminated by a null byte.
  516. Backslashes between the embedded double quotes are processed. If STRIPDQ
  517. is zero, an unquoted `"' terminates the string. */
  518. static char *
  519. string_extract_double_quoted (string, sindex, stripdq)
  520. char *string;
  521. int *sindex, stripdq;
  522. {
  523. size_t slen;
  524. char *send;
  525. int j, i, t;
  526. unsigned char c;
  527. char *temp, *ret; /* The new string we return. */
  528. int pass_next, backquote, si; /* State variables for the machine. */
  529. int dquote;
  530. DECLARE_MBSTATE;
  531. slen = strlen (string + *sindex) + *sindex;
  532. send = string + slen;
  533. pass_next = backquote = dquote = 0;
  534. temp = (char *)xmalloc (1 + slen - *sindex);
  535. j = 0;
  536. i = *sindex;
  537. while (c = string[i])
  538. {
  539. /* Process a character that was quoted by a backslash. */
  540. if (pass_next)
  541. {
  542. /* Posix.2 sez:
  543. ``The backslash shall retain its special meaning as an escape
  544. character only when followed by one of the characters:
  545. $ ` " \ <newline>''.
  546. If STRIPDQ is zero, we handle the double quotes here and let
  547. expand_word_internal handle the rest. If STRIPDQ is non-zero,
  548. we have already been through one round of backslash stripping,
  549. and want to strip these backslashes only if DQUOTE is non-zero,
  550. indicating that we are inside an embedded double-quoted string. */
  551. /* If we are in an embedded quoted string, then don't strip
  552. backslashes before characters for which the backslash
  553. retains its special meaning, but remove backslashes in
  554. front of other characters. If we are not in an
  555. embedded quoted string, don't strip backslashes at all.
  556. This mess is necessary because the string was already
  557. surrounded by double quotes (and sh has some really weird
  558. quoting rules).
  559. The returned string will be run through expansion as if
  560. it were double-quoted. */
  561. if ((stripdq == 0 && c != '"') ||
  562. (stripdq && ((dquote && (sh_syntaxtab[c] & CBSDQUOTE)) || dquote == 0)))
  563. temp[j++] = '\\';
  564. pass_next = 0;
  565. add_one_character:
  566. COPY_CHAR_I (temp, j, string, send, i);
  567. continue;
  568. }
  569. /* A backslash protects the next character. The code just above
  570. handles preserving the backslash in front of any character but
  571. a double quote. */
  572. if (c == '\\')
  573. {
  574. pass_next++;
  575. i++;
  576. continue;
  577. }
  578. /* Inside backquotes, ``the portion of the quoted string from the
  579. initial backquote and the characters up to the next backquote
  580. that is not preceded by a backslash, having escape characters
  581. removed, defines that command''. */
  582. if (backquote)
  583. {
  584. if (c == '`')
  585. backquote = 0;
  586. temp[j++] = c;
  587. i++;
  588. continue;
  589. }
  590. if (c == '`')
  591. {
  592. temp[j++] = c;
  593. backquote++;
  594. i++;
  595. continue;
  596. }
  597. /* Pass everything between `$(' and the matching `)' or a quoted
  598. ${ ... } pair through according to the Posix.2 specification. */
  599. if (c == '$' && ((string[i + 1] == LPAREN) || (string[i + 1] == LBRACE)))
  600. {
  601. int free_ret = 1;
  602. si = i + 2;
  603. if (string[i + 1] == LPAREN)
  604. ret = extract_command_subst (string, &si, 0);
  605. else
  606. ret = extract_dollar_brace_string (string, &si, 1, 0);
  607. temp[j++] = '$';
  608. temp[j++] = string[i + 1];
  609. /* Just paranoia; ret will not be 0 unless no_longjmp_on_fatal_error
  610. is set. */
  611. if (ret == 0 && no_longjmp_on_fatal_error)
  612. {
  613. free_ret = 0;
  614. ret = string + i + 2;
  615. }
  616. for (t = 0; ret[t]; t++, j++)
  617. temp[j] = ret[t];
  618. temp[j] = string[si];
  619. if (string[si])
  620. {
  621. j++;
  622. i = si + 1;
  623. }
  624. else
  625. i = si;
  626. if (free_ret)
  627. free (ret);
  628. continue;
  629. }
  630. /* Add any character but a double quote to the quoted string we're
  631. accumulating. */
  632. if (c != '"')
  633. goto add_one_character;
  634. /* c == '"' */
  635. if (stripdq)
  636. {
  637. dquote ^= 1;
  638. i++;
  639. continue;
  640. }
  641. break;
  642. }
  643. temp[j] = '\0';
  644. /* Point to after the closing quote. */
  645. if (c)
  646. i++;
  647. *sindex = i;
  648. return (temp);
  649. }
  650. /* This should really be another option to string_extract_double_quoted. */
  651. static int
  652. skip_double_quoted (string, slen, sind)
  653. char *string;
  654. size_t slen;
  655. int sind;
  656. {
  657. int c, i;
  658. char *ret;
  659. int pass_next, backquote, si;
  660. DECLARE_MBSTATE;
  661. pass_next = backquote = 0;
  662. i = sind;
  663. while (c = string[i])
  664. {
  665. if (pass_next)
  666. {
  667. pass_next = 0;
  668. ADVANCE_CHAR (string, slen, i);
  669. continue;
  670. }
  671. else if (c == '\\')
  672. {
  673. pass_next++;
  674. i++;
  675. continue;
  676. }
  677. else if (backquote)
  678. {
  679. if (c == '`')
  680. backquote = 0;
  681. ADVANCE_CHAR (string, slen, i);
  682. continue;
  683. }
  684. else if (c == '`')
  685. {
  686. backquote++;
  687. i++;
  688. continue;
  689. }
  690. else if (c == '$' && ((string[i + 1] == LPAREN) || (string[i + 1] == LBRACE)))
  691. {
  692. si = i + 2;
  693. if (string[i + 1] == LPAREN)
  694. ret = extract_command_subst (string, &si, SX_NOALLOC);
  695. else
  696. ret = extract_dollar_brace_string (string, &si, 1, SX_NOALLOC);
  697. i = si + 1;
  698. continue;
  699. }
  700. else if (c != '"')
  701. {
  702. ADVANCE_CHAR (string, slen, i);
  703. continue;
  704. }
  705. else
  706. break;
  707. }
  708. if (c)
  709. i++;
  710. return (i);
  711. }
  712. /* Extract the contents of STRING as if it is enclosed in single quotes.
  713. SINDEX, when passed in, is the offset of the character immediately
  714. following the opening single quote; on exit, SINDEX is left pointing after
  715. the closing single quote. */
  716. static inline char *
  717. string_extract_single_quoted (string, sindex)
  718. char *string;
  719. int *sindex;
  720. {
  721. register int i;
  722. size_t slen;
  723. char *t;
  724. DECLARE_MBSTATE;
  725. /* Don't need slen for ADVANCE_CHAR unless multibyte chars possible. */
  726. slen = (MB_CUR_MAX > 1) ? strlen (string + *sindex) + *sindex : 0;
  727. i = *sindex;
  728. while (string[i] && string[i] != '\'')
  729. ADVANCE_CHAR (string, slen, i);
  730. t = substring (string, *sindex, i);
  731. if (string[i])
  732. i++;
  733. *sindex = i;
  734. return (t);
  735. }
  736. static inline int
  737. skip_single_quoted (string, slen, sind)
  738. const char *string;
  739. size_t slen;
  740. int sind;
  741. {
  742. register int c;
  743. DECLARE_MBSTATE;
  744. c = sind;
  745. while (string[c] && string[c] != '\'')
  746. ADVANCE_CHAR (string, slen, c);
  747. if (string[c])
  748. c++;
  749. return c;
  750. }
  751. /* Just like string_extract, but doesn't hack backslashes or any of
  752. that other stuff. Obeys CTLESC quoting. Used to do splitting on $IFS. */
  753. static char *
  754. string_extract_verbatim (string, slen, sindex, charlist, flags)
  755. char *string;
  756. size_t slen;
  757. int *sindex;
  758. char *charlist;
  759. int flags;
  760. {
  761. register int i = *sindex;
  762. #if defined (HANDLE_MULTIBYTE)
  763. size_t clen;
  764. wchar_t *wcharlist;
  765. #endif
  766. int c;
  767. char *temp;
  768. DECLARE_MBSTATE;
  769. if (charlist[0] == '\'' && charlist[1] == '\0')
  770. {
  771. temp = string_extract_single_quoted (string, sindex);
  772. --*sindex; /* leave *sindex at separator character */
  773. return temp;
  774. }
  775. i = *sindex;
  776. #if 0
  777. /* See how the MBLEN and ADVANCE_CHAR macros work to understand why we need
  778. this only if MB_CUR_MAX > 1. */
  779. slen = (MB_CUR_MAX > 1) ? strlen (string + *sindex) + *sindex : 1;
  780. #endif
  781. #if defined (HANDLE_MULTIBYTE)
  782. clen = strlen (charlist);
  783. wcharlist = 0;
  784. #endif
  785. while (c = string[i])
  786. {
  787. #if defined (HANDLE_MULTIBYTE)
  788. size_t mblength;
  789. #endif
  790. if ((flags & SX_NOCTLESC) == 0 && c == CTLESC)
  791. {
  792. i += 2;
  793. continue;
  794. }
  795. /* Even if flags contains SX_NOCTLESC, we let CTLESC quoting CTLNUL
  796. through, to protect the CTLNULs from later calls to
  797. remove_quoted_nulls. */
  798. else if ((flags & SX_NOESCCTLNUL) == 0 && c == CTLESC && string[i+1] == CTLNUL)
  799. {
  800. i += 2;
  801. continue;
  802. }
  803. #if defined (HANDLE_MULTIBYTE)
  804. mblength = MBLEN (string + i, slen - i);
  805. if (mblength > 1)
  806. {
  807. wchar_t wc;
  808. mblength = mbtowc (&wc, string + i, slen - i);
  809. if (MB_INVALIDCH (mblength))
  810. {
  811. if (MEMBER (c, charlist))
  812. break;
  813. }
  814. else
  815. {
  816. if (wcharlist == 0)
  817. {
  818. size_t len;
  819. len = mbstowcs (wcharlist, charlist, 0);
  820. if (len == -1)
  821. len = 0;
  822. wcharlist = (wchar_t *)xmalloc (sizeof (wchar_t) * (len + 1));
  823. mbstowcs (wcharlist, charlist, len + 1);
  824. }
  825. if (wcschr (wcharlist, wc))
  826. break;
  827. }
  828. }
  829. else
  830. #endif
  831. if (MEMBER (c, charlist))
  832. break;
  833. ADVANCE_CHAR (string, slen, i);
  834. }
  835. #if defined (HANDLE_MULTIBYTE)
  836. FREE (wcharlist);
  837. #endif
  838. temp = substring (string, *sindex, i);
  839. *sindex = i;
  840. return (temp);
  841. }
  842. /* Extract the $( construct in STRING, and return a new string.
  843. Start extracting at (SINDEX) as if we had just seen "$(".
  844. Make (SINDEX) get the position of the matching ")". )
  845. XFLAGS is additional flags to pass to other extraction functions, */
  846. char *
  847. extract_command_subst (string, sindex, xflags)
  848. char *string;
  849. int *sindex;
  850. int xflags;
  851. {
  852. if (string[*sindex] == '(') /*)*/
  853. return (extract_delimited_string (string, sindex, "$(", "(", ")", xflags|SX_COMMAND)); /*)*/
  854. else
  855. {
  856. xflags |= (no_longjmp_on_fatal_error ? SX_NOLONGJMP : 0);
  857. return (xparse_dolparen (string, string+*sindex, sindex, xflags));
  858. }
  859. }
  860. /* Extract the $[ construct in STRING, and return a new string. (])
  861. Start extracting at (SINDEX) as if we had just seen "$[".
  862. Make (SINDEX) get the position of the matching "]". */
  863. char *
  864. extract_arithmetic_subst (string, sindex)
  865. char *string;
  866. int *sindex;
  867. {
  868. return (extract_delimited_string (string, sindex, "$[", "[", "]", 0)); /*]*/
  869. }
  870. #if defined (PROCESS_SUBSTITUTION)
  871. /* Extract the <( or >( construct in STRING, and return a new string.
  872. Start extracting at (SINDEX) as if we had just seen "<(".
  873. Make (SINDEX) get the position of the matching ")". */ /*))*/
  874. char *
  875. extract_process_subst (string, starter, sindex)
  876. char *string;
  877. char *starter;
  878. int *sindex;
  879. {
  880. return (extract_delimited_string (string, sindex, starter, "(", ")", 0));
  881. }
  882. #endif /* PROCESS_SUBSTITUTION */
  883. #if defined (ARRAY_VARS)
  884. /* This can be fooled by unquoted right parens in the passed string. If
  885. each caller verifies that the last character in STRING is a right paren,
  886. we don't even need to call extract_delimited_string. */
  887. char *
  888. extract_array_assignment_list (string, sindex)
  889. char *string;
  890. int *sindex;
  891. {
  892. int slen;
  893. char *ret;
  894. slen = strlen (string); /* ( */
  895. if (string[slen - 1] == ')')
  896. {
  897. ret = substring (string, *sindex, slen - 1);
  898. *sindex = slen - 1;
  899. return ret;
  900. }
  901. return 0;
  902. }
  903. #endif
  904. /* Extract and create a new string from the contents of STRING, a
  905. character string delimited with OPENER and CLOSER. SINDEX is
  906. the address of an int describing the current offset in STRING;
  907. it should point to just after the first OPENER found. On exit,
  908. SINDEX gets the position of the last character of the matching CLOSER.
  909. If OPENER is more than a single character, ALT_OPENER, if non-null,
  910. contains a character string that can also match CLOSER and thus
  911. needs to be skipped. */
  912. static char *
  913. extract_delimited_string (string, sindex, opener, alt_opener, closer, flags)
  914. char *string;
  915. int *sindex;
  916. char *opener, *alt_opener, *closer;
  917. int flags;
  918. {
  919. int i, c, si;
  920. size_t slen;
  921. char *t, *result;
  922. int pass_character, nesting_level, in_comment;
  923. int len_closer, len_opener, len_alt_opener;
  924. DECLARE_MBSTATE;
  925. slen = strlen (string + *sindex) + *sindex;
  926. len_opener = STRLEN (opener);
  927. len_alt_opener = STRLEN (alt_opener);
  928. len_closer = STRLEN (closer);
  929. pass_character = in_comment = 0;
  930. nesting_level = 1;
  931. i = *sindex;
  932. while (nesting_level)
  933. {
  934. c = string[i];
  935. if (c == 0)
  936. break;
  937. if (in_comment)
  938. {
  939. if (c == '\n')
  940. in_comment = 0;
  941. ADVANCE_CHAR (string, slen, i);
  942. continue;
  943. }
  944. if (pass_character) /* previous char was backslash */
  945. {
  946. pass_character = 0;
  947. ADVANCE_CHAR (string, slen, i);
  948. continue;
  949. }
  950. /* Not exactly right yet; should handle shell metacharacters and
  951. multibyte characters, too. */
  952. if ((flags & SX_COMMAND) && c == '#' && (i == 0 || string[i - 1] == '\n' || shellblank (string[i - 1])))
  953. {
  954. in_comment = 1;
  955. ADVANCE_CHAR (string, slen, i);
  956. continue;
  957. }
  958. if (c == CTLESC || c == '\\')
  959. {
  960. pass_character++;
  961. i++;
  962. continue;
  963. }
  964. /* Process a nested OPENER. */
  965. if (STREQN (string + i, opener, len_opener))
  966. {
  967. si = i + len_opener;
  968. t = extract_delimited_string (string, &si, opener, alt_opener, closer, flags|SX_NOALLOC);
  969. i = si + 1;
  970. continue;
  971. }
  972. /* Process a nested ALT_OPENER */
  973. if (len_alt_opener && STREQN (string + i, alt_opener, len_alt_opener))
  974. {
  975. si = i + len_alt_opener;
  976. t = extract_delimited_string (string, &si, alt_opener, alt_opener, closer, flags|SX_NOALLOC);
  977. i = si + 1;
  978. continue;
  979. }
  980. /* If the current substring terminates the delimited string, decrement
  981. the nesting level. */
  982. if (STREQN (string + i, closer, len_closer))
  983. {
  984. i += len_closer - 1; /* move to last byte of the closer */
  985. nesting_level--;
  986. if (nesting_level == 0)
  987. break;
  988. }
  989. /* Pass old-style command substitution through verbatim. */
  990. if (c == '`')
  991. {
  992. si = i + 1;
  993. t = string_extract (string, &si, "`", flags|SX_NOALLOC);
  994. i = si + 1;
  995. continue;
  996. }
  997. /* Pass single-quoted and double-quoted strings through verbatim. */
  998. if (c == '\'' || c == '"')
  999. {
  1000. si = i + 1;
  1001. i = (c == '\'') ? skip_single_quoted (string, slen, si)
  1002. : skip_double_quoted (string, slen, si);
  1003. continue;
  1004. }
  1005. /* move past this character, which was not special. */
  1006. ADVANCE_CHAR (string, slen, i);
  1007. }
  1008. if (c == 0 && nesting_level)
  1009. {
  1010. if (no_longjmp_on_fatal_error == 0)
  1011. {
  1012. report_error (_("bad substitution: no closing `%s' in %s"), closer, string);
  1013. last_command_exit_value = EXECUTION_FAILURE;
  1014. exp_jump_to_top_level (DISCARD);
  1015. }
  1016. else
  1017. {
  1018. *sindex = i;
  1019. return (char *)NULL;
  1020. }
  1021. }
  1022. si = i - *sindex - len_closer + 1;
  1023. if (flags & SX_NOALLOC)
  1024. result = (char *)NULL;
  1025. else
  1026. {
  1027. result = (char *)xmalloc (1 + si);
  1028. strncpy (result, string + *sindex, si);
  1029. result[si] = '\0';
  1030. }
  1031. *sindex = i;
  1032. return (result);
  1033. }
  1034. /* Extract a parameter expansion expression within ${ and } from STRING.
  1035. Obey the Posix.2 rules for finding the ending `}': count braces while
  1036. skipping over enclosed quoted strings and command substitutions.
  1037. SINDEX is the address of an int describing the current offset in STRING;
  1038. it should point to just after the first `{' found. On exit, SINDEX
  1039. gets the position of the matching `}'. QUOTED is non-zero if this
  1040. occurs inside double quotes. */
  1041. /* XXX -- this is very similar to extract_delimited_string -- XXX */
  1042. static char *
  1043. extract_dollar_brace_string (string, sindex, quoted, flags)
  1044. char *string;
  1045. int *sindex, quoted, flags;
  1046. {
  1047. register int i, c;
  1048. size_t slen;
  1049. int pass_character, nesting_level, si;
  1050. char *result, *t;
  1051. DECLARE_MBSTATE;
  1052. pass_character = 0;
  1053. nesting_level = 1;
  1054. slen = strlen (string + *sindex) + *sindex;
  1055. i = *sindex;
  1056. while (c = string[i])
  1057. {
  1058. if (pass_character)
  1059. {
  1060. pass_character = 0;
  1061. ADVANCE_CHAR (string, slen, i);
  1062. continue;
  1063. }
  1064. /* CTLESCs and backslashes quote the next character. */
  1065. if (c == CTLESC || c == '\\')
  1066. {
  1067. pass_character++;
  1068. i++;
  1069. continue;
  1070. }
  1071. if (string[i] == '$' && string[i+1] == LBRACE)
  1072. {
  1073. nesting_level++;
  1074. i += 2;
  1075. continue;
  1076. }
  1077. if (c == RBRACE)
  1078. {
  1079. nesting_level--;
  1080. if (nesting_level == 0)
  1081. break;
  1082. i++;
  1083. continue;
  1084. }
  1085. /* Pass the contents of old-style command substitutions through
  1086. verbatim. */
  1087. if (c == '`')
  1088. {
  1089. si = i + 1;
  1090. t = string_extract (string, &si, "`", flags|SX_NOALLOC);
  1091. i = si + 1;
  1092. continue;
  1093. }
  1094. /* Pass the contents of new-style command substitutions and
  1095. arithmetic substitutions through verbatim. */
  1096. if (string[i] == '$' && string[i+1] == LPAREN)
  1097. {
  1098. si = i + 2;
  1099. t = extract_command_subst (string, &si, flags|SX_NOALLOC);
  1100. i = si + 1;
  1101. continue;
  1102. }
  1103. /* Pass the contents of single-quoted and double-quoted strings
  1104. through verbatim. */
  1105. if (c == '\'' || c == '"')
  1106. {
  1107. si = i + 1;
  1108. i = (c == '\'') ? skip_single_quoted (string, slen, si)
  1109. : skip_double_quoted (string, slen, si);
  1110. /* skip_XXX_quoted leaves index one past close quote */
  1111. continue;
  1112. }
  1113. /* move past this character, which was not special. */
  1114. ADVANCE_CHAR (string, slen, i);
  1115. }
  1116. if (c == 0 && nesting_level)
  1117. {
  1118. if (no_longjmp_on_fatal_error == 0)
  1119. { /* { */
  1120. report_error (_("bad substitution: no closing `%s' in %s"), "}", string);
  1121. last_command_exit_value = EXECUTION_FAILURE;
  1122. exp_jump_to_top_level (DISCARD);
  1123. }
  1124. else
  1125. {
  1126. *sindex = i;
  1127. return ((char *)NULL);
  1128. }
  1129. }
  1130. result = (flags & SX_NOALLOC) ? (char *)NULL : substring (string, *sindex, i);
  1131. *sindex = i;
  1132. return (result);
  1133. }
  1134. /* Remove backslashes which are quoting backquotes from STRING. Modifies
  1135. STRING, and returns a pointer to it. */
  1136. char *
  1137. de_backslash (string)
  1138. char *string;
  1139. {
  1140. register size_t slen;
  1141. register int i, j, prev_i;
  1142. DECLARE_MBSTATE;
  1143. slen = strlen (string);
  1144. i = j = 0;
  1145. /* Loop copying string[i] to string[j], i >= j. */
  1146. while (i < slen)
  1147. {
  1148. if (string[i] == '\\' && (string[i + 1] == '`' || string[i + 1] == '\\' ||
  1149. string[i + 1] == '$'))
  1150. i++;
  1151. prev_i = i;
  1152. ADVANCE_CHAR (string, slen, i);
  1153. if (j < prev_i)
  1154. do string[j++] = string[prev_i++]; while (prev_i < i);
  1155. else
  1156. j = i;
  1157. }
  1158. string[j] = '\0';
  1159. return (string);
  1160. }
  1161. #if 0
  1162. /*UNUSED*/
  1163. /* Replace instances of \! in a string with !. */
  1164. void
  1165. unquote_bang (string)
  1166. char *string;
  1167. {
  1168. register int i, j;
  1169. register char *temp;
  1170. temp = (char *)xmalloc (1 + strlen (string));
  1171. for (i = 0, j = 0; (temp[j] = string[i]); i++, j++)
  1172. {
  1173. if (string[i] == '\\' && string[i + 1] == '!')
  1174. {
  1175. temp[j] = '!';
  1176. i++;
  1177. }
  1178. }
  1179. strcpy (string, temp);
  1180. free (temp);
  1181. }
  1182. #endif
  1183. #define CQ_RETURN(x) do { no_longjmp_on_fatal_error = 0; return (x); } while (0)
  1184. /* This function assumes s[i] == open; returns with s[ret] == close; used to
  1185. parse array subscripts. FLAGS currently unused. */
  1186. static int
  1187. skip_matched_pair (string, start, open, close, flags)
  1188. const char *string;
  1189. int start, open, close, flags;
  1190. {
  1191. int i, pass_next, backq, si, c, count;
  1192. size_t slen;
  1193. char *temp, *ss;
  1194. DECLARE_MBSTATE;
  1195. slen = strlen (string + start) + start;
  1196. no_longjmp_on_fatal_error = 1;
  1197. i = start + 1; /* skip over leading bracket */
  1198. count = 1;
  1199. pass_next = backq = 0;
  1200. ss = (char *)string;
  1201. while (c = string[i])
  1202. {
  1203. if (pass_next)
  1204. {
  1205. pass_next = 0;
  1206. if (c == 0)
  1207. CQ_RETURN(i);
  1208. ADVANCE_CHAR (string, slen, i);
  1209. continue;
  1210. }
  1211. else if (c == '\\')
  1212. {
  1213. pass_next = 1;
  1214. i++;
  1215. continue;
  1216. }
  1217. else if (backq)
  1218. {
  1219. if (c == '`')
  1220. backq = 0;
  1221. ADVANCE_CHAR (string, slen, i);
  1222. continue;
  1223. }
  1224. else if (c == '`')
  1225. {
  1226. backq = 1;
  1227. i++;
  1228. continue;
  1229. }
  1230. else if (c == open)
  1231. {
  1232. count++;
  1233. i++;
  1234. continue;
  1235. }
  1236. else if (c == close)
  1237. {
  1238. count--;
  1239. if (count == 0)
  1240. break;
  1241. i++;
  1242. continue;
  1243. }
  1244. else if (c == '\'' || c == '"')
  1245. {
  1246. i = (c == '\'') ? skip_single_quoted (ss, slen, ++i)
  1247. : skip_double_quoted (ss, slen, ++i);
  1248. /* no increment, the skip functions increment past the closing quote. */
  1249. }
  1250. else if (c == '$' && (string[i+1] == LPAREN || string[i+1] == LBRACE))
  1251. {
  1252. si = i + 2;
  1253. if (string[si] == '\0')
  1254. CQ_RETURN(si);
  1255. if (string[i+1] == LPAREN)
  1256. temp = extract_delimited_string (ss, &si, "$(", "(", ")", SX_NOALLOC|SX_COMMAND); /* ) */
  1257. else
  1258. temp = extract_dollar_brace_string (ss, &si, 0, SX_NOALLOC);
  1259. i = si;
  1260. if (string[i] == '\0') /* don't increment i past EOS in loop */
  1261. break;
  1262. i++;
  1263. continue;
  1264. }
  1265. else
  1266. ADVANCE_CHAR (string, slen, i);
  1267. }
  1268. CQ_RETURN(i);
  1269. }
  1270. #if defined (ARRAY_VARS)
  1271. int
  1272. skipsubscript (string, start)
  1273. const char *string;
  1274. int start;
  1275. {
  1276. return (skip_matched_pair (string, start, '[', ']', 0));
  1277. }
  1278. #endif
  1279. /* Skip characters in STRING until we find a character in DELIMS, and return
  1280. the index of that character. START is the index into string at which we
  1281. begin. This is similar in spirit to strpbrk, but it returns an index into
  1282. STRING and takes a starting index. This little piece of code knows quite
  1283. a lot of shell syntax. It's very similar to skip_double_quoted and other
  1284. functions of that ilk. */
  1285. int
  1286. skip_to_delim (string, start, delims, flags)
  1287. char *string;
  1288. int start;
  1289. char *delims;
  1290. int flags;
  1291. {
  1292. int i, pass_next, backq, si, c, invert;
  1293. size_t slen;
  1294. char *temp;
  1295. DECLARE_MBSTATE;
  1296. slen = strlen (string + start) + start;
  1297. if (flags & SD_NOJMP)
  1298. no_longjmp_on_fatal_error = 1;
  1299. invert = (flags & SD_INVERT);
  1300. i = start;
  1301. pass_next = backq = 0;
  1302. while (c = string[i])
  1303. {
  1304. if (pass_next)
  1305. {
  1306. pass_next = 0;
  1307. if (c == 0)
  1308. CQ_RETURN(i);
  1309. ADVANCE_CHAR (string, slen, i);
  1310. continue;
  1311. }
  1312. else if (c == '\\')
  1313. {
  1314. pass_next = 1;
  1315. i++;
  1316. continue;
  1317. }
  1318. else if (backq)
  1319. {
  1320. if (c == '`')
  1321. backq = 0;
  1322. ADVANCE_CHAR (string, slen, i);
  1323. continue;
  1324. }
  1325. else if (c == '`')
  1326. {
  1327. backq = 1;
  1328. i++;
  1329. continue;
  1330. }
  1331. else if (invert == 0 && member (c, delims))
  1332. break;
  1333. else if (c == '\'' || c == '"')
  1334. {
  1335. i = (c == '\'') ? skip_single_quoted (string, slen, ++i)
  1336. : skip_double_quoted (string, slen, ++i);
  1337. /* no increment, the skip functions increment past the closing quote. */
  1338. }
  1339. else if (c == '$' && (string[i+1] == LPAREN || string[i+1] == LBRACE))
  1340. {
  1341. si = i + 2;
  1342. if (string[si] == '\0')
  1343. CQ_RETURN(si);
  1344. if (string[i+1] == LPAREN)
  1345. temp = extract_delimited_string (string, &si, "$(", "(", ")", SX_NOALLOC|SX_COMMAND); /* ) */
  1346. else
  1347. temp = extract_dollar_brace_string (string, &si, 0, SX_NOALLOC);
  1348. i = si;
  1349. if (string[i] == '\0') /* don't increment i past EOS in loop */
  1350. break;
  1351. i++;
  1352. continue;
  1353. }
  1354. else if (invert && (member (c, delims) == 0))
  1355. break;
  1356. else
  1357. ADVANCE_CHAR (string, slen, i);
  1358. }
  1359. CQ_RETURN(i);
  1360. }
  1361. #if defined (READLINE)
  1362. /* Return 1 if the portion of STRING ending at EINDEX is quoted (there is
  1363. an unclosed quoted string), or if the character at EINDEX is quoted
  1364. by a backslash. NO_LONGJMP_ON_FATAL_ERROR is used to flag that the various
  1365. single and double-quoted string parsing functions should not return an
  1366. error if there are unclosed quotes or braces. The characters that this
  1367. recognizes need to be the same as the contents of
  1368. rl_completer_quote_characters. */
  1369. int
  1370. char_is_quoted (string, eindex)
  1371. char *string;
  1372. int eindex;
  1373. {
  1374. int i, pass_next, c;
  1375. size_t slen;
  1376. DECLARE_MBSTATE;
  1377. slen = strlen (string);
  1378. no_longjmp_on_fatal_error = 1;
  1379. i = pass_next = 0;
  1380. while (i <= eindex)
  1381. {
  1382. c = string[i];
  1383. if (pass_next)
  1384. {
  1385. pass_next = 0;
  1386. if (i >= eindex) /* XXX was if (i >= eindex - 1) */
  1387. CQ_RETURN(1);
  1388. ADVANCE_CHAR (string, slen, i);
  1389. continue;
  1390. }
  1391. else if (c == '\\')
  1392. {
  1393. pass_next = 1;
  1394. i++;
  1395. continue;
  1396. }
  1397. else if (c == '\'' || c == '"')
  1398. {
  1399. i = (c == '\'') ? skip_single_quoted (string, slen, ++i)
  1400. : skip_double_quoted (string, slen, ++i);
  1401. if (i > eindex)
  1402. CQ_RETURN(1);
  1403. /* no increment, the skip_xxx functions go one past end */
  1404. }
  1405. else
  1406. ADVANCE_CHAR (string, slen, i);
  1407. }
  1408. CQ_RETURN(0);
  1409. }
  1410. int
  1411. unclosed_pair (string, eindex, openstr)
  1412. char *string;
  1413. int eindex;
  1414. char *openstr;
  1415. {
  1416. int i, pass_next, openc, olen;
  1417. size_t slen;
  1418. DECLARE_MBSTATE;
  1419. slen = strlen (string);
  1420. olen = strlen (openstr);
  1421. i = pass_next = openc = 0;
  1422. while (i <= eindex)
  1423. {
  1424. if (pass_next)
  1425. {
  1426. pass_next = 0;
  1427. if (i >= eindex) /* XXX was if (i >= eindex - 1) */
  1428. return 0;
  1429. ADVANCE_CHAR (string, slen, i);
  1430. continue;
  1431. }
  1432. else if (string[i] == '\\')
  1433. {
  1434. pass_next = 1;
  1435. i++;
  1436. continue;
  1437. }
  1438. else if (STREQN (string + i, openstr, olen))
  1439. {
  1440. openc = 1 - openc;
  1441. i += olen;
  1442. }
  1443. else if (string[i] == '\'' || string[i] == '"')
  1444. {
  1445. i = (string[i] == '\'') ? skip_single_quoted (string, slen, i)
  1446. : skip_double_quoted (string, slen, i);
  1447. if (i > eindex)
  1448. return 0;
  1449. }
  1450. else
  1451. ADVANCE_CHAR (string, slen, i);
  1452. }
  1453. return (openc);
  1454. }
  1455. /* Split STRING (length SLEN) at DELIMS, and return a WORD_LIST with the
  1456. individual words. If DELIMS is NULL, the current value of $IFS is used
  1457. to split the string, and the function follows the shell field splitting
  1458. rules. SENTINEL is an index to look for. NWP, if non-NULL,
  1459. gets the number of words in the returned list. CWP, if non-NULL, gets
  1460. the index of the word containing SENTINEL. Non-whitespace chars in
  1461. DELIMS delimit separate fields. */
  1462. WORD_LIST *
  1463. split_at_delims (string, slen, delims, sentinel, nwp, cwp)
  1464. char *string;
  1465. int slen;
  1466. char *delims;
  1467. int sentinel;
  1468. int *nwp, *cwp;
  1469. {
  1470. int ts, te, i, nw, cw, ifs_split;
  1471. char *token, *d, *d2;
  1472. WORD_LIST *ret, *tl;
  1473. if (string == 0 || *string == '\0')
  1474. {
  1475. if (nwp)
  1476. *nwp = 0;
  1477. if (cwp)
  1478. *cwp = 0;
  1479. return ((WORD_LIST *)NULL);
  1480. }
  1481. d = (delims == 0) ? ifs_value : delims;
  1482. ifs_split = delims == 0;
  1483. /* Make d2 the non-whitespace characters in delims */
  1484. d2 = 0;
  1485. if (delims)
  1486. {
  1487. size_t slength;
  1488. #if defined (HANDLE_MULTIBYTE)
  1489. size_t mblength = 1;
  1490. #endif
  1491. DECLARE_MBSTATE;
  1492. slength = strlen (delims);
  1493. d2 = (char *)xmalloc (slength + 1);
  1494. i = ts = 0;
  1495. while (delims[i])
  1496. {
  1497. #if defined (HANDLE_MULTIBYTE)
  1498. mbstate_t state_bak;
  1499. state_bak = state;
  1500. mblength = MBRLEN (delims + i, slength, &state);
  1501. if (MB_INVALIDCH (mblength))
  1502. state = state_bak;
  1503. else if (mblength > 1)
  1504. {
  1505. memcpy (d2 + ts, delims + i, mblength);
  1506. ts += mblength;
  1507. i += mblength;
  1508. slength -= mblength;
  1509. continue;
  1510. }
  1511. #endif
  1512. if (whitespace (delims[i]) == 0)
  1513. d2[ts++] = delims[i];
  1514. i++;
  1515. slength--;
  1516. }
  1517. d2[ts] = '\0';
  1518. }
  1519. ret = (WORD_LIST *)NULL;
  1520. /* Remove sequences of whitspace characters at the start of the string, as
  1521. long as those characters are delimiters. */
  1522. for (i = 0; member (string[i], d) && spctabnl (string[i]); i++)
  1523. ;
  1524. if (string[i] == '\0')
  1525. return (ret);
  1526. ts = i;
  1527. nw = 0;
  1528. cw = -1;
  1529. while (1)
  1530. {
  1531. te = skip_to_delim (string, ts, d, SD_NOJMP);
  1532. /* If we have a non-whitespace delimiter character, use it to make a
  1533. separate field. This is just about what $IFS splitting does and
  1534. is closer to the behavior of the shell parser. */
  1535. if (ts == te && d2 && member (string[ts], d2))
  1536. {
  1537. te = ts + 1;
  1538. /* If we're using IFS splitting, the non-whitespace delimiter char
  1539. and any additional IFS whitespace delimits a field. */
  1540. if (ifs_split)
  1541. while (member (string[te], d) && spctabnl (string[te]))
  1542. te++;
  1543. else
  1544. while (member (string[te], d2))
  1545. te++;
  1546. }
  1547. token = substring (string, ts, te);
  1548. ret = add_string_to_list (token, ret);
  1549. free (token);
  1550. nw++;
  1551. if (sentinel >= ts && sentinel <= te)
  1552. cw = nw;
  1553. /* If the cursor is at whitespace just before word start, set the
  1554. sentinel word to the current word. */
  1555. if (cwp && cw == -1 && sentinel == ts-1)
  1556. cw = nw;
  1557. /* If the cursor is at whitespace between two words, make a new, empty
  1558. word, add it before (well, after, since the list is in reverse order)
  1559. the word we just added, and set the current word to that one. */
  1560. if (cwp && cw == -1 && sentinel < ts)
  1561. {
  1562. tl = make_word_list (make_word (""), ret->next);
  1563. ret->next = tl;
  1564. cw = nw;
  1565. nw++;
  1566. }
  1567. if (string[te] == 0)
  1568. break;
  1569. i = te;
  1570. while (member (string[i], d) && (ifs_split || spctabnl(string[i])))
  1571. i++;
  1572. if (string[i])
  1573. ts = i;
  1574. else
  1575. break;
  1576. }
  1577. /* Special case for SENTINEL at the end of STRING. If we haven't found
  1578. the word containing SENTINEL yet, and the index we're looking for is at
  1579. the end of STRING, add an additional null argument and set the current
  1580. word pointer to that. */
  1581. if (cwp && cw == -1 && sentinel >= slen)
  1582. {
  1583. if (whitespace (string[sentinel - 1]))
  1584. {
  1585. token = "";
  1586. ret = add_string_to_list (token, ret);
  1587. nw++;
  1588. }
  1589. cw = nw;
  1590. }
  1591. if (nwp)
  1592. *nwp = nw;
  1593. if (cwp)
  1594. *cwp = cw;
  1595. return (REVERSE_LIST (ret, WORD_LIST *));
  1596. }
  1597. #endif /* READLINE */
  1598. #if 0
  1599. /* UNUSED */
  1600. /* Extract the name of the variable to bind to from the assignment string. */
  1601. char *
  1602. assignment_name (string)
  1603. char *string;
  1604. {
  1605. int offset;
  1606. char *temp;
  1607. offset = assignment (string, 0);
  1608. if (offset == 0)
  1609. return (char *)NULL;
  1610. temp = substring (string, 0, offset);
  1611. return (temp);
  1612. }
  1613. #endif
  1614. /* **************************************************************** */
  1615. /* */
  1616. /* Functions to convert strings to WORD_LISTs and vice versa */
  1617. /* */
  1618. /* **************************************************************** */
  1619. /* Return a single string of all the words in LIST. SEP is the separator
  1620. to put between individual elements of LIST in the output string. */
  1621. char *
  1622. string_list_internal (list, sep)
  1623. WORD_LIST *list;
  1624. char *sep;
  1625. {
  1626. register WORD_LIST *t;
  1627. char *result, *r;
  1628. int word_len, sep_len, result_size;
  1629. if (list == 0)
  1630. return ((char *)NULL);
  1631. /* Short-circuit quickly if we don't need to separate anything. */
  1632. if (list->next == 0)
  1633. return (savestring (list->word->word));
  1634. /* This is nearly always called with either sep[0] == 0 or sep[1] == 0. */
  1635. sep_len = STRLEN (sep);
  1636. result_size = 0;
  1637. for (t = list; t; t = t->next)
  1638. {
  1639. if (t != list)
  1640. result_size += sep_len;
  1641. result_size += strlen (t->word->word);
  1642. }
  1643. r = result = (char *)xmalloc (result_size + 1);
  1644. for (t = list; t; t = t->next)
  1645. {
  1646. if (t != list && sep_len)
  1647. {
  1648. if (sep_len > 1)
  1649. {
  1650. FASTCOPY (sep, r, sep_len);
  1651. r += sep_len;
  1652. }
  1653. else
  1654. *r++ = sep[0];
  1655. }
  1656. word_len = strlen (t->word->word);
  1657. FASTCOPY (t->word->word, r, word_len);
  1658. r += word_len;
  1659. }
  1660. *r = '\0';
  1661. return (result);
  1662. }
  1663. /* Return a single string of all the words present in LIST, separating
  1664. each word with a space. */
  1665. char *
  1666. string_list (list)
  1667. WORD_LIST *list;
  1668. {
  1669. return (string_list_internal (list, " "));
  1670. }
  1671. /* An external interface that can be used by the rest of the shell to
  1672. obtain a string containing the first character in $IFS. Handles all
  1673. the multibyte complications. If LENP is non-null, it is set to the
  1674. length of the returned string. */
  1675. char *
  1676. ifs_firstchar (lenp)
  1677. int *lenp;
  1678. {
  1679. char *ret;
  1680. int len;
  1681. ret = xmalloc (MB_LEN_MAX + 1);
  1682. #if defined (HANDLE_MULTIBYTE)
  1683. if (ifs_firstc_len == 1)
  1684. {
  1685. ret[0] = ifs_firstc[0];
  1686. ret[1] = '\0';
  1687. len = ret[0] ? 1 : 0;
  1688. }
  1689. else
  1690. {
  1691. memcpy (ret, ifs_firstc, ifs_firstc_len);
  1692. ret[len = ifs_firstc_len] = '\0';
  1693. }
  1694. #else
  1695. ret[0] = ifs_firstc;
  1696. ret[1] = '\0';
  1697. len = ret[0] ? 0 : 1;
  1698. #endif
  1699. if (lenp)
  1700. *lenp = len;
  1701. return ret;
  1702. }
  1703. /* Return a single string of all the words present in LIST, obeying the
  1704. quoting rules for "$*", to wit: (P1003.2, draft 11, 3.5.2) "If the
  1705. expansion [of $*] appears within a double quoted string, it expands
  1706. to a single field with the value of each parameter separated by the
  1707. first character of the IFS variable, or by a <space> if IFS is unset." */
  1708. char *
  1709. string_list_dollar_star (list)
  1710. WORD_LIST *list;
  1711. {
  1712. char *ret;
  1713. #if defined (HANDLE_MULTIBYTE)
  1714. # if defined (__GNUC__)
  1715. char sep[MB_CUR_MAX + 1];
  1716. # else
  1717. char *sep = 0;
  1718. # endif
  1719. #else
  1720. char sep[2];
  1721. #endif
  1722. #if defined (HANDLE_MULTI

Large files files are truncated, but you can click here to view the full file