PageRenderTime 76ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 1ms

/usr.bin/tr/str.c

https://gitlab.com/tlevine/DragonFlyBSD
C | 354 lines | 277 code | 29 blank | 48 comment | 70 complexity | 0382ac4628eee7dc5820412402d75ae9 MD5 | raw file
  1. /*-
  2. * Copyright (c) 1991, 1993
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the University nor the names of its contributors
  14. * may be used to endorse or promote products derived from this software
  15. * without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. *
  29. * @(#)str.c 8.2 (Berkeley) 4/28/95
  30. * $FreeBSD: src/usr.bin/tr/str.c,v 1.10.2.2 2002/07/29 12:59:33 tjr Exp $
  31. */
  32. #include <sys/types.h>
  33. #include <ctype.h>
  34. #include <err.h>
  35. #include <stddef.h>
  36. #include <stdio.h>
  37. #include <stdlib.h>
  38. #include <string.h>
  39. #include "extern.h"
  40. static int backslash(STR *);
  41. static int bracket(STR *);
  42. static int c_class(const void *, const void *);
  43. static void genclass(STR *);
  44. static void genequiv(STR *);
  45. static int genrange(STR *);
  46. static void genseq(STR *);
  47. int
  48. next(STR *s)
  49. {
  50. int ch;
  51. switch (s->state) {
  52. case EOS:
  53. return (0);
  54. case INFINITE:
  55. return (1);
  56. case NORMAL:
  57. switch (ch = (u_char)*s->str) {
  58. case '\0':
  59. s->state = EOS;
  60. return (0);
  61. case '\\':
  62. s->lastch = backslash(s);
  63. break;
  64. case '[':
  65. if (bracket(s))
  66. return (next(s));
  67. /* FALLTHROUGH */
  68. default:
  69. ++s->str;
  70. s->lastch = ch;
  71. break;
  72. }
  73. /* We can start a range at any time. */
  74. if (s->str[0] == '-' && genrange(s))
  75. return (next(s));
  76. return (1);
  77. case RANGE:
  78. if (s->cnt-- == 0) {
  79. s->state = NORMAL;
  80. return (next(s));
  81. }
  82. ++s->lastch;
  83. return (1);
  84. case SEQUENCE:
  85. if (s->cnt-- == 0) {
  86. s->state = NORMAL;
  87. return (next(s));
  88. }
  89. return (1);
  90. case SET:
  91. if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
  92. s->state = NORMAL;
  93. return (next(s));
  94. }
  95. return (1);
  96. default:
  97. return (0);
  98. }
  99. /* NOTREACHED */
  100. }
  101. static int
  102. bracket(STR *s)
  103. {
  104. char *p;
  105. switch (s->str[1]) {
  106. case ':': /* "[:class:]" */
  107. if ((p = strchr(s->str + 2, ']')) == NULL)
  108. return (0);
  109. if (*(p - 1) != ':' || p - s->str < 4)
  110. goto repeat;
  111. *(p - 1) = '\0';
  112. s->str += 2;
  113. genclass(s);
  114. s->str = p + 1;
  115. return (1);
  116. case '=': /* "[=equiv=]" */
  117. if ((p = strchr(s->str + 2, ']')) == NULL)
  118. return (0);
  119. if (*(p - 1) != '=' || p - s->str < 4)
  120. goto repeat;
  121. s->str += 2;
  122. genequiv(s);
  123. return (1);
  124. default: /* "[\###*n]" or "[#*n]" */
  125. repeat:
  126. if ((p = strpbrk(s->str + 2, "*]")) == NULL)
  127. return (0);
  128. if (p[0] != '*' || strchr(p, ']') == NULL)
  129. return (0);
  130. s->str += 1;
  131. genseq(s);
  132. return (1);
  133. }
  134. /* NOTREACHED */
  135. }
  136. typedef struct {
  137. const char *name;
  138. int (*func)(int);
  139. int *set;
  140. } CLASS;
  141. static CLASS classes[] = {
  142. #undef isalnum
  143. { "alnum", isalnum, NULL },
  144. #undef isalpha
  145. { "alpha", isalpha, NULL },
  146. #undef isblank
  147. { "blank", isblank, NULL },
  148. #undef iscntrl
  149. { "cntrl", iscntrl, NULL },
  150. #undef isdigit
  151. { "digit", isdigit, NULL },
  152. #undef isgraph
  153. { "graph", isgraph, NULL },
  154. #undef islower
  155. { "lower", islower, NULL },
  156. #undef isprint
  157. { "print", isprint, NULL },
  158. #undef ispunct
  159. { "punct", ispunct, NULL },
  160. #undef isspace
  161. { "space", isspace, NULL },
  162. #undef isupper
  163. { "upper", isupper, NULL },
  164. #undef isxdigit
  165. { "xdigit", isxdigit, NULL },
  166. };
  167. static void
  168. genclass(STR *s)
  169. {
  170. int cnt, (*func)(int);
  171. CLASS *cp, tmp;
  172. int *p;
  173. tmp.name = s->str;
  174. if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
  175. sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
  176. errx(1, "unknown class %s", s->str);
  177. if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
  178. err(1, "malloc");
  179. bzero(p, (NCHARS + 1) * sizeof(int));
  180. for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
  181. if ((func)(cnt))
  182. *p++ = cnt;
  183. *p = OOBCH;
  184. s->cnt = 0;
  185. s->state = SET;
  186. s->set = cp->set;
  187. }
  188. static int
  189. c_class(const void *a, const void *b)
  190. {
  191. return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
  192. }
  193. static void
  194. genequiv(STR *s)
  195. {
  196. int i, p, pri;
  197. char src[2], dst[3];
  198. if (*s->str == '\\') {
  199. s->equiv[0] = backslash(s);
  200. if (*s->str != '=')
  201. errx(1, "misplaced equivalence equals sign");
  202. s->str += 2;
  203. } else {
  204. s->equiv[0] = s->str[0];
  205. if (s->str[1] != '=')
  206. errx(1, "misplaced equivalence equals sign");
  207. s->str += 3;
  208. }
  209. /*
  210. * Calculate the set of all characters in the same equivalence class
  211. * as the specified character (they will have the same primary
  212. * collation weights).
  213. * XXX Knows too much about how strxfrm() is implemented. Assumes
  214. * it fills the string with primary collation weight bytes. Only one-
  215. * to-one mappings are supported.
  216. */
  217. src[0] = s->equiv[0];
  218. src[1] = '\0';
  219. if (strxfrm(dst, src, sizeof(dst)) == 1) {
  220. pri = (unsigned char)*dst;
  221. for (p = 1, i = 1; i < NCHARS; i++) {
  222. *src = i;
  223. if (strxfrm(dst, src, sizeof(dst)) == 1 && pri &&
  224. pri == (unsigned char)*dst)
  225. s->equiv[p++] = i;
  226. }
  227. s->equiv[p] = OOBCH;
  228. }
  229. s->cnt = 0;
  230. s->state = SET;
  231. s->set = s->equiv;
  232. }
  233. static int
  234. genrange(STR *s)
  235. {
  236. int stopval;
  237. char *savestart;
  238. savestart = s->str;
  239. stopval = *++s->str == '\\' ? backslash(s) : (u_char)*s->str++;
  240. if (stopval < (u_char)s->lastch) {
  241. s->str = savestart;
  242. return (0);
  243. }
  244. s->cnt = stopval - s->lastch + 1;
  245. s->state = RANGE;
  246. --s->lastch;
  247. return (1);
  248. }
  249. static void
  250. genseq(STR *s)
  251. {
  252. char *ep;
  253. if (s->which == STRING1)
  254. errx(1, "sequences only valid in string2");
  255. if (*s->str == '\\')
  256. s->lastch = backslash(s);
  257. else
  258. s->lastch = *s->str++;
  259. if (*s->str != '*')
  260. errx(1, "misplaced sequence asterisk");
  261. switch (*++s->str) {
  262. case '\\':
  263. s->cnt = backslash(s);
  264. break;
  265. case ']':
  266. s->cnt = 0;
  267. ++s->str;
  268. break;
  269. default:
  270. if (isdigit((u_char)*s->str)) {
  271. s->cnt = strtol(s->str, &ep, 0);
  272. if (*ep == ']') {
  273. s->str = ep + 1;
  274. break;
  275. }
  276. }
  277. errx(1, "illegal sequence count");
  278. /* NOTREACHED */
  279. }
  280. s->state = s->cnt ? SEQUENCE : INFINITE;
  281. }
  282. /*
  283. * Translate \??? into a character. Up to 3 octal digits, if no digits either
  284. * an escape code or a literal character.
  285. */
  286. static int
  287. backslash(STR *s)
  288. {
  289. int ch, cnt, val;
  290. for (cnt = val = 0;;) {
  291. ch = (u_char)*++s->str;
  292. if (!isascii(ch) || !isdigit(ch))
  293. break;
  294. val = val * 8 + ch - '0';
  295. if (++cnt == 3) {
  296. ++s->str;
  297. break;
  298. }
  299. }
  300. if (cnt)
  301. return (val);
  302. if (ch != '\0')
  303. ++s->str;
  304. switch (ch) {
  305. case 'a': /* escape characters */
  306. return ('\7');
  307. case 'b':
  308. return ('\b');
  309. case 'f':
  310. return ('\f');
  311. case 'n':
  312. return ('\n');
  313. case 'r':
  314. return ('\r');
  315. case 't':
  316. return ('\t');
  317. case 'v':
  318. return ('\13');
  319. case '\0': /* \" -> \ */
  320. s->state = EOS;
  321. return ('\\');
  322. default: /* \x" -> x */
  323. return (ch);
  324. }
  325. }