/usr.bin/tr/cset.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 290 lines · 185 code · 30 blank · 75 comment · 50 complexity · f893333d11fdf31f380b25d63ceff548 MD5 · raw file

  1. /*-
  2. * Copyright (c) 2004 Tim J. Robbins.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. * SUCH DAMAGE.
  25. */
  26. /*
  27. * "Set of characters" ADT implemented as a splay tree of extents, with
  28. * a lookup table cache to simplify looking up the first bunch of
  29. * characters (which are presumably more common than others).
  30. */
  31. #include <sys/cdefs.h>
  32. __FBSDID("$FreeBSD$");
  33. #include <assert.h>
  34. #include <stdbool.h>
  35. #include <stdlib.h>
  36. #include <wchar.h>
  37. #include <wctype.h>
  38. #include "cset.h"
  39. static struct csnode * cset_delete(struct csnode *, wchar_t);
  40. static __inline int cset_rangecmp(struct csnode *, wchar_t);
  41. static struct csnode * cset_splay(struct csnode *, wchar_t);
  42. /*
  43. * cset_alloc --
  44. * Allocate a set of characters.
  45. */
  46. struct cset *
  47. cset_alloc(void)
  48. {
  49. struct cset *cs;
  50. if ((cs = malloc(sizeof(*cs))) == NULL)
  51. return (NULL);
  52. cs->cs_root = NULL;
  53. cs->cs_classes = NULL;
  54. cs->cs_havecache = false;
  55. cs->cs_invert = false;
  56. return (cs);
  57. }
  58. /*
  59. * cset_add --
  60. * Add a character to the set.
  61. */
  62. bool
  63. cset_add(struct cset *cs, wchar_t ch)
  64. {
  65. struct csnode *csn, *ncsn;
  66. wchar_t oval;
  67. cs->cs_havecache = false;
  68. /*
  69. * Inserting into empty tree; new item becomes the root.
  70. */
  71. if (cs->cs_root == NULL) {
  72. csn = malloc(sizeof(*cs->cs_root));
  73. if (csn == NULL)
  74. return (false);
  75. csn->csn_left = csn->csn_right = NULL;
  76. csn->csn_min = csn->csn_max = ch;
  77. cs->cs_root = csn;
  78. return (true);
  79. }
  80. /*
  81. * Splay to check whether the item already exists, and otherwise,
  82. * where we should put it.
  83. */
  84. csn = cs->cs_root = cset_splay(cs->cs_root, ch);
  85. /*
  86. * Avoid adding duplicate nodes.
  87. */
  88. if (cset_rangecmp(csn, ch) == 0)
  89. return (true);
  90. /*
  91. * Allocate a new node and make it the new root.
  92. */
  93. ncsn = malloc(sizeof(*ncsn));
  94. if (ncsn == NULL)
  95. return (false);
  96. ncsn->csn_min = ncsn->csn_max = ch;
  97. if (cset_rangecmp(csn, ch) < 0) {
  98. ncsn->csn_left = csn->csn_left;
  99. ncsn->csn_right = csn;
  100. csn->csn_left = NULL;
  101. } else {
  102. ncsn->csn_right = csn->csn_right;
  103. ncsn->csn_left = csn;
  104. csn->csn_right = NULL;
  105. }
  106. cs->cs_root = ncsn;
  107. /*
  108. * Coalesce with left and right neighbours if possible.
  109. */
  110. if (ncsn->csn_left != NULL) {
  111. ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
  112. if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
  113. oval = ncsn->csn_left->csn_min;
  114. ncsn->csn_left = cset_delete(ncsn->csn_left,
  115. ncsn->csn_left->csn_min);
  116. ncsn->csn_min = oval;
  117. }
  118. }
  119. if (ncsn->csn_right != NULL) {
  120. ncsn->csn_right = cset_splay(ncsn->csn_right,
  121. ncsn->csn_max + 1);
  122. if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
  123. oval = ncsn->csn_right->csn_max;
  124. ncsn->csn_right = cset_delete(ncsn->csn_right,
  125. ncsn->csn_right->csn_min);
  126. ncsn->csn_max = oval;
  127. }
  128. }
  129. return (true);
  130. }
  131. /*
  132. * cset_in_hard --
  133. * Determine whether a character is in the set without using
  134. * the cache.
  135. */
  136. bool
  137. cset_in_hard(struct cset *cs, wchar_t ch)
  138. {
  139. struct csclass *csc;
  140. for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
  141. if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
  142. return (cs->cs_invert ^ true);
  143. if (cs->cs_root != NULL) {
  144. cs->cs_root = cset_splay(cs->cs_root, ch);
  145. return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
  146. }
  147. return (cs->cs_invert ^ false);
  148. }
  149. /*
  150. * cset_cache --
  151. * Update the cache.
  152. */
  153. void
  154. cset_cache(struct cset *cs)
  155. {
  156. wchar_t i;
  157. for (i = 0; i < CS_CACHE_SIZE; i++)
  158. cs->cs_cache[i] = cset_in_hard(cs, i);
  159. cs->cs_havecache = true;
  160. }
  161. /*
  162. * cset_invert --
  163. * Invert the character set.
  164. */
  165. void
  166. cset_invert(struct cset *cs)
  167. {
  168. cs->cs_invert ^= true;
  169. cs->cs_havecache = false;
  170. }
  171. /*
  172. * cset_addclass --
  173. * Add a wctype()-style character class to the set, optionally
  174. * inverting it.
  175. */
  176. bool
  177. cset_addclass(struct cset *cs, wctype_t type, bool invert)
  178. {
  179. struct csclass *csc;
  180. csc = malloc(sizeof(*csc));
  181. if (csc == NULL)
  182. return (false);
  183. csc->csc_type = type;
  184. csc->csc_invert = invert;
  185. csc->csc_next = cs->cs_classes;
  186. cs->cs_classes = csc;
  187. cs->cs_havecache = false;
  188. return (true);
  189. }
  190. static __inline int
  191. cset_rangecmp(struct csnode *t, wchar_t ch)
  192. {
  193. if (ch < t->csn_min)
  194. return (-1);
  195. if (ch > t->csn_max)
  196. return (1);
  197. return (0);
  198. }
  199. static struct csnode *
  200. cset_splay(struct csnode *t, wchar_t ch)
  201. {
  202. struct csnode N, *l, *r, *y;
  203. /*
  204. * Based on public domain code from Sleator.
  205. */
  206. assert(t != NULL);
  207. N.csn_left = N.csn_right = NULL;
  208. l = r = &N;
  209. for (;;) {
  210. if (cset_rangecmp(t, ch) < 0) {
  211. if (t->csn_left != NULL &&
  212. cset_rangecmp(t->csn_left, ch) < 0) {
  213. y = t->csn_left;
  214. t->csn_left = y->csn_right;
  215. y->csn_right = t;
  216. t = y;
  217. }
  218. if (t->csn_left == NULL)
  219. break;
  220. r->csn_left = t;
  221. r = t;
  222. t = t->csn_left;
  223. } else if (cset_rangecmp(t, ch) > 0) {
  224. if (t->csn_right != NULL &&
  225. cset_rangecmp(t->csn_right, ch) > 0) {
  226. y = t->csn_right;
  227. t->csn_right = y->csn_left;
  228. y->csn_left = t;
  229. t = y;
  230. }
  231. if (t->csn_right == NULL)
  232. break;
  233. l->csn_right = t;
  234. l = t;
  235. t = t->csn_right;
  236. } else
  237. break;
  238. }
  239. l->csn_right = t->csn_left;
  240. r->csn_left = t->csn_right;
  241. t->csn_left = N.csn_right;
  242. t->csn_right = N.csn_left;
  243. return (t);
  244. }
  245. static struct csnode *
  246. cset_delete(struct csnode *t, wchar_t ch)
  247. {
  248. struct csnode *x;
  249. assert(t != NULL);
  250. t = cset_splay(t, ch);
  251. assert(cset_rangecmp(t, ch) == 0);
  252. if (t->csn_left == NULL)
  253. x = t->csn_right;
  254. else {
  255. x = cset_splay(t->csn_left, ch);
  256. x->csn_right = t->csn_right;
  257. }
  258. free(t);
  259. return x;
  260. }