PageRenderTime 32ms CodeModel.GetById 13ms app.highlight 16ms RepoModel.GetById 1ms app.codeStats 0ms

/usr.bin/tr/cset.c

https://bitbucket.org/freebsd/freebsd-head/
C | 290 lines | 185 code | 30 blank | 75 comment | 50 complexity | f893333d11fdf31f380b25d63ceff548 MD5 | raw file
  1/*-
  2 * Copyright (c) 2004 Tim J. Robbins.
  3 * All rights reserved.
  4 *
  5 * Redistribution and use in source and binary forms, with or without
  6 * modification, are permitted provided that the following conditions
  7 * are met:
  8 * 1. Redistributions of source code must retain the above copyright
  9 *    notice, this list of conditions and the following disclaimer.
 10 * 2. Redistributions in binary form must reproduce the above copyright
 11 *    notice, this list of conditions and the following disclaimer in the
 12 *    documentation and/or other materials provided with the distribution.
 13 *
 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 24 * SUCH DAMAGE.
 25 */
 26/*
 27 * "Set of characters" ADT implemented as a splay tree of extents, with
 28 * a lookup table cache to simplify looking up the first bunch of
 29 * characters (which are presumably more common than others).
 30 */
 31
 32#include <sys/cdefs.h>
 33__FBSDID("$FreeBSD$");
 34
 35#include <assert.h>
 36#include <stdbool.h>
 37#include <stdlib.h>
 38#include <wchar.h>
 39#include <wctype.h>
 40#include "cset.h"
 41
 42static struct csnode *	cset_delete(struct csnode *, wchar_t);
 43static __inline int	cset_rangecmp(struct csnode *, wchar_t);
 44static struct csnode *	cset_splay(struct csnode *, wchar_t);
 45
 46/*
 47 * cset_alloc --
 48 *	Allocate a set of characters.
 49 */
 50struct cset *
 51cset_alloc(void)
 52{
 53	struct cset *cs;
 54
 55	if ((cs = malloc(sizeof(*cs))) == NULL)
 56		return (NULL);
 57	cs->cs_root = NULL;
 58	cs->cs_classes = NULL;
 59	cs->cs_havecache = false;
 60	cs->cs_invert = false;
 61	return (cs);
 62}
 63
 64/*
 65 * cset_add --
 66 *	Add a character to the set.
 67 */
 68bool
 69cset_add(struct cset *cs, wchar_t ch)
 70{
 71	struct csnode *csn, *ncsn;
 72	wchar_t oval;
 73
 74	cs->cs_havecache = false;
 75
 76	/*
 77	 * Inserting into empty tree; new item becomes the root.
 78	 */
 79	if (cs->cs_root == NULL) {
 80		csn = malloc(sizeof(*cs->cs_root));
 81		if (csn == NULL)
 82			return (false);
 83		csn->csn_left = csn->csn_right = NULL;
 84		csn->csn_min = csn->csn_max = ch;
 85		cs->cs_root = csn;
 86		return (true);
 87	}
 88
 89	/*
 90	 * Splay to check whether the item already exists, and otherwise,
 91	 * where we should put it.
 92	 */
 93	csn = cs->cs_root = cset_splay(cs->cs_root, ch);
 94
 95	/*
 96	 * Avoid adding duplicate nodes.
 97	 */
 98	if (cset_rangecmp(csn, ch) == 0)
 99		return (true);
100
101	/*
102	 * Allocate a new node and make it the new root.
103	 */
104	ncsn = malloc(sizeof(*ncsn));
105	if (ncsn == NULL)
106		return (false);
107	ncsn->csn_min = ncsn->csn_max = ch;
108	if (cset_rangecmp(csn, ch) < 0) {
109		ncsn->csn_left = csn->csn_left;
110		ncsn->csn_right = csn;
111		csn->csn_left = NULL;
112	} else {
113		ncsn->csn_right = csn->csn_right;
114		ncsn->csn_left = csn;
115		csn->csn_right = NULL;
116	}
117	cs->cs_root = ncsn;
118
119	/*
120	 * Coalesce with left and right neighbours if possible.
121	 */
122	if (ncsn->csn_left != NULL) {
123		ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
124		if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
125			oval = ncsn->csn_left->csn_min;
126			ncsn->csn_left = cset_delete(ncsn->csn_left,
127			    ncsn->csn_left->csn_min);
128			ncsn->csn_min = oval;
129		}
130	}
131	if (ncsn->csn_right != NULL) {
132		ncsn->csn_right = cset_splay(ncsn->csn_right,
133		    ncsn->csn_max + 1);
134		if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
135			oval = ncsn->csn_right->csn_max;
136			ncsn->csn_right = cset_delete(ncsn->csn_right,
137			    ncsn->csn_right->csn_min);
138			ncsn->csn_max = oval;
139		}
140	}
141
142	return (true);
143}
144
145/*
146 * cset_in_hard --
147 *	Determine whether a character is in the set without using
148 *	the cache.
149 */
150bool
151cset_in_hard(struct cset *cs, wchar_t ch)
152{
153	struct csclass *csc;
154
155	for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
156		if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
157			return (cs->cs_invert ^ true);
158	if (cs->cs_root != NULL) {
159		cs->cs_root = cset_splay(cs->cs_root, ch);
160		return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
161	}
162	return (cs->cs_invert ^ false);
163}
164
165/*
166 * cset_cache --
167 *	Update the cache.
168 */
169void
170cset_cache(struct cset *cs)
171{
172	wchar_t i;
173
174	for (i = 0; i < CS_CACHE_SIZE; i++)
175		cs->cs_cache[i] = cset_in_hard(cs, i);
176
177	cs->cs_havecache = true;
178}
179
180/*
181 * cset_invert --
182 *	Invert the character set.
183 */
184void
185cset_invert(struct cset *cs)
186{
187
188	cs->cs_invert ^= true;
189	cs->cs_havecache = false;
190}
191
192/*
193 * cset_addclass --
194 *	Add a wctype()-style character class to the set, optionally
195 *	inverting it.
196 */
197bool
198cset_addclass(struct cset *cs, wctype_t type, bool invert)
199{
200	struct csclass *csc;
201
202	csc = malloc(sizeof(*csc));
203	if (csc == NULL)
204		return (false);
205	csc->csc_type = type;
206	csc->csc_invert = invert;
207	csc->csc_next = cs->cs_classes;
208	cs->cs_classes = csc;
209	cs->cs_havecache = false;
210	return (true);
211}
212
213static __inline int
214cset_rangecmp(struct csnode *t, wchar_t ch)
215{
216
217	if (ch < t->csn_min)
218		return (-1);
219	if (ch > t->csn_max)
220		return (1);
221	return (0);
222}
223
224static struct csnode *
225cset_splay(struct csnode *t, wchar_t ch)
226{
227	struct csnode N, *l, *r, *y;
228
229	/*
230	 * Based on public domain code from Sleator.
231	 */
232
233	assert(t != NULL);
234
235	N.csn_left = N.csn_right = NULL;
236	l = r = &N;
237	for (;;) {
238		if (cset_rangecmp(t, ch) < 0) {
239			if (t->csn_left != NULL &&
240			    cset_rangecmp(t->csn_left, ch) < 0) {
241				y = t->csn_left;
242				t->csn_left = y->csn_right;
243				y->csn_right = t;
244				t = y;
245			}
246			if (t->csn_left == NULL)
247				break;
248			r->csn_left = t;
249			r = t;
250			t = t->csn_left;
251		} else if (cset_rangecmp(t, ch) > 0) {
252			if (t->csn_right != NULL &&
253			    cset_rangecmp(t->csn_right, ch) > 0) {
254				y = t->csn_right;
255				t->csn_right = y->csn_left;
256				y->csn_left = t;
257				t = y;
258			}
259			if (t->csn_right == NULL)
260				break;
261			l->csn_right = t;
262			l = t;
263			t = t->csn_right;
264		} else
265			break;
266	}
267	l->csn_right = t->csn_left;
268	r->csn_left = t->csn_right;
269	t->csn_left = N.csn_right;
270	t->csn_right = N.csn_left;
271	return (t);
272}
273
274static struct csnode *
275cset_delete(struct csnode *t, wchar_t ch)
276{
277	struct csnode *x;
278
279	assert(t != NULL);
280	t = cset_splay(t, ch);
281	assert(cset_rangecmp(t, ch) == 0);
282	if (t->csn_left == NULL)
283		x = t->csn_right;
284	else {
285		x = cset_splay(t->csn_left, ch);
286		x->csn_right = t->csn_right;
287	}
288	free(t);
289	return x;
290}