/usr.bin/tr/cset.c
https://bitbucket.org/freebsd/freebsd-head/ · C · 290 lines · 185 code · 30 blank · 75 comment · 50 complexity · f893333d11fdf31f380b25d63ceff548 MD5 · raw file
- /*-
- * Copyright (c) 2004 Tim J. Robbins.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- /*
- * "Set of characters" ADT implemented as a splay tree of extents, with
- * a lookup table cache to simplify looking up the first bunch of
- * characters (which are presumably more common than others).
- */
- #include <sys/cdefs.h>
- __FBSDID("$FreeBSD$");
- #include <assert.h>
- #include <stdbool.h>
- #include <stdlib.h>
- #include <wchar.h>
- #include <wctype.h>
- #include "cset.h"
- static struct csnode * cset_delete(struct csnode *, wchar_t);
- static __inline int cset_rangecmp(struct csnode *, wchar_t);
- static struct csnode * cset_splay(struct csnode *, wchar_t);
- /*
- * cset_alloc --
- * Allocate a set of characters.
- */
- struct cset *
- cset_alloc(void)
- {
- struct cset *cs;
- if ((cs = malloc(sizeof(*cs))) == NULL)
- return (NULL);
- cs->cs_root = NULL;
- cs->cs_classes = NULL;
- cs->cs_havecache = false;
- cs->cs_invert = false;
- return (cs);
- }
- /*
- * cset_add --
- * Add a character to the set.
- */
- bool
- cset_add(struct cset *cs, wchar_t ch)
- {
- struct csnode *csn, *ncsn;
- wchar_t oval;
- cs->cs_havecache = false;
- /*
- * Inserting into empty tree; new item becomes the root.
- */
- if (cs->cs_root == NULL) {
- csn = malloc(sizeof(*cs->cs_root));
- if (csn == NULL)
- return (false);
- csn->csn_left = csn->csn_right = NULL;
- csn->csn_min = csn->csn_max = ch;
- cs->cs_root = csn;
- return (true);
- }
- /*
- * Splay to check whether the item already exists, and otherwise,
- * where we should put it.
- */
- csn = cs->cs_root = cset_splay(cs->cs_root, ch);
- /*
- * Avoid adding duplicate nodes.
- */
- if (cset_rangecmp(csn, ch) == 0)
- return (true);
- /*
- * Allocate a new node and make it the new root.
- */
- ncsn = malloc(sizeof(*ncsn));
- if (ncsn == NULL)
- return (false);
- ncsn->csn_min = ncsn->csn_max = ch;
- if (cset_rangecmp(csn, ch) < 0) {
- ncsn->csn_left = csn->csn_left;
- ncsn->csn_right = csn;
- csn->csn_left = NULL;
- } else {
- ncsn->csn_right = csn->csn_right;
- ncsn->csn_left = csn;
- csn->csn_right = NULL;
- }
- cs->cs_root = ncsn;
- /*
- * Coalesce with left and right neighbours if possible.
- */
- if (ncsn->csn_left != NULL) {
- ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
- if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
- oval = ncsn->csn_left->csn_min;
- ncsn->csn_left = cset_delete(ncsn->csn_left,
- ncsn->csn_left->csn_min);
- ncsn->csn_min = oval;
- }
- }
- if (ncsn->csn_right != NULL) {
- ncsn->csn_right = cset_splay(ncsn->csn_right,
- ncsn->csn_max + 1);
- if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
- oval = ncsn->csn_right->csn_max;
- ncsn->csn_right = cset_delete(ncsn->csn_right,
- ncsn->csn_right->csn_min);
- ncsn->csn_max = oval;
- }
- }
- return (true);
- }
- /*
- * cset_in_hard --
- * Determine whether a character is in the set without using
- * the cache.
- */
- bool
- cset_in_hard(struct cset *cs, wchar_t ch)
- {
- struct csclass *csc;
- for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
- if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
- return (cs->cs_invert ^ true);
- if (cs->cs_root != NULL) {
- cs->cs_root = cset_splay(cs->cs_root, ch);
- return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
- }
- return (cs->cs_invert ^ false);
- }
- /*
- * cset_cache --
- * Update the cache.
- */
- void
- cset_cache(struct cset *cs)
- {
- wchar_t i;
- for (i = 0; i < CS_CACHE_SIZE; i++)
- cs->cs_cache[i] = cset_in_hard(cs, i);
- cs->cs_havecache = true;
- }
- /*
- * cset_invert --
- * Invert the character set.
- */
- void
- cset_invert(struct cset *cs)
- {
- cs->cs_invert ^= true;
- cs->cs_havecache = false;
- }
- /*
- * cset_addclass --
- * Add a wctype()-style character class to the set, optionally
- * inverting it.
- */
- bool
- cset_addclass(struct cset *cs, wctype_t type, bool invert)
- {
- struct csclass *csc;
- csc = malloc(sizeof(*csc));
- if (csc == NULL)
- return (false);
- csc->csc_type = type;
- csc->csc_invert = invert;
- csc->csc_next = cs->cs_classes;
- cs->cs_classes = csc;
- cs->cs_havecache = false;
- return (true);
- }
- static __inline int
- cset_rangecmp(struct csnode *t, wchar_t ch)
- {
- if (ch < t->csn_min)
- return (-1);
- if (ch > t->csn_max)
- return (1);
- return (0);
- }
- static struct csnode *
- cset_splay(struct csnode *t, wchar_t ch)
- {
- struct csnode N, *l, *r, *y;
- /*
- * Based on public domain code from Sleator.
- */
- assert(t != NULL);
- N.csn_left = N.csn_right = NULL;
- l = r = &N;
- for (;;) {
- if (cset_rangecmp(t, ch) < 0) {
- if (t->csn_left != NULL &&
- cset_rangecmp(t->csn_left, ch) < 0) {
- y = t->csn_left;
- t->csn_left = y->csn_right;
- y->csn_right = t;
- t = y;
- }
- if (t->csn_left == NULL)
- break;
- r->csn_left = t;
- r = t;
- t = t->csn_left;
- } else if (cset_rangecmp(t, ch) > 0) {
- if (t->csn_right != NULL &&
- cset_rangecmp(t->csn_right, ch) > 0) {
- y = t->csn_right;
- t->csn_right = y->csn_left;
- y->csn_left = t;
- t = y;
- }
- if (t->csn_right == NULL)
- break;
- l->csn_right = t;
- l = t;
- t = t->csn_right;
- } else
- break;
- }
- l->csn_right = t->csn_left;
- r->csn_left = t->csn_right;
- t->csn_left = N.csn_right;
- t->csn_right = N.csn_left;
- return (t);
- }
- static struct csnode *
- cset_delete(struct csnode *t, wchar_t ch)
- {
- struct csnode *x;
- assert(t != NULL);
- t = cset_splay(t, ch);
- assert(cset_rangecmp(t, ch) == 0);
- if (t->csn_left == NULL)
- x = t->csn_right;
- else {
- x = cset_splay(t->csn_left, ch);
- x->csn_right = t->csn_right;
- }
- free(t);
- return x;
- }