zend_string.cpp | searchcode

/extensions/hphp/runtime/base/zend/zend_string.cpp

Large files files are truncated, but you can click here to view the full file

/*
   +----------------------------------------------------------------------+
   | HipHop for PHP                                                       |
   +----------------------------------------------------------------------+
   | Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com)         |
   | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.00 of the Zend license,     |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.zend.com/license/2_00.txt.                                |
   | If you did not receive a copy of the Zend license and are unable to  |
   | obtain it through the world-wide-web, please send a note to          |
   | license@zend.com so we can mail you a copy immediately.              |
   +----------------------------------------------------------------------+
*/

#include <runtime/base/zend/zend_string.h>
#include <runtime/base/zend/zend_printf.h>
#include <runtime/base/zend/zend_math.h>

//#include <util/lock.h>
#include <math.h>
#include <monetary.h>

#include <runtime/base/util/exceptions.h>
#include <runtime/base/complex_types.h>
//#include <runtime/base/util/string_buffer.h>
//#include <runtime/base/runtime_error.h>
//#include <runtime/base/type_conversions.h>
//#include <runtime/base/builtin_functions.h>

#ifdef __APPLE__
#ifndef isnan
#define isnan(x)  \
  ( sizeof (x) == sizeof(float )  ? __inline_isnanf((float)(x)) \
  : sizeof (x) == sizeof(double)  ? __inline_isnand((double)(x))  \
  : __inline_isnan ((long double)(x)))
#endif

#ifndef isinf
#define isinf(x)  \
  ( sizeof (x) == sizeof(float )  ? __inline_isinff((float)(x)) \
  : sizeof (x) == sizeof(double)  ? __inline_isinfd((double)(x))  \
  : __inline_isinf ((long double)(x)))
#endif
#endif


#define PHP_QPRINT_MAXL 75

namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
// helpers

bool string_substr_check(int len, int &f, int &l, bool strict /* = true */) {
  // if "from" position is negative, count start position from the end
  if (f < 0) {
    f += len;
    if (f < 0) {
      return false;
    }
  }
  if (f > len || f == len && strict) {
    return false;
  }

  // if "length" position is negative, set it to the length
  // needed to stop that many chars from the end of the string
  if (l < 0) {
    l += len - f;
    if (l < 0) {
      return false;
    }
  }
  if ((unsigned int)f + (unsigned int)l > (unsigned int)len) {
    l = len - f;
  }
  return true;
}

void string_charmask(const char *sinput, int len, char *mask) {
  const unsigned char *input = (unsigned char *)sinput;
  const unsigned char *end;
  unsigned char c;

  memset(mask, 0, 256);
  for (end = input+len; input < end; input++) {
    c=*input;
    if ((input+3 < end) && input[1] == '.' && input[2] == '.'
        && input[3] >= c) {
      memset(mask+c, 1, input[3] - c + 1);
      input+=3;
    } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
      /* Error, try to be as helpful as possible:
         (a range ending/starting with '.' won't be captured here) */
      if (end-len >= input) { /* there was no 'left' char */
        // FIXME: throw_invalid_argument
        //  ("charlist: Invalid '..'-range, missing left of '..'");
        continue;
      }
      if (input+2 >= end) { /* there is no 'right' char */
       // FIXME:  throw_invalid_argument
       //   ("charlist: Invalid '..'-range, missing right of '..'");
        continue;
      }
      if (input[-1] > input[2]) { /* wrong order */
        // FIXME: throw_invalid_argument
        //  ("charlist: '..'-range needs to be incrementing");
        continue;
      }
      /* FIXME: better error (a..b..c is the only left possibility?) */
      // FIXME: throw_invalid_argument("charlist: Invalid '..'-range");
      continue;
    } else {
      mask[c]=1;
    }
  }
}

int string_copy(char *dst, const char *src, int siz) {
  register char *d = dst;
  register const char *s = src;
  register size_t n = siz;

  /* Copy as many bytes as will fit */
  if (n != 0 && --n != 0) {
    do {
      if ((*d++ = *s++) == 0)
        break;
    } while (--n != 0);
  }

  /* Not enough room in dst, add NUL and traverse rest of src */
  if (n == 0) {
    if (siz != 0)
      *d = '\0';    /* NUL-terminate dst */
    while (*s++)
      ;
  }

  return(s - src - 1);  /* count does not include NUL */
}

///////////////////////////////////////////////////////////////////////////////

char *string_concat(const char *s1, int len1, const char *s2, int len2,
                    int &len) {
  len = len1 + len2;
  char *buf = (char *)malloc(len + 1);
  if (buf == NULL) {
    throw FatalErrorException(0, "malloc failed: %d", len);
  }
  memcpy(buf, s1, len1);
  memcpy(buf + len1, s2, len2);
  buf[len] = 0;
  return buf;
}

///////////////////////////////////////////////////////////////////////////////
// comparisons

int string_cmp(const char *s1, int len1, const char *s2, int len2) {
  if (len1 <= len2) {
    for (int i = 0; i < len1; i++) {
      char c1 = s1[i];
      char c2 = s2[i];
      if (c1 > c2) return 1;
      if (c1 < c2) return -1;
    }
    return len1 < len2 ? -1 : 0;
  }

  for (int i = 0; i < len2; i++) {
    char c1 = s1[i];
    char c2 = s2[i];
    if (c1 > c2) return 1;
    if (c1 < c2) return -1;
  }
  return 1;
}

int string_casecmp(const char *s1, int len1, const char *s2, int len2) {
  if (len1 <= len2) {
    for (int i = 0; i < len1; i++) {
      char c1 = toupper(s1[i]);
      char c2 = toupper(s2[i]);
      if (c1 > c2) return 1;
      if (c1 < c2) return -1;
    }
    return len1 < len2 ? -1 : 0;
  }

  for (int i = 0; i < len2; i++) {
    char c1 = toupper(s1[i]);
    char c2 = toupper(s2[i]);
    if (c1 > c2) return 1;
    if (c1 < c2) return -1;
  }
  return 1;
}

int string_ncmp(const char *s1, const char *s2, int len) {
  for (int i = 0; i < len; i++) {
    char c1 = s1[i];
    char c2 = s2[i];
    if (c1 > c2) return 1;
    if (c1 < c2) return -1;
  }
  return 0;
}

int string_ncasecmp(const char *s1, const char *s2, int len) {
  for (int i = 0; i < len; i++) {
    char c1 = toupper(s1[i]);
    char c2 = toupper(s2[i]);
    if (c1 > c2) return 1;
    if (c1 < c2) return -1;
  }
  return 0;
}

static int compare_right(char const **a, char const *aend,
                         char const **b, char const *bend) {
  int bias = 0;

  /* The longest run of digits wins.  That aside, the greatest
     value wins, but we can't know that it will until we've scanned
     both numbers to know that they have the same magnitude, so we
     remember it in BIAS. */
  for(;; (*a)++, (*b)++) {
    if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
        (*b == bend || !isdigit((int)(unsigned char)**b)))
      return bias;
    else if (*a == aend || !isdigit((int)(unsigned char)**a))
      return -1;
    else if (*b == bend || !isdigit((int)(unsigned char)**b))
      return +1;
    else if (**a < **b) {
      if (!bias)
        bias = -1;
    } else if (**a > **b) {
      if (!bias)
        bias = +1;
    }
  }

  return 0;
}

static int compare_left(char const **a, char const *aend,
                        char const **b, char const *bend) {
  /* Compare two left-aligned numbers: the first to have a
     different value wins. */
  for(;; (*a)++, (*b)++) {
    if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
        (*b == bend || !isdigit((int)(unsigned char)**b)))
      return 0;
    else if (*a == aend || !isdigit((int)(unsigned char)**a))
      return -1;
    else if (*b == bend || !isdigit((int)(unsigned char)**b))
      return +1;
    else if (**a < **b)
      return -1;
    else if (**a > **b)
      return +1;
  }

  return 0;
}

int string_natural_cmp(char const *a, size_t a_len,
                       char const *b, size_t b_len, int fold_case) {
  char ca, cb;
  char const *ap, *bp;
  char const *aend = a + a_len, *bend = b + b_len;
  int fractional, result;

  if (a_len == 0 || b_len == 0)
    return a_len - b_len;

  ap = a;
  bp = b;
  while (1) {
    ca = *ap; cb = *bp;

    /* skip over leading spaces or zeros */
    while (isspace((int)(unsigned char)ca))
      ca = *++ap;

    while (isspace((int)(unsigned char)cb))
      cb = *++bp;

    /* process run of digits */
    if (isdigit((int)(unsigned char)ca)  &&  isdigit((int)(unsigned char)cb)) {
      fractional = (ca == '0' || cb == '0');

      if (fractional)
        result = compare_left(&ap, aend, &bp, bend);
      else
        result = compare_right(&ap, aend, &bp, bend);

      if (result != 0)
        return result;
      else if (ap == aend && bp == bend)
        /* End of the strings. Let caller sort them out. */
        return 0;
      else {
        /* Keep on comparing from the current point. */
        ca = *ap; cb = *bp;
      }
    }

    if (fold_case) {
      ca = toupper((int)(unsigned char)ca);
      cb = toupper((int)(unsigned char)cb);
    }

    if (ca < cb)
      return -1;
    else if (ca > cb)
      return +1;

    ++ap; ++bp;
    if (ap >= aend && bp >= bend)
      /* The strings compare the same.  Perhaps the caller
         will want to call strcmp to break the tie. */
      return 0;
    else if (ap >= aend)
      return -1;
    else if (bp >= bend)
      return 1;
  }
}

///////////////////////////////////////////////////////////////////////////////

char *string_to_lower(const char *s, int len) {
  ASSERT(s);
  char *ret = (char *)malloc(len + 1);
  for (int i = 0; i < len; i++) {
    ret[i] = tolower(s[i]);
  }
  ret[len] = '\0';
  return ret;
}

char *string_to_upper(const char *s, int len) {
  ASSERT(s);
  char *ret = (char *)malloc(len + 1);
  for (int i = 0; i < len; i++) {
    ret[i] = toupper(s[i]);
  }
  ret[len] = '\0';
  return ret;
}

char *string_to_upper_first(const char *s, int len) {
  ASSERT(s);
  char *ret = string_duplicate(s, len);
  if (*ret) {
    *ret = toupper(*ret);
  }
  return ret;
}

char *string_to_upper_words(const char *s, int len) {
  ASSERT(s);
  char *ret = string_duplicate(s, len);
  if (*ret) {
    *ret = toupper(*ret);
    for (int i = 1; i < len; i++) {
      if (isspace(ret[i-1])) {
        ret[i] = toupper(ret[i]);
      }
    }
  }
  return ret;
}

///////////////////////////////////////////////////////////////////////////////

char *string_trim(const char *s, int &len,
                  const char *charlist, int charlistlen, int mode) {
  ASSERT(s);
  char mask[256];
  string_charmask(charlist, charlistlen, mask);

  int trimmed = 0;
  if (mode & 1) {
    for (int i = 0; i < len; i++) {
      if (mask[(unsigned char)s[i]]) {
        trimmed++;
      } else {
        break;
      }
    }
    len -= trimmed;
    s += trimmed;
  }
  if (mode & 2) {
    for (int i = len - 1; i >= 0; i--) {
      if (mask[(unsigned char)s[i]]) {
        len--;
      } else {
        break;
      }
    }
  }
  return string_duplicate(s, len);
}

#define STR_PAD_LEFT            0
#define STR_PAD_RIGHT           1
#define STR_PAD_BOTH            2

char *string_pad(const char *input, int &len, int pad_length,
                 const char *pad_string, int pad_str_len,
                 int pad_type) {
  ASSERT(input);
  int num_pad_chars = pad_length - len;

  /* If resulting string turns out to be shorter than input string,
     we simply copy the input and return. */
  if (pad_length < 0 || num_pad_chars < 0) {
    return string_duplicate(input, len);
  }

  /* Setup the padding string values if specified. */
  if (pad_str_len == 0) {
    // FIXME: throw_invalid_argument("pad_string: (empty)");
    return NULL;
  }

  char *result = (char *)malloc(pad_length + 1);

  /* We need to figure out the left/right padding lengths. */
  int left_pad, right_pad;
  switch (pad_type) {
  case STR_PAD_RIGHT:
    left_pad = 0;
    right_pad = num_pad_chars;
    break;
  case STR_PAD_LEFT:
    left_pad = num_pad_chars;
    right_pad = 0;
    break;
  case STR_PAD_BOTH:
    left_pad = num_pad_chars / 2;
    right_pad = num_pad_chars - left_pad;
    break;
  default:
    // throw_invalid_argument("pad_type: %d", pad_type);
    return NULL;
  }

  /* First we pad on the left. */
  int result_len = 0;
  for (int i = 0; i < left_pad; i++) {
    result[result_len++] = pad_string[i % pad_str_len];
  }

  /* Then we copy the input string. */
  memcpy(result + result_len, input, len);
  result_len += len;

  /* Finally, we pad on the right. */
  for (int i = 0; i < right_pad; i++) {
    result[result_len++] = pad_string[i % pad_str_len];
  }
  result[result_len] = '\0';

  len = result_len;
  return result;
}

///////////////////////////////////////////////////////////////////////////////

char *string_substr(const char *s, int &len, int start, int length,
                    bool nullable) {
  ASSERT(s);
  if (string_substr_check(len, start, length)) {
    len = length;
    return string_duplicate(s + start, length);
  }
  len = 0;
  if (nullable) {
    return NULL;
  }
  return string_duplicate("", 0);
}

int string_find(const char *input, int len, char ch, int pos,
                bool case_sensitive) {
  ASSERT(input);
  if (len && pos < len) {
    if (!case_sensitive) {
      ch = tolower(ch);
      char *lowered = string_to_lower(input, len);
      int ret = string_find(lowered, len, ch, pos, true);
      free(lowered);
      return ret;
    }

    int l = 1;
    if (!string_substr_check(len, pos, l)) {
      return -1;
    }

    const void *ptr = memchr(input + pos, ch, len - pos);
    if (ptr != NULL) {
      return (int)((const char *)ptr - input);
    }
  }
  return -1;
}

int string_rfind(const char *input, int len, char ch, int pos,
                 bool case_sensitive) {
  ASSERT(input);

  if (len > 0 && pos < len) {
    if (!case_sensitive) {
      ch = tolower(ch);
      char *lowered = string_to_lower(input, len);
      int ret = string_rfind(lowered, len, ch, pos, true);
      free(lowered);
      return ret;
    }

    int l = 0;
    bool stop_at_offset = (pos >= 0);

    if (!string_substr_check(len, pos, l)) {
      return -1;
    }

    int start = len - 1, stop = 0;
    if (stop_at_offset) {
      stop = pos;
    } else {
      start = pos;
    }

    for (int i = start; i >= stop; i--) {
      if (input[i] == ch) {
        return i;
      }
    }
  }
  return -1;
}

int string_find(const char *input, int len, const char *s, int s_len,
                int pos, bool case_sensitive) {
  ASSERT(input);
  ASSERT(s);
  if (!s_len) {
    return -1;
  }
  if (len && pos < len) {
    if (!case_sensitive) {
      char *lowered_s = string_to_lower(s, s_len);
      char *lowered = string_to_lower(input, len);
      int ret = string_find(lowered, len, lowered_s, s_len, pos, true);
      free(lowered);
      free(lowered_s);
      return ret;
    }

    int l = 1;
    if (!string_substr_check(len, pos, l)) {
      return -1;
    }

    void *ptr = memmem(input + pos, len - pos, s, s_len);
    if (ptr != NULL) {
      return (int)((const char *)ptr - input);
    }
  }
  return -1;
}

int string_rfind(const char *input, int len, const char *s, int s_len,
                 int pos, bool case_sensitive) {
  ASSERT(input);
  ASSERT(s);
  if (!s_len) {
    return -1;
  }

  if (len && pos < len) {
    if (!case_sensitive) {
      char *lowered_s = string_to_lower(s, s_len);
      char *lowered = string_to_lower(input, len);
      int ret = string_rfind(lowered, len, lowered_s, s_len, pos, true);
      free(lowered);
      free(lowered_s);
      return ret;
    }

    int l = 0;
    bool stop_at_offset = (pos >= 0);
    if (!string_substr_check(len, pos, l)) {
      return -1;
    }

    int start = len, stop = 0;
    if (stop_at_offset) {
      stop = pos;
    } else {
      start = pos;
    }

    for (int i = start - 1; i >= stop; i--) {
      if (input[i] == s[0] && memcmp(input+i, s, s_len) == 0) {
        return i;
      }
    }
  }
  return -1;
}

const char *string_memnstr(const char *haystack, const char *needle,
                           int needle_len, const char *end) {
  const char *p = haystack;
  char ne = needle[needle_len-1];

  end -= needle_len;
  while (p <= end) {
    if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
      if (!memcmp(needle, p, needle_len-1)) {
        return p;
      }
    }
    if (p == NULL) {
      return NULL;
    }
    p++;
  }
  return NULL;
}

void *string_memrchr(const void *s, int c, size_t n) {
  register unsigned char *e;

  if (n <= 0) {
    return NULL;
  }
  for (e = (unsigned char *)s + n - 1; e >= (unsigned char *)s; e--) {
    if (*e == (unsigned char)c) {
      return (void *)e;
    }
  }
  return NULL;
}

char *string_replace(const char *s, int &len, int start, int length,
                     const char *replacement, int len_repl) {
  ASSERT(s);
  ASSERT(replacement);
  if (!string_substr_check(len, start, length, false)) {
    len = 0;
    return string_duplicate("", 0);
  }

  char *ret = (char *)malloc(len + len_repl - length + 1);

  int ret_len = 0;
  if (start) {
    memcpy(ret, s, start);
    ret_len += start;
  }
  if (len_repl) {
    memcpy(ret + ret_len, replacement, len_repl);
    ret_len += len_repl;
  }
  len -= (start + length);
  if (len) {
    memcpy(ret + ret_len, s + start + length, len);
    ret_len += len;
  }

  len = ret_len;
  ret[ret_len] = '\0';
  return ret;
}

char *string_replace(const char *input, int &len,
                     const char *search, int len_search,
                     const char *replacement, int len_replace,
                     int &count, bool case_sensitive) {
  ASSERT(input);
  ASSERT(search && len_search);

  if (len == 0) {
    return NULL;
  }

  std::vector<int> founds;
  founds.reserve(16);
  if (len_search == 1) {
    for (int pos = string_find(input, len, *search, 0, case_sensitive);
         pos >= 0;
         pos = string_find(input, len, *search, pos + len_search,
                           case_sensitive)) {
      founds.push_back(pos);
    }
  } else {
    for (int pos = string_find(input, len, search, len_search, 0,
                               case_sensitive);
         pos >= 0;
         pos = string_find(input, len, search, len_search,
                           pos + len_search, case_sensitive)) {
      founds.push_back(pos);
    }
  }

  count = founds.size();
  if (count == 0) {
    return NULL; // not found
  }

  char *ret = (char *)malloc(len + (len_replace - len_search) * count + 1);
  char *p = ret;
  int pos = 0; // last position in input that hasn't been copied over yet
  int n;
  for (unsigned int i = 0; i < founds.size(); i++) {
    n = founds[i];
    if (n > pos) {
      n -= pos;
      memcpy(p, input, n);
      p += n;
      input += n;
      pos += n;
    }
    if (len_replace) {
      memcpy(p, replacement, len_replace);
      p += len_replace;
    }
    input += len_search;
    pos += len_search;
  }
  n = len;
  if (n > pos) {
    n -= pos;
    memcpy(p, input, n);
    p += n;
  }
  *p = '\0';

  len = p - ret;
  return ret;
}

///////////////////////////////////////////////////////////////////////////////

char *string_reverse(const char *s, int len) {
  ASSERT(s);
  char *n = (char *)malloc(len + 1);
  char *p = n;
  const char *e = s + len;

  while (--e >= s) {
    *p++ = *e;
  }

  *p = '\0';
  return n;
}

char *string_repeat(const char *s, int &len, int count) {
  ASSERT(s);

  if (len == 0 || count <= 0) {
    return NULL;
  }

  char *ret = (char *)malloc(len * count + 1);
  if (len == 1) {
    memset(ret, *s, count);
    len = count;
  } else {
    char *p = ret;
    for (int i = 0; i < count; i++) {
      memcpy(p, s, len);
      p += len;
    }
    len = p - ret;
  }
  ret[len] = '\0';
  return ret;
}

char *string_shuffle(const char *str, int len) {
  ASSERT(str);
  if (len <= 1) {
    return NULL;
  }

  char *ret = string_duplicate(str, len);
  int n_left = len;
  while (--n_left) {
    int rnd_idx = rand() % n_left;
    char temp = ret[n_left];
    ret[n_left] = ret[rnd_idx];
    ret[rnd_idx] = temp;
  }
  return ret;
}

char *string_chunk_split(const char *src, int &srclen, const char *end,
                         int endlen, int chunklen) {
  int chunks = srclen / chunklen; // complete chunks!
  int restlen = srclen - chunks * chunklen; /* srclen % chunklen */

  int out_len = (chunks + 1) * endlen + srclen + 1;
  char *dest = (char *)malloc(out_len);

  const char *p; char *q;
  const char *pMax = src + srclen - chunklen + 1;
  for (p = src, q = dest; p < pMax; ) {
    memcpy(q, p, chunklen);
    q += chunklen;
    memcpy(q, end, endlen);
    q += endlen;
    p += chunklen;
  }

  if (restlen) {
    memcpy(q, p, restlen);
    q += restlen;
    memcpy(q, end, endlen);
    q += endlen;
  }

  *q = '\0';
  srclen = q - dest;
  return(dest);
}

///////////////////////////////////////////////////////////////////////////////

#define PHP_TAG_BUF_SIZE 1023

/**
 * Check if tag is in a set of tags
 *
 * states:
 *
 * 0 start tag
 * 1 first non-whitespace char seen
 */
static int string_tag_find(const char *tag, int len, char *set) {
  char c, *n;
  const char *t;
  int state=0, done=0;
  char *norm;

  if (len <= 0) {
    return 0;
  }

  norm = (char *)malloc(len+1);

  n = norm;
  t = tag;
  c = tolower(*t);
  /*
    normalize the tag removing leading and trailing whitespace
    and turn any <a whatever...> into just <a> and any </tag>
    into <tag>
  */
  while (!done) {
    switch (c) {
    case '<':
      *(n++) = c;
      break;
    case '>':
      done =1;
      break;
    default:
      if (!isspace((int)c)) {
        if (state == 0) {
          state=1;
          if (c != '/')
            *(n++) = c;
        } else {
          *(n++) = c;
        }
      } else {
        if (state == 1)
          done=1;
      }
      break;
    }
    c = tolower(*(++t));
  }
  *(n++) = '>';
  *n = '\0';
  if (strstr(set, norm)) {
    done=1;
  } else {
    done=0;
  }
  free(norm);
  return done;
}

/**
 * A simple little state-machine to strip out html and php tags
 *
 * State 0 is the output state, State 1 means we are inside a
 * normal html tag and state 2 means we are inside a php tag.
 *
 * The state variable is passed in to allow a function like fgetss
 * to maintain state across calls to the function.
 *
 * lc holds the last significant character read and br is a bracket
 * counter.
 *
 * When an allow string is passed in we keep track of the string
 * in state 1 and when the tag is closed check it against the
 * allow string to see if we should allow it.

 * swm: Added ability to strip <?xml tags without assuming it PHP
 * code.
 */
static size_t strip_tags_impl(char *rbuf, int len, int *stateptr,
                              char *allow, int allow_len,
                              bool allow_tag_spaces) {
  char *tbuf, *buf, *p, *tp, *rp, c, lc;
  int br, i=0, depth=0, in_q = 0;
  int state = 0;

  if (stateptr)
    state = *stateptr;

  buf = string_duplicate(rbuf, len);
  c = *buf;
  lc = '\0';
  p = buf;
  rp = rbuf;
  br = 0;
  if (allow) {
    for (char *tmp = allow; *tmp; tmp++) {
      *tmp = tolower((int)*(unsigned char *)tmp);
    }
    tbuf = (char *)malloc(PHP_TAG_BUF_SIZE+1);
    tp = tbuf;
  } else {
    tbuf = tp = NULL;
  }

  while (i < len) {
    switch (c) {
    case '\0':
      break;
    case '<':
      if (isspace(*(p + 1)) && !allow_tag_spaces) {
        goto reg_char;
      }
      if (state == 0) {
        lc = '<';
        state = 1;
        if (allow) {
          tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
          *(tp++) = '<';
        }
      } else if (state == 1) {
        depth++;
      }
      break;

    case '(':
      if (state == 2) {
        if (lc != '"' && lc != '\'') {
          lc = '(';
          br++;
        }
      } else if (allow && state == 1) {
        tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
        *(tp++) = c;
      } else if (state == 0) {
        *(rp++) = c;
      }
      break;

    case ')':
      if (state == 2) {
        if (lc != '"' && lc != '\'') {
          lc = ')';
          br--;
        }
      } else if (allow && state == 1) {
        tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
        *(tp++) = c;
      } else if (state == 0) {
        *(rp++) = c;
      }
      break;

    case '>':
      if (depth) {
        depth--;
        break;
      }

      if (in_q) {
        break;
      }

      switch (state) {
      case 1: /* HTML/XML */
        lc = '>';
        in_q = state = 0;
        if (allow) {
          tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
          *(tp++) = '>';
          *tp='\0';
          if (string_tag_find(tbuf, tp-tbuf, allow)) {
            memcpy(rp, tbuf, tp-tbuf);
            rp += tp-tbuf;
          }
          tp = tbuf;
        }
        break;

      case 2: /* PHP */
        if (!br && lc != '\"' && *(p-1) == '?') {
          in_q = state = 0;
          tp = tbuf;
        }
        break;

      case 3:
        in_q = state = 0;
        tp = tbuf;
        break;

      case 4: /* JavaScript/CSS/etc... */
        if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
          in_q = state = 0;
          tp = tbuf;
        }
        break;

      default:
        *(rp++) = c;
        break;
      }
      break;

    case '"':
    case '\'':
      if (state == 2 && *(p-1) != '\\') {
        if (lc == c) {
          lc = '\0';
        } else if (lc != '\\') {
          lc = c;
        }
      } else if (state == 0) {
        *(rp++) = c;
      } else if (allow && state == 1) {
        tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
        *(tp++) = c;
      }
      if (state && p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
        if (in_q) {
          in_q = 0;
        } else {
          in_q = *p;
        }
      }
      break;

    case '!':
      /* JavaScript & Other HTML scripting languages */
      if (state == 1 && *(p-1) == '<') {
        state = 3;
        lc = c;
      } else {
        if (state == 0) {
          *(rp++) = c;
        } else if (allow && state == 1) {
          tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
          *(tp++) = c;
        }
      }
      break;

    case '-':
      if (state == 3 && p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
        state = 4;
      } else {
        goto reg_char;
      }
      break;

    case '?':

      if (state == 1 && *(p-1) == '<') {
        br=0;
        state=2;
        break;
      }

    case 'E':
    case 'e':
      /* !DOCTYPE exception */
      if (state==3 && p > buf+6
          && tolower(*(p-1)) == 'p'
          && tolower(*(p-2)) == 'y'
          && tolower(*(p-3)) == 't'
          && tolower(*(p-4)) == 'c'
          && tolower(*(p-5)) == 'o'
          && tolower(*(p-6)) == 'd') {
        state = 1;
        break;
      }
      /* fall-through */

    case 'l':

      /* swm: If we encounter '<?xml' then we shouldn't be in
       * state == 2 (PHP). Switch back to HTML.
       */

      if (state == 2 && p > buf+2 && *(p-1) == 'm' && *(p-2) == 'x') {
        state = 1;
        break;
      }

      /* fall-through */
    default:
    reg_char:
      if (state == 0) {
        *(rp++) = c;
      } else if (allow && state == 1) {
        tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
        *(tp++) = c;
      }
      break;
    }
    c = *(++p);
    i++;
  }
  if (rp < rbuf + len) {
    *rp = '\0';
  }
  free(buf);
  if (allow)
    free(tbuf);
  if (stateptr)
    *stateptr = state;

  return (size_t)(rp - rbuf);
}

char *string_strip_tags(const char *s, int &len, const char *allow,
                        int allow_len) {
  ASSERT(s);
  ASSERT(allow);

  char *ret = string_duplicate(s, len);
  char *sallow = string_duplicate(allow, allow_len);
  len = strip_tags_impl(ret, len, NULL, sallow, allow_len, false);
  free(sallow);
  return ret;
}

///////////////////////////////////////////////////////////////////////////////

char *string_wordwrap(const char *text, int &textlen, int linelength,
                      const char *breakchar, int breakcharlen, bool docut) {
  ASSERT(text);
  ASSERT(breakchar);

  char *newtext;
  int newtextlen, chk;
  size_t alloced;
  long current = 0, laststart = 0, lastspace = 0;

  if (textlen == 0) {
    return "";
  }

  if (breakcharlen == 0) {
    // throw_invalid_argument("wordbreak: (empty)");
    return NULL;
  }

  if (linelength == 0 && docut) {
    // throw_invalid_argument("width", "can't force cut when width = 0");
    return NULL;
  }

  /* Special case for a single-character break as it needs no
     additional storage space */
  if (breakcharlen == 1 && !docut) {
    newtext = string_duplicate(text, textlen);

    laststart = lastspace = 0;
    for (current = 0; current < textlen; current++) {
      if (text[current] == breakchar[0]) {
        laststart = lastspace = current;
      } else if (text[current] == ' ') {
        if (current - laststart >= linelength) {
          newtext[current] = breakchar[0];
          laststart = current + 1;
        }
        lastspace = current;
      } else if (current - laststart >= linelength && laststart != lastspace) {
        newtext[lastspace] = breakchar[0];
        laststart = lastspace + 1;
      }
    }

    return newtext;
  }

  /* Multiple character line break or forced cut */
  if (linelength > 0) {
    chk = (int)(textlen/linelength + 1);
    alloced = textlen + chk * breakcharlen + 1;
  } else {
    chk = textlen;
    alloced = textlen * (breakcharlen + 1) + 1;
  }
  newtext = (char *)malloc(alloced);

  /* now keep track of the actual new text length */
  newtextlen = 0;

  laststart = lastspace = 0;
  for (current = 0; current < textlen; current++) {
    if (chk <= 0) {
      alloced += (int) (((textlen - current + 1)/linelength + 1) *
                        breakcharlen) + 1;
      newtext = (char *)realloc(newtext, alloced);
      chk = (int) ((textlen - current)/linelength) + 1;
    }
    /* when we hit an existing break, copy to new buffer, and
     * fix up laststart and lastspace */
    if (text[current] == breakchar[0]
        && current + breakcharlen < textlen
        && !strncmp(text+current, breakchar, breakcharlen)) {
      memcpy(newtext+newtextlen, text+laststart,
             current-laststart+breakcharlen);
      newtextlen += current-laststart+breakcharlen;
      current += breakcharlen - 1;
      laststart = lastspace = current + 1;
      chk--;
    }
    /* if it is a space, check if it is at the line boundary,
     * copy and insert a break, or just keep track of it */
    else if (text[current] == ' ') {
      if (current - laststart >= linelength) {
        memcpy(newtext+newtextlen, text+laststart, current-laststart);
        newtextlen += current - laststart;
        memcpy(newtext+newtextlen, breakchar, breakcharlen);
        newtextlen += breakcharlen;
        laststart = current + 1;
        chk--;
      }
      lastspace = current;
    }
    /* if we are cutting, and we've accumulated enough
     * characters, and we haven't see a space for this line,
     * copy and insert a break. */
    else if (current - laststart >= linelength
             && docut && laststart >= lastspace) {
      memcpy(newtext+newtextlen, text+laststart, current-laststart);
      newtextlen += current - laststart;
      memcpy(newtext+newtextlen, breakchar, breakcharlen);
      newtextlen += breakcharlen;
      laststart = lastspace = current;
      chk--;
    }
    /* if the current word puts us over the linelength, copy
     * back up until the last space, insert a break, and move
     * up the laststart */
    else if (current - laststart >= linelength
             && laststart < lastspace) {
      memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
      newtextlen += lastspace - laststart;
      memcpy(newtext+newtextlen, breakchar, breakcharlen);
      newtextlen += breakcharlen;
      laststart = lastspace = lastspace + 1;
      chk--;
    }
  }

  /* copy over any stragglers */
  if (laststart != current) {
    memcpy(newtext+newtextlen, text+laststart, current-laststart);
    newtextlen += current - laststart;
  }

  textlen = newtextlen;
  newtext[newtextlen] = '\0';
  return newtext;
}

///////////////////////////////////////////////////////////////////////////////

char *string_addcslashes(const char *str, int &length, const char *what,
                         int wlength) {
  ASSERT(str);
  ASSERT(what);

  char flags[256];
  string_charmask(what, wlength, flags);

  char *new_str = (char *)malloc((length << 2) + 1);
  const char *source;
  const char *end;
  char *target;
  for (source = str, end = source + length, target = new_str; source < end;
       source++) {
    char c = *source;
    if (flags[(unsigned char)c]) {
      if ((unsigned char) c < 32 || (unsigned char) c > 126) {
        *target++ = '\\';
        switch (c) {
        case '\n': *target++ = 'n'; break;
        case '\t': *target++ = 't'; break;
        case '\r': *target++ = 'r'; break;
        case '\a': *target++ = 'a'; break;
        case '\v': *target++ = 'v'; break;
        case '\b': *target++ = 'b'; break;
        case '\f': *target++ = 'f'; break;
        default: target += sprintf(target, "%03o", (unsigned char) c);
        }
        continue;
      }
      *target++ = '\\';
    }
    *target++ = c;
  }
  *target = 0;
  length = target - new_str;
  return new_str;
}

char *string_stripcslashes(const char *input, int &nlen) {
  ASSERT(input);
  if (nlen == 0) {
    return NULL;
  }

  char *str = string_duplicate(input, nlen);

  char *source, *target, *end;
  int i;
  char numtmp[4];

  for (source=str, end=str+nlen, target=str; source < end; source++) {
    if (*source == '\\' && source+1 < end) {
      source++;
      switch (*source) {
      case 'n':  *target++='\n'; nlen--; break;
      case 'r':  *target++='\r'; nlen--; break;
      case 'a':  *target++='\a'; nlen--; break;
      case 't':  *target++='\t'; nlen--; break;
      case 'v':  *target++='\v'; nlen--; break;
      case 'b':  *target++='\b'; nlen--; break;
      case 'f':  *target++='\f'; nlen--; break;
      case '\\': *target++='\\'; nlen--; break;
      case 'x':
        if (source+1 < end && isxdigit((int)(*(source+1)))) {
          numtmp[0] = *++source;
          if (source+1 < end && isxdigit((int)(*(source+1)))) {
            numtmp[1] = *++source;
            numtmp[2] = '\0';
            nlen-=3;
          } else {
            numtmp[1] = '\0';
            nlen-=2;
          }
          *target++=(char)strtol(numtmp, NULL, 16);
          break;
        }
        /* break is left intentionally */
      default:
        i=0;
        while (source < end && *source >= '0' && *source <= '7' && i<3) {
          numtmp[i++] = *source++;
        }
        if (i) {
          numtmp[i]='\0';
          *target++=(char)strtol(numtmp, NULL, 8);
          nlen-=i;
          source--;
        } else {
          *target++=*source;
          nlen--;
        }
      }
    } else {
      *target++=*source;
    }
  }
  *target='\0';
  nlen = target - str;
  return str;
}

char *string_addslashes(const char *str, int &length) {
  ASSERT(str);
  if (length == 0) {
    return NULL;
  }

  char *new_str = (char *)malloc((length << 1) + 1);
  const char *source = str;
  const char *end = source + length;
  char *target = new_str;

  while (source < end) {
    switch (*source) {
    case '\0':
      *target++ = '\\';
      *target++ = '0';
      break;
    case '\'':
    case '\"':
    case '\\':
      *target++ = '\\';
      /* break is missing *intentionally* */
    default:
      *target++ = *source;
      break;
    }

    source++;
  }

  *target = 0;
  length = target - new_str;
  return new_str;
}

char *string_stripslashes(const char *input, int &l) {
  ASSERT(input);
  if (!*input) {
    return NULL;
  }

  char *str = string_duplicate(input, l);
  char *s, *t;
  s = str;
  t = str;

  while (l > 0) {
    if (*t == '\\') {
      t++;        /* skip the slash */
      l--;
      if (l > 0) {
        if (*t == '0') {
          *s++='\0';
          t++;
        } else {
          *s++ = *t++;  /* preserve the next character */
        }
        l--;
      }
    } else {
      *s++ = *t++;
      l--;
    }
  }
  if (s != t) {
    *s = '\0';
  }
  l = s - str;
  return str;
}

char *string_quotemeta(const char *input, int &len) {
  ASSERT(input);
  if (len == 0) {
    return NULL;
  }

  char *ret = (char *)malloc((len << 1) + 1);
  char *q = ret;
  for (const char *p = input; *p; p++) {
    char c = *p;
    switch (c) {
    case '.':
    case '\\':
    case '+':
    case '*':
    case '?':
    case '[':
    case '^':
    case ']':
    case '$':
    case '(':
    case ')':
      *q++ = '\\';
      /* break is missing _intentionally_ */
    default:
      *q++ = c;
    }
  }
  *q = 0;
  len = q - ret;
  return ret;
}

///////////////////////////////////////////////////////////////////////////////

static char string_hex2int(int c) {
  if (isdigit(c)) {
    return c - '0';
  }
  if (c >= 'A' && c <= 'F') {
    return c - 'A' + 10;
  }
  if (c >= 'a' && c <= 'f') {
    return c - 'a' + 10;
  }
  return -1;
}

char *string_quoted_printable_encode(const char *input, int &len) {
  const char *hex = "0123456789ABCDEF";

  unsigned char *ret =
    (unsigned char *)malloc(3 * len + 3 * (((3 * len)/PHP_QPRINT_MAXL) + 1));
  unsigned char *d = ret;

  int length = len;
  unsigned char c;
  unsigned long lp = 0;
  while (length--) {
    if (((c = *input++) == '\015') && (*input == '\012') && length > 0) {
      *d++ = '\015';
      *d++ = *input++;
      length--;
      lp = 0;
    } else {
      if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') ||
          ((c == ' ') && (*input == '\015'))) {
        if ((lp += 3) > PHP_QPRINT_MAXL) {
          *d++ = '=';
          *d++ = '\015';
          *d++ = '\012';
          lp = 3;
        }
        *d++ = '=';
        *d++ = hex[c >> 4];
        *d++ = hex[c & 0xf];
      } else {
        if ((++lp) > PHP_QPRINT_MAXL) {
          *d++ = '=';
          *d++ = '\015';
          *d++ = '\012';
          lp = 1;
        }
        *d++ = c;
      }
    }
  }
  *d = '\0';
  len = d - ret;
  return (char*)ret;
}

char *string_quoted_printable_decode(const char *input, int &len, bool is_q) {
  ASSERT(input);
  if (len == 0) {
    return NULL;
  }

  int i = 0, j = 0, k;
  const char *str_in = input;
  char *str_out = (char *)malloc(len + 1);
  while (i < len && str_in[i]) {
    switch (str_in[i]) {
    case '=':
      if (i + 2 < len && str_in[i + 1] && str_in[i + 2] &&
          isxdigit((int) str_in[i + 1]) && isxdigit((int) str_in[i + 2]))
        {
          str_out[j++] = (string_hex2int((int) str_in[i + 1]) << 4)
            + string_hex2int((int) str_in[i + 2]);
          i += 3;
        } else  /* check for soft line break according to RFC 2045*/ {
        k = 1;
        while (str_in[i + k] &&
               ((str_in[i + k] == 32) || (str_in[i + k] == 9))) {
          /* Possibly, skip spaces/tabs at the end of line */
          k++;
        }
        if (!str_in[i + k]) {
          /* End of line reached */
          i += k;
        }
        else if ((str_in[i + k] == 13) && (str_in[i + k + 1] == 10)) {
          /* CRLF */
          i += k + 2;
        }
        else if ((str_in[i + k] == 13) || (str_in[i + k] == 10)) {
          /* CR or LF */
          i += k + 1;
        }
        else {
          str_out[j++] = str_in[i++];
        }
      }
      break;
    case '_':
      if (is_q) {
        str_out[j++] = ' ';
        i++;
      } else {
        str_out[j++] = str_in[i++];
      }
      break;
    default:
      str_out[j++] = str_in[i++];
    }
  }
  str_out[j] = '\0';
  len = j;
  return str_out;
}

char *string_bin2hex(const char *input, int &len) {
  static char hexconvtab[] = "0123456789abcdef";

  ASSERT(input);
  if (len == 0) {
    return NULL;
  }

  int i, j;
  char *result = (char *)malloc((len << 1) + 1);

  for (i = j = 0; i < len; i++) {
    result[j++] = hexconvtab[(unsigned char)input[i] >> 4];
    result[j++] = hexconvtab[(unsigned char)input[i] & 15];
  }
  result[j] = '\0';
  len = j;
  return result;
}

char *string_hex2bin(const char *input, int &len) {
  len >>= 1;
  char *str = (char *)malloc(len + 1);
  int i, j;
  for (i = j = 0; i < len; i++) {
    char c = input[j++];
    if (c >= '0' && c <= '9') {
      str[i] = (c - '0') << 4;
    } else if (c >= 'a' && c <= 'f') {
      str[i] = (c - 'a' + 10) << 4;
    } else if (c >= 'A' && c <= 'F') {
      str[i] = (c - 'A' + 10) << 4;
    } else {
      free(str);
      throw InvalidArgumentException("bad encoding at position", j);
    }
    c = input[j++];
    if (c >= '0' && c <= '9') {
      str[i] |= c - '0';
    } else if (c >= 'a' && c <= 'f') {
      str[i] |= c - 'a' + 10;
    } else if (c >= 'A' && c <= 'F') {
      str[i] |= c - 'A' + 10;
    } else {
      free(str);
      throw InvalidArgumentException("bad encoding at position", j);
    }
  }
  str[len] = '\0';
  return str;
}

Variant string_base_to_numeric(const char *s, int len, int base) {
  int64 num = 0;
  double fnum = 0;
  int mode = 0;
  int64 cutoff;
  int cutlim;

  ASSERT(string_validate_base(base));

  cutoff = LONG_MAX / base;
  cutlim = LONG_MAX % base;

  for (int i = len; i > 0; i--) {
    char c = *s++;

    /* might not work for EBCDIC */
    if (c >= '0' && c <= '9')
      c -= '0';
    else if (c >= 'A' && c <= 'Z')
      c -= 'A' - 10;
    else if (c >= 'a' && c <= 'z')
      c -= 'a' - 10;
    else
      continue;

    if (c >= base)
      continue;

    switch (mode) {
    case 0: /* Integer */
      if (num < cutoff || (num == cutoff && c <= cutlim)) {
        num = num * base + c;
        break;
      } else {
        fnum = num;
        mode = 1;
      }
      /* fall-through */
    case 1: /* Float */
      fnum = fnum * base + c;
    }
  }

  if (mode == 1) {
    return fnum;
  }
  return num;
}

char *string_long_to_base(unsigned long value, int base) {
  static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
  char buf[(sizeof(unsigned long) << 3) + 1];
  char *ptr, *end;

  ASSERT(string_validate_base(base));

  end = ptr = buf + sizeof(buf) - 1;
  *ptr = '\0';

  do {
    *--ptr = digits[value % base];
    value /= base;
  } while (ptr > buf && value);

  return string_duplicate(ptr, end - ptr);
}

char *string_numeric_to_base(CVarRef value, int base) {
  static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";

  ASSERT(string_validate_base(base));
  if ((!value.isInteger() && !value.isDouble())) {
    return string_duplicate("", 0);
  }

  if (value.isDouble()) {
    double fvalue = floor(value); /* floor it just in case */
    char *ptr, *end;
    char buf[(sizeof(double) << 3) + 1];

    /* Don't try to convert +/- infinity */
    if (fvalue == HUGE_VAL || fvalue == -HUGE_VAL) {
      // raise_warning("Number too large");
      return string_duplicate("", 0);
    }

    end = ptr = buf + sizeof(buf) - 1;
    *ptr = '\0';

    do {
      *--ptr = digits[(int) fmod(fvalue, base)];
      fvalue /= base;
    } while (ptr > buf && fabs(fvalue) >= 1);

    return string_duplicate(ptr, end - ptr);
  }

  return string_long_to_base(value.toInt64(), base);
}

///////////////////////////////////////////////////////////////////////////////
// uuencode

#define PHP_UU_ENC(c) \
  ((c) ? ((c) & 077) + ' ' : '`')
#define PHP_UU_ENC_C2(c) \
  PHP_UU_ENC(((*(c) << 4) & 060) | ((*((c) + 1) >> 4) & 017))
#define PHP_UU_ENC_C3(c) \
  PHP_UU_ENC(((*(c + 1) << 2) & 074) | ((*((c) + 2) >> 6) & 03))
#define PHP_UU_DEC(c) \
  (((c) - ' ') & 077)

char *string_uuencode(const char *src, int src_len, int &dest_len) {
  ASSERT(src);
  ASSERT(src_len);

  int len = 45;
  char *p;
  const char *s, *e, *ee;
  char *dest;

  /* encoded length is ~ 38% greater then the original */
  p = dest = (char *)malloc((int)ceil(src_len * 1.38) + 46);
  s = src;
  e = src + src_len;

  while ((s + 3) < e) {
    ee = s + len;
    if (ee > e) {
      ee = e;
      len = ee - s;
      if (len % 3) {
        ee = s + (int) (floor(len / 3) * 3);
      }
    }
    *p++ = PHP_UU_ENC(len);

    while (s < ee) {
      *p++ = PHP_UU_ENC(*s >> 2);
      *p++ = PHP_UU_ENC_C2(s);
      *p++ = PHP_UU_ENC_C3(s);
      *p++ = PHP_UU_ENC(*(s + 2) & 077);

      s += 3;
    }

    if (len == 45) {
      *p++ = '\n';
    }
  }

  if (s < e) {
    if (len == 45) {
      *p++ = PHP_UU_ENC(e - s);
      len = 0;
    }

    *p++ = PHP_UU_ENC(*s >> 2);
    *p++ = PHP_UU_ENC_C2(s);
    *p++ = ((e - s) > 1) ? PHP_UU_ENC_C3(s) : PHP_UU_ENC('\0');
    *p++ = ((e - s) > 2) ? PHP_UU_ENC(*(s + 2) & 077) : PHP_UU_ENC('\0');
  }

  if (len < 45) {
    *p++ = '\n';
  }

  *p++ = PHP_UU_ENC('\0');
  *p++ = '\n';
  *p = '\0';

  dest_len = p - dest;
  return dest;
}

char *string_uudecode(const char *src, int src_len, int &total_len) {
  total_len = 0;
  int len;
  const char *s, *e, *ee;
  char *p, *dest;

  p = dest = (char *)malloc((int)ceil(src_len * 0.75) + 1);
  s = src;
  e = src + src_len;

  while (s < e) {
    if ((len = PHP_UU_DEC(*s++)) <= 0) {
      break;
    }
    /* sanity check */
    if (len > src_len) {
      goto err;
    }

    total_len += len;

    ee = s + (len == 45 ? 60 : (int) floor(len * 1.33));
    /* sanity check */
    if (ee > e) {
      goto err;
    }

    while (s < ee) {
      *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
      *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
      *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
      s += 4;
    }

    if (len < 45) {
      break;
    }

    /* skip \n */
    s++;
  }

  if ((len = total_len > (p - dest))) {
    *p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
    if (len > 1) {
      *p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
      if (len > 2) {
        *p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
      }
    }
  }

  *(dest + total_len) = '\0';

  return dest;

 err:
  free(dest);
  return NULL;
}

///////////////////////////////////////////////////////////////////////////////
// base64

static const char base64_table[] = {
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
};

static const char base64_pad = '=';

static const short base64_reverse_table[256] = {
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
  -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
  -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
};

static unsigned char *php_base64_encode(const unsigned char *str, int length,
                                        int *ret_length) {
  const unsigned char *current = str;
  unsigned char *p;
  unsigned char *result;

  if ((length + 2) < 0 || ((length + 2) / 3) >= (1 << (sizeof(int) * 8 - 2))) {
    if (ret_length != NULL) {
      *ret_length = 0;
    }
    return NULL;
  }

  result = (unsigned char *)malloc(((length + 2) / 3) * 4 + 1);
  p = result;

  while (length > 2) { /* keep going until we have less than 24 bits */
    *p++ = base64_table[current[0] >> 2];
    *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
    *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
    *p++ = base64_table[current[2] & 0x3f];

    current += 3;
    length -= 3; /* we just handle 3 octets of data */
  }

  /* now deal with the tail end of things */
  if (length != 0) {
    *p++ = base64_table[current[0] >> 2];
    if (length > 1) {
      *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
      *p++ = base64_table[(current[1] & 0x0f) << 2];
      *p++ = base64_pad;
    } else {
      *p++ = base64_table[(current[0] & 0x03) << 4];
      *p++ = base64_pad;
      *p++ = base64_pad;
    }
  }
  if (ret_length != NULL) {
    *ret_length = (int)(p - result);
  }
  *p = '\0';
  return result;
}

static unsigned char *php_base64_decode(const unsigned char *str,
                                        int length, int *ret_length,
                                        bool strict) {
  const unsigned char *current = str;
  int ch, i = 0, j = 0, k;
  /* this sucks for threaded environments */
  unsigned char *result;

  result = (unsigned char *)malloc(length + 1);

  /* run through the whole string, converting as we go */
  while ((ch = *current++) != '\0' && length-- > 0) {
    if (ch == base64_pad) {
      if (*current != '=' && (i % 4) == 1) {
        free(result);
        return NULL;
      }
      continue;
    }

    ch = base64_reverse_table[ch];
    if ((!strict && ch < 0) || ch == -1) {
      /* a space or some other separator character, we simply skip over */
      continue;
    } else if (ch == -2) {
…
Large files files are truncated, but you can click here to view the full file