tokenize.cpp | searchcode

/src/tokenize.cpp

http://github.com/bengardner/uncrustify
C++ | 2047 lines | 1590 code | 201 blank | 256 comment | 371 complexity | e2414d84fb693b154ad3e8152180e063 MD5 | raw file
Possible License(s): GPL-2.0

/**
 * @file tokenize.cpp
 * This file breaks up the text stream into tokens or chunks.
 *
 * Each routine needs to set pc.len and pc.type.
 *
 * @author  Ben Gardner
 * @license GPL v2+
 */
#include "tokenize.h"
#include "uncrustify_types.h"
#include "char_table.h"
#include "prototypes.h"
#include "chunk_list.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include "unc_ctype.h"
#include "uncrustify.h"
#include "keywords.h"


struct tok_info
{
   tok_info()
      : last_ch(0)
      , idx(0)
      , row(1)
      , col(1)
   {
   }

   int last_ch;
   int idx;
   int row;
   int col;
};

struct tok_ctx
{
   tok_ctx(const deque<int> &d)
      : data(d)
   {
   }


   /* save before trying to parse something that may fail */
   void save()
   {
      save(s);
   }


   void save(tok_info &info)
   {
      info = c;
   }


   /* restore previous saved state */
   void restore()
   {
      restore(s);
   }


   void restore(const tok_info &info)
   {
      c = info;
   }


   bool more()
   {
      return(c.idx < (int)data.size());
   }


   int peek()
   {
      return(more() ? data[c.idx] : -1);
   }


   int peek(int idx)
   {
      idx += c.idx;
      return((idx < (int)data.size()) ? data[idx] : -1);
   }


   int get()
   {
      if (more())
      {
         int ch = data[c.idx++];
         switch (ch)
         {
         case '\t':
            c.col = calc_next_tab_column(c.col, cpd.settings[UO_input_tab_size].u);
            break;

         case '\n':
            if (c.last_ch != '\r')
            {
               c.row++;
               c.col = 1;
            }
            break;

         case '\r':
            c.row++;
            c.col = 1;
            break;

         default:
            c.col++;
            break;
         }
         c.last_ch = ch;
         return(ch);
      }
      return(-1);
   }


   bool expect(int ch)
   {
      if (peek() == ch)
      {
         get();
         return(true);
      }
      return(false);
   }

   const deque<int> &data;
   tok_info          c; /* current */
   tok_info          s; /* saved */
};


/**
 * Count the number of characters in a quoted string.
 * The next bit of text starts with a quote char " or ' or <.
 * Count the number of characters until the matching character.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a string was parsed
 */
static bool parse_string(tok_ctx &ctx, chunk_t &pc, int quote_idx, bool allow_escape);


/**
 * Literal string, ends with single "
 * Two "" don't end the string.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a string was parsed
 */
static bool parse_cs_string(tok_ctx &ctx, chunk_t &pc);


/**
 * Interpolated strings start with $" end with a single "
 * Double quotes are escaped by doubling.
 * Need to track embedded { } pairs and ignore anything between.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a string was parsed
 */
static bool parse_cs_interpolated_string(tok_ctx &ctx, chunk_t &pc);


/**
 * VALA verbatim string, ends with three quotes (""")
 *
 * @param pc   The structure to update, str is an input.
 */
static void parse_verbatim_string(tok_ctx &ctx, chunk_t &pc);


static bool tag_compare(const deque<int> &d, int a_idx, int b_idx, int len);


/**
 * Parses a C++0x 'R' string. R"( xxx )" R"tag(  )tag" u8R"(x)" uR"(x)"
 * Newlines may be in the string.
 */
static bool parse_cr_string(tok_ctx &ctx, chunk_t &pc, int q_idx);


/**
 * Count the number of whitespace characters.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether whitespace was parsed
 */
static bool parse_whitespace(tok_ctx &ctx, chunk_t &pc);


/**
 * Called when we hit a backslash.
 * If there is nothing but whitespace until the newline, then this is a
 * backslash newline
 */
static bool parse_bs_newline(tok_ctx &ctx, chunk_t &pc);


/**
 * Parses any number of tab or space chars followed by a newline.
 * Does not change pc.len if a newline isn't found.
 * This is not the same as parse_whitespace() because it only consumes until
 * a single newline is encountered.
 */
static bool parse_newline(tok_ctx &ctx);


/**
 * PAWN #define is different than C/C++.
 *   #define PATTERN REPLACEMENT_TEXT
 * The PATTERN may not contain a space or '[' or ']'.
 * A generic whitespace check should be good enough.
 * Do not change the pattern.
 */
static void parse_pawn_pattern(tok_ctx &ctx, chunk_t &pc, c_token_t tt);


static bool parse_ignored(tok_ctx &ctx, chunk_t &pc);


/**
 * Skips the next bit of whatever and returns the type of block.
 *
 * pc.str is the input text.
 * pc.len in the output length.
 * pc.type is the output type
 * pc.column is output column
 *
 * @param pc      The structure to update, str is an input.
 * @return        true/false - whether anything was parsed
 */
static bool parse_next(tok_ctx &ctx, chunk_t &pc);


/**
 * Parses all legal D string constants.
 *
 * Quoted strings:
 *   r"Wysiwyg"      # WYSIWYG string
 *   x"hexstring"    # Hexadecimal array
 *   `Wysiwyg`       # WYSIWYG string
 *   'char'          # single character
 *   "reg_string"    # regular string
 *
 * Non-quoted strings:
 * \x12              # 1-byte hex constant
 * \u1234            # 2-byte hex constant
 * \U12345678        # 4-byte hex constant
 * \123              # octal constant
 * \&amp;            # named entity
 * \n                # single character
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a string was parsed
 */
static bool d_parse_string(tok_ctx &ctx, chunk_t &pc);


/**
 * Figure of the length of the comment at text.
 * The next bit of text starts with a '/', so it might be a comment.
 * There are three types of comments:
 *  - C comments that start with  '/ *' and end with '* /'
 *  - C++ comments that start with //
 *  - D nestable comments '/+' '+/'
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a comment was parsed
 */
static bool parse_comment(tok_ctx &ctx, chunk_t &pc);


/**
 * Figure of the length of the code placeholder at text, if present.
 * This is only for Xcode which sometimes inserts temporary code placeholder chunks, which in plaintext <#look like this#>.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a placeholder was parsed.
 */
static bool parse_code_placeholder(tok_ctx &ctx, chunk_t &pc);


/**
 * Parse any attached suffix, which may be a user-defined literal suffix.
 * If for a string, explicitly exclude common format and scan specifiers, ie,
 * PRIx32 and SCNx64.
 */
static void parse_suffix(tok_ctx &ctx, chunk_t &pc, bool forstring);


static bool is_bin(int ch);
static bool is_bin_(int ch);
static bool is_oct(int ch);
static bool is_oct_(int ch);
static bool is_dec(int ch);
static bool is_dec_(int ch);
static bool is_hex(int ch);
static bool is_hex_(int ch);


/**
 * Count the number of characters in the number.
 * The next bit of text starts with a number (0-9 or '.'), so it is a number.
 * Count the number of characters in the number.
 *
 * This should cover all number formats for all languages.
 * Note that this is not a strict parser. It will happily parse numbers in
 * an invalid format.
 *
 * For example, only D allows underscores in the numbers, but they are
 * allowed in all formats.
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a number was parsed
 */
static bool parse_number(tok_ctx &ctx, chunk_t &pc);


static bool d_parse_string(tok_ctx &ctx, chunk_t &pc)
{
   int ch = ctx.peek();

   if ((ch == '"') || (ch == '\'') || (ch == '`'))
   {
      return(parse_string(ctx, pc, 0, true));
   }
   else if (ch == '\\')
   {
      ctx.save();
      int cnt;
      pc.str.clear();
      while (ctx.peek() == '\\')
      {
         pc.str.append(ctx.get());
         /* Check for end of file */
         switch (ctx.peek())
         {
         case 'x':
            /* \x HexDigit HexDigit */
            cnt = 3;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case 'u':
            /* \u HexDigit HexDigit HexDigit HexDigit */
            cnt = 5;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case 'U':
            /* \U HexDigit (x8) */
            cnt = 9;
            while (cnt--)
            {
               pc.str.append(ctx.get());
            }
            break;

         case '0':
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
            /* handle up to 3 octal digits */
            pc.str.append(ctx.get());
            ch = ctx.peek();
            if ((ch >= '0') && (ch <= '7'))
            {
               pc.str.append(ctx.get());
               ch = ctx.peek();
               if ((ch >= '0') && (ch <= '7'))
               {
                  pc.str.append(ctx.get());
               }
            }
            break;

         case '&':
            /* \& NamedCharacterEntity ; */
            pc.str.append(ctx.get());
            while (unc_isalpha(ctx.peek()))
            {
               pc.str.append(ctx.get());
            }
            if (ctx.peek() == ';')
            {
               pc.str.append(ctx.get());
            }
            break;

         default:
            /* Everything else is a single character */
            pc.str.append(ctx.get());
            break;
         } // switch
      }

      if (pc.str.size() > 1)
      {
         pc.type = CT_STRING;
         return(true);
      }
      ctx.restore();
   }
   else if (((ch == 'r') || (ch == 'x')) && (ctx.peek(1) == '"'))
   {
      return(parse_string(ctx, pc, 1, false));
   }
   return(false);
} // d_parse_string


#if 0


/**
 * A string-in-string search.  Like strstr() with a haystack length.
 */
static const char *str_search(const char *needle, const char *haystack, int haystack_len)
{
   int needle_len = strlen(needle);

   while (haystack_len-- >= needle_len)
   {
      if (memcmp(needle, haystack, needle_len) == 0)
      {
         return(haystack);
      }
      haystack++;
   }
   return(NULL);
}
#endif


static bool parse_comment(tok_ctx &ctx, chunk_t &pc)
{
   bool is_d    = (cpd.lang_flags & LANG_D) != 0;          // forcing value to bool
   bool is_cs   = (cpd.lang_flags & LANG_CS) != 0;         // forcing value to bool
   int  d_level = 0;

   /* does this start with '/ /' or '/ *' or '/ +' (d) */
   if ((ctx.peek() != '/') ||
       ((ctx.peek(1) != '*') && (ctx.peek(1) != '/') &&
        ((ctx.peek(1) != '+') || !is_d)))
   {
      return(false);
   }

   ctx.save();

   /* account for opening two chars */
   pc.str = ctx.get();   /* opening '/' */
   int ch = ctx.get();
   pc.str.append(ch);    /* second char */

   if (ch == '/')
   {
      pc.type = CT_COMMENT_CPP;
      while (true)
      {
         int bs_cnt = 0;
         while (ctx.more())
         {
            ch = ctx.peek();
            if ((ch == '\r') || (ch == '\n'))
            {
               break;
            }
            if ((ch == '\\') && !is_cs) /* backslashes aren't special in comments in C# */
            {
               bs_cnt++;
            }
            else
            {
               bs_cnt = 0;
            }
            pc.str.append(ctx.get());
         }

         /* If we hit an odd number of backslashes right before the newline,
          * then we keep going.
          */
         if (((bs_cnt & 1) == 0) || !ctx.more())
         {
            break;
         }
         if (ctx.peek() == '\r')
         {
            pc.str.append(ctx.get());
         }
         if (ctx.peek() == '\n')
         {
            pc.str.append(ctx.get());
         }
         pc.nl_count++;
         cpd.did_newline = true;
      }
   }
   else if (!ctx.more())
   {
      /* unexpected end of file */
      ctx.restore();
      return(false);
   }
   else if (ch == '+')
   {
      pc.type = CT_COMMENT;
      d_level++;
      while ((d_level > 0) && ctx.more())
      {
         if ((ctx.peek() == '+') && (ctx.peek(1) == '/'))
         {
            pc.str.append(ctx.get());  /* store the '+' */
            pc.str.append(ctx.get());  /* store the '/' */
            d_level--;
            continue;
         }

         if ((ctx.peek() == '/') && (ctx.peek(1) == '+'))
         {
            pc.str.append(ctx.get());  /* store the '/' */
            pc.str.append(ctx.get());  /* store the '+' */
            d_level++;
            continue;
         }

         ch = ctx.get();
         pc.str.append(ch);
         if ((ch == '\n') || (ch == '\r'))
         {
            pc.type = CT_COMMENT_MULTI;
            pc.nl_count++;

            if (ch == '\r')
            {
               if (ctx.peek() == '\n')
               {
                  cpd.le_counts[LE_CRLF]++;
                  pc.str.append(ctx.get());  /* store the '\n' */
               }
               else
               {
                  cpd.le_counts[LE_CR]++;
               }
            }
            else
            {
               cpd.le_counts[LE_LF]++;
            }
         }
      }
   }
   else  /* must be '/ *' */
   {
      pc.type = CT_COMMENT;
      while (ctx.more())
      {
         if ((ctx.peek() == '*') && (ctx.peek(1) == '/'))
         {
            pc.str.append(ctx.get());  /* store the '*' */
            pc.str.append(ctx.get());  /* store the '/' */

            tok_info ss;
            ctx.save(ss);
            int      oldsize = pc.str.size();

            /* If there is another C comment right after this one, combine them */
            while ((ctx.peek() == ' ') || (ctx.peek() == '\t'))
            {
               pc.str.append(ctx.get());
            }
            if ((ctx.peek() != '/') || (ctx.peek(1) != '*'))
            {
               /* undo the attempt to join */
               ctx.restore(ss);
               pc.str.resize(oldsize);
               break;
            }
         }

         ch = ctx.get();
         pc.str.append(ch);
         if ((ch == '\n') || (ch == '\r'))
         {
            pc.type = CT_COMMENT_MULTI;
            pc.nl_count++;

            if (ch == '\r')
            {
               if (ctx.peek() == '\n')
               {
                  cpd.le_counts[LE_CRLF]++;
                  pc.str.append(ctx.get());  /* store the '\n' */
               }
               else
               {
                  cpd.le_counts[LE_CR]++;
               }
            }
            else
            {
               cpd.le_counts[LE_LF]++;
            }
         }
      }
   }

   if (cpd.unc_off)
   {
      const char *ontext = cpd.settings[UO_enable_processing_cmt].str;
      if ((ontext == NULL) || !ontext[0])
      {
         ontext = UNCRUSTIFY_ON_TEXT;
      }

      if (pc.str.find(ontext) >= 0)
      {
         LOG_FMT(LBCTRL, "Found '%s' on line %zu\n", ontext, pc.orig_line);
         cpd.unc_off = false;
      }
   }
   else
   {
      const char *offtext = cpd.settings[UO_disable_processing_cmt].str;
      if ((offtext == NULL) || !offtext[0])
      {
         offtext = UNCRUSTIFY_OFF_TEXT;
      }

      if (pc.str.find(offtext) >= 0)
      {
         LOG_FMT(LBCTRL, "Found '%s' on line %zu\n", offtext, pc.orig_line);
         cpd.unc_off = true;
         // Issue #842
         cpd.unc_off_used = true;
      }
   }
   return(true);
} // parse_comment


static bool parse_code_placeholder(tok_ctx &ctx, chunk_t &pc)
{
   if ((ctx.peek() != '<') || (ctx.peek(1) != '#'))
   {
      return(false);
   }

   ctx.save();

   /* account for opening two chars '<#' */
   pc.str = ctx.get();
   pc.str.append(ctx.get());

   /* grab everything until '#>', fail if not found. */
   int last1 = 0;
   while (ctx.more())
   {
      int last2 = last1;
      last1 = ctx.get();
      pc.str.append(last1);

      if ((last2 == '#') && (last1 == '>'))
      {
         pc.type = CT_WORD;
         return(true);
      }
   }
   ctx.restore();
   return(false);
}


static void parse_suffix(tok_ctx &ctx, chunk_t &pc, bool forstring = false)
{
   if (CharTable::IsKw1(ctx.peek()))
   {
      int slen    = 0;
      int oldsize = pc.str.size();

      /* don't add the suffix if we see L" or L' or S" */
      int p1 = ctx.peek();
      int p2 = ctx.peek(1);
      if (forstring &&
          (((p1 == 'L') && ((p2 == '"') || (p2 == '\''))) ||
           ((p1 == 'S') && (p2 == '"'))))
      {
         return;
      }

      tok_info ss;
      ctx.save(ss);
      while (ctx.more() && CharTable::IsKw2(ctx.peek()))
      {
         slen++;
         pc.str.append(ctx.get());
      }

      if (forstring && (slen >= 4) &&
          (pc.str.startswith("PRI", oldsize) ||
           pc.str.startswith("SCN", oldsize)))
      {
         ctx.restore(ss);
         pc.str.resize(oldsize);
      }
   }
}


static bool is_bin(int ch)
{
   return((ch == '0') || (ch == '1'));
}


static bool is_bin_(int ch)
{
   return(is_bin(ch) || (ch == '_'));
}


static bool is_oct(int ch)
{
   return((ch >= '0') && (ch <= '7'));
}


static bool is_oct_(int ch)
{
   return(is_oct(ch) || (ch == '_'));
}


static bool is_dec(int ch)
{
   return((ch >= '0') && (ch <= '9'));
}


static bool is_dec_(int ch)
{
   return(is_dec(ch) || (ch == '_'));
}


static bool is_hex(int ch)
{
   return(((ch >= '0') && (ch <= '9')) ||
          ((ch >= 'a') && (ch <= 'f')) ||
          ((ch >= 'A') && (ch <= 'F')));
}


static bool is_hex_(int ch)
{
   return(is_hex(ch) || (ch == '_'));
}


static bool parse_number(tok_ctx &ctx, chunk_t &pc)
{
   /* A number must start with a digit or a dot, followed by a digit */
   if (!is_dec(ctx.peek()) &&
       ((ctx.peek() != '.') || !is_dec(ctx.peek(1))))
   {
      return(false);
   }

   bool is_float = (ctx.peek() == '.');
   if (is_float && (ctx.peek(1) == '.'))
   {
      return(false);
   }

   /* Check for Hex, Octal, or Binary
    * Note that only D and Pawn support binary, but who cares?
    */
   bool did_hex = false;
   if (ctx.peek() == '0')
   {
      pc.str.append(ctx.get());  /* store the '0' */
      int     ch;
      chunk_t pc_temp;
      size_t  pc_length;

      pc_temp.str.append('0');
      // MS constant might have an "h" at the end. Look for it
      ctx.save();
      while (ctx.more() && CharTable::IsKw2(ctx.peek()))
      {
         ch = ctx.get();
         pc_temp.str.append(ch);
      }
      pc_length = pc_temp.len();
      ch        = pc_temp.str[pc_length - 1];
      ctx.restore();
      LOG_FMT(LGUY, "%s(%d): pc_temp:%s\n", __func__, __LINE__, pc_temp.text());
      if (ch == 'h')
      {
         // we have an MS hexadecimal number with "h" at the end
         LOG_FMT(LGUY, "%s(%d): MS hexadecimal number\n", __func__, __LINE__);
         did_hex = true;
         do
         {
            pc.str.append(ctx.get()); /* store the rest */
         } while (is_hex_(ctx.peek()));
         pc.str.append(ctx.get());    /* store the h */
         LOG_FMT(LGUY, "%s(%d): pc:%s\n", __func__, __LINE__, pc.text());
      }
      else
      {
         switch (unc_toupper(ctx.peek()))
         {
         case 'X':               /* hex */
            did_hex = true;
            do
            {
               pc.str.append(ctx.get());  /* store the 'x' and then the rest */
            } while (is_hex_(ctx.peek()));
            break;

         case 'B':               /* binary */
            do
            {
               pc.str.append(ctx.get());  /* store the 'b' and then the rest */
            } while (is_bin_(ctx.peek()));
            break;

         case '0':                /* octal or decimal */
         case '1':
         case '2':
         case '3':
         case '4':
         case '5':
         case '6':
         case '7':
         case '8':
         case '9':
            do
            {
               pc.str.append(ctx.get());
            } while (is_oct_(ctx.peek()));
            break;

         default:
            /* either just 0 or 0.1 or 0UL, etc */
            break;
         }
      }
   }
   else
   {
      /* Regular int or float */
      while (is_dec_(ctx.peek()))
      {
         pc.str.append(ctx.get());
      }
   }

   /* Check if we stopped on a decimal point & make sure it isn't '..' */
   if ((ctx.peek() == '.') && (ctx.peek(1) != '.'))
   {
      pc.str.append(ctx.get());
      is_float = true;
      if (did_hex)
      {
         while (is_hex_(ctx.peek()))
         {
            pc.str.append(ctx.get());
         }
      }
      else
      {
         while (is_dec_(ctx.peek()))
         {
            pc.str.append(ctx.get());
         }
      }
   }

   /* Check exponent
    * Valid exponents per language (not that it matters):
    * C/C++/D/Java: eEpP
    * C#/Pawn:      eE
    */
   int tmp = unc_toupper(ctx.peek());
   if ((tmp == 'E') || (tmp == 'P'))
   {
      is_float = true;
      pc.str.append(ctx.get());
      if ((ctx.peek() == '+') || (ctx.peek() == '-'))
      {
         pc.str.append(ctx.get());
      }
      while (is_dec_(ctx.peek()))
      {
         pc.str.append(ctx.get());
      }
   }

   /* Check the suffixes
    * Valid suffixes per language (not that it matters):
    *        Integer       Float
    * C/C++: uUlL64        lLfF
    * C#:    uUlL          fFdDMm
    * D:     uUL           ifFL
    * Java:  lL            fFdD
    * Pawn:  (none)        (none)
    *
    * Note that i, f, d, and m only appear in floats.
    */
   while (1)
   {
      int tmp = unc_toupper(ctx.peek());
      if ((tmp == 'I') || (tmp == 'F') || (tmp == 'D') || (tmp == 'M'))
      {
         is_float = true;
      }
      else if ((tmp != 'L') && (tmp != 'U'))
      {
         break;
      }
      pc.str.append(ctx.get());
   }

   /* skip the Microsoft-specific '64' suffix */
   if ((ctx.peek() == '6') && (ctx.peek(1) == '4'))
   {
      pc.str.append(ctx.get());
      pc.str.append(ctx.get());
   }

   pc.type = is_float ? CT_NUMBER_FP : CT_NUMBER;

   /* If there is anything left, then we are probably dealing with garbage or
    * some sick macro junk. Eat it.
    */
   parse_suffix(ctx, pc);

   return(true);
} // parse_number


static bool parse_string(tok_ctx &ctx, chunk_t &pc, int quote_idx, bool allow_escape)
{
   char escape_char        = cpd.settings[UO_string_escape_char].n;
   char escape_char2       = cpd.settings[UO_string_escape_char2].n;
   bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b && (cpd.lang_flags & LANG_ALLC);

   pc.str.clear();
   while (quote_idx-- > 0)
   {
      pc.str.append(ctx.get());
   }

   pc.type = CT_STRING;
   int end_ch = CharTable::Get(ctx.peek()) & 0xff;
   pc.str.append(ctx.get());  /* store the " */

   bool escaped = false;
   while (ctx.more())
   {
      int lastcol = ctx.c.col;
      int ch      = ctx.get();

      if ((ch == '\t') && should_escape_tabs)
      {
         ctx.c.col = lastcol + 2;
         pc.str.append(escape_char);
         pc.str.append('t');
         continue;
      }

      pc.str.append(ch);
      if (ch == '\n')
      {
         pc.nl_count++;
         pc.type = CT_STRING_MULTI;
         escaped = false;
         continue;
      }
      if ((ch == '\r') && (ctx.peek() != '\n'))
      {
         pc.str.append(ctx.get());
         pc.nl_count++;
         pc.type = CT_STRING_MULTI;
         escaped = false;
         continue;
      }
      if (!escaped)
      {
         if (ch == escape_char)
         {
            escaped = (escape_char != 0);
         }
         else if ((ch == escape_char2) && (ctx.peek() == end_ch))
         {
            escaped = allow_escape;
         }
         else if (ch == end_ch)
         {
            break;
         }
      }
      else
      {
         escaped = false;
      }
   }

   parse_suffix(ctx, pc, true);
   return(true);
} // parse_string


static bool parse_cs_string(tok_ctx &ctx, chunk_t &pc)
{
   pc.str = ctx.get();
   pc.str.append(ctx.get());
   pc.type = CT_STRING;

   bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b;

   /* go until we hit a zero (end of file) or a single " */
   while (ctx.more())
   {
      int ch = ctx.get();
      pc.str.append(ch);
      if ((ch == '\n') || (ch == '\r'))
      {
         pc.type = CT_STRING_MULTI;
         pc.nl_count++;
      }
      else if (ch == '\t')
      {
         if (should_escape_tabs && !cpd.warned_unable_string_replace_tab_chars)
         {
            cpd.warned_unable_string_replace_tab_chars = true;

            log_sev_t warnlevel = (log_sev_t)cpd.settings[UO_warn_level_tabs_found_in_verbatim_string_literals].n;

            /* a tab char can't be replaced with \\t because escapes don't work in here-strings. best we can do is warn. */
            LOG_FMT(warnlevel, "%s:%zu Detected non-replaceable tab char in literal string\n", cpd.filename, pc.orig_line);
            if (warnlevel < LWARN)
            {
               cpd.error_count++;
            }
         }
      }
      else if (ch == '"')
      {
         if (ctx.peek() == '"')
         {
            pc.str.append(ctx.get());
         }
         else
         {
            break;
         }
      }
   }

   return(true);
} // parse_cs_string


static bool parse_cs_interpolated_string(tok_ctx &ctx, chunk_t &pc)
{
   pc.str = ctx.get();        // '$'
   pc.str.append(ctx.get());  // '"'
   pc.type = CT_STRING;

   int depth = 0;

   /* go until we hit a zero (end of file) or a single " */
   while (ctx.more())
   {
      int ch = ctx.get();
      pc.str.append(ch);

      /* if we are inside a { }, then we only look for a } */
      if (depth > 0)
      {
         if (ch == '}')
         {
            if (ctx.peek() == '}')
            {
               // }} doesn't decrease the depth
               pc.str.append(ctx.get());  // '{'
            }
            else
            {
               depth--;
            }
         }
      }
      else
      {
         if (ch == '{')
         {
            if (ctx.peek() == '{')
            {
               // {{ doesn't increase the depth
               pc.str.append(ctx.get());
            }
            else
            {
               depth++;
            }
         }
         else if (ch == '"')
         {
            if (ctx.peek() == '"')
            {
               pc.str.append(ctx.get());
            }
            else
            {
               break;
            }
         }
      }
   }

   return(true);
} // parse_cs_interpolated_string


static void parse_verbatim_string(tok_ctx &ctx, chunk_t &pc)
{
   pc.type = CT_STRING;

   // consume the initial """
   pc.str = ctx.get();
   pc.str.append(ctx.get());
   pc.str.append(ctx.get());

   /* go until we hit a zero (end of file) or a """ */
   while (ctx.more())
   {
      int ch = ctx.get();
      pc.str.append(ch);
      if ((ch == '"') &&
          (ctx.peek() == '"') &&
          (ctx.peek(1) == '"'))
      {
         pc.str.append(ctx.get());
         pc.str.append(ctx.get());
         break;
      }
      if ((ch == '\n') || (ch == '\r'))
      {
         pc.type = CT_STRING_MULTI;
         pc.nl_count++;
      }
   }
}


static bool tag_compare(const deque<int> &d, int a_idx, int b_idx, int len)
{
   if (a_idx != b_idx)
   {
      while (len-- > 0)
      {
         if (d[a_idx] != d[b_idx])
         {
            return(false);
         }
      }
   }
   return(true);
}


static bool parse_cr_string(tok_ctx &ctx, chunk_t &pc, int q_idx)
{
   int tag_idx = ctx.c.idx + q_idx + 1;
   int tag_len = 0;

   ctx.save();

   /* Copy the prefix + " to the string */
   pc.str.clear();
   int cnt = q_idx + 1;
   while (cnt--)
   {
      pc.str.append(ctx.get());
   }

   /* Add the tag and get the length of the tag */
   while (ctx.more() && (ctx.peek() != '('))
   {
      tag_len++;
      pc.str.append(ctx.get());
   }
   if (ctx.peek() != '(')
   {
      ctx.restore();
      return(false);
   }

   pc.type = CT_STRING;
   while (ctx.more())
   {
      if ((ctx.peek() == ')') &&
          (ctx.peek(tag_len + 1) == '"') &&
          tag_compare(ctx.data, tag_idx, ctx.c.idx + 1, tag_len))
      {
         cnt = tag_len + 2;   /* for the )" */
         while (cnt--)
         {
            pc.str.append(ctx.get());
         }
         parse_suffix(ctx, pc);
         return(true);
      }
      if (ctx.peek() == '\n')
      {
         pc.str.append(ctx.get());
         pc.nl_count++;
         pc.type = CT_STRING_MULTI;
      }
      else
      {
         pc.str.append(ctx.get());
      }
   }
   ctx.restore();
   return(false);
} // parse_cr_string


/**
 * Count the number of characters in a word.
 * The first character is already valid for a keyword
 *
 * @param pc   The structure to update, str is an input.
 * @return     Whether a word was parsed (always true)
 */
bool parse_word(tok_ctx &ctx, chunk_t &pc, bool skipcheck)
{
   static unc_text intr_txt("@interface");

   /* The first character is already valid */
   pc.str.clear();
   pc.str.append(ctx.get());

   while (ctx.more())
   {
      int ch = ctx.peek();
      if (CharTable::IsKw2(ch))
      {
         pc.str.append(ctx.get());
      }
      else if ((ch == '\\') && (unc_tolower(ctx.peek(1)) == 'u'))
      {
         pc.str.append(ctx.get());
         pc.str.append(ctx.get());
         skipcheck = true;
      }
      else
      {
         break;
      }

      /* HACK: Non-ASCII character are only allowed in identifiers */
      if (ch > 0x7f)
      {
         skipcheck = true;
      }
   }
   pc.type = CT_WORD;

   if (skipcheck)
   {
      return(true);
   }

   /* Detect pre-processor functions now */
   if ((cpd.in_preproc == CT_PP_DEFINE) &&
       (cpd.preproc_ncnl_count == 1))
   {
      if (ctx.peek() == '(')
      {
         pc.type = CT_MACRO_FUNC;
      }
      else
      {
         pc.type = CT_MACRO;
      }
   }
   else
   {
      /* '@interface' is reserved, not an interface itself */
      if ((cpd.lang_flags & LANG_JAVA) && pc.str.startswith("@") &&
          !pc.str.equals(intr_txt))
      {
         pc.type = CT_ANNOTATION;
      }
      else
      {
         /* Turn it into a keyword now */
         pc.type = find_keyword_type(pc.text(), pc.str.size());
      }
   }

   return(true);
} // parse_word


static bool parse_whitespace(tok_ctx &ctx, chunk_t &pc)
{
   int nl_count = 0;
   int ch       = -2;

   /* REVISIT: use a better whitespace detector? */
   while (ctx.more() && unc_isspace(ctx.peek()))
   {
      ch = ctx.get();   /* throw away the whitespace char */
      switch (ch)
      {
      case '\r':
         if (ctx.expect('\n'))
         {
            /* CRLF ending */
            cpd.le_counts[LE_CRLF]++;
         }
         else
         {
            /* CR ending */
            cpd.le_counts[LE_CR]++;
         }
         nl_count++;
         pc.orig_prev_sp = 0;
         break;

      case '\n':
         /* LF ending */
         cpd.le_counts[LE_LF]++;
         nl_count++;
         pc.orig_prev_sp = 0;
         break;

      case '\t':
         pc.orig_prev_sp += calc_next_tab_column(cpd.column, cpd.settings[UO_input_tab_size].u) - cpd.column;
         break;

      case ' ':
         pc.orig_prev_sp++;
         break;

      default:
         break;
      }
   }

   if (ch != -2)
   {
      pc.str.clear();
      pc.nl_count  = nl_count;
      pc.type      = nl_count ? CT_NEWLINE : CT_WHITESPACE;
      pc.after_tab = (ctx.c.last_ch == '\t');
      return(true);
   }
   return(false);
} // parse_whitespace


static bool parse_bs_newline(tok_ctx &ctx, chunk_t &pc)
{
   ctx.save();
   ctx.get(); /* skip the '\' */

   int ch;
   while (ctx.more() && unc_isspace(ch = ctx.peek()))
   {
      ctx.get();
      if ((ch == '\r') || (ch == '\n'))
      {
         if (ch == '\r')
         {
            ctx.expect('\n');
         }
         pc.str      = "\\";
         pc.type     = CT_NL_CONT;
         pc.nl_count = 1;
         return(true);
      }
   }

   ctx.restore();
   return(false);
}


static bool parse_newline(tok_ctx &ctx)
{
   ctx.save();

   /* Eat whitespace */
   while ((ctx.peek() == ' ') || (ctx.peek() == '\t'))
   {
      ctx.get();
   }
   if ((ctx.peek() == '\r') || (ctx.peek() == '\n'))
   {
      if (!ctx.expect('\n'))
      {
         ctx.get();
         ctx.expect('\n');
      }
      return(true);
   }
   ctx.restore();
   return(false);
}


static void parse_pawn_pattern(tok_ctx &ctx, chunk_t &pc, c_token_t tt)
{
   pc.str.clear();
   pc.type = tt;
   while (!unc_isspace(ctx.peek()))
   {
      /* end the pattern on an escaped newline */
      if (ctx.peek() == '\\')
      {
         int ch = ctx.peek(1);
         if ((ch == '\n') || (ch == '\r'))
         {
            break;
         }
      }
      pc.str.append(ctx.get());
   }
}


static bool parse_ignored(tok_ctx &ctx, chunk_t &pc)
{
   int nl_count = 0;

   /* Parse off newlines/blank lines */
   while (parse_newline(ctx))
   {
      nl_count++;
   }
   if (nl_count > 0)
   {
      pc.nl_count = nl_count;
      pc.type     = CT_NEWLINE;
      return(true);
   }

   /* See if the UO_enable_processing_cmt text is on this line */
   ctx.save();
   pc.str.clear();
   while (ctx.more() &&
          (ctx.peek() != '\r') &&
          (ctx.peek() != '\n'))
   {
      pc.str.append(ctx.get());
   }
   if (pc.str.size() == 0)
   {
      /* end of file? */
      return(false);
   }
   /* Note that we aren't actually making sure this is in a comment, yet */
   const char *ontext = cpd.settings[UO_enable_processing_cmt].str;
   if (ontext == NULL)
   {
      ontext = UNCRUSTIFY_ON_TEXT;
   }
   if (pc.str.find(ontext) < 0)
   {
      pc.type = CT_IGNORED;
      return(true);
   }
   ctx.restore();

   /* parse off whitespace leading to the comment */
   if (parse_whitespace(ctx, pc))
   {
      pc.type = CT_IGNORED;
      return(true);
   }

   /* Look for the ending comment and let it pass */
   if (parse_comment(ctx, pc) && !cpd.unc_off)
   {
      return(true);
   }

   /* Reset the chunk & scan to until a newline */
   pc.str.clear();
   while (ctx.more() &&
          (ctx.peek() != '\r') &&
          (ctx.peek() != '\n'))
   {
      pc.str.append(ctx.get());
   }
   if (pc.str.size() > 0)
   {
      pc.type = CT_IGNORED;
      return(true);
   }
   return(false);
} // parse_ignored


static bool parse_next(tok_ctx &ctx, chunk_t &pc)
{
   if (!ctx.more())
   {
      //fprintf(stderr, "All done!\n");
      return(false);
   }

   /* Save off the current column */
   pc.orig_line = ctx.c.row;
   pc.column    = ctx.c.col;
   pc.orig_col  = ctx.c.col;
   pc.type      = CT_NONE;
   pc.nl_count  = 0;
   pc.flags     = 0;

   /* If it is turned off, we put everything except newlines into CT_UNKNOWN */
   if (cpd.unc_off)
   {
      if (parse_ignored(ctx, pc))
      {
         return(true);
      }
   }

   /**
    * Parse whitespace
    */
   if (parse_whitespace(ctx, pc))
   {
      return(true);
   }

   /**
    * Handle unknown/unhandled preprocessors
    */
   if ((cpd.in_preproc > CT_PP_BODYCHUNK) &&
       (cpd.in_preproc <= CT_PP_OTHER))
   {
      pc.str.clear();
      tok_info ss;
      ctx.save(ss);
      /* Chunk to a newline or comment */
      pc.type = CT_PREPROC_BODY;
      int last = 0;
      while (ctx.more())
      {
         int ch = ctx.peek();

         if ((ch == '\n') || (ch == '\r'))
         {
            /* Back off if this is an escaped newline */
            if (last == '\\')
            {
               ctx.restore(ss);
               pc.str.pop_back();
            }
            break;
         }

         /* Quit on a C++ comment start */
         if ((ch == '/') && (ctx.peek(1) == '/'))
         {
            break;
         }
         last = ch;
         ctx.save(ss);

         pc.str.append(ctx.get());
      }
      if (pc.str.size() > 0)
      {
         return(true);
      }
   }

   /**
    * Detect backslash-newline
    */
   if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc))
   {
      return(true);
   }

   /**
    * Parse comments
    */
   if (parse_comment(ctx, pc))
   {
      return(true);
   }

   /* Parse code placeholders */
   if (parse_code_placeholder(ctx, pc))
   {
      return(true);
   }

   /* Check for C# literal strings, ie @"hello" and identifiers @for*/
   if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@'))
   {
      if (ctx.peek(1) == '"')
      {
         parse_cs_string(ctx, pc);
         return(true);
      }
      /* check for non-keyword identifiers such as @if @switch, etc */
      if (CharTable::IsKw1(ctx.peek(1)))
      {
         parse_word(ctx, pc, true);
         return(true);
      }
   }

   /* Check for C# Interpolated strings */
   if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"'))
   {
      parse_cs_interpolated_string(ctx, pc);
      return(true);
   }

   /* handle VALA """ strings """ */
   if ((cpd.lang_flags & LANG_VALA) &&
       (ctx.peek() == '"') &&
       (ctx.peek(1) == '"') &&
       (ctx.peek(2) == '"'))
   {
      parse_verbatim_string(ctx, pc);
      return(true);
   }

   /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */
   int ch = ctx.peek();
   if ((cpd.lang_flags & LANG_CPP) &&
       ((ch == 'u') || (ch == 'U') || (ch == 'R')))
   {
      int  idx     = 0;
      bool is_real = false;

      if ((ch == 'u') && (ctx.peek(1) == '8'))
      {
         idx = 2;
      }
      else if (unc_tolower(ch) == 'u')
      {
         idx++;
      }

      if (ctx.peek(idx) == 'R')
      {
         idx++;
         is_real = true;
      }
      if (ctx.peek(idx) == '"')
      {
         if (is_real)
         {
            if (parse_cr_string(ctx, pc, idx))
            {
               return(true);
            }
         }
         else
         {
            if (parse_string(ctx, pc, idx, true))
            {
               parse_suffix(ctx, pc, true);
               return(true);
            }
         }
      }
   }

   /* PAWN specific stuff */
   if (cpd.lang_flags & LANG_PAWN)
   {
      if ((cpd.preproc_ncnl_count == 1) &&
          ((cpd.in_preproc == CT_PP_DEFINE) ||
           (cpd.in_preproc == CT_PP_EMIT)))
      {
         parse_pawn_pattern(ctx, pc, CT_MACRO);
         return(true);
      }
      /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */
      if ((ctx.peek() == '\\') || (ctx.peek() == '!'))
      {
         if (ctx.peek(1) == '"')
         {
            parse_string(ctx, pc, 1, (ctx.peek() == '!'));
            return(true);
         }
         else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) &&
                  (ctx.peek(2) == '"'))
         {
            parse_string(ctx, pc, 2, false);
            return(true);
         }
      }

      /* handle PAWN preprocessor args %0 .. %9 */
      if ((cpd.in_preproc == CT_PP_DEFINE) &&
          (ctx.peek() == '%') &&
          unc_isdigit(ctx.peek(1)))
      {
         pc.str.clear();
         pc.str.append(ctx.get());
         pc.str.append(ctx.get());
         pc.type = CT_WORD;
         return(true);
      }
   }

   /**
    * Parse strings and character constants
    */

//parse_word(ctx, pc_temp, true);
//ctx.restore(ctx.c);
   if (parse_number(ctx, pc))
   {
      return(true);
   }

   if (cpd.lang_flags & LANG_D)
   {
      /* D specific stuff */
      if (d_parse_string(ctx, pc))
      {
         return(true);
      }
   }
   else
   {
      /* Not D stuff */

      /* Check for L'a', L"abc", 'a', "abc", <abc> strings */
      ch = ctx.peek();
      int ch1 = ctx.peek(1);
      if ((((ch == 'L') || (ch == 'S')) &&
           ((ch1 == '"') || (ch1 == '\''))) ||
          (ch == '"') ||
          (ch == '\'') ||
          ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE)))
      {
         parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true);
         return(true);
      }

      if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE))
      {
         if (chunk_get_tail()->type == CT_MACRO)
         {
            /* We have "#define XXX <", assume '<' starts an include string */
            parse_string(ctx, pc, 0, false);
            return(true);
         }
      }
   }

   /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/
   if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@'))
   {
      int nc = ctx.peek(1);
      if ((nc == '"') || (nc == '\''))
      {
         /* literal string */
         parse_string(ctx, pc, 1, true);
         return(true);
      }
      else if ((nc >= '0') && (nc <= '9'))
      {
         /* literal number */
         pc.str.append(ctx.get());  /* store the '@' */
         parse_number(ctx, pc);
         return(true);
      }
   }

   /* Check for pawn/ObjectiveC/Java and normal identifiers */
   if (CharTable::IsKw1(ctx.peek()) ||
       ((ctx.peek() == '\\') && (unc_tolower(ctx.peek(1)) == 'u')) ||
       ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1))))
   {
      parse_word(ctx, pc, false);
      return(true);
   }

   /* see if we have a punctuator */
   char punc_txt[4];
   punc_txt[0] = ctx.peek();
   punc_txt[1] = ctx.peek(1);
   punc_txt[2] = ctx.peek(2);
   punc_txt[3] = ctx.peek(3);
   const chunk_tag_t *punc;
   if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL)
   {
      int cnt = strlen(punc->tag);
      while (cnt--)
      {
         pc.str.append(ctx.get());
      }
      pc.type   = punc->type;
      pc.flags |= PCF_PUNCTUATOR;
      return(true);
   }

   /* throw away this character */
   pc.type = CT_UNKNOWN;
   pc.str.append(ctx.get());

   LOG_FMT(LWARN, "%s:%zu Garbage in col %d: %x\n",
           cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]);
   cpd.error_count++;
   return(true);
} // parse_next


/**
 * This function parses or tokenizes the whole buffer into a list.
 * It has to do some tricks to parse preprocessors.
 *
 * If output_text() were called immediately after, two things would happen:
 *  - trailing whitespace are removed.
 *  - leading space & tabs are converted to the appropriate format.
 *
 * All the tokens are inserted before ref. If ref is NULL, they are inserted
 * at the end of the list.  Line numbers are relative to the start of the data.
 */
void tokenize(const deque<int> &data, chunk_t *ref)
{
   tok_ctx       ctx(data);
   chunk_t       chunk;
   chunk_t       *pc    = NULL;
   chunk_t       *rprev = NULL;
   parse_frame_t frm;
   bool          last_was_tab = false;
   int           prev_sp      = 0;

   cpd.unc_stage = US_TOKENIZE;

   memset(&frm, 0, sizeof(frm));

   while (ctx.more())
   {
      chunk.reset();
      if (!parse_next(ctx, chunk))
      {
         LOG_FMT(LERR, "%s:%d Bailed before the end?\n",
                 cpd.filename, ctx.c.row);
         cpd.error_count++;
         break;
      }

      /* Don't create an entry for whitespace */
      if (chunk.type == CT_WHITESPACE)
      {
         last_was_tab = chunk.after_tab;
         prev_sp      = chunk.orig_prev_sp;
         continue;
      }
      chunk.orig_prev_sp = prev_sp;
      prev_sp            = 0;

      if (chunk.type == CT_NEWLINE)
      {
         last_was_tab    = chunk.after_tab;
         chunk.after_tab = false;
         chunk.str.clear();
      }
      else if (chunk.type == CT_NL_CONT)
      {
         last_was_tab    = chunk.after_tab;
         chunk.after_tab = false;
         chunk.str       = "\\\n";
      }
      else
      {
         chunk.after_tab = last_was_tab;
         last_was_tab    = false;
      }

      /* Strip trailing whitespace (for CPP comments and PP blocks) */
      while ((chunk.str.size() > 0) &&
             ((chunk.str[chunk.str.size() - 1] == ' ') ||
              (chunk.str[chunk.str.size() - 1] == '\t')))
      {
         // If comment contains backslash '\' followed by whitespace chars, keep last one;
         // this will prevent it from turning '\' into line continuation.
         if ((chunk.str.size() > 1) && (chunk.str[chunk.str.size() - 2] == '\\'))
         {
            break;
         }
         chunk.str.pop_back();
      }

      /* Store off the end column */
      chunk.orig_col_end = ctx.c.col;

      /* Add the chunk to the list */
      rprev = pc;
      if (rprev != NULL)
      {
         chunk_flags_set(pc, rprev->flags & PCF_COPY_FLAGS);

         /* a newline can't be in a preprocessor */
         if (pc->type == CT_NEWLINE)
         {
            chunk_flags_clr(pc, PCF_IN_PREPROC);
         }
      }
      if (ref != NULL)
      {
         chunk.flags |= PCF_INSERTED;
      }
      else
      {
         chunk.flags &= ~PCF_INSERTED;
      }
      pc = chunk_add_before(&chunk, ref);

      /* A newline marks the end of a preprocessor */
      if (pc->type == CT_NEWLINE) // || (pc->type == CT_COMMENT_MULTI))
      {
         cpd.in_preproc         = CT_NONE;
         cpd.preproc_ncnl_count = 0;
      }

      /* Special handling for preprocessor stuff */
      if (cpd.in_preproc != CT_NONE)
      {
         chunk_flags_set(pc, PCF_IN_PREPROC);

         /* Count words after the preprocessor */
         if (!chunk_is_comment(pc) && !chunk_is_newline(pc))
         {
            cpd.preproc_ncnl_count++;
         }

         /* Figure out the type of preprocessor for #include parsing */
         if (cpd.in_preproc == CT_PREPROC)
         {
            if ((pc->type < CT_PP_DEFINE) || (pc->type > CT_PP_OTHER))
            {
               set_chunk_type(pc, CT_PP_OTHER);
            }
            cpd.in_preproc = pc->type;
         }
      }
      else
      {
         /* Check for a preprocessor start */
         if ((pc->type == CT_POUND) &&
             ((rprev == NULL) || (rprev->type == CT_NEWLINE)))
         {
            set_chunk_type(pc, CT_PREPROC);
            pc->flags     |= PCF_IN_PREPROC;
            cpd.in_preproc = CT_PREPROC;
         }
      }
      if (pc->type == CT_NEWLINE)
      {
         LOG_FMT(LGUY, "%s(%d): (%zu)<NL> col=%zu\n",
                 __func__, __LINE__, pc->orig_line, pc->orig_col);
      }
      else
      {
         LOG_FMT(LGUY, "%s(%d): text():%s, type:%s, orig_col=%zu, orig_col_end=%d\n",
                 __func__, __LINE__, pc->text(), get_token_name(pc->type), pc->orig_col, pc->orig_col_end);
      }
   }

   /* Set the cpd.newline string for this file */
   if ((cpd.settings[UO_newlines].le == LE_LF) ||
       ((cpd.settings[UO_newlines].le == LE_AUTO) &&
        (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CRLF]) &&
        (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CR])))
   {
      /* LF line ends */
      cpd.newline = "\n";
      LOG_FMT(LLINEENDS, "Using LF line endings\n");
   }
   else if ((cpd.settings[UO_newlines].le == LE_CRLF) ||
            ((cpd.settings[UO_newlines].le == LE_AUTO) &&
             (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_LF]) &&
             (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_CR])))
   {
      /* CRLF line ends */
      cpd.newline = "\r\n";
      LOG_FMT(LLINEENDS, "Using CRLF line endings\n");
   }
   else
   {
      /* CR line ends */
      cpd.newline = "\r";
      LOG_FMT(LLINEENDS, "Using CR line endings\n");
   }
} // tokenize


// /**
//  * A simplistic fixed-sized needle in the fixed-size haystack string search.
//  */
// int str_find(const char *needle, int needle_len,
//              const char *haystack, int haystack_len)
// {
//    for (int idx = 0; idx < (haystack_len - needle_len); idx++)
//    {
//       if (memcmp(needle, haystack + idx, needle_len) == 0)
//       {
//          return(idx);
//       }
//    }
//    return(-1);
// }