/src/tokenize.cpp
C++ | 2047 lines | 1590 code | 201 blank | 256 comment | 371 complexity | e2414d84fb693b154ad3e8152180e063 MD5 | raw file
Possible License(s): GPL-2.0
- /**
- * @file tokenize.cpp
- * This file breaks up the text stream into tokens or chunks.
- *
- * Each routine needs to set pc.len and pc.type.
- *
- * @author Ben Gardner
- * @license GPL v2+
- */
- #include "tokenize.h"
- #include "uncrustify_types.h"
- #include "char_table.h"
- #include "prototypes.h"
- #include "chunk_list.h"
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include "unc_ctype.h"
- #include "uncrustify.h"
- #include "keywords.h"
- struct tok_info
- {
- tok_info()
- : last_ch(0)
- , idx(0)
- , row(1)
- , col(1)
- {
- }
- int last_ch;
- int idx;
- int row;
- int col;
- };
- struct tok_ctx
- {
- tok_ctx(const deque<int> &d)
- : data(d)
- {
- }
- /* save before trying to parse something that may fail */
- void save()
- {
- save(s);
- }
- void save(tok_info &info)
- {
- info = c;
- }
- /* restore previous saved state */
- void restore()
- {
- restore(s);
- }
- void restore(const tok_info &info)
- {
- c = info;
- }
- bool more()
- {
- return(c.idx < (int)data.size());
- }
- int peek()
- {
- return(more() ? data[c.idx] : -1);
- }
- int peek(int idx)
- {
- idx += c.idx;
- return((idx < (int)data.size()) ? data[idx] : -1);
- }
- int get()
- {
- if (more())
- {
- int ch = data[c.idx++];
- switch (ch)
- {
- case '\t':
- c.col = calc_next_tab_column(c.col, cpd.settings[UO_input_tab_size].u);
- break;
- case '\n':
- if (c.last_ch != '\r')
- {
- c.row++;
- c.col = 1;
- }
- break;
- case '\r':
- c.row++;
- c.col = 1;
- break;
- default:
- c.col++;
- break;
- }
- c.last_ch = ch;
- return(ch);
- }
- return(-1);
- }
- bool expect(int ch)
- {
- if (peek() == ch)
- {
- get();
- return(true);
- }
- return(false);
- }
- const deque<int> &data;
- tok_info c; /* current */
- tok_info s; /* saved */
- };
- /**
- * Count the number of characters in a quoted string.
- * The next bit of text starts with a quote char " or ' or <.
- * Count the number of characters until the matching character.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a string was parsed
- */
- static bool parse_string(tok_ctx &ctx, chunk_t &pc, int quote_idx, bool allow_escape);
- /**
- * Literal string, ends with single "
- * Two "" don't end the string.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a string was parsed
- */
- static bool parse_cs_string(tok_ctx &ctx, chunk_t &pc);
- /**
- * Interpolated strings start with $" end with a single "
- * Double quotes are escaped by doubling.
- * Need to track embedded { } pairs and ignore anything between.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a string was parsed
- */
- static bool parse_cs_interpolated_string(tok_ctx &ctx, chunk_t &pc);
- /**
- * VALA verbatim string, ends with three quotes (""")
- *
- * @param pc The structure to update, str is an input.
- */
- static void parse_verbatim_string(tok_ctx &ctx, chunk_t &pc);
- static bool tag_compare(const deque<int> &d, int a_idx, int b_idx, int len);
- /**
- * Parses a C++0x 'R' string. R"( xxx )" R"tag( )tag" u8R"(x)" uR"(x)"
- * Newlines may be in the string.
- */
- static bool parse_cr_string(tok_ctx &ctx, chunk_t &pc, int q_idx);
- /**
- * Count the number of whitespace characters.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether whitespace was parsed
- */
- static bool parse_whitespace(tok_ctx &ctx, chunk_t &pc);
- /**
- * Called when we hit a backslash.
- * If there is nothing but whitespace until the newline, then this is a
- * backslash newline
- */
- static bool parse_bs_newline(tok_ctx &ctx, chunk_t &pc);
- /**
- * Parses any number of tab or space chars followed by a newline.
- * Does not change pc.len if a newline isn't found.
- * This is not the same as parse_whitespace() because it only consumes until
- * a single newline is encountered.
- */
- static bool parse_newline(tok_ctx &ctx);
- /**
- * PAWN #define is different than C/C++.
- * #define PATTERN REPLACEMENT_TEXT
- * The PATTERN may not contain a space or '[' or ']'.
- * A generic whitespace check should be good enough.
- * Do not change the pattern.
- */
- static void parse_pawn_pattern(tok_ctx &ctx, chunk_t &pc, c_token_t tt);
- static bool parse_ignored(tok_ctx &ctx, chunk_t &pc);
- /**
- * Skips the next bit of whatever and returns the type of block.
- *
- * pc.str is the input text.
- * pc.len in the output length.
- * pc.type is the output type
- * pc.column is output column
- *
- * @param pc The structure to update, str is an input.
- * @return true/false - whether anything was parsed
- */
- static bool parse_next(tok_ctx &ctx, chunk_t &pc);
- /**
- * Parses all legal D string constants.
- *
- * Quoted strings:
- * r"Wysiwyg" # WYSIWYG string
- * x"hexstring" # Hexadecimal array
- * `Wysiwyg` # WYSIWYG string
- * 'char' # single character
- * "reg_string" # regular string
- *
- * Non-quoted strings:
- * \x12 # 1-byte hex constant
- * \u1234 # 2-byte hex constant
- * \U12345678 # 4-byte hex constant
- * \123 # octal constant
- * \& # named entity
- * \n # single character
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a string was parsed
- */
- static bool d_parse_string(tok_ctx &ctx, chunk_t &pc);
- /**
- * Figure of the length of the comment at text.
- * The next bit of text starts with a '/', so it might be a comment.
- * There are three types of comments:
- * - C comments that start with '/ *' and end with '* /'
- * - C++ comments that start with //
- * - D nestable comments '/+' '+/'
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a comment was parsed
- */
- static bool parse_comment(tok_ctx &ctx, chunk_t &pc);
- /**
- * Figure of the length of the code placeholder at text, if present.
- * This is only for Xcode which sometimes inserts temporary code placeholder chunks, which in plaintext <#look like this#>.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a placeholder was parsed.
- */
- static bool parse_code_placeholder(tok_ctx &ctx, chunk_t &pc);
- /**
- * Parse any attached suffix, which may be a user-defined literal suffix.
- * If for a string, explicitly exclude common format and scan specifiers, ie,
- * PRIx32 and SCNx64.
- */
- static void parse_suffix(tok_ctx &ctx, chunk_t &pc, bool forstring);
- static bool is_bin(int ch);
- static bool is_bin_(int ch);
- static bool is_oct(int ch);
- static bool is_oct_(int ch);
- static bool is_dec(int ch);
- static bool is_dec_(int ch);
- static bool is_hex(int ch);
- static bool is_hex_(int ch);
- /**
- * Count the number of characters in the number.
- * The next bit of text starts with a number (0-9 or '.'), so it is a number.
- * Count the number of characters in the number.
- *
- * This should cover all number formats for all languages.
- * Note that this is not a strict parser. It will happily parse numbers in
- * an invalid format.
- *
- * For example, only D allows underscores in the numbers, but they are
- * allowed in all formats.
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a number was parsed
- */
- static bool parse_number(tok_ctx &ctx, chunk_t &pc);
- static bool d_parse_string(tok_ctx &ctx, chunk_t &pc)
- {
- int ch = ctx.peek();
- if ((ch == '"') || (ch == '\'') || (ch == '`'))
- {
- return(parse_string(ctx, pc, 0, true));
- }
- else if (ch == '\\')
- {
- ctx.save();
- int cnt;
- pc.str.clear();
- while (ctx.peek() == '\\')
- {
- pc.str.append(ctx.get());
- /* Check for end of file */
- switch (ctx.peek())
- {
- case 'x':
- /* \x HexDigit HexDigit */
- cnt = 3;
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- break;
- case 'u':
- /* \u HexDigit HexDigit HexDigit HexDigit */
- cnt = 5;
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- break;
- case 'U':
- /* \U HexDigit (x8) */
- cnt = 9;
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- /* handle up to 3 octal digits */
- pc.str.append(ctx.get());
- ch = ctx.peek();
- if ((ch >= '0') && (ch <= '7'))
- {
- pc.str.append(ctx.get());
- ch = ctx.peek();
- if ((ch >= '0') && (ch <= '7'))
- {
- pc.str.append(ctx.get());
- }
- }
- break;
- case '&':
- /* \& NamedCharacterEntity ; */
- pc.str.append(ctx.get());
- while (unc_isalpha(ctx.peek()))
- {
- pc.str.append(ctx.get());
- }
- if (ctx.peek() == ';')
- {
- pc.str.append(ctx.get());
- }
- break;
- default:
- /* Everything else is a single character */
- pc.str.append(ctx.get());
- break;
- } // switch
- }
- if (pc.str.size() > 1)
- {
- pc.type = CT_STRING;
- return(true);
- }
- ctx.restore();
- }
- else if (((ch == 'r') || (ch == 'x')) && (ctx.peek(1) == '"'))
- {
- return(parse_string(ctx, pc, 1, false));
- }
- return(false);
- } // d_parse_string
- #if 0
- /**
- * A string-in-string search. Like strstr() with a haystack length.
- */
- static const char *str_search(const char *needle, const char *haystack, int haystack_len)
- {
- int needle_len = strlen(needle);
- while (haystack_len-- >= needle_len)
- {
- if (memcmp(needle, haystack, needle_len) == 0)
- {
- return(haystack);
- }
- haystack++;
- }
- return(NULL);
- }
- #endif
- static bool parse_comment(tok_ctx &ctx, chunk_t &pc)
- {
- bool is_d = (cpd.lang_flags & LANG_D) != 0; // forcing value to bool
- bool is_cs = (cpd.lang_flags & LANG_CS) != 0; // forcing value to bool
- int d_level = 0;
- /* does this start with '/ /' or '/ *' or '/ +' (d) */
- if ((ctx.peek() != '/') ||
- ((ctx.peek(1) != '*') && (ctx.peek(1) != '/') &&
- ((ctx.peek(1) != '+') || !is_d)))
- {
- return(false);
- }
- ctx.save();
- /* account for opening two chars */
- pc.str = ctx.get(); /* opening '/' */
- int ch = ctx.get();
- pc.str.append(ch); /* second char */
- if (ch == '/')
- {
- pc.type = CT_COMMENT_CPP;
- while (true)
- {
- int bs_cnt = 0;
- while (ctx.more())
- {
- ch = ctx.peek();
- if ((ch == '\r') || (ch == '\n'))
- {
- break;
- }
- if ((ch == '\\') && !is_cs) /* backslashes aren't special in comments in C# */
- {
- bs_cnt++;
- }
- else
- {
- bs_cnt = 0;
- }
- pc.str.append(ctx.get());
- }
- /* If we hit an odd number of backslashes right before the newline,
- * then we keep going.
- */
- if (((bs_cnt & 1) == 0) || !ctx.more())
- {
- break;
- }
- if (ctx.peek() == '\r')
- {
- pc.str.append(ctx.get());
- }
- if (ctx.peek() == '\n')
- {
- pc.str.append(ctx.get());
- }
- pc.nl_count++;
- cpd.did_newline = true;
- }
- }
- else if (!ctx.more())
- {
- /* unexpected end of file */
- ctx.restore();
- return(false);
- }
- else if (ch == '+')
- {
- pc.type = CT_COMMENT;
- d_level++;
- while ((d_level > 0) && ctx.more())
- {
- if ((ctx.peek() == '+') && (ctx.peek(1) == '/'))
- {
- pc.str.append(ctx.get()); /* store the '+' */
- pc.str.append(ctx.get()); /* store the '/' */
- d_level--;
- continue;
- }
- if ((ctx.peek() == '/') && (ctx.peek(1) == '+'))
- {
- pc.str.append(ctx.get()); /* store the '/' */
- pc.str.append(ctx.get()); /* store the '+' */
- d_level++;
- continue;
- }
- ch = ctx.get();
- pc.str.append(ch);
- if ((ch == '\n') || (ch == '\r'))
- {
- pc.type = CT_COMMENT_MULTI;
- pc.nl_count++;
- if (ch == '\r')
- {
- if (ctx.peek() == '\n')
- {
- cpd.le_counts[LE_CRLF]++;
- pc.str.append(ctx.get()); /* store the '\n' */
- }
- else
- {
- cpd.le_counts[LE_CR]++;
- }
- }
- else
- {
- cpd.le_counts[LE_LF]++;
- }
- }
- }
- }
- else /* must be '/ *' */
- {
- pc.type = CT_COMMENT;
- while (ctx.more())
- {
- if ((ctx.peek() == '*') && (ctx.peek(1) == '/'))
- {
- pc.str.append(ctx.get()); /* store the '*' */
- pc.str.append(ctx.get()); /* store the '/' */
- tok_info ss;
- ctx.save(ss);
- int oldsize = pc.str.size();
- /* If there is another C comment right after this one, combine them */
- while ((ctx.peek() == ' ') || (ctx.peek() == '\t'))
- {
- pc.str.append(ctx.get());
- }
- if ((ctx.peek() != '/') || (ctx.peek(1) != '*'))
- {
- /* undo the attempt to join */
- ctx.restore(ss);
- pc.str.resize(oldsize);
- break;
- }
- }
- ch = ctx.get();
- pc.str.append(ch);
- if ((ch == '\n') || (ch == '\r'))
- {
- pc.type = CT_COMMENT_MULTI;
- pc.nl_count++;
- if (ch == '\r')
- {
- if (ctx.peek() == '\n')
- {
- cpd.le_counts[LE_CRLF]++;
- pc.str.append(ctx.get()); /* store the '\n' */
- }
- else
- {
- cpd.le_counts[LE_CR]++;
- }
- }
- else
- {
- cpd.le_counts[LE_LF]++;
- }
- }
- }
- }
- if (cpd.unc_off)
- {
- const char *ontext = cpd.settings[UO_enable_processing_cmt].str;
- if ((ontext == NULL) || !ontext[0])
- {
- ontext = UNCRUSTIFY_ON_TEXT;
- }
- if (pc.str.find(ontext) >= 0)
- {
- LOG_FMT(LBCTRL, "Found '%s' on line %zu\n", ontext, pc.orig_line);
- cpd.unc_off = false;
- }
- }
- else
- {
- const char *offtext = cpd.settings[UO_disable_processing_cmt].str;
- if ((offtext == NULL) || !offtext[0])
- {
- offtext = UNCRUSTIFY_OFF_TEXT;
- }
- if (pc.str.find(offtext) >= 0)
- {
- LOG_FMT(LBCTRL, "Found '%s' on line %zu\n", offtext, pc.orig_line);
- cpd.unc_off = true;
- // Issue #842
- cpd.unc_off_used = true;
- }
- }
- return(true);
- } // parse_comment
- static bool parse_code_placeholder(tok_ctx &ctx, chunk_t &pc)
- {
- if ((ctx.peek() != '<') || (ctx.peek(1) != '#'))
- {
- return(false);
- }
- ctx.save();
- /* account for opening two chars '<#' */
- pc.str = ctx.get();
- pc.str.append(ctx.get());
- /* grab everything until '#>', fail if not found. */
- int last1 = 0;
- while (ctx.more())
- {
- int last2 = last1;
- last1 = ctx.get();
- pc.str.append(last1);
- if ((last2 == '#') && (last1 == '>'))
- {
- pc.type = CT_WORD;
- return(true);
- }
- }
- ctx.restore();
- return(false);
- }
- static void parse_suffix(tok_ctx &ctx, chunk_t &pc, bool forstring = false)
- {
- if (CharTable::IsKw1(ctx.peek()))
- {
- int slen = 0;
- int oldsize = pc.str.size();
- /* don't add the suffix if we see L" or L' or S" */
- int p1 = ctx.peek();
- int p2 = ctx.peek(1);
- if (forstring &&
- (((p1 == 'L') && ((p2 == '"') || (p2 == '\''))) ||
- ((p1 == 'S') && (p2 == '"'))))
- {
- return;
- }
- tok_info ss;
- ctx.save(ss);
- while (ctx.more() && CharTable::IsKw2(ctx.peek()))
- {
- slen++;
- pc.str.append(ctx.get());
- }
- if (forstring && (slen >= 4) &&
- (pc.str.startswith("PRI", oldsize) ||
- pc.str.startswith("SCN", oldsize)))
- {
- ctx.restore(ss);
- pc.str.resize(oldsize);
- }
- }
- }
- static bool is_bin(int ch)
- {
- return((ch == '0') || (ch == '1'));
- }
- static bool is_bin_(int ch)
- {
- return(is_bin(ch) || (ch == '_'));
- }
- static bool is_oct(int ch)
- {
- return((ch >= '0') && (ch <= '7'));
- }
- static bool is_oct_(int ch)
- {
- return(is_oct(ch) || (ch == '_'));
- }
- static bool is_dec(int ch)
- {
- return((ch >= '0') && (ch <= '9'));
- }
- static bool is_dec_(int ch)
- {
- return(is_dec(ch) || (ch == '_'));
- }
- static bool is_hex(int ch)
- {
- return(((ch >= '0') && (ch <= '9')) ||
- ((ch >= 'a') && (ch <= 'f')) ||
- ((ch >= 'A') && (ch <= 'F')));
- }
- static bool is_hex_(int ch)
- {
- return(is_hex(ch) || (ch == '_'));
- }
- static bool parse_number(tok_ctx &ctx, chunk_t &pc)
- {
- /* A number must start with a digit or a dot, followed by a digit */
- if (!is_dec(ctx.peek()) &&
- ((ctx.peek() != '.') || !is_dec(ctx.peek(1))))
- {
- return(false);
- }
- bool is_float = (ctx.peek() == '.');
- if (is_float && (ctx.peek(1) == '.'))
- {
- return(false);
- }
- /* Check for Hex, Octal, or Binary
- * Note that only D and Pawn support binary, but who cares?
- */
- bool did_hex = false;
- if (ctx.peek() == '0')
- {
- pc.str.append(ctx.get()); /* store the '0' */
- int ch;
- chunk_t pc_temp;
- size_t pc_length;
- pc_temp.str.append('0');
- // MS constant might have an "h" at the end. Look for it
- ctx.save();
- while (ctx.more() && CharTable::IsKw2(ctx.peek()))
- {
- ch = ctx.get();
- pc_temp.str.append(ch);
- }
- pc_length = pc_temp.len();
- ch = pc_temp.str[pc_length - 1];
- ctx.restore();
- LOG_FMT(LGUY, "%s(%d): pc_temp:%s\n", __func__, __LINE__, pc_temp.text());
- if (ch == 'h')
- {
- // we have an MS hexadecimal number with "h" at the end
- LOG_FMT(LGUY, "%s(%d): MS hexadecimal number\n", __func__, __LINE__);
- did_hex = true;
- do
- {
- pc.str.append(ctx.get()); /* store the rest */
- } while (is_hex_(ctx.peek()));
- pc.str.append(ctx.get()); /* store the h */
- LOG_FMT(LGUY, "%s(%d): pc:%s\n", __func__, __LINE__, pc.text());
- }
- else
- {
- switch (unc_toupper(ctx.peek()))
- {
- case 'X': /* hex */
- did_hex = true;
- do
- {
- pc.str.append(ctx.get()); /* store the 'x' and then the rest */
- } while (is_hex_(ctx.peek()));
- break;
- case 'B': /* binary */
- do
- {
- pc.str.append(ctx.get()); /* store the 'b' and then the rest */
- } while (is_bin_(ctx.peek()));
- break;
- case '0': /* octal or decimal */
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- do
- {
- pc.str.append(ctx.get());
- } while (is_oct_(ctx.peek()));
- break;
- default:
- /* either just 0 or 0.1 or 0UL, etc */
- break;
- }
- }
- }
- else
- {
- /* Regular int or float */
- while (is_dec_(ctx.peek()))
- {
- pc.str.append(ctx.get());
- }
- }
- /* Check if we stopped on a decimal point & make sure it isn't '..' */
- if ((ctx.peek() == '.') && (ctx.peek(1) != '.'))
- {
- pc.str.append(ctx.get());
- is_float = true;
- if (did_hex)
- {
- while (is_hex_(ctx.peek()))
- {
- pc.str.append(ctx.get());
- }
- }
- else
- {
- while (is_dec_(ctx.peek()))
- {
- pc.str.append(ctx.get());
- }
- }
- }
- /* Check exponent
- * Valid exponents per language (not that it matters):
- * C/C++/D/Java: eEpP
- * C#/Pawn: eE
- */
- int tmp = unc_toupper(ctx.peek());
- if ((tmp == 'E') || (tmp == 'P'))
- {
- is_float = true;
- pc.str.append(ctx.get());
- if ((ctx.peek() == '+') || (ctx.peek() == '-'))
- {
- pc.str.append(ctx.get());
- }
- while (is_dec_(ctx.peek()))
- {
- pc.str.append(ctx.get());
- }
- }
- /* Check the suffixes
- * Valid suffixes per language (not that it matters):
- * Integer Float
- * C/C++: uUlL64 lLfF
- * C#: uUlL fFdDMm
- * D: uUL ifFL
- * Java: lL fFdD
- * Pawn: (none) (none)
- *
- * Note that i, f, d, and m only appear in floats.
- */
- while (1)
- {
- int tmp = unc_toupper(ctx.peek());
- if ((tmp == 'I') || (tmp == 'F') || (tmp == 'D') || (tmp == 'M'))
- {
- is_float = true;
- }
- else if ((tmp != 'L') && (tmp != 'U'))
- {
- break;
- }
- pc.str.append(ctx.get());
- }
- /* skip the Microsoft-specific '64' suffix */
- if ((ctx.peek() == '6') && (ctx.peek(1) == '4'))
- {
- pc.str.append(ctx.get());
- pc.str.append(ctx.get());
- }
- pc.type = is_float ? CT_NUMBER_FP : CT_NUMBER;
- /* If there is anything left, then we are probably dealing with garbage or
- * some sick macro junk. Eat it.
- */
- parse_suffix(ctx, pc);
- return(true);
- } // parse_number
- static bool parse_string(tok_ctx &ctx, chunk_t &pc, int quote_idx, bool allow_escape)
- {
- char escape_char = cpd.settings[UO_string_escape_char].n;
- char escape_char2 = cpd.settings[UO_string_escape_char2].n;
- bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b && (cpd.lang_flags & LANG_ALLC);
- pc.str.clear();
- while (quote_idx-- > 0)
- {
- pc.str.append(ctx.get());
- }
- pc.type = CT_STRING;
- int end_ch = CharTable::Get(ctx.peek()) & 0xff;
- pc.str.append(ctx.get()); /* store the " */
- bool escaped = false;
- while (ctx.more())
- {
- int lastcol = ctx.c.col;
- int ch = ctx.get();
- if ((ch == '\t') && should_escape_tabs)
- {
- ctx.c.col = lastcol + 2;
- pc.str.append(escape_char);
- pc.str.append('t');
- continue;
- }
- pc.str.append(ch);
- if (ch == '\n')
- {
- pc.nl_count++;
- pc.type = CT_STRING_MULTI;
- escaped = false;
- continue;
- }
- if ((ch == '\r') && (ctx.peek() != '\n'))
- {
- pc.str.append(ctx.get());
- pc.nl_count++;
- pc.type = CT_STRING_MULTI;
- escaped = false;
- continue;
- }
- if (!escaped)
- {
- if (ch == escape_char)
- {
- escaped = (escape_char != 0);
- }
- else if ((ch == escape_char2) && (ctx.peek() == end_ch))
- {
- escaped = allow_escape;
- }
- else if (ch == end_ch)
- {
- break;
- }
- }
- else
- {
- escaped = false;
- }
- }
- parse_suffix(ctx, pc, true);
- return(true);
- } // parse_string
- static bool parse_cs_string(tok_ctx &ctx, chunk_t &pc)
- {
- pc.str = ctx.get();
- pc.str.append(ctx.get());
- pc.type = CT_STRING;
- bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b;
- /* go until we hit a zero (end of file) or a single " */
- while (ctx.more())
- {
- int ch = ctx.get();
- pc.str.append(ch);
- if ((ch == '\n') || (ch == '\r'))
- {
- pc.type = CT_STRING_MULTI;
- pc.nl_count++;
- }
- else if (ch == '\t')
- {
- if (should_escape_tabs && !cpd.warned_unable_string_replace_tab_chars)
- {
- cpd.warned_unable_string_replace_tab_chars = true;
- log_sev_t warnlevel = (log_sev_t)cpd.settings[UO_warn_level_tabs_found_in_verbatim_string_literals].n;
- /* a tab char can't be replaced with \\t because escapes don't work in here-strings. best we can do is warn. */
- LOG_FMT(warnlevel, "%s:%zu Detected non-replaceable tab char in literal string\n", cpd.filename, pc.orig_line);
- if (warnlevel < LWARN)
- {
- cpd.error_count++;
- }
- }
- }
- else if (ch == '"')
- {
- if (ctx.peek() == '"')
- {
- pc.str.append(ctx.get());
- }
- else
- {
- break;
- }
- }
- }
- return(true);
- } // parse_cs_string
- static bool parse_cs_interpolated_string(tok_ctx &ctx, chunk_t &pc)
- {
- pc.str = ctx.get(); // '$'
- pc.str.append(ctx.get()); // '"'
- pc.type = CT_STRING;
- int depth = 0;
- /* go until we hit a zero (end of file) or a single " */
- while (ctx.more())
- {
- int ch = ctx.get();
- pc.str.append(ch);
- /* if we are inside a { }, then we only look for a } */
- if (depth > 0)
- {
- if (ch == '}')
- {
- if (ctx.peek() == '}')
- {
- // }} doesn't decrease the depth
- pc.str.append(ctx.get()); // '{'
- }
- else
- {
- depth--;
- }
- }
- }
- else
- {
- if (ch == '{')
- {
- if (ctx.peek() == '{')
- {
- // {{ doesn't increase the depth
- pc.str.append(ctx.get());
- }
- else
- {
- depth++;
- }
- }
- else if (ch == '"')
- {
- if (ctx.peek() == '"')
- {
- pc.str.append(ctx.get());
- }
- else
- {
- break;
- }
- }
- }
- }
- return(true);
- } // parse_cs_interpolated_string
- static void parse_verbatim_string(tok_ctx &ctx, chunk_t &pc)
- {
- pc.type = CT_STRING;
- // consume the initial """
- pc.str = ctx.get();
- pc.str.append(ctx.get());
- pc.str.append(ctx.get());
- /* go until we hit a zero (end of file) or a """ */
- while (ctx.more())
- {
- int ch = ctx.get();
- pc.str.append(ch);
- if ((ch == '"') &&
- (ctx.peek() == '"') &&
- (ctx.peek(1) == '"'))
- {
- pc.str.append(ctx.get());
- pc.str.append(ctx.get());
- break;
- }
- if ((ch == '\n') || (ch == '\r'))
- {
- pc.type = CT_STRING_MULTI;
- pc.nl_count++;
- }
- }
- }
- static bool tag_compare(const deque<int> &d, int a_idx, int b_idx, int len)
- {
- if (a_idx != b_idx)
- {
- while (len-- > 0)
- {
- if (d[a_idx] != d[b_idx])
- {
- return(false);
- }
- }
- }
- return(true);
- }
- static bool parse_cr_string(tok_ctx &ctx, chunk_t &pc, int q_idx)
- {
- int tag_idx = ctx.c.idx + q_idx + 1;
- int tag_len = 0;
- ctx.save();
- /* Copy the prefix + " to the string */
- pc.str.clear();
- int cnt = q_idx + 1;
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- /* Add the tag and get the length of the tag */
- while (ctx.more() && (ctx.peek() != '('))
- {
- tag_len++;
- pc.str.append(ctx.get());
- }
- if (ctx.peek() != '(')
- {
- ctx.restore();
- return(false);
- }
- pc.type = CT_STRING;
- while (ctx.more())
- {
- if ((ctx.peek() == ')') &&
- (ctx.peek(tag_len + 1) == '"') &&
- tag_compare(ctx.data, tag_idx, ctx.c.idx + 1, tag_len))
- {
- cnt = tag_len + 2; /* for the )" */
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- parse_suffix(ctx, pc);
- return(true);
- }
- if (ctx.peek() == '\n')
- {
- pc.str.append(ctx.get());
- pc.nl_count++;
- pc.type = CT_STRING_MULTI;
- }
- else
- {
- pc.str.append(ctx.get());
- }
- }
- ctx.restore();
- return(false);
- } // parse_cr_string
- /**
- * Count the number of characters in a word.
- * The first character is already valid for a keyword
- *
- * @param pc The structure to update, str is an input.
- * @return Whether a word was parsed (always true)
- */
- bool parse_word(tok_ctx &ctx, chunk_t &pc, bool skipcheck)
- {
- static unc_text intr_txt("@interface");
- /* The first character is already valid */
- pc.str.clear();
- pc.str.append(ctx.get());
- while (ctx.more())
- {
- int ch = ctx.peek();
- if (CharTable::IsKw2(ch))
- {
- pc.str.append(ctx.get());
- }
- else if ((ch == '\\') && (unc_tolower(ctx.peek(1)) == 'u'))
- {
- pc.str.append(ctx.get());
- pc.str.append(ctx.get());
- skipcheck = true;
- }
- else
- {
- break;
- }
- /* HACK: Non-ASCII character are only allowed in identifiers */
- if (ch > 0x7f)
- {
- skipcheck = true;
- }
- }
- pc.type = CT_WORD;
- if (skipcheck)
- {
- return(true);
- }
- /* Detect pre-processor functions now */
- if ((cpd.in_preproc == CT_PP_DEFINE) &&
- (cpd.preproc_ncnl_count == 1))
- {
- if (ctx.peek() == '(')
- {
- pc.type = CT_MACRO_FUNC;
- }
- else
- {
- pc.type = CT_MACRO;
- }
- }
- else
- {
- /* '@interface' is reserved, not an interface itself */
- if ((cpd.lang_flags & LANG_JAVA) && pc.str.startswith("@") &&
- !pc.str.equals(intr_txt))
- {
- pc.type = CT_ANNOTATION;
- }
- else
- {
- /* Turn it into a keyword now */
- pc.type = find_keyword_type(pc.text(), pc.str.size());
- }
- }
- return(true);
- } // parse_word
- static bool parse_whitespace(tok_ctx &ctx, chunk_t &pc)
- {
- int nl_count = 0;
- int ch = -2;
- /* REVISIT: use a better whitespace detector? */
- while (ctx.more() && unc_isspace(ctx.peek()))
- {
- ch = ctx.get(); /* throw away the whitespace char */
- switch (ch)
- {
- case '\r':
- if (ctx.expect('\n'))
- {
- /* CRLF ending */
- cpd.le_counts[LE_CRLF]++;
- }
- else
- {
- /* CR ending */
- cpd.le_counts[LE_CR]++;
- }
- nl_count++;
- pc.orig_prev_sp = 0;
- break;
- case '\n':
- /* LF ending */
- cpd.le_counts[LE_LF]++;
- nl_count++;
- pc.orig_prev_sp = 0;
- break;
- case '\t':
- pc.orig_prev_sp += calc_next_tab_column(cpd.column, cpd.settings[UO_input_tab_size].u) - cpd.column;
- break;
- case ' ':
- pc.orig_prev_sp++;
- break;
- default:
- break;
- }
- }
- if (ch != -2)
- {
- pc.str.clear();
- pc.nl_count = nl_count;
- pc.type = nl_count ? CT_NEWLINE : CT_WHITESPACE;
- pc.after_tab = (ctx.c.last_ch == '\t');
- return(true);
- }
- return(false);
- } // parse_whitespace
- static bool parse_bs_newline(tok_ctx &ctx, chunk_t &pc)
- {
- ctx.save();
- ctx.get(); /* skip the '\' */
- int ch;
- while (ctx.more() && unc_isspace(ch = ctx.peek()))
- {
- ctx.get();
- if ((ch == '\r') || (ch == '\n'))
- {
- if (ch == '\r')
- {
- ctx.expect('\n');
- }
- pc.str = "\\";
- pc.type = CT_NL_CONT;
- pc.nl_count = 1;
- return(true);
- }
- }
- ctx.restore();
- return(false);
- }
- static bool parse_newline(tok_ctx &ctx)
- {
- ctx.save();
- /* Eat whitespace */
- while ((ctx.peek() == ' ') || (ctx.peek() == '\t'))
- {
- ctx.get();
- }
- if ((ctx.peek() == '\r') || (ctx.peek() == '\n'))
- {
- if (!ctx.expect('\n'))
- {
- ctx.get();
- ctx.expect('\n');
- }
- return(true);
- }
- ctx.restore();
- return(false);
- }
- static void parse_pawn_pattern(tok_ctx &ctx, chunk_t &pc, c_token_t tt)
- {
- pc.str.clear();
- pc.type = tt;
- while (!unc_isspace(ctx.peek()))
- {
- /* end the pattern on an escaped newline */
- if (ctx.peek() == '\\')
- {
- int ch = ctx.peek(1);
- if ((ch == '\n') || (ch == '\r'))
- {
- break;
- }
- }
- pc.str.append(ctx.get());
- }
- }
- static bool parse_ignored(tok_ctx &ctx, chunk_t &pc)
- {
- int nl_count = 0;
- /* Parse off newlines/blank lines */
- while (parse_newline(ctx))
- {
- nl_count++;
- }
- if (nl_count > 0)
- {
- pc.nl_count = nl_count;
- pc.type = CT_NEWLINE;
- return(true);
- }
- /* See if the UO_enable_processing_cmt text is on this line */
- ctx.save();
- pc.str.clear();
- while (ctx.more() &&
- (ctx.peek() != '\r') &&
- (ctx.peek() != '\n'))
- {
- pc.str.append(ctx.get());
- }
- if (pc.str.size() == 0)
- {
- /* end of file? */
- return(false);
- }
- /* Note that we aren't actually making sure this is in a comment, yet */
- const char *ontext = cpd.settings[UO_enable_processing_cmt].str;
- if (ontext == NULL)
- {
- ontext = UNCRUSTIFY_ON_TEXT;
- }
- if (pc.str.find(ontext) < 0)
- {
- pc.type = CT_IGNORED;
- return(true);
- }
- ctx.restore();
- /* parse off whitespace leading to the comment */
- if (parse_whitespace(ctx, pc))
- {
- pc.type = CT_IGNORED;
- return(true);
- }
- /* Look for the ending comment and let it pass */
- if (parse_comment(ctx, pc) && !cpd.unc_off)
- {
- return(true);
- }
- /* Reset the chunk & scan to until a newline */
- pc.str.clear();
- while (ctx.more() &&
- (ctx.peek() != '\r') &&
- (ctx.peek() != '\n'))
- {
- pc.str.append(ctx.get());
- }
- if (pc.str.size() > 0)
- {
- pc.type = CT_IGNORED;
- return(true);
- }
- return(false);
- } // parse_ignored
- static bool parse_next(tok_ctx &ctx, chunk_t &pc)
- {
- if (!ctx.more())
- {
- //fprintf(stderr, "All done!\n");
- return(false);
- }
- /* Save off the current column */
- pc.orig_line = ctx.c.row;
- pc.column = ctx.c.col;
- pc.orig_col = ctx.c.col;
- pc.type = CT_NONE;
- pc.nl_count = 0;
- pc.flags = 0;
- /* If it is turned off, we put everything except newlines into CT_UNKNOWN */
- if (cpd.unc_off)
- {
- if (parse_ignored(ctx, pc))
- {
- return(true);
- }
- }
- /**
- * Parse whitespace
- */
- if (parse_whitespace(ctx, pc))
- {
- return(true);
- }
- /**
- * Handle unknown/unhandled preprocessors
- */
- if ((cpd.in_preproc > CT_PP_BODYCHUNK) &&
- (cpd.in_preproc <= CT_PP_OTHER))
- {
- pc.str.clear();
- tok_info ss;
- ctx.save(ss);
- /* Chunk to a newline or comment */
- pc.type = CT_PREPROC_BODY;
- int last = 0;
- while (ctx.more())
- {
- int ch = ctx.peek();
- if ((ch == '\n') || (ch == '\r'))
- {
- /* Back off if this is an escaped newline */
- if (last == '\\')
- {
- ctx.restore(ss);
- pc.str.pop_back();
- }
- break;
- }
- /* Quit on a C++ comment start */
- if ((ch == '/') && (ctx.peek(1) == '/'))
- {
- break;
- }
- last = ch;
- ctx.save(ss);
- pc.str.append(ctx.get());
- }
- if (pc.str.size() > 0)
- {
- return(true);
- }
- }
- /**
- * Detect backslash-newline
- */
- if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc))
- {
- return(true);
- }
- /**
- * Parse comments
- */
- if (parse_comment(ctx, pc))
- {
- return(true);
- }
- /* Parse code placeholders */
- if (parse_code_placeholder(ctx, pc))
- {
- return(true);
- }
- /* Check for C# literal strings, ie @"hello" and identifiers @for*/
- if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@'))
- {
- if (ctx.peek(1) == '"')
- {
- parse_cs_string(ctx, pc);
- return(true);
- }
- /* check for non-keyword identifiers such as @if @switch, etc */
- if (CharTable::IsKw1(ctx.peek(1)))
- {
- parse_word(ctx, pc, true);
- return(true);
- }
- }
- /* Check for C# Interpolated strings */
- if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"'))
- {
- parse_cs_interpolated_string(ctx, pc);
- return(true);
- }
- /* handle VALA """ strings """ */
- if ((cpd.lang_flags & LANG_VALA) &&
- (ctx.peek() == '"') &&
- (ctx.peek(1) == '"') &&
- (ctx.peek(2) == '"'))
- {
- parse_verbatim_string(ctx, pc);
- return(true);
- }
- /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */
- int ch = ctx.peek();
- if ((cpd.lang_flags & LANG_CPP) &&
- ((ch == 'u') || (ch == 'U') || (ch == 'R')))
- {
- int idx = 0;
- bool is_real = false;
- if ((ch == 'u') && (ctx.peek(1) == '8'))
- {
- idx = 2;
- }
- else if (unc_tolower(ch) == 'u')
- {
- idx++;
- }
- if (ctx.peek(idx) == 'R')
- {
- idx++;
- is_real = true;
- }
- if (ctx.peek(idx) == '"')
- {
- if (is_real)
- {
- if (parse_cr_string(ctx, pc, idx))
- {
- return(true);
- }
- }
- else
- {
- if (parse_string(ctx, pc, idx, true))
- {
- parse_suffix(ctx, pc, true);
- return(true);
- }
- }
- }
- }
- /* PAWN specific stuff */
- if (cpd.lang_flags & LANG_PAWN)
- {
- if ((cpd.preproc_ncnl_count == 1) &&
- ((cpd.in_preproc == CT_PP_DEFINE) ||
- (cpd.in_preproc == CT_PP_EMIT)))
- {
- parse_pawn_pattern(ctx, pc, CT_MACRO);
- return(true);
- }
- /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */
- if ((ctx.peek() == '\\') || (ctx.peek() == '!'))
- {
- if (ctx.peek(1) == '"')
- {
- parse_string(ctx, pc, 1, (ctx.peek() == '!'));
- return(true);
- }
- else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) &&
- (ctx.peek(2) == '"'))
- {
- parse_string(ctx, pc, 2, false);
- return(true);
- }
- }
- /* handle PAWN preprocessor args %0 .. %9 */
- if ((cpd.in_preproc == CT_PP_DEFINE) &&
- (ctx.peek() == '%') &&
- unc_isdigit(ctx.peek(1)))
- {
- pc.str.clear();
- pc.str.append(ctx.get());
- pc.str.append(ctx.get());
- pc.type = CT_WORD;
- return(true);
- }
- }
- /**
- * Parse strings and character constants
- */
- //parse_word(ctx, pc_temp, true);
- //ctx.restore(ctx.c);
- if (parse_number(ctx, pc))
- {
- return(true);
- }
- if (cpd.lang_flags & LANG_D)
- {
- /* D specific stuff */
- if (d_parse_string(ctx, pc))
- {
- return(true);
- }
- }
- else
- {
- /* Not D stuff */
- /* Check for L'a', L"abc", 'a', "abc", <abc> strings */
- ch = ctx.peek();
- int ch1 = ctx.peek(1);
- if ((((ch == 'L') || (ch == 'S')) &&
- ((ch1 == '"') || (ch1 == '\''))) ||
- (ch == '"') ||
- (ch == '\'') ||
- ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE)))
- {
- parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true);
- return(true);
- }
- if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE))
- {
- if (chunk_get_tail()->type == CT_MACRO)
- {
- /* We have "#define XXX <", assume '<' starts an include string */
- parse_string(ctx, pc, 0, false);
- return(true);
- }
- }
- }
- /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/
- if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@'))
- {
- int nc = ctx.peek(1);
- if ((nc == '"') || (nc == '\''))
- {
- /* literal string */
- parse_string(ctx, pc, 1, true);
- return(true);
- }
- else if ((nc >= '0') && (nc <= '9'))
- {
- /* literal number */
- pc.str.append(ctx.get()); /* store the '@' */
- parse_number(ctx, pc);
- return(true);
- }
- }
- /* Check for pawn/ObjectiveC/Java and normal identifiers */
- if (CharTable::IsKw1(ctx.peek()) ||
- ((ctx.peek() == '\\') && (unc_tolower(ctx.peek(1)) == 'u')) ||
- ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1))))
- {
- parse_word(ctx, pc, false);
- return(true);
- }
- /* see if we have a punctuator */
- char punc_txt[4];
- punc_txt[0] = ctx.peek();
- punc_txt[1] = ctx.peek(1);
- punc_txt[2] = ctx.peek(2);
- punc_txt[3] = ctx.peek(3);
- const chunk_tag_t *punc;
- if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL)
- {
- int cnt = strlen(punc->tag);
- while (cnt--)
- {
- pc.str.append(ctx.get());
- }
- pc.type = punc->type;
- pc.flags |= PCF_PUNCTUATOR;
- return(true);
- }
- /* throw away this character */
- pc.type = CT_UNKNOWN;
- pc.str.append(ctx.get());
- LOG_FMT(LWARN, "%s:%zu Garbage in col %d: %x\n",
- cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]);
- cpd.error_count++;
- return(true);
- } // parse_next
- /**
- * This function parses or tokenizes the whole buffer into a list.
- * It has to do some tricks to parse preprocessors.
- *
- * If output_text() were called immediately after, two things would happen:
- * - trailing whitespace are removed.
- * - leading space & tabs are converted to the appropriate format.
- *
- * All the tokens are inserted before ref. If ref is NULL, they are inserted
- * at the end of the list. Line numbers are relative to the start of the data.
- */
- void tokenize(const deque<int> &data, chunk_t *ref)
- {
- tok_ctx ctx(data);
- chunk_t chunk;
- chunk_t *pc = NULL;
- chunk_t *rprev = NULL;
- parse_frame_t frm;
- bool last_was_tab = false;
- int prev_sp = 0;
- cpd.unc_stage = US_TOKENIZE;
- memset(&frm, 0, sizeof(frm));
- while (ctx.more())
- {
- chunk.reset();
- if (!parse_next(ctx, chunk))
- {
- LOG_FMT(LERR, "%s:%d Bailed before the end?\n",
- cpd.filename, ctx.c.row);
- cpd.error_count++;
- break;
- }
- /* Don't create an entry for whitespace */
- if (chunk.type == CT_WHITESPACE)
- {
- last_was_tab = chunk.after_tab;
- prev_sp = chunk.orig_prev_sp;
- continue;
- }
- chunk.orig_prev_sp = prev_sp;
- prev_sp = 0;
- if (chunk.type == CT_NEWLINE)
- {
- last_was_tab = chunk.after_tab;
- chunk.after_tab = false;
- chunk.str.clear();
- }
- else if (chunk.type == CT_NL_CONT)
- {
- last_was_tab = chunk.after_tab;
- chunk.after_tab = false;
- chunk.str = "\\\n";
- }
- else
- {
- chunk.after_tab = last_was_tab;
- last_was_tab = false;
- }
- /* Strip trailing whitespace (for CPP comments and PP blocks) */
- while ((chunk.str.size() > 0) &&
- ((chunk.str[chunk.str.size() - 1] == ' ') ||
- (chunk.str[chunk.str.size() - 1] == '\t')))
- {
- // If comment contains backslash '\' followed by whitespace chars, keep last one;
- // this will prevent it from turning '\' into line continuation.
- if ((chunk.str.size() > 1) && (chunk.str[chunk.str.size() - 2] == '\\'))
- {
- break;
- }
- chunk.str.pop_back();
- }
- /* Store off the end column */
- chunk.orig_col_end = ctx.c.col;
- /* Add the chunk to the list */
- rprev = pc;
- if (rprev != NULL)
- {
- chunk_flags_set(pc, rprev->flags & PCF_COPY_FLAGS);
- /* a newline can't be in a preprocessor */
- if (pc->type == CT_NEWLINE)
- {
- chunk_flags_clr(pc, PCF_IN_PREPROC);
- }
- }
- if (ref != NULL)
- {
- chunk.flags |= PCF_INSERTED;
- }
- else
- {
- chunk.flags &= ~PCF_INSERTED;
- }
- pc = chunk_add_before(&chunk, ref);
- /* A newline marks the end of a preprocessor */
- if (pc->type == CT_NEWLINE) // || (pc->type == CT_COMMENT_MULTI))
- {
- cpd.in_preproc = CT_NONE;
- cpd.preproc_ncnl_count = 0;
- }
- /* Special handling for preprocessor stuff */
- if (cpd.in_preproc != CT_NONE)
- {
- chunk_flags_set(pc, PCF_IN_PREPROC);
- /* Count words after the preprocessor */
- if (!chunk_is_comment(pc) && !chunk_is_newline(pc))
- {
- cpd.preproc_ncnl_count++;
- }
- /* Figure out the type of preprocessor for #include parsing */
- if (cpd.in_preproc == CT_PREPROC)
- {
- if ((pc->type < CT_PP_DEFINE) || (pc->type > CT_PP_OTHER))
- {
- set_chunk_type(pc, CT_PP_OTHER);
- }
- cpd.in_preproc = pc->type;
- }
- }
- else
- {
- /* Check for a preprocessor start */
- if ((pc->type == CT_POUND) &&
- ((rprev == NULL) || (rprev->type == CT_NEWLINE)))
- {
- set_chunk_type(pc, CT_PREPROC);
- pc->flags |= PCF_IN_PREPROC;
- cpd.in_preproc = CT_PREPROC;
- }
- }
- if (pc->type == CT_NEWLINE)
- {
- LOG_FMT(LGUY, "%s(%d): (%zu)<NL> col=%zu\n",
- __func__, __LINE__, pc->orig_line, pc->orig_col);
- }
- else
- {
- LOG_FMT(LGUY, "%s(%d): text():%s, type:%s, orig_col=%zu, orig_col_end=%d\n",
- __func__, __LINE__, pc->text(), get_token_name(pc->type), pc->orig_col, pc->orig_col_end);
- }
- }
- /* Set the cpd.newline string for this file */
- if ((cpd.settings[UO_newlines].le == LE_LF) ||
- ((cpd.settings[UO_newlines].le == LE_AUTO) &&
- (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CRLF]) &&
- (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CR])))
- {
- /* LF line ends */
- cpd.newline = "\n";
- LOG_FMT(LLINEENDS, "Using LF line endings\n");
- }
- else if ((cpd.settings[UO_newlines].le == LE_CRLF) ||
- ((cpd.settings[UO_newlines].le == LE_AUTO) &&
- (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_LF]) &&
- (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_CR])))
- {
- /* CRLF line ends */
- cpd.newline = "\r\n";
- LOG_FMT(LLINEENDS, "Using CRLF line endings\n");
- }
- else
- {
- /* CR line ends */
- cpd.newline = "\r";
- LOG_FMT(LLINEENDS, "Using CR line endings\n");
- }
- } // tokenize
- // /**
- // * A simplistic fixed-sized needle in the fixed-size haystack string search.
- // */
- // int str_find(const char *needle, int needle_len,
- // const char *haystack, int haystack_len)
- // {
- // for (int idx = 0; idx < (haystack_len - needle_len); idx++)
- // {
- // if (memcmp(needle, haystack + idx, needle_len) == 0)
- // {
- // return(idx);
- // }
- // }
- // return(-1);
- // }