PageRenderTime 71ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/omnetpp-4.1/src/common/linetokenizer.cc

https://bitbucket.org/indigopony/omnetproject
C++ | 201 lines | 161 code | 18 blank | 22 comment | 25 complexity | cfc4d55bd13b6ee2f87775bf45a890d1 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, GPL-2.0, Apache-2.0, JSON
  1. //=========================================================================
  2. // LINETOKENIZER.CC - part of
  3. // OMNeT++/OMNEST
  4. // Discrete System Simulation in C++
  5. //
  6. // Author: Andras Varga
  7. //
  8. //=========================================================================
  9. /*--------------------------------------------------------------*
  10. Copyright (C) 2006-2008 OpenSim Ltd.
  11. This file is distributed WITHOUT ANY WARRANTY. See the file
  12. `license' for details on this and other legal matters.
  13. *--------------------------------------------------------------*/
  14. #include <assert.h>
  15. #include <sstream>
  16. #include <string.h>
  17. #include "exception.h"
  18. #include "linetokenizer.h"
  19. USING_NAMESPACE
  20. LineTokenizer::LineTokenizer(int bufferSize, int maxTokenNum, char sep1, char sep2)
  21. : sep1(sep1), sep2(sep2)
  22. {
  23. if (maxTokenNum < 0)
  24. maxTokenNum = bufferSize/4;
  25. vecsize = maxTokenNum;
  26. vec = new char *[vecsize];
  27. lineBufferSize = bufferSize;
  28. lineBuffer = new char[lineBufferSize];
  29. }
  30. LineTokenizer::~LineTokenizer()
  31. {
  32. delete [] vec;
  33. delete [] lineBuffer;
  34. }
  35. inline int h2d(char c)
  36. {
  37. if (c>='0' && c<='9') return c-'0';
  38. if (c>='A' && c<='F') return c-'A'+10;
  39. if (c>='a' && c<='f') return c-'a'+10;
  40. return -1;
  41. }
  42. inline int h2d(char *&s)
  43. {
  44. int a = h2d(*s);
  45. if (a<0) return 0;
  46. s++;
  47. int b = h2d(*s);
  48. if (b<0) return a;
  49. s++;
  50. return a*16+b;
  51. }
  52. static void interpretBackslashes(char *buffer)
  53. {
  54. // interpret backslashes in-place. This works because the output
  55. // is always shorter (or equal) than the input.
  56. char *s = buffer;
  57. char *d = buffer;
  58. for (; *s; s++, d++)
  59. {
  60. if (*s=='\\')
  61. {
  62. // allow backslash as quote character, also interpret backslash sequences
  63. // note: this must be kept consistent with opp_quotestr()/opp_parsequotedstr()
  64. s++;
  65. switch(*s)
  66. {
  67. case 'b': *d = '\b'; break;
  68. case 'f': *d = '\f'; break;
  69. case 'n': *d = '\n'; break;
  70. case 'r': *d = '\r'; break;
  71. case 't': *d = '\t'; break;
  72. case 'x': s++; *d = h2d(s); s--; break; // hex code
  73. case '"': *d = '"'; break; // quote needs to be backslashed
  74. case '\\': *d = '\\'; break; // backslash needs to be backslashed
  75. case '\n': d--; break; // don't store line continuation (backslash followed by newline)
  76. case '\0': d--; s--; break; // string ends in stray backslash
  77. default: *d = *s; // be tolerant with unrecogized backslash sequences
  78. }
  79. }
  80. else
  81. {
  82. *d = *s;
  83. }
  84. }
  85. *d = '\0';
  86. }
  87. int LineTokenizer::tokenize(const char *line, int length)
  88. {
  89. if (length >= lineBufferSize)
  90. throw opp_runtime_error("Cannot tokenize lines longer than %d", lineBufferSize - 1);
  91. strncpy(lineBuffer, line, length);
  92. lineBuffer[length] = '\0'; // guard
  93. char *s = lineBuffer + length - 1;
  94. while (s >= lineBuffer && (*s == '\r' || *s == '\n'))
  95. *s-- = '\0';
  96. numtokens = 0;
  97. s = lineBuffer;
  98. // loop through the tokens on the line
  99. for (;;)
  100. {
  101. // skip separators before token
  102. while (*s==sep1 || *s==sep2) s++;
  103. char *token;
  104. if (!*s)
  105. {
  106. // end of line found -- exit loop
  107. break;
  108. }
  109. else if (*s=='"')
  110. {
  111. // parse quoted string
  112. token = s+1;
  113. s++;
  114. // try to find end of quoted string
  115. bool containsBackslash = false;
  116. while (*s && *s!='"')
  117. if (*s++=='\\')
  118. {s++; containsBackslash = true;}
  119. // check we found the close quote
  120. if (*s!='"')
  121. throw opp_runtime_error("Unmatched quote in file");
  122. // terminate quoted string with zero, overwriting close quote
  123. *s++ = 0;
  124. // if token contained a backslash (rare!), we need post-processing
  125. // to interpret the escape sequences
  126. if (containsBackslash)
  127. interpretBackslashes(token);
  128. }
  129. else
  130. {
  131. // parse unquoted string
  132. token = s;
  133. // try find end of string
  134. while (*s && *s!=sep1 && *s!=sep2) s++;
  135. // terminate string with zero (if we are not already at end of the line)
  136. if (*s) *s++ = 0;
  137. }
  138. // add token to the array (if there's room); s points to the rest of the string
  139. if (numtokens==vecsize)
  140. throw opp_runtime_error("Too many tokens on a line, max %d allowed", vecsize-1);
  141. vec[numtokens++] = token;
  142. }
  143. return numtokens;
  144. }
  145. /*
  146. Example code:
  147. #include <string.h>
  148. #include <iostream>
  149. using namespace std;
  150. void tok(const char *s)
  151. {
  152. char *buf = new char[strlen(s)+1];
  153. strcpy(buf, s);
  154. cout << buf << " --> ";
  155. LineTokenizer t;
  156. bool ok = t.tokenize(buf);
  157. if (!ok)
  158. cout << t.errorMsg(1) << endl;
  159. int numtokens = t.numTokens();
  160. char **vec = t.tokens();
  161. for (int i=0; i<numtokens; i++)
  162. cout << (i==0?"":":") << vec[i];
  163. cout << "\n";
  164. }
  165. int main(int argc, char **argv)
  166. {
  167. tok("E 121.1344 e434");
  168. tok("E \"121.1344 e434\" 222");
  169. return 0;
  170. }
  171. */