/src/io/FileTokenizer.cpp

https://bitbucket.org/pshriwise/moab · C++ · 445 lines · 297 code · 81 blank · 67 comment · 97 complexity · b86b5c9e103417301db5b800c2a52ae9 MD5 · raw file

  1. /**
  2. * MOAB, a Mesh-Oriented datABase, is a software component for creating,
  3. * storing and accessing finite element mesh data.
  4. *
  5. * Copyright 2004 Sandia Corporation. Under the terms of Contract
  6. * DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government
  7. * retains certain rights in this software.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. */
  15. #include "FileTokenizer.hpp"
  16. #include "moab/ReadUtilIface.hpp"
  17. #include "moab/ErrorHandler.hpp"
  18. #include <cstring>
  19. #include <cctype>
  20. #include <string>
  21. #include <cstdlib>
  22. namespace moab
  23. {
  24. using namespace std;
  25. FileTokenizer::FileTokenizer( FILE* file_ptr, ReadUtilIface* )
  26. : filePtr( file_ptr ), nextToken( buffer ), bufferEnd( buffer ), lineNumber( 1 ), lastChar( '\0' )
  27. {
  28. }
  29. FileTokenizer::~FileTokenizer()
  30. {
  31. fclose( filePtr );
  32. }
  33. bool FileTokenizer::eof() const
  34. {
  35. return nextToken == bufferEnd && feof( filePtr );
  36. }
  37. const char* FileTokenizer::get_string()
  38. {
  39. // If the whitespace character marking the end of the
  40. // last token was a newline, increment the line count.
  41. if( lastChar == '\n' ) ++lineNumber;
  42. // Loop until either found the start of a token to return or have
  43. // reached the end of the file.
  44. for( ;; )
  45. {
  46. // If the buffer is empty, read more.
  47. if( nextToken == bufferEnd )
  48. {
  49. size_t count = fread( buffer, 1, sizeof( buffer ) - 1, filePtr );
  50. if( 0 == count )
  51. {
  52. if( feof( filePtr ) )
  53. return NULL;
  54. else
  55. MB_SET_ERR_RET_VAL( "I/O Error", NULL );
  56. }
  57. nextToken = buffer;
  58. bufferEnd = buffer + count;
  59. }
  60. // If the current character is not a space, we've found a token.
  61. if( !isspace( *nextToken ) ) break;
  62. // If the current space character is a newline,
  63. // increment the line number count.
  64. if( *nextToken == '\n' ) ++lineNumber;
  65. ++nextToken;
  66. }
  67. // Store the start of the token in "result" and
  68. // advance "nextToken" to one past the end of the
  69. // token.
  70. char* result = nextToken;
  71. while( nextToken != bufferEnd && !isspace( static_cast< unsigned char >( *nextToken ) ) )
  72. ++nextToken;
  73. // If we have reached the end of the buffer without finding
  74. // a whitespace character terminating the token, we need to
  75. // read more from the file. Only try once. If the token is
  76. // too large to fit in the buffer, give up.
  77. if( nextToken == bufferEnd )
  78. {
  79. // Shift the (possibly) partial token to the start of the buffer.
  80. size_t remaining = bufferEnd - result;
  81. memmove( buffer, result, remaining );
  82. result = buffer;
  83. nextToken = result + remaining;
  84. // Fill the remainder of the buffer after the token.
  85. size_t count = fread( nextToken, 1, sizeof( buffer ) - remaining - 1, filePtr );
  86. if( 0 == count && !feof( filePtr ) ) MB_SET_ERR_RET_VAL( "I/O Error", NULL );
  87. bufferEnd = nextToken + count;
  88. // Continue to advance nextToken until we find the space
  89. // terminating the token.
  90. while( nextToken != bufferEnd && !isspace( *nextToken ) )
  91. ++nextToken;
  92. if( nextToken == bufferEnd )
  93. { // EOF
  94. *bufferEnd = '\0';
  95. ++bufferEnd;
  96. }
  97. }
  98. // Save terminating whitespace character (or NULL char if EOF).
  99. lastChar = *nextToken;
  100. // Put null in buffer to mark end of current token.
  101. *nextToken = '\0';
  102. // Advance nextToken to the next character to search next time.
  103. ++nextToken;
  104. return result;
  105. }
  106. bool FileTokenizer::get_double_internal( double& result )
  107. {
  108. // Get a token
  109. const char *token_end, *token = get_string();
  110. if( !token ) return false;
  111. // Check for hex value -- on some platforms (e.g. Linux), strtod
  112. // will accept hex values, on others (e.g. Sun) it will not. Force
  113. // failure on hex numbers for consistency.
  114. if( token[0] && token[1] && token[0] == '0' && toupper( token[1] ) == 'X' )
  115. MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
  116. false );
  117. // Parse token as double
  118. result = strtod( token, (char**)&token_end );
  119. // If the one past the last char read by strtod is
  120. // not the NULL character terminating the string,
  121. // then parse failed.
  122. if( *token_end )
  123. MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
  124. false );
  125. return true;
  126. }
  127. bool FileTokenizer::get_float_internal( float& result )
  128. {
  129. double d;
  130. if( !get_double_internal( d ) ) return false;
  131. result = (float)d;
  132. return true;
  133. }
  134. bool FileTokenizer::get_long_int_internal( long& result )
  135. {
  136. // Get a token
  137. const char *token_end, *token = get_string();
  138. if( !token ) return false;
  139. // Parse token as long
  140. result = strtol( token, (char**)&token_end, 0 );
  141. // If the one past the last char read by strtol is
  142. // not the NULL character terminating the string,
  143. // then parse failed.
  144. if( *token_end )
  145. MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected number, got \"" << token << "\"",
  146. false );
  147. return true;
  148. }
  149. bool FileTokenizer::get_byte_internal( unsigned char& result )
  150. {
  151. long i;
  152. if( !get_long_int_internal( i ) ) return false;
  153. result = (unsigned char)i;
  154. if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
  155. return true;
  156. }
  157. bool FileTokenizer::get_short_int_internal( short& result )
  158. {
  159. long i;
  160. if( !get_long_int_internal( i ) ) return false;
  161. result = (short)i;
  162. if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
  163. return true;
  164. }
  165. bool FileTokenizer::get_integer_internal( int& result )
  166. {
  167. long i;
  168. if( !get_long_int_internal( i ) ) return false;
  169. result = (int)i;
  170. if( i != (long)result ) MB_SET_ERR_RET_VAL( "Numeric overflow at line " << line_number(), false );
  171. return true;
  172. }
  173. bool FileTokenizer::get_boolean_internal( bool& result )
  174. {
  175. // Get a token
  176. const char* token = get_string();
  177. if( !token ) return false;
  178. if( token[1] || ( token[0] != '0' && token[0] != '1' ) )
  179. MB_SET_ERR_RET_VAL( "Syntax error at line " << line_number() << ": expected 0 or 1, got \"" << token << "\"",
  180. false );
  181. result = token[0] == '1';
  182. return true;
  183. }
  184. bool FileTokenizer::get_floats( size_t count, float* array )
  185. {
  186. for( size_t i = 0; i < count; ++i )
  187. {
  188. if( !get_float_internal( *array ) ) return false;
  189. ++array;
  190. }
  191. return true;
  192. }
  193. bool FileTokenizer::get_doubles( size_t count, double* array )
  194. {
  195. for( size_t i = 0; i < count; ++i )
  196. {
  197. if( !get_double_internal( *array ) ) return false;
  198. ++array;
  199. }
  200. return true;
  201. }
  202. bool FileTokenizer::get_bytes( size_t count, unsigned char* array )
  203. {
  204. for( size_t i = 0; i < count; ++i )
  205. {
  206. if( !get_byte_internal( *array ) ) return false;
  207. ++array;
  208. }
  209. return true;
  210. }
  211. bool FileTokenizer::get_short_ints( size_t count, short* array )
  212. {
  213. for( size_t i = 0; i < count; ++i )
  214. {
  215. if( !get_short_int_internal( *array ) ) return false;
  216. ++array;
  217. }
  218. return true;
  219. }
  220. bool FileTokenizer::get_integers( size_t count, int* array )
  221. {
  222. for( size_t i = 0; i < count; ++i )
  223. {
  224. if( !get_integer_internal( *array ) ) return false;
  225. ++array;
  226. }
  227. return true;
  228. }
  229. bool FileTokenizer::get_long_ints( size_t count, long* array )
  230. {
  231. for( size_t i = 0; i < count; ++i )
  232. {
  233. if( !get_long_int_internal( *array ) ) return false;
  234. ++array;
  235. }
  236. return true;
  237. }
  238. bool FileTokenizer::get_booleans( size_t count, bool* array )
  239. {
  240. for( size_t i = 0; i < count; ++i )
  241. {
  242. if( !get_boolean_internal( *array ) ) return false;
  243. ++array;
  244. }
  245. return true;
  246. }
  247. void FileTokenizer::unget_token()
  248. {
  249. if( nextToken - buffer < 2 ) return;
  250. --nextToken;
  251. *nextToken = lastChar;
  252. --nextToken;
  253. while( nextToken > buffer && *nextToken )
  254. --nextToken;
  255. if( !*nextToken ) ++nextToken;
  256. lastChar = '\0';
  257. }
  258. bool FileTokenizer::match_token( const char* str, bool print_error )
  259. {
  260. // Get a token
  261. const char* token = get_string();
  262. if( !token ) return false;
  263. // Check if it matches
  264. if( 0 == strcmp( token, str ) ) return true;
  265. // Construct error message
  266. if( print_error )
  267. MB_SET_ERR_CONT( "Syntax error at line " << line_number() << ": expected \"" << str << "\", got \"" << token
  268. << "\"" );
  269. return false;
  270. }
  271. int FileTokenizer::match_token( const char* const* list, bool print_error )
  272. {
  273. // Get a token
  274. const char* token = get_string();
  275. if( !token ) return 0;
  276. // Check if it matches any input string
  277. const char* const* ptr;
  278. for( ptr = list; *ptr; ++ptr )
  279. {
  280. if( 0 == strcmp( token, *ptr ) ) return ptr - list + 1;
  281. }
  282. if( !print_error ) return 0;
  283. // No match, constuct error message
  284. std::string message( "Parsing error at line " );
  285. char lineno[16];
  286. sprintf( lineno, "%d", line_number() );
  287. message += lineno;
  288. message += ": expected one of {";
  289. for( ptr = list; *ptr; ++ptr )
  290. {
  291. message += " ";
  292. message += *ptr;
  293. }
  294. message += " } got \"";
  295. message += token;
  296. message += "\"";
  297. MB_SET_ERR_CONT( message.c_str() );
  298. return 0;
  299. }
  300. bool FileTokenizer::get_newline( bool report_error )
  301. {
  302. if( lastChar == '\n' )
  303. {
  304. lastChar = ' ';
  305. ++lineNumber;
  306. return true;
  307. }
  308. // Loop until either we a) find a newline, b) find a non-whitespace
  309. // character or c) reach the end of the file.
  310. for( ;; )
  311. {
  312. // If the buffer is empty, read more.
  313. if( nextToken == bufferEnd )
  314. {
  315. size_t count = fread( buffer, 1, sizeof( buffer ), filePtr );
  316. if( 0 == count )
  317. {
  318. if( eof() )
  319. MB_SET_ERR_RET_VAL( "File truncated at line " << line_number(), false );
  320. else
  321. MB_SET_ERR_RET_VAL( "I/O Error", false );
  322. }
  323. nextToken = buffer;
  324. bufferEnd = buffer + count;
  325. }
  326. // If the current character is not a space, the we've failed.
  327. if( !isspace( *nextToken ) )
  328. if( report_error ) MB_SET_ERR_RET_VAL( "Expected newline at line " << line_number(), false );
  329. // If the current space character is a newline,
  330. // increment the line number count.
  331. if( *nextToken == '\n' )
  332. {
  333. ++lineNumber;
  334. ++nextToken;
  335. lastChar = ' ';
  336. return true;
  337. }
  338. ++nextToken;
  339. }
  340. return false;
  341. }
  342. bool FileTokenizer::get_binary( size_t size, void* mem )
  343. {
  344. // If data in buffer
  345. if( nextToken != bufferEnd )
  346. {
  347. // If requested size is less than buffer contents,
  348. // just pass back part of the buffer
  349. if( bufferEnd - nextToken <= (int)size )
  350. {
  351. memcpy( mem, nextToken, size );
  352. nextToken += size;
  353. return true;
  354. }
  355. // Copy buffer contents into memory and clear buffer
  356. memcpy( mem, nextToken, bufferEnd - nextToken );
  357. size -= bufferEnd - nextToken;
  358. mem = reinterpret_cast< char* >( mem ) + ( bufferEnd - nextToken );
  359. nextToken = bufferEnd;
  360. }
  361. // Read any additional data from file
  362. return size == fread( mem, 1, size, filePtr );
  363. }
  364. } // namespace moab