PageRenderTime 57ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/udunits-2.1.24/lib/parser.y

#
Happy | 598 lines | 522 code | 76 blank | 0 comment | 0 complexity | c63513564f3dd356ab81b35d556990b6 MD5 | raw file
  1. %{
  2. /*
  3. * Copyright 2008, 2009 University Corporation for Atmospheric Research
  4. *
  5. * This file is part of the UDUNITS-2 package. See the file LICENSE
  6. * in the top-level source-directory of the package for copying and
  7. * redistribution conditions.
  8. */
  9. /*
  10. * bison(1)-based parser for decoding formatted unit specifications.
  11. *
  12. * This module is thread-compatible but not thread-safe. Multi-threaded
  13. * access must be externally synchronized.
  14. */
  15. /*LINTLIBRARY*/
  16. #ifndef _XOPEN_SOURCE
  17. # define _XOPEN_SOURCE 500
  18. #endif
  19. #include <assert.h>
  20. #include <ctype.h>
  21. #include <errno.h>
  22. #include <stdlib.h>
  23. #include <stdio.h>
  24. #include <string.h>
  25. #include <strings.h>
  26. #include "udunits2.h"
  27. static ut_unit* _finalUnit; /* fully-parsed specification */
  28. static ut_system* _unitSystem; /* The unit-system to use */
  29. static char* _errorMessage; /* last error-message */
  30. static ut_encoding _encoding; /* encoding of string to be parsed */
  31. static int _restartScanner;/* restart scanner? */
  32. /*
  33. * Removes leading and trailing whitespace from a string.
  34. *
  35. * Arguments:
  36. * string NUL-terminated string. Will be modified if it
  37. * contains whitespace.
  38. * encoding The character-encoding of "string".
  39. * Returns:
  40. * "string"
  41. */
  42. char*
  43. ut_trim(
  44. char* const string,
  45. const ut_encoding encoding)
  46. {
  47. static const char* asciiSpace = " \t\n\r\f\v";
  48. static const char* latin1Space = " \t\n\r\f\v\xa0"; /* add NBSP */
  49. const char* whiteSpace;
  50. char* start;
  51. char* stop;
  52. size_t len;
  53. whiteSpace =
  54. encoding == UT_LATIN1
  55. ? latin1Space
  56. : asciiSpace;
  57. start = string + strspn(string, whiteSpace);
  58. for (stop = start + strlen(start); stop > start; --stop)
  59. if (strchr(whiteSpace, stop[-1]) == NULL)
  60. break;
  61. len = stop - start;
  62. (void)memmove(string, start, len);
  63. string[len] = 0;
  64. ut_set_status(UT_SUCCESS);
  65. return start;
  66. }
  67. /*
  68. * YACC error routine:
  69. */
  70. void
  71. uterror(
  72. char *s)
  73. {
  74. static char* nomem = "uterror(): out of memory";
  75. if (_errorMessage != NULL && _errorMessage != nomem)
  76. free(_errorMessage);
  77. _errorMessage = strdup(s);
  78. if (_errorMessage == NULL)
  79. _errorMessage = nomem;
  80. }
  81. %}
  82. %union {
  83. char* id; /* identifier */
  84. ut_unit* unit; /* "unit" structure */
  85. double rval; /* floating-point numerical value */
  86. long ival; /* integer numerical value */
  87. }
  88. %token ERR
  89. %token SHIFT
  90. %token MULTIPLY
  91. %token DIVIDE
  92. %token <ival> INT
  93. %token <ival> EXPONENT
  94. %token <rval> REAL
  95. %token <id> ID
  96. %token <rval> DATE
  97. %token <rval> CLOCK
  98. %token <rval> TIMESTAMP
  99. %token <rval> LOGREF
  100. %type <unit> unit_spec
  101. %type <unit> shift_exp
  102. %type <unit> product_exp
  103. %type <unit> power_exp
  104. %type <unit> basic_exp
  105. %type <rval> timestamp
  106. %type <rval> number
  107. %%
  108. unit_spec: /* nothing */ {
  109. _finalUnit = ut_get_dimensionless_unit_one(_unitSystem);
  110. YYACCEPT;
  111. } |
  112. shift_exp {
  113. _finalUnit = $1;
  114. YYACCEPT;
  115. } |
  116. error {
  117. YYABORT;
  118. }
  119. ;
  120. shift_exp: product_exp {
  121. $$ = $1;
  122. } |
  123. product_exp SHIFT REAL {
  124. $$ = ut_offset($1, $3);
  125. ut_free($1);
  126. if ($$ == NULL)
  127. YYERROR;
  128. } |
  129. product_exp SHIFT INT {
  130. $$ = ut_offset($1, $3);
  131. ut_free($1);
  132. if ($$ == NULL)
  133. YYERROR;
  134. } |
  135. product_exp SHIFT timestamp {
  136. $$ = ut_offset_by_time($1, $3);
  137. ut_free($1);
  138. if ($$ == NULL)
  139. YYERROR;
  140. } |
  141. product_exp SHIFT error {
  142. ut_status prev = ut_get_status();
  143. ut_free($1);
  144. ut_set_status(prev);
  145. YYERROR;
  146. }
  147. ;
  148. product_exp: power_exp {
  149. $$ = $1;
  150. } |
  151. product_exp power_exp {
  152. $$ = ut_multiply($1, $2);
  153. ut_free($1);
  154. ut_free($2);
  155. if ($$ == NULL)
  156. YYERROR;
  157. } |
  158. product_exp error {
  159. ut_status prev = ut_get_status();
  160. ut_free($1);
  161. ut_set_status(prev);
  162. YYERROR;
  163. } |
  164. product_exp MULTIPLY power_exp {
  165. $$ = ut_multiply($1, $3);
  166. ut_free($1);
  167. ut_free($3);
  168. if ($$ == NULL)
  169. YYERROR;
  170. } |
  171. product_exp MULTIPLY error {
  172. ut_status prev = ut_get_status();
  173. ut_free($1);
  174. ut_set_status(prev);
  175. YYERROR;
  176. } |
  177. product_exp DIVIDE power_exp {
  178. $$ = ut_divide($1, $3);
  179. ut_free($1);
  180. ut_free($3);
  181. if ($$ == NULL)
  182. YYERROR;
  183. } |
  184. product_exp DIVIDE error {
  185. ut_status prev = ut_get_status();
  186. ut_free($1);
  187. ut_set_status(prev);
  188. YYERROR;
  189. }
  190. ;
  191. power_exp: basic_exp {
  192. $$ = $1;
  193. } |
  194. basic_exp INT {
  195. $$ = ut_raise($1, $2);
  196. ut_free($1);
  197. if ($$ == NULL)
  198. YYERROR;
  199. } |
  200. basic_exp EXPONENT {
  201. $$ = ut_raise($1, $2);
  202. ut_free($1);
  203. if ($$ == NULL)
  204. YYERROR;
  205. } |
  206. basic_exp error {
  207. ut_status prev = ut_get_status();
  208. ut_free($1);
  209. ut_set_status(prev);
  210. YYERROR;
  211. }
  212. ;
  213. basic_exp: ID {
  214. double prefix = 1;
  215. ut_unit* unit = NULL;
  216. char* cp = $1;
  217. int symbolPrefixSeen = 0;
  218. while (*cp) {
  219. size_t nchar;
  220. double value;
  221. unit = ut_get_unit_by_name(_unitSystem, cp);
  222. if (unit != NULL)
  223. break;
  224. unit = ut_get_unit_by_symbol(_unitSystem, cp);
  225. if (unit != NULL)
  226. break;
  227. if (utGetPrefixByName(_unitSystem, cp, &value, &nchar)
  228. == UT_SUCCESS) {
  229. prefix *= value;
  230. cp += nchar;
  231. }
  232. else {
  233. if (!symbolPrefixSeen &&
  234. utGetPrefixBySymbol(_unitSystem, cp, &value,
  235. &nchar) == UT_SUCCESS) {
  236. symbolPrefixSeen = 1;
  237. prefix *= value;
  238. cp += nchar;
  239. }
  240. else {
  241. break;
  242. }
  243. }
  244. }
  245. free($1);
  246. if (unit == NULL) {
  247. ut_set_status(UT_UNKNOWN);
  248. YYERROR;
  249. }
  250. $$ = ut_scale(prefix, unit);
  251. ut_free(unit);
  252. if ($$ == NULL)
  253. YYERROR;
  254. } |
  255. '(' shift_exp ')' {
  256. $$ = $2;
  257. } |
  258. '(' shift_exp error {
  259. ut_status status = ut_get_status();
  260. ut_free($2);
  261. ut_set_status(status);
  262. YYERROR;
  263. } |
  264. LOGREF product_exp ')' {
  265. $$ = ut_log($1, $2);
  266. ut_free($2);
  267. if ($$ == NULL)
  268. YYERROR;
  269. } |
  270. LOGREF product_exp error {
  271. ut_status status = ut_get_status();
  272. ut_free($2);
  273. ut_set_status(status);
  274. YYERROR;
  275. } |
  276. number {
  277. $$ = ut_scale($1,
  278. ut_get_dimensionless_unit_one(_unitSystem));
  279. }
  280. ;
  281. number: INT {
  282. $$ = $1;
  283. } |
  284. REAL {
  285. $$ = $1;
  286. }
  287. ;
  288. timestamp: DATE {
  289. $$ = $1;
  290. } |
  291. DATE CLOCK {
  292. $$ = $1 + $2;
  293. } |
  294. DATE CLOCK CLOCK {
  295. $$ = $1 + ($2 - $3);
  296. } |
  297. DATE CLOCK INT {
  298. int mag = $3 >= 0 ? $3 : -$3;
  299. if (mag <= 24) {
  300. $$ = $1 + ($2 - ut_encode_clock($3, 0, 0));
  301. }
  302. else if (mag >= 100 && mag <= 2400) {
  303. $$ = $1 + ($2 - ut_encode_clock($3/100, $3%100, 0));
  304. }
  305. else {
  306. ut_set_status(UT_SYNTAX);
  307. YYERROR;
  308. }
  309. } |
  310. DATE CLOCK ID {
  311. int error = 0;
  312. if (strcasecmp($3, "UTC") != 0 &&
  313. strcasecmp($3, "GMT") != 0 &&
  314. strcasecmp($3, "Z") != 0) {
  315. ut_set_status(UT_UNKNOWN);
  316. error = 1;
  317. }
  318. free($3);
  319. if (!error) {
  320. $$ = $1 + $2;
  321. }
  322. else {
  323. YYERROR;
  324. }
  325. } |
  326. TIMESTAMP {
  327. $$ = $1;
  328. } |
  329. TIMESTAMP CLOCK {
  330. $$ = $1 - $2;
  331. } |
  332. TIMESTAMP INT {
  333. int mag = $2 >= 0 ? $2 : -$2;
  334. if (mag <= 24) {
  335. $$ = $1 - ut_encode_clock($2, 0, 0);
  336. }
  337. else if (mag >= 100 && mag <= 2400) {
  338. $$ = $1 - ut_encode_clock($2/100, $2%100, 0);
  339. }
  340. else {
  341. ut_set_status(UT_SYNTAX);
  342. YYERROR;
  343. }
  344. } |
  345. TIMESTAMP ID {
  346. int error = 0;
  347. if (strcasecmp($2, "UTC") != 0 &&
  348. strcasecmp($2, "GMT") != 0 &&
  349. strcasecmp($2, "Z") != 0) {
  350. ut_set_status(UT_UNKNOWN);
  351. error = 1;
  352. }
  353. free($2);
  354. if (!error) {
  355. $$ = $1;
  356. }
  357. else {
  358. YYERROR;
  359. }
  360. }
  361. ;
  362. %%
  363. #define yymaxdepth utmaxdepth
  364. #define yylval utlval
  365. #define yychar utchar
  366. #define yypact utpact
  367. #define yyr1 utr1
  368. #define yyr2 utr2
  369. #define yydef utdef
  370. #define yychk utchk
  371. #define yypgo utpgo
  372. #define yyact utact
  373. #define yyexca utexca
  374. #define yyerrflag uterrflag
  375. #define yynerrs utnerrs
  376. #define yyps utps
  377. #define yypv utpv
  378. #define yys uts
  379. #define yy_yys utyys
  380. #define yystate utstate
  381. #define yytmp uttmp
  382. #define yyv utv
  383. #define yy_yyv utyyv
  384. #define yyval utval
  385. #define yylloc utlloc
  386. #define yyreds utreds
  387. #define yytoks uttoks
  388. #define yylhs utyylhs
  389. #define yylen utyylen
  390. #define yydefred utyydefred
  391. #define yydgoto utyydgoto
  392. #define yysindex utyysindex
  393. #define yyrindex utyyrindex
  394. #define yygindex utyygindex
  395. #define yytable utyytable
  396. #define yycheck utyycheck
  397. #define yyname utyyname
  398. #define yyrule utyyrule
  399. #include "scanner.c"
  400. /*
  401. * Converts a string in the Latin-1 character set (ISO 8859-1) to the UTF-8
  402. * character set.
  403. *
  404. * Arguments:
  405. * latin1String Pointer to the string to be converted. May be freed
  406. * upon return.
  407. * Returns:
  408. * NULL Failure. ut_handle_error_message() was called.
  409. * else Pointer to UTF-8 representation of "string". Must not
  410. * be freed. Subsequent calls may overwrite.
  411. */
  412. static const char*
  413. latin1ToUtf8(
  414. const char* const latin1String)
  415. {
  416. static char* utf8String = NULL;
  417. static size_t bufSize = 0;
  418. size_t size;
  419. const unsigned char* in;
  420. unsigned char* out;
  421. assert(latin1String != NULL);
  422. size = 2 * strlen(latin1String) + 1;
  423. if (size > bufSize) {
  424. char* buf = realloc(utf8String, size);
  425. if (buf != NULL) {
  426. utf8String = buf;
  427. bufSize = size;
  428. }
  429. else {
  430. ut_handle_error_message("Couldn't allocate %ld-byte buffer: %s",
  431. (unsigned long)size, strerror(errno));
  432. return NULL;
  433. }
  434. }
  435. for (in = (const unsigned char*)latin1String,
  436. out = (unsigned char*)utf8String; *in; ++in) {
  437. # define IS_ASCII(c) (((c) & 0x80) == 0)
  438. if (IS_ASCII(*in)) {
  439. *out++ = *in;
  440. }
  441. else {
  442. *out++ = 0xC0 | ((0xC0 & *in) >> 6);
  443. *out++ = 0x80 | (0x3F & *in);
  444. }
  445. }
  446. *out = 0;
  447. return utf8String;
  448. }
  449. /*
  450. * Returns the binary representation of a unit corresponding to a string
  451. * representation.
  452. *
  453. * Arguments:
  454. * system Pointer to the unit-system in which the parsing will
  455. * occur.
  456. * string The string to be parsed (e.g., "millimeters"). There
  457. * should be no leading or trailing whitespace in the
  458. * string. See ut_trim().
  459. * encoding The encoding of "string".
  460. * Returns:
  461. * NULL Failure. "ut_get_status()" will be one of
  462. * UT_BAD_ARG "system" or "string" is NULL.
  463. * UT_SYNTAX "string" contained a syntax
  464. * error.
  465. * UT_UNKNOWN "string" contained an unknown
  466. * identifier.
  467. * UT_OS Operating-system failure. See
  468. * "errno".
  469. * else Pointer to the unit corresponding to "string".
  470. */
  471. ut_unit*
  472. ut_parse(
  473. const ut_system* const system,
  474. const char* const string,
  475. ut_encoding encoding)
  476. {
  477. ut_unit* unit = NULL; /* failure */
  478. if (system == NULL || string == NULL) {
  479. ut_set_status(UT_BAD_ARG);
  480. }
  481. else {
  482. const char* utf8String;
  483. if (encoding != UT_LATIN1) {
  484. utf8String = string;
  485. }
  486. else {
  487. utf8String = latin1ToUtf8(string);
  488. encoding = UT_UTF8;
  489. if (utf8String == NULL)
  490. ut_set_status(UT_OS);
  491. }
  492. if (utf8String != NULL) {
  493. YY_BUFFER_STATE buf = ut_scan_string(utf8String);
  494. _unitSystem = (ut_system*)system;
  495. _encoding = encoding;
  496. _restartScanner = 1;
  497. #if YYDEBUG
  498. utdebug = 0;
  499. ut_flex_debug = 0;
  500. #endif
  501. _finalUnit = NULL;
  502. if (utparse() == 0) {
  503. int status;
  504. int n = yy_c_buf_p - buf->yy_ch_buf;
  505. if (n >= strlen(utf8String)) {
  506. unit = _finalUnit; /* success */
  507. status = UT_SUCCESS;
  508. }
  509. else {
  510. /*
  511. * Parsing terminated before the end of the string.
  512. */
  513. ut_free(_finalUnit);
  514. status = UT_SYNTAX;
  515. }
  516. ut_set_status(status);
  517. }
  518. ut_delete_buffer(buf);
  519. } /* utf8String != NULL */
  520. } /* valid arguments */
  521. return unit;
  522. }