PageRenderTime 40ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/indra/llmessage/llmessagetemplateparser.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 761 lines | 615 code | 75 blank | 71 comment | 161 complexity | cb1db1c274742306b50ff45bfe4ecfb6 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. * @file llmessagetemplateparser.cpp
  3. * @brief LLMessageTemplateParser implementation
  4. *
  5. * $LicenseInfo:firstyear=2007&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #include "linden_common.h"
  27. #include "llmessagetemplateparser.h"
  28. #include <boost/tokenizer.hpp>
  29. // What follows is a bunch of C functions to do validation.
  30. // Lets support a small subset of regular expressions here
  31. // Syntax is a string made up of:
  32. // a - checks against alphanumeric ([A-Za-z0-9])
  33. // c - checks against character ([A-Za-z])
  34. // f - checks against first variable character ([A-Za-z_])
  35. // v - checks against variable ([A-Za-z0-9_])
  36. // s - checks against sign of integer ([-0-9])
  37. // d - checks against integer digit ([0-9])
  38. // * - repeat last check
  39. // checks 'a'
  40. BOOL b_return_alphanumeric_ok(char c)
  41. {
  42. if ( ( (c < 'A')
  43. ||(c > 'Z'))
  44. &&( (c < 'a')
  45. ||(c > 'z'))
  46. &&( (c < '0')
  47. ||(c > '9')))
  48. {
  49. return FALSE;
  50. }
  51. return TRUE;
  52. }
  53. // checks 'c'
  54. BOOL b_return_character_ok(char c)
  55. {
  56. if ( ( (c < 'A')
  57. ||(c > 'Z'))
  58. &&( (c < 'a')
  59. ||(c > 'z')))
  60. {
  61. return FALSE;
  62. }
  63. return TRUE;
  64. }
  65. // checks 'f'
  66. BOOL b_return_first_variable_ok(char c)
  67. {
  68. if ( ( (c < 'A')
  69. ||(c > 'Z'))
  70. &&( (c < 'a')
  71. ||(c > 'z'))
  72. &&(c != '_'))
  73. {
  74. return FALSE;
  75. }
  76. return TRUE;
  77. }
  78. // checks 'v'
  79. BOOL b_return_variable_ok(char c)
  80. {
  81. if ( ( (c < 'A')
  82. ||(c > 'Z'))
  83. &&( (c < 'a')
  84. ||(c > 'z'))
  85. &&( (c < '0')
  86. ||(c > '9'))
  87. &&(c != '_'))
  88. {
  89. return FALSE;
  90. }
  91. return TRUE;
  92. }
  93. // checks 's'
  94. BOOL b_return_signed_integer_ok(char c)
  95. {
  96. if ( ( (c < '0')
  97. ||(c > '9'))
  98. &&(c != '-'))
  99. {
  100. return FALSE;
  101. }
  102. return TRUE;
  103. }
  104. // checks 'd'
  105. BOOL b_return_integer_ok(char c)
  106. {
  107. if ( (c < '0')
  108. ||(c > '9'))
  109. {
  110. return FALSE;
  111. }
  112. return TRUE;
  113. }
  114. BOOL (*gParseCheckCharacters[])(char c) =
  115. {
  116. b_return_alphanumeric_ok,
  117. b_return_character_ok,
  118. b_return_first_variable_ok,
  119. b_return_variable_ok,
  120. b_return_signed_integer_ok,
  121. b_return_integer_ok
  122. };
  123. S32 get_checker_number(char checker)
  124. {
  125. switch(checker)
  126. {
  127. case 'a':
  128. return 0;
  129. case 'c':
  130. return 1;
  131. case 'f':
  132. return 2;
  133. case 'v':
  134. return 3;
  135. case 's':
  136. return 4;
  137. case 'd':
  138. return 5;
  139. case '*':
  140. return 9999;
  141. default:
  142. return -1;
  143. }
  144. }
  145. // check token based on passed simplified regular expression
  146. BOOL b_check_token(const char *token, const char *regexp)
  147. {
  148. S32 tptr, rptr = 0;
  149. S32 current_checker, next_checker = 0;
  150. current_checker = get_checker_number(regexp[rptr++]);
  151. if (current_checker == -1)
  152. {
  153. llerrs << "Invalid regular expression value!" << llendl;
  154. return FALSE;
  155. }
  156. if (current_checker == 9999)
  157. {
  158. llerrs << "Regular expression can't start with *!" << llendl;
  159. return FALSE;
  160. }
  161. for (tptr = 0; token[tptr]; tptr++)
  162. {
  163. if (current_checker == -1)
  164. {
  165. llerrs << "Input exceeds regular expression!\nDid you forget a *?" << llendl;
  166. return FALSE;
  167. }
  168. if (!gParseCheckCharacters[current_checker](token[tptr]))
  169. {
  170. return FALSE;
  171. }
  172. if (next_checker != 9999)
  173. {
  174. next_checker = get_checker_number(regexp[rptr++]);
  175. if (next_checker != 9999)
  176. {
  177. current_checker = next_checker;
  178. }
  179. }
  180. }
  181. return TRUE;
  182. }
  183. // C variable can be made up of upper or lower case letters, underscores, or numbers, but can't start with a number
  184. BOOL b_variable_ok(const char *token)
  185. {
  186. if (!b_check_token(token, "fv*"))
  187. {
  188. llwarns << "Token '" << token << "' isn't a variable!" << llendl;
  189. return FALSE;
  190. }
  191. return TRUE;
  192. }
  193. // An integer is made up of the digits 0-9 and may be preceded by a '-'
  194. BOOL b_integer_ok(const char *token)
  195. {
  196. if (!b_check_token(token, "sd*"))
  197. {
  198. llwarns << "Token isn't an integer!" << llendl;
  199. return FALSE;
  200. }
  201. return TRUE;
  202. }
  203. // An integer is made up of the digits 0-9
  204. BOOL b_positive_integer_ok(const char *token)
  205. {
  206. if (!b_check_token(token, "d*"))
  207. {
  208. llwarns << "Token isn't an integer!" << llendl;
  209. return FALSE;
  210. }
  211. return TRUE;
  212. }
  213. // Done with C functions, here's the tokenizer.
  214. typedef boost::tokenizer< boost::char_separator<char> > tokenizer;
  215. LLTemplateTokenizer::LLTemplateTokenizer(const std::string & contents) : mStarted(false), mTokens()
  216. {
  217. boost::char_separator<char> newline("\r\n", "", boost::keep_empty_tokens);
  218. boost::char_separator<char> spaces(" \t");
  219. U32 line_counter = 1;
  220. tokenizer line_tokens(contents, newline);
  221. for(tokenizer::iterator line_iter = line_tokens.begin();
  222. line_iter != line_tokens.end();
  223. ++line_iter, ++line_counter)
  224. {
  225. tokenizer word_tokens(*line_iter, spaces);
  226. for(tokenizer::iterator word_iter = word_tokens.begin();
  227. word_iter != word_tokens.end();
  228. ++word_iter)
  229. {
  230. if((*word_iter)[0] == '/')
  231. {
  232. break; // skip to end of line on comments
  233. }
  234. positioned_token pt;// = new positioned_token();
  235. pt.str = std::string(*word_iter);
  236. pt.line = line_counter;
  237. mTokens.push_back(pt);
  238. }
  239. }
  240. mCurrent = mTokens.begin();
  241. }
  242. void LLTemplateTokenizer::inc()
  243. {
  244. if(atEOF())
  245. {
  246. error("trying to increment token of EOF");
  247. }
  248. else if(mStarted)
  249. {
  250. ++mCurrent;
  251. }
  252. else
  253. {
  254. mStarted = true;
  255. mCurrent = mTokens.begin();
  256. }
  257. }
  258. void LLTemplateTokenizer::dec()
  259. {
  260. if(mCurrent == mTokens.begin())
  261. {
  262. if(mStarted)
  263. {
  264. mStarted = false;
  265. }
  266. else
  267. {
  268. error("trying to decrement past beginning of file");
  269. }
  270. }
  271. else
  272. {
  273. mCurrent--;
  274. }
  275. }
  276. std::string LLTemplateTokenizer::get() const
  277. {
  278. if(atEOF())
  279. {
  280. error("trying to get EOF");
  281. }
  282. return mCurrent->str;
  283. }
  284. U32 LLTemplateTokenizer::line() const
  285. {
  286. if(atEOF())
  287. {
  288. return 0;
  289. }
  290. return mCurrent->line;
  291. }
  292. bool LLTemplateTokenizer::atEOF() const
  293. {
  294. return mCurrent == mTokens.end();
  295. }
  296. std::string LLTemplateTokenizer::next()
  297. {
  298. inc();
  299. return get();
  300. }
  301. bool LLTemplateTokenizer::want(const std::string & token)
  302. {
  303. if(atEOF()) return false;
  304. inc();
  305. if(atEOF()) return false;
  306. if(get() != token)
  307. {
  308. dec(); // back up a step
  309. return false;
  310. }
  311. return true;
  312. }
  313. bool LLTemplateTokenizer::wantEOF()
  314. {
  315. // see if the next token is EOF
  316. if(atEOF()) return true;
  317. inc();
  318. if(!atEOF())
  319. {
  320. dec(); // back up a step
  321. return false;
  322. }
  323. return true;
  324. }
  325. void LLTemplateTokenizer::error(std::string message) const
  326. {
  327. if(atEOF())
  328. {
  329. llerrs << "Unexpected end of file: " << message << llendl;
  330. }
  331. else
  332. {
  333. llerrs << "Problem parsing message template at line "
  334. << line() << ", with token '" << get() << "' : "
  335. << message << llendl;
  336. }
  337. }
  338. // Done with tokenizer, next is the parser.
  339. LLTemplateParser::LLTemplateParser(LLTemplateTokenizer & tokens):
  340. mVersion(0.f),
  341. mMessages()
  342. {
  343. // the version number should be the first thing in the file
  344. if (tokens.want("version"))
  345. {
  346. // version number
  347. std::string vers_string = tokens.next();
  348. mVersion = (F32)atof(vers_string.c_str());
  349. llinfos << "### Message template version " << mVersion << " ###" << llendl;
  350. }
  351. else
  352. {
  353. llerrs << "Version must be first in the message template, found "
  354. << tokens.next() << llendl;
  355. }
  356. while(LLMessageTemplate * templatep = parseMessage(tokens))
  357. {
  358. if (templatep->getDeprecation() != MD_DEPRECATED)
  359. {
  360. mMessages.push_back(templatep);
  361. }
  362. else
  363. {
  364. delete templatep;
  365. }
  366. }
  367. if(!tokens.wantEOF())
  368. {
  369. llerrs << "Expected end of template or a message, instead found: "
  370. << tokens.next() << " at " << tokens.line() << llendl;
  371. }
  372. }
  373. F32 LLTemplateParser::getVersion() const
  374. {
  375. return mVersion;
  376. }
  377. LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const
  378. {
  379. return mMessages.begin();
  380. }
  381. LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const
  382. {
  383. return mMessages.end();
  384. }
  385. // static
  386. LLMessageTemplate * LLTemplateParser::parseMessage(LLTemplateTokenizer & tokens)
  387. {
  388. LLMessageTemplate *templatep = NULL;
  389. if(!tokens.want("{"))
  390. {
  391. return NULL;
  392. }
  393. // name first
  394. std::string template_name = tokens.next();
  395. // is name a legit C variable name
  396. if (!b_variable_ok(template_name.c_str()))
  397. {
  398. llerrs << "Not legit variable name: " << template_name << " at " << tokens.line() << llendl;
  399. }
  400. // ok, now get Frequency ("High", "Medium", or "Low")
  401. EMsgFrequency frequency = MFT_LOW;
  402. std::string freq_string = tokens.next();
  403. if (freq_string == "High")
  404. {
  405. frequency = MFT_HIGH;
  406. }
  407. else if (freq_string == "Medium")
  408. {
  409. frequency = MFT_MEDIUM;
  410. }
  411. else if (freq_string == "Low" || freq_string == "Fixed")
  412. {
  413. frequency = MFT_LOW;
  414. }
  415. else
  416. {
  417. llerrs << "Expected frequency, got " << freq_string << " at " << tokens.line() << llendl;
  418. }
  419. // TODO more explicit checking here pls
  420. U32 message_number = strtoul(tokens.next().c_str(),NULL,0);
  421. switch (frequency) {
  422. case MFT_HIGH:
  423. break;
  424. case MFT_MEDIUM:
  425. message_number = (255 << 8) | message_number;
  426. break;
  427. case MFT_LOW:
  428. message_number = (255 << 24) | (255 << 16) | message_number;
  429. break;
  430. default:
  431. llerrs << "Unknown frequency enum: " << frequency << llendl;
  432. }
  433. templatep = new LLMessageTemplate(
  434. template_name.c_str(),
  435. message_number,
  436. frequency);
  437. // Now get trust ("Trusted", "NotTrusted")
  438. std::string trust = tokens.next();
  439. if (trust == "Trusted")
  440. {
  441. templatep->setTrust(MT_TRUST);
  442. }
  443. else if (trust == "NotTrusted")
  444. {
  445. templatep->setTrust(MT_NOTRUST);
  446. }
  447. else
  448. {
  449. llerrs << "Bad trust " << trust << " at " << tokens.line() << llendl;
  450. }
  451. // get encoding
  452. std::string encoding = tokens.next();
  453. if(encoding == "Unencoded")
  454. {
  455. templatep->setEncoding(ME_UNENCODED);
  456. }
  457. else if(encoding == "Zerocoded")
  458. {
  459. templatep->setEncoding(ME_ZEROCODED);
  460. }
  461. else
  462. {
  463. llerrs << "Bad encoding " << encoding << " at " << tokens.line() << llendl;
  464. }
  465. // get deprecation
  466. if(tokens.want("Deprecated"))
  467. {
  468. templatep->setDeprecation(MD_DEPRECATED);
  469. }
  470. else if (tokens.want("UDPDeprecated"))
  471. {
  472. templatep->setDeprecation(MD_UDPDEPRECATED);
  473. }
  474. else if (tokens.want("UDPBlackListed"))
  475. {
  476. templatep->setDeprecation(MD_UDPBLACKLISTED);
  477. }
  478. else if (tokens.want("NotDeprecated"))
  479. {
  480. // this is the default value, but it can't hurt to set it twice
  481. templatep->setDeprecation(MD_NOTDEPRECATED);
  482. }
  483. else {
  484. // It's probably a brace, let's just start block processing
  485. }
  486. while(LLMessageBlock * blockp = parseBlock(tokens))
  487. {
  488. templatep->addBlock(blockp);
  489. }
  490. if(!tokens.want("}"))
  491. {
  492. llerrs << "Expecting closing } for message " << template_name
  493. << " at " << tokens.line() << llendl;
  494. }
  495. return templatep;
  496. }
  497. // static
  498. LLMessageBlock * LLTemplateParser::parseBlock(LLTemplateTokenizer & tokens)
  499. {
  500. LLMessageBlock * blockp = NULL;
  501. if(!tokens.want("{"))
  502. {
  503. return NULL;
  504. }
  505. // name first
  506. std::string block_name = tokens.next();
  507. // is name a legit C variable name
  508. if (!b_variable_ok(block_name.c_str()))
  509. {
  510. llerrs << "not a legal block name: " << block_name
  511. << " at " << tokens.line() << llendl;
  512. }
  513. // now, block type ("Single", "Multiple", or "Variable")
  514. std::string block_type = tokens.next();
  515. // which one is it?
  516. if (block_type == "Single")
  517. {
  518. // ok, we can create a block
  519. blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE);
  520. }
  521. else if (block_type == "Multiple")
  522. {
  523. // need to get the number of repeats
  524. std::string repeats = tokens.next();
  525. // is it a legal integer
  526. if (!b_positive_integer_ok(repeats.c_str()))
  527. {
  528. llerrs << "not a legal integer for block multiple count: "
  529. << repeats << " at " << tokens.line() << llendl;
  530. }
  531. // ok, we can create a block
  532. blockp = new LLMessageBlock(block_name.c_str(),
  533. MBT_MULTIPLE,
  534. atoi(repeats.c_str()));
  535. }
  536. else if (block_type == "Variable")
  537. {
  538. // ok, we can create a block
  539. blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE);
  540. }
  541. else
  542. {
  543. llerrs << "bad block type: " << block_type
  544. << " at " << tokens.line() << llendl;
  545. }
  546. while(LLMessageVariable * varp = parseVariable(tokens))
  547. {
  548. blockp->addVariable(varp->getName(),
  549. varp->getType(),
  550. varp->getSize());
  551. delete varp;
  552. }
  553. if(!tokens.want("}"))
  554. {
  555. llerrs << "Expecting closing } for block " << block_name
  556. << " at " << tokens.line() << llendl;
  557. }
  558. return blockp;
  559. }
  560. // static
  561. LLMessageVariable * LLTemplateParser::parseVariable(LLTemplateTokenizer & tokens)
  562. {
  563. LLMessageVariable * varp = NULL;
  564. if(!tokens.want("{"))
  565. {
  566. return NULL;
  567. }
  568. std::string var_name = tokens.next();
  569. if (!b_variable_ok(var_name.c_str()))
  570. {
  571. llerrs << "Not a legit variable name: " << var_name
  572. << " at " << tokens.line() << llendl;
  573. }
  574. std::string var_type = tokens.next();
  575. if (var_type == "U8")
  576. {
  577. varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1);
  578. }
  579. else if (var_type == "U16")
  580. {
  581. varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2);
  582. }
  583. else if (var_type == "U32")
  584. {
  585. varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4);
  586. }
  587. else if (var_type == "U64")
  588. {
  589. varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8);
  590. }
  591. else if (var_type == "S8")
  592. {
  593. varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1);
  594. }
  595. else if (var_type == "S16")
  596. {
  597. varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2);
  598. }
  599. else if (var_type == "S32")
  600. {
  601. varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4);
  602. }
  603. else if (var_type == "S64")
  604. {
  605. varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8);
  606. }
  607. else if (var_type == "F32")
  608. {
  609. varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4);
  610. }
  611. else if (var_type == "F64")
  612. {
  613. varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8);
  614. }
  615. else if (var_type == "LLVector3")
  616. {
  617. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12);
  618. }
  619. else if (var_type == "LLVector3d")
  620. {
  621. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24);
  622. }
  623. else if (var_type == "LLVector4")
  624. {
  625. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16);
  626. }
  627. else if (var_type == "LLQuaternion")
  628. {
  629. varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12);
  630. }
  631. else if (var_type == "LLUUID")
  632. {
  633. varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16);
  634. }
  635. else if (var_type == "BOOL")
  636. {
  637. varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1);
  638. }
  639. else if (var_type == "IPADDR")
  640. {
  641. varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4);
  642. }
  643. else if (var_type == "IPPORT")
  644. {
  645. varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2);
  646. }
  647. else if (var_type == "Fixed" || var_type == "Variable")
  648. {
  649. std::string variable_size = tokens.next();
  650. if (!b_positive_integer_ok(variable_size.c_str()))
  651. {
  652. llerrs << "not a legal integer variable size: " << variable_size
  653. << " at " << tokens.line() << llendl;
  654. }
  655. EMsgVariableType type_enum;
  656. if(var_type == "Variable")
  657. {
  658. type_enum = MVT_VARIABLE;
  659. }
  660. else if(var_type == "Fixed")
  661. {
  662. type_enum = MVT_FIXED;
  663. }
  664. else
  665. {
  666. type_enum = MVT_FIXED; // removes a warning
  667. llerrs << "bad variable type: " << var_type
  668. << " at " << tokens.line() << llendl;
  669. }
  670. varp = new LLMessageVariable(
  671. var_name.c_str(),
  672. type_enum,
  673. atoi(variable_size.c_str()));
  674. }
  675. else
  676. {
  677. llerrs << "bad variable type:" << var_type
  678. << " at " << tokens.line() << llendl;
  679. }
  680. if(!tokens.want("}"))
  681. {
  682. llerrs << "Expecting closing } for variable " << var_name
  683. << " at " << tokens.line() << llendl;
  684. }
  685. return varp;
  686. }