/Parser.cpp

http://github.com/fawek/cjango · C++ · 216 lines · 179 code · 11 blank · 26 comment · 22 complexity · 740d508d60d6b191c63e67b89d39bd98 MD5 · raw file

  1. /**
  2. * Copyright (C) 2010 Jakub Wieczorek <fawek@fawek.net>
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. #include "Parser.h"
  23. #include "ASCIIUtils.h"
  24. #include "CommentNode.h"
  25. #include "Node.h"
  26. #include "NullTagNode.h"
  27. #include "TagNode.h"
  28. #include "TagNodeFactory.h"
  29. #include "TemplateNode.h"
  30. #include "TextNode.h"
  31. #include "Tokenizer.h"
  32. #include "TagNodeFactory.h"
  33. #include "VariableNode.h"
  34. #include <cassert>
  35. Parser::Parser(std::istream* stream)
  36. : m_tokenizer(new Tokenizer(stream))
  37. , m_lastTokenType(Tokenizer::Token::None)
  38. , m_insideClause(false)
  39. {
  40. }
  41. Parser::~Parser()
  42. {
  43. delete m_tokenizer;
  44. }
  45. // This could go even further and tokenize the parameters into a list although
  46. // that does not leave as much flexibility for the tag nodes as it should.
  47. // For instance, what would we do with literals? Should we allow spaces to appear
  48. // in them without being split? But then, a tag might want to treat a single " as
  49. // a normal character, which it could not because we'd treat as a parse error.
  50. bool splitTagExpression(const std::string& expression, std::string& tagName, std::vector<std::string>& parameters)
  51. {
  52. int i = 0;
  53. int j = expression.size() - 1;
  54. while (i < j && isWhitespace(expression[i]))
  55. ++i;
  56. while (j > i && isWhitespace(expression[j]))
  57. --j;
  58. if (i == j)
  59. return false;
  60. int k = i;
  61. while (k <= j && !isWhitespace(expression[k]))
  62. ++k;
  63. tagName = expression.substr(i, k - i);
  64. assert(parameters.empty());
  65. while (k <= j && isWhitespace(expression[k]))
  66. ++k;
  67. if (k != j) {
  68. while (k <= j && isWhitespace(expression[k]))
  69. ++k;
  70. bool inLiteral = false;
  71. while (k <= j) {
  72. while (k <= j && isWhitespace(expression[k]))
  73. ++k;
  74. i = k;
  75. while (k <= j && (inLiteral || !isWhitespace(expression[k]))) {
  76. if (expression[k] == '"')
  77. inLiteral = !inLiteral;
  78. ++k;
  79. }
  80. parameters.push_back(std::string());
  81. parameters.back() = expression.substr(i, k - i);
  82. }
  83. }
  84. return true;
  85. }
  86. // For now, this just removes trailing whitespaces.
  87. // I'm still unsure where the variable parsing should take place.
  88. bool splitVariableExpression(const std::string& expression, std::string& variable)
  89. {
  90. int i = 0;
  91. int j = expression.size() - 1;
  92. while (i < j && isWhitespace(expression[i]))
  93. ++i;
  94. while (j > i && isWhitespace(expression[j]))
  95. --j;
  96. if (i == j)
  97. return false;
  98. variable = expression.substr(i, j - i + 1);
  99. return true;
  100. }
  101. // FIXME: Most of the assertions below should be turn into parse errors and be
  102. // reported in a cleaner way to the client side.
  103. TemplateNode* Parser::parse()
  104. {
  105. TemplateNode* root = new TemplateNode();
  106. Node* current = root;
  107. m_insideClause = false;
  108. Tokenizer::Token token;
  109. do {
  110. assert(m_lastTokenType != Tokenizer::Token::EndOfInput);
  111. m_tokenizer->nextToken(token);
  112. assert(token.type != Tokenizer::Token::None);
  113. switch (token.type) {
  114. case Tokenizer::Token::Text:
  115. assert(m_lastTokenType != Tokenizer::Token::Text);
  116. switch (m_lastTokenType) {
  117. case Tokenizer::Token::OpenComment: {
  118. assert(m_insideClause);
  119. CommentNode* commentNode = new CommentNode(current);
  120. commentNode->setText(token.contents);
  121. break;
  122. }
  123. case Tokenizer::Token::OpenVariable: {
  124. assert(m_insideClause);
  125. std::string variable;
  126. assert(splitVariableExpression(token.contents, variable));
  127. VariableNode* variableNode = new VariableNode(current, variable);
  128. break;
  129. }
  130. case Tokenizer::Token::OpenTag: {
  131. assert(m_insideClause);
  132. std::string tagName;
  133. std::vector<std::string> parameters;
  134. assert(splitTagExpression(token.contents, tagName, parameters));
  135. if (tagName.size() > 3 && tagName.substr(0, 3) == "end") {
  136. std::string tagBaseName = tagName.substr(3);
  137. if (TagNodeFactory::self()->isTagRegistered(tagBaseName.c_str())) {
  138. assert(current->type() == Node::Tag);
  139. TagNode* tagNode = static_cast<TagNode*>(current);
  140. assert(tagNode->name() == tagBaseName);
  141. assert(!tagNode->isSelfClosing());
  142. current = current->parent();
  143. } else {
  144. TagNode* tagNode = new NullTagNode(current);
  145. assert(tagNode->isSelfClosing());
  146. }
  147. } else {
  148. if (TagNodeFactory::self()->isTagRegistered(tagName.c_str())) {
  149. TagNode* tagNode = TagNodeFactory::self()->create(tagName.c_str(), current);
  150. assert(tagNode);
  151. tagNode->setName(tagName);
  152. tagNode->setParameters(parameters);
  153. if (!tagNode->isSelfClosing())
  154. current = tagNode;
  155. } else {
  156. TagNode* tagNode = new NullTagNode(current);
  157. assert(tagNode->isSelfClosing());
  158. }
  159. }
  160. break;
  161. }
  162. default: {
  163. TextNode* textNode = new TextNode(current);
  164. textNode->setText(token.contents);
  165. break;
  166. }
  167. }
  168. break;
  169. case Tokenizer::Token::OpenComment:
  170. case Tokenizer::Token::OpenVariable:
  171. case Tokenizer::Token::OpenTag:
  172. assert(!m_insideClause);
  173. m_insideClause = true;
  174. break;
  175. case Tokenizer::Token::CloseComment:
  176. case Tokenizer::Token::CloseVariable:
  177. case Tokenizer::Token::CloseTag:
  178. assert(m_insideClause);
  179. m_insideClause = false;
  180. break;
  181. case Tokenizer::Token::EndOfInput:
  182. // Make sure all opening tags have their corresponding closing tags.
  183. assert(current == root);
  184. break;
  185. }
  186. m_lastTokenType = token.type;
  187. assert(current);
  188. } while (token.type != Tokenizer::Token::EndOfInput);
  189. return root;
  190. }