/Parser.cpp
http://github.com/fawek/cjango · C++ · 216 lines · 179 code · 11 blank · 26 comment · 22 complexity · 740d508d60d6b191c63e67b89d39bd98 MD5 · raw file
- /**
- * Copyright (C) 2010 Jakub Wieczorek <fawek@fawek.net>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #include "Parser.h"
- #include "ASCIIUtils.h"
- #include "CommentNode.h"
- #include "Node.h"
- #include "NullTagNode.h"
- #include "TagNode.h"
- #include "TagNodeFactory.h"
- #include "TemplateNode.h"
- #include "TextNode.h"
- #include "Tokenizer.h"
- #include "TagNodeFactory.h"
- #include "VariableNode.h"
- #include <cassert>
- Parser::Parser(std::istream* stream)
- : m_tokenizer(new Tokenizer(stream))
- , m_lastTokenType(Tokenizer::Token::None)
- , m_insideClause(false)
- {
- }
- Parser::~Parser()
- {
- delete m_tokenizer;
- }
- // This could go even further and tokenize the parameters into a list although
- // that does not leave as much flexibility for the tag nodes as it should.
- // For instance, what would we do with literals? Should we allow spaces to appear
- // in them without being split? But then, a tag might want to treat a single " as
- // a normal character, which it could not because we'd treat as a parse error.
- bool splitTagExpression(const std::string& expression, std::string& tagName, std::vector<std::string>& parameters)
- {
- int i = 0;
- int j = expression.size() - 1;
- while (i < j && isWhitespace(expression[i]))
- ++i;
- while (j > i && isWhitespace(expression[j]))
- --j;
- if (i == j)
- return false;
- int k = i;
- while (k <= j && !isWhitespace(expression[k]))
- ++k;
- tagName = expression.substr(i, k - i);
- assert(parameters.empty());
- while (k <= j && isWhitespace(expression[k]))
- ++k;
- if (k != j) {
- while (k <= j && isWhitespace(expression[k]))
- ++k;
- bool inLiteral = false;
- while (k <= j) {
- while (k <= j && isWhitespace(expression[k]))
- ++k;
- i = k;
- while (k <= j && (inLiteral || !isWhitespace(expression[k]))) {
- if (expression[k] == '"')
- inLiteral = !inLiteral;
- ++k;
- }
- parameters.push_back(std::string());
- parameters.back() = expression.substr(i, k - i);
- }
- }
-
- return true;
- }
- // For now, this just removes trailing whitespaces.
- // I'm still unsure where the variable parsing should take place.
- bool splitVariableExpression(const std::string& expression, std::string& variable)
- {
- int i = 0;
- int j = expression.size() - 1;
- while (i < j && isWhitespace(expression[i]))
- ++i;
- while (j > i && isWhitespace(expression[j]))
- --j;
- if (i == j)
- return false;
- variable = expression.substr(i, j - i + 1);
- return true;
- }
- // FIXME: Most of the assertions below should be turn into parse errors and be
- // reported in a cleaner way to the client side.
- TemplateNode* Parser::parse()
- {
- TemplateNode* root = new TemplateNode();
- Node* current = root;
- m_insideClause = false;
- Tokenizer::Token token;
- do {
- assert(m_lastTokenType != Tokenizer::Token::EndOfInput);
- m_tokenizer->nextToken(token);
- assert(token.type != Tokenizer::Token::None);
- switch (token.type) {
- case Tokenizer::Token::Text:
- assert(m_lastTokenType != Tokenizer::Token::Text);
- switch (m_lastTokenType) {
- case Tokenizer::Token::OpenComment: {
- assert(m_insideClause);
-
- CommentNode* commentNode = new CommentNode(current);
- commentNode->setText(token.contents);
- break;
- }
- case Tokenizer::Token::OpenVariable: {
- assert(m_insideClause);
- std::string variable;
- assert(splitVariableExpression(token.contents, variable));
- VariableNode* variableNode = new VariableNode(current, variable);
- break;
- }
- case Tokenizer::Token::OpenTag: {
- assert(m_insideClause);
- std::string tagName;
- std::vector<std::string> parameters;
- assert(splitTagExpression(token.contents, tagName, parameters));
-
- if (tagName.size() > 3 && tagName.substr(0, 3) == "end") {
- std::string tagBaseName = tagName.substr(3);
- if (TagNodeFactory::self()->isTagRegistered(tagBaseName.c_str())) {
- assert(current->type() == Node::Tag);
- TagNode* tagNode = static_cast<TagNode*>(current);
- assert(tagNode->name() == tagBaseName);
- assert(!tagNode->isSelfClosing());
- current = current->parent();
- } else {
- TagNode* tagNode = new NullTagNode(current);
- assert(tagNode->isSelfClosing());
- }
- } else {
- if (TagNodeFactory::self()->isTagRegistered(tagName.c_str())) {
- TagNode* tagNode = TagNodeFactory::self()->create(tagName.c_str(), current);
- assert(tagNode);
- tagNode->setName(tagName);
- tagNode->setParameters(parameters);
- if (!tagNode->isSelfClosing())
- current = tagNode;
- } else {
- TagNode* tagNode = new NullTagNode(current);
- assert(tagNode->isSelfClosing());
- }
- }
- break;
- }
- default: {
- TextNode* textNode = new TextNode(current);
- textNode->setText(token.contents);
- break;
- }
- }
- break;
- case Tokenizer::Token::OpenComment:
- case Tokenizer::Token::OpenVariable:
- case Tokenizer::Token::OpenTag:
- assert(!m_insideClause);
- m_insideClause = true;
- break;
- case Tokenizer::Token::CloseComment:
- case Tokenizer::Token::CloseVariable:
- case Tokenizer::Token::CloseTag:
- assert(m_insideClause);
- m_insideClause = false;
- break;
- case Tokenizer::Token::EndOfInput:
- // Make sure all opening tags have their corresponding closing tags.
- assert(current == root);
- break;
- }
- m_lastTokenType = token.type;
- assert(current);
- } while (token.type != Tokenizer::Token::EndOfInput);
- return root;
- }