/Parser.cpp
C++ | 216 lines | 179 code | 11 blank | 26 comment | 22 complexity | 740d508d60d6b191c63e67b89d39bd98 MD5 | raw file
1/** 2 * Copyright (C) 2010 Jakub Wieczorek <fawek@fawek.net> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "Parser.h" 24 25#include "ASCIIUtils.h" 26#include "CommentNode.h" 27#include "Node.h" 28#include "NullTagNode.h" 29#include "TagNode.h" 30#include "TagNodeFactory.h" 31#include "TemplateNode.h" 32#include "TextNode.h" 33#include "Tokenizer.h" 34#include "TagNodeFactory.h" 35#include "VariableNode.h" 36 37#include <cassert> 38 39Parser::Parser(std::istream* stream) 40 : m_tokenizer(new Tokenizer(stream)) 41 , m_lastTokenType(Tokenizer::Token::None) 42 , m_insideClause(false) 43{ 44} 45 46Parser::~Parser() 47{ 48 delete m_tokenizer; 49} 50 51// This could go even further and tokenize the parameters into a list although 52// that does not leave as much flexibility for the tag nodes as it should. 53// For instance, what would we do with literals? Should we allow spaces to appear 54// in them without being split? But then, a tag might want to treat a single " as 55// a normal character, which it could not because we'd treat as a parse error. 56bool splitTagExpression(const std::string& expression, std::string& tagName, std::vector<std::string>& parameters) 57{ 58 int i = 0; 59 int j = expression.size() - 1; 60 while (i < j && isWhitespace(expression[i])) 61 ++i; 62 while (j > i && isWhitespace(expression[j])) 63 --j; 64 if (i == j) 65 return false; 66 67 int k = i; 68 while (k <= j && !isWhitespace(expression[k])) 69 ++k; 70 71 tagName = expression.substr(i, k - i); 72 73 assert(parameters.empty()); 74 while (k <= j && isWhitespace(expression[k])) 75 ++k; 76 77 if (k != j) { 78 while (k <= j && isWhitespace(expression[k])) 79 ++k; 80 81 bool inLiteral = false; 82 while (k <= j) { 83 while (k <= j && isWhitespace(expression[k])) 84 ++k; 85 i = k; 86 while (k <= j && (inLiteral || !isWhitespace(expression[k]))) { 87 if (expression[k] == '"') 88 inLiteral = !inLiteral; 89 ++k; 90 } 91 92 parameters.push_back(std::string()); 93 parameters.back() = expression.substr(i, k - i); 94 } 95 } 96 97 return true; 98} 99 100// For now, this just removes trailing whitespaces. 101// I'm still unsure where the variable parsing should take place. 102bool splitVariableExpression(const std::string& expression, std::string& variable) 103{ 104 int i = 0; 105 int j = expression.size() - 1; 106 while (i < j && isWhitespace(expression[i])) 107 ++i; 108 while (j > i && isWhitespace(expression[j])) 109 --j; 110 if (i == j) 111 return false; 112 113 variable = expression.substr(i, j - i + 1); 114 115 return true; 116} 117 118// FIXME: Most of the assertions below should be turn into parse errors and be 119// reported in a cleaner way to the client side. 120TemplateNode* Parser::parse() 121{ 122 TemplateNode* root = new TemplateNode(); 123 Node* current = root; 124 125 m_insideClause = false; 126 127 Tokenizer::Token token; 128 do { 129 assert(m_lastTokenType != Tokenizer::Token::EndOfInput); 130 m_tokenizer->nextToken(token); 131 assert(token.type != Tokenizer::Token::None); 132 133 switch (token.type) { 134 case Tokenizer::Token::Text: 135 assert(m_lastTokenType != Tokenizer::Token::Text); 136 switch (m_lastTokenType) { 137 case Tokenizer::Token::OpenComment: { 138 assert(m_insideClause); 139 140 CommentNode* commentNode = new CommentNode(current); 141 commentNode->setText(token.contents); 142 break; 143 } 144 case Tokenizer::Token::OpenVariable: { 145 assert(m_insideClause); 146 std::string variable; 147 assert(splitVariableExpression(token.contents, variable)); 148 149 VariableNode* variableNode = new VariableNode(current, variable); 150 break; 151 } 152 case Tokenizer::Token::OpenTag: { 153 assert(m_insideClause); 154 std::string tagName; 155 std::vector<std::string> parameters; 156 assert(splitTagExpression(token.contents, tagName, parameters)); 157 158 if (tagName.size() > 3 && tagName.substr(0, 3) == "end") { 159 std::string tagBaseName = tagName.substr(3); 160 if (TagNodeFactory::self()->isTagRegistered(tagBaseName.c_str())) { 161 assert(current->type() == Node::Tag); 162 TagNode* tagNode = static_cast<TagNode*>(current); 163 assert(tagNode->name() == tagBaseName); 164 assert(!tagNode->isSelfClosing()); 165 current = current->parent(); 166 } else { 167 TagNode* tagNode = new NullTagNode(current); 168 assert(tagNode->isSelfClosing()); 169 } 170 } else { 171 if (TagNodeFactory::self()->isTagRegistered(tagName.c_str())) { 172 TagNode* tagNode = TagNodeFactory::self()->create(tagName.c_str(), current); 173 assert(tagNode); 174 tagNode->setName(tagName); 175 tagNode->setParameters(parameters); 176 if (!tagNode->isSelfClosing()) 177 current = tagNode; 178 } else { 179 TagNode* tagNode = new NullTagNode(current); 180 assert(tagNode->isSelfClosing()); 181 } 182 } 183 184 break; 185 } 186 default: { 187 TextNode* textNode = new TextNode(current); 188 textNode->setText(token.contents); 189 break; 190 } 191 } 192 break; 193 case Tokenizer::Token::OpenComment: 194 case Tokenizer::Token::OpenVariable: 195 case Tokenizer::Token::OpenTag: 196 assert(!m_insideClause); 197 m_insideClause = true; 198 break; 199 case Tokenizer::Token::CloseComment: 200 case Tokenizer::Token::CloseVariable: 201 case Tokenizer::Token::CloseTag: 202 assert(m_insideClause); 203 m_insideClause = false; 204 break; 205 case Tokenizer::Token::EndOfInput: 206 // Make sure all opening tags have their corresponding closing tags. 207 assert(current == root); 208 break; 209 } 210 211 m_lastTokenType = token.type; 212 assert(current); 213 } while (token.type != Tokenizer::Token::EndOfInput); 214 215 return root; 216}