PageRenderTime 71ms CodeModel.GetById 32ms app.highlight 22ms RepoModel.GetById 2ms app.codeStats 0ms

/Parser.cpp

http://github.com/fawek/cjango
C++ | 216 lines | 179 code | 11 blank | 26 comment | 22 complexity | 740d508d60d6b191c63e67b89d39bd98 MD5 | raw file
  1/**
  2 * Copyright (C) 2010 Jakub Wieczorek <fawek@fawek.net>
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 * of this software and associated documentation files (the "Software"), to deal
  6 * in the Software without restriction, including without limitation the rights
  7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 * copies of the Software, and to permit persons to whom the Software is
  9 * furnished to do so, subject to the following conditions:
 10 * 
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 * 
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 20 * SOFTWARE.
 21 */
 22
 23#include "Parser.h"
 24
 25#include "ASCIIUtils.h"
 26#include "CommentNode.h"
 27#include "Node.h"
 28#include "NullTagNode.h"
 29#include "TagNode.h"
 30#include "TagNodeFactory.h"
 31#include "TemplateNode.h"
 32#include "TextNode.h"
 33#include "Tokenizer.h"
 34#include "TagNodeFactory.h"
 35#include "VariableNode.h"
 36
 37#include <cassert>
 38
 39Parser::Parser(std::istream* stream)
 40    : m_tokenizer(new Tokenizer(stream))
 41    , m_lastTokenType(Tokenizer::Token::None)
 42    , m_insideClause(false)
 43{
 44}
 45
 46Parser::~Parser()
 47{
 48    delete m_tokenizer;
 49}
 50
 51// This could go even further and tokenize the parameters into a list although
 52// that does not leave as much flexibility for the tag nodes as it should.
 53// For instance, what would we do with literals? Should we allow spaces to appear
 54// in them without being split? But then, a tag might want to treat a single " as
 55// a normal character, which it could not because we'd treat as a parse error.
 56bool splitTagExpression(const std::string& expression, std::string& tagName, std::vector<std::string>& parameters)
 57{
 58    int i = 0;
 59    int j = expression.size() - 1;
 60    while (i < j && isWhitespace(expression[i]))
 61        ++i;
 62    while (j > i && isWhitespace(expression[j]))
 63        --j;
 64    if (i == j)
 65        return false;
 66
 67    int k = i;
 68    while (k <= j && !isWhitespace(expression[k]))
 69        ++k;
 70
 71    tagName = expression.substr(i, k - i);
 72
 73    assert(parameters.empty());
 74    while (k <= j && isWhitespace(expression[k]))
 75        ++k;
 76
 77    if (k != j) {
 78        while (k <= j && isWhitespace(expression[k]))
 79            ++k;
 80
 81        bool inLiteral = false;
 82        while (k <= j) {
 83            while (k <= j && isWhitespace(expression[k]))
 84                ++k;
 85            i = k;
 86            while (k <= j && (inLiteral || !isWhitespace(expression[k]))) {
 87                if (expression[k] == '"')
 88                    inLiteral = !inLiteral;
 89                ++k;
 90            }
 91
 92            parameters.push_back(std::string());
 93            parameters.back() = expression.substr(i, k - i);
 94        }
 95    }
 96    
 97    return true;
 98}
 99
100// For now, this just removes trailing whitespaces.
101// I'm still unsure where the variable parsing should take place.
102bool splitVariableExpression(const std::string& expression, std::string& variable)
103{
104    int i = 0;
105    int j = expression.size() - 1;
106    while (i < j && isWhitespace(expression[i]))
107        ++i;
108    while (j > i && isWhitespace(expression[j]))
109        --j;
110    if (i == j)
111        return false;
112
113    variable = expression.substr(i, j - i + 1);
114
115    return true;
116}
117
118// FIXME: Most of the assertions below should be turn into parse errors and be
119// reported in a cleaner way to the client side.
120TemplateNode* Parser::parse()
121{
122    TemplateNode* root = new TemplateNode();
123    Node* current = root;
124
125    m_insideClause = false;
126
127    Tokenizer::Token token;
128    do {
129        assert(m_lastTokenType != Tokenizer::Token::EndOfInput);
130        m_tokenizer->nextToken(token);
131        assert(token.type != Tokenizer::Token::None);
132
133        switch (token.type) {
134            case Tokenizer::Token::Text:
135                assert(m_lastTokenType != Tokenizer::Token::Text);
136                switch (m_lastTokenType) {
137                    case Tokenizer::Token::OpenComment: {
138                        assert(m_insideClause);
139                        
140                        CommentNode* commentNode = new CommentNode(current);
141                        commentNode->setText(token.contents);
142                        break;
143                    }
144                    case Tokenizer::Token::OpenVariable: {
145                        assert(m_insideClause);
146                        std::string variable;
147                        assert(splitVariableExpression(token.contents, variable));
148
149                        VariableNode* variableNode = new VariableNode(current, variable);
150                        break;
151                    }
152                    case Tokenizer::Token::OpenTag: {
153                        assert(m_insideClause);
154                        std::string tagName;
155                        std::vector<std::string> parameters;
156                        assert(splitTagExpression(token.contents, tagName, parameters));
157                        
158                        if (tagName.size() > 3 && tagName.substr(0, 3) == "end") {
159                            std::string tagBaseName = tagName.substr(3);
160                            if (TagNodeFactory::self()->isTagRegistered(tagBaseName.c_str())) {
161                                assert(current->type() == Node::Tag);
162                                TagNode* tagNode = static_cast<TagNode*>(current);
163                                assert(tagNode->name() == tagBaseName);
164                                assert(!tagNode->isSelfClosing());
165                                current = current->parent();
166                            } else {
167                                TagNode* tagNode = new NullTagNode(current);
168                                assert(tagNode->isSelfClosing());
169                            }
170                        } else {
171                            if (TagNodeFactory::self()->isTagRegistered(tagName.c_str())) {
172                                TagNode* tagNode = TagNodeFactory::self()->create(tagName.c_str(), current);
173                                assert(tagNode);
174                                tagNode->setName(tagName);
175                                tagNode->setParameters(parameters);
176                                if (!tagNode->isSelfClosing())
177                                    current = tagNode;
178                            } else {
179                                TagNode* tagNode = new NullTagNode(current);
180                                assert(tagNode->isSelfClosing());
181                            }
182                        }
183
184                        break;
185                    }
186                    default: {
187                        TextNode* textNode = new TextNode(current);
188                        textNode->setText(token.contents);
189                        break;
190                    }
191                }
192                break;
193            case Tokenizer::Token::OpenComment:
194            case Tokenizer::Token::OpenVariable:
195            case Tokenizer::Token::OpenTag:
196                assert(!m_insideClause);
197                m_insideClause = true;
198                break;
199            case Tokenizer::Token::CloseComment:
200            case Tokenizer::Token::CloseVariable:
201            case Tokenizer::Token::CloseTag:
202                assert(m_insideClause);
203                m_insideClause = false;
204                break;
205            case Tokenizer::Token::EndOfInput:
206                // Make sure all opening tags have their corresponding closing tags.
207                assert(current == root);
208                break;
209        }
210
211        m_lastTokenType = token.type;
212        assert(current);
213    } while (token.type != Tokenizer::Token::EndOfInput);
214
215    return root;
216}