PageRenderTime 421ms CodeModel.GetById 231ms app.highlight 7ms RepoModel.GetById 175ms app.codeStats 0ms

/mordor/xml/xml_parser.rl

http://github.com/mozy/mordor
Unknown | 166 lines | 135 code | 31 blank | 0 comment | 0 complexity | 94b5b075289d657ecaf626f1dc5596f2 MD5 | raw file
  1// Copyright (c) 2009 - Mozy, Inc.
  2
  3#include "mordor/pch.h"
  4
  5#include "mordor/xml/parser.h"
  6
  7using namespace Mordor;
  8
  9%%{
 10    machine xml_parser;
 11
 12    action mark { mark = fpc;}
 13    action done { fbreak; }
 14    prepush {
 15        prepush();
 16    }
 17    postpop {
 18        postpop();
 19    }
 20
 21    Char = '\t' | '\n' | '\r' | [' '-255];
 22    S = (' ' | '\t' | '\r' | '\n')+;
 23
 24    NameStartChar = ':' | [A-Z] | '_' | [a-z] | 0xC0..0xD6 | 0xD8..0xF6 | 0xF8..0xFF;
 25    NameChar = NameStartChar | '-' | '.' | [0-9] | 0xB7;
 26    Name = NameStartChar NameChar*;
 27    Names = Name (' ' Name)*;
 28    Nmtoken = NameChar+;
 29    Nmtokens = Nmtoken (' ' Nmtoken)*;
 30
 31    CharData = [^<&]* - ([^<&]* ']]>' [^<&]*);
 32
 33    action reference
 34    {
 35        m_handler.onReference(std::string(mark, fpc-mark));
 36        mark = NULL;
 37    }
 38
 39    CharRef = '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';';
 40    EntityRef = '&' Name ';';
 41    Reference = (EntityRef | CharRef) >mark %reference;
 42    PEReference = '%' Name ';';
 43
 44    action attrib_value
 45    {
 46        if (fpc != mark) {
 47            m_handler.onAttributeValue(std::string(mark, fpc-mark));
 48            mark = NULL;
 49        }
 50    }
 51
 52    EntityValue = '"' ([^%&"] | PEReference | Reference)* '"' |
 53                  "'" ([^%&'] | PEReference | Reference)* '"';
 54
 55    AttValue = '"' [^"<]* >mark %attrib_value '"' |
 56               "'" [^'<]* >mark %attrib_value "'" ;
 57    SystemLiteral = ('"' [^"]* '"') | ("'" [^']* "'");
 58    PubidChar = ' ' | '\r' | '\n' | [a-zA-Z0-9] | ['()+,./:=?;!*#@$_%] | '-';
 59    PubidLiteral = '"' PubidChar* '"' | "'" (PubidChar* -- "'") "'";
 60
 61    Comment = '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->';
 62
 63    PITarget = Name - ([Xx][Mm][Ll]);
 64    PI = '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>';
 65
 66    Misc = Comment | PI | S;
 67
 68    Eq = S? '=' S?;
 69    VersionNum = '1.' [0-9]+;
 70    VersionInfo = S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"');
 71    EncName = [A-Za-z] ([A-Za-z0-9._] | '-')*;
 72    EncodingDecl = S 'encoding' Eq ('"' EncName '"' | "'" EncName "'");
 73    SDDecl = S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'));
 74    XMLDecl = '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>';
 75
 76    ExternalID = 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral;
 77    #markupdecl = elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment;
 78    #intSubset = (markupdecl | DeclSep)*;
 79    intSubset = '';
 80    doctypedecl = '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>';
 81    prolog = XMLDecl? Misc* (doctypedecl Misc*)?;
 82
 83    CDStart = '<![CDATA[';
 84    CData = (Char* - (Char* ']]>' Char*));
 85    CDEnd = ']]>';
 86    CDSect = CDStart CData CDEnd;
 87
 88    action start_tag {
 89        m_handler.onStartTag(std::string(mark, fpc-mark));
 90        mark = NULL;
 91    }
 92    action end_tag {
 93        m_handler.onEndTag(std::string(mark, fpc-mark));
 94        mark = NULL;
 95    }
 96    action empty_tag {
 97        m_handler.onEmptyTag();
 98    }
 99
100    action attrib_name
101    {
102        m_handler.onAttributeName(std::string(mark, fpc-mark));
103        mark = NULL;
104    }
105
106    Attribute = Name >mark %attrib_name Eq AttValue;
107    STag = '<' Name >mark %start_tag (S Attribute)* S? '>';
108    ETag = '</' Name >mark %end_tag S? '>';
109    EmptyElemTag = '<' Name >mark %start_tag (S Attribute)* S? '/>' %empty_tag;
110    action call_parse_content {
111        fcall *xml_parser_en_parse_content;
112    }
113    element = EmptyElemTag | STag @call_parse_content; #content ETag;
114
115    action inner_text
116    {
117        if (fpc != mark) {
118			m_handler.onInnerText(std::string(mark, fpc-mark));
119			mark = NULL;
120	    }
121    }
122
123    content = CharData? >mark %inner_text ((element | Reference | CDSect | PI | Comment) CharData? >mark %inner_text)*;
124
125    action element_finished {
126        fret;
127    }
128    parse_content := parse_content_lbl: content ETag @element_finished;
129
130    document = prolog element Misc*;
131
132    main := document;
133    write data;
134}%%
135
136void
137XMLParser::init()
138{
139    RagelParserWithStack::init();
140    %% write init;
141}
142
143void
144XMLParser::exec()
145{
146#ifdef MSVC
147#pragma warning(push)
148#pragma warning(disable : 4244)
149#endif
150        %% write exec;
151#ifdef MSVC
152#pragma warning(pop)
153#endif
154}
155
156bool
157XMLParser::final() const
158{
159    return cs >= xml_parser_first_final;
160}
161
162bool
163XMLParser::error() const
164{
165    return cs == xml_parser_error;
166}