PageRenderTime 23ms CodeModel.GetById 1ms app.highlight 17ms RepoModel.GetById 1ms app.codeStats 0ms

/tags/jsdoc_toolkit-2.2.1/jsdoc-toolkit/app/handlers/XMLDOC/XMLParse.js

http://jsdoc-toolkit.googlecode.com/
JavaScript | 292 lines | 160 code | 51 blank | 81 comment | 32 complexity | 0f165b84327fa450e6d10b9ca95e0a06 MD5 | raw file
  1LOG.inform("XMLDOC.Parser loaded");
  2
  3/**
  4 * XML Parser object.  Returns an {@link #XMLDOC.Parser.node} which is
  5 * the root element of the parsed document.
  6 * <p/>
  7 * By default, this parser will only handle well formed XML.  To
  8 * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
  9 * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
 10 * <p/>
 11 * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
 12 * an exception.</i>
 13 *
 14 * @author Brett Fattori (bfattori@fry.com)
 15 * @author $Author: micmath $
 16 * @version $Revision: 497 $
 17 */
 18XMLDOC.Parser = {};
 19
 20/**
 21 * Strict mode setting.  Setting this to false allows HTML-style source to
 22 * be parsed.  Normally, well formed XML has defined end tags, or empty tags
 23 * are properly formed.  Default: <tt>true</tt>
 24 * @type Boolean
 25 */
 26XMLDOC.Parser.strictMode = true;
 27
 28/**
 29 * A node in an XML Document.  Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
 30 * @param parent {XMLDOC.Parser.node} The parent node
 31 * @param name {String} The node name
 32 * @param type {String} One of the types
 33 */
 34XMLDOC.Parser.node = function(parent, name, type)
 35{
 36   this.name = name;
 37   this.type = type || "ELEMENT";
 38   this.parent = parent;
 39   this.charData = "";
 40   this.attrs = {};
 41   this.nodes = [];
 42   this.cPtr = 0;
 43
 44   XMLDOC.Parser.node.prototype.getAttributeNames = function() {
 45      var a = [];
 46      for (var o in this.attrs)
 47      {
 48         a.push(o);
 49      }
 50
 51      return a;
 52   };
 53
 54   XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
 55      return this.attrs[attr];
 56   };
 57
 58   XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
 59      this.attrs[attr] = val;
 60   };
 61
 62   XMLDOC.Parser.node.prototype.getChild = function(idx) {
 63      return this.nodes[idx];
 64   };
 65
 66   XMLDOC.Parser.node.prototype.parentNode = function() {
 67      return this.parent;
 68   };
 69
 70   XMLDOC.Parser.node.prototype.firstChild = function() {
 71      return this.nodes[0];
 72   };
 73
 74   XMLDOC.Parser.node.prototype.lastChild = function() {
 75      return this.nodes[this.nodes.length - 1];
 76   };
 77
 78   XMLDOC.Parser.node.prototype.nextSibling = function() {
 79      var p = this.parent;
 80      if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
 81      {
 82         return p.getChild(p.nodes.indexOf(this) + 1);
 83      }
 84      return null;
 85   };
 86
 87   XMLDOC.Parser.node.prototype.prevSibling = function() {
 88      var p = this.parent;
 89      if (p && (p.nodes.indexOf(this) - 1 >= 0))
 90      {
 91         return p.getChild(p.nodes.indexOf(this) - 1);
 92      }
 93      return null;
 94   };
 95};
 96
 97/**
 98 * Parse an XML Document from the specified source.  The XML should be
 99 * well formed, unless strict mode is disabled, then the parser will
100 * handle HTML-style XML documents.
101 * @param src {String} The source to parse
102 */
103XMLDOC.Parser.parse = function(src)
104{
105   var A = [];
106
107   // Normailize whitespace
108   A = src.split("\r\n");
109   src = A.join("\n");
110   A = src.split("\r");
111   src = A.join("\n");
112
113   // Remove XML and DOCTYPE specifier
114   src.replace(/<\?XML .*\?>/i, "");
115   src.replace(/<!DOCTYPE .*\>/i, "");
116
117   // The document is the root node and cannot be modified or removed
118   var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
119
120   // Let's break it down
121   XMLDOC.Parser.eat(doc, src);
122
123   return doc;
124};
125
126/**
127 * The XML fragment processing routine.  This method is private and should not be called
128 * directly.
129 * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
130 * @param src {String} The source within the fragment to process
131 * @private
132 */
133XMLDOC.Parser.eat = function(parentNode, src)
134{
135   // A simple tag def
136   var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
137
138   // Special tag types
139   var reCommentTag = /<!--((.|\s)*?)-->/;
140   var rePITag = /<\?((.|\s)*?)\?>/;
141
142   // A start tag (with potential empty marker)
143   var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
144
145   // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
146   var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
147
148   // Fully enclosing tag with nested tags
149   var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
150
151   // Breaks down attributes
152   var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
153
154   // Find us a tag
155   var tag;
156   while ((tag = reTag.exec(src)) != null)
157   {
158      if (tag.index > 0)
159      {
160         // The next tag has some text before it
161         var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
162
163         if (text.length > 0 && (text != "\n"))
164         {
165            var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
166            txtnode.charData = text;
167
168            // Append the new text node
169            parentNode.nodes.push(txtnode);
170         }
171
172         // Reset the lastIndex of reTag
173         reTag.lastIndex -= src.substring(0, tag.index).length;
174
175         // Eat the text
176         src = src.substring(tag.index);
177      }
178
179      if (reCommentTag.test(tag[0]))
180      {
181         // Is this a comment?
182         var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
183         comment.charData = reCommentTag.exec(tag[0])[1];
184
185         // Append the comment
186         parentNode.nodes.push(comment);
187
188         // Move the lastIndex of reTag
189         reTag.lastIndex -= tag[0].length;
190
191         // Eat the tag
192         src = src.replace(reCommentTag, "");
193      }
194      else if (rePITag.test(tag[0]))
195      {
196         // Is this a processing instruction?
197         var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
198         pi.charData = rePITag.exec(tag[0])[1];
199
200         // Append the processing instruction
201         parentNode.nodes.push(pi);
202
203         // Move the lastIndex of reTag
204         reTag.lastIndex -= tag[0].length;
205
206         // Eat the tag
207         src = src.replace(rePITag, "");
208      }
209      else if (reStartTag.test(tag[0]))
210      {
211         // Break it down
212         var e = reStartTag.exec(tag[0]);
213         var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
214
215         // Get attributes from the tag
216         var a;
217         while ((a = reAttributes.exec(e[2])) != null )
218         {
219            elem.attrs[a[1]] = a[3];
220         }
221
222         // Is this an empty XML-style tag?
223         if (e[6] == "/")
224         {
225            // Append the empty element
226            parentNode.nodes.push(elem);
227
228            // Move the lastIndex of reTag (include the start tag length)
229            reTag.lastIndex -= e[0].length;
230
231            // Eat the tag
232            src = src.replace(reStartTag, "");
233         }
234         else
235         {
236            // Check for malformed XML tags
237            var htmlParsed = false;
238            var htmlStartTag = reHTMLEmptyTag.exec(src);
239
240            // See if there isn't an end tag within this block
241            var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
242            var htmlEndTag = reHTMLEndTag.exec(src);
243
244            if (XMLDOC.Parser.strictMode && htmlEndTag == null)
245            {
246               // Poorly formed XML fails in strict mode
247               var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
248               err.src = src;
249               throw err;
250            }
251            else if (htmlEndTag == null)
252            {
253               // This is an HTML-style empty tag, store the element for it in non-strict mode
254               parentNode.nodes.push(elem);
255
256               // Eat the tag
257               src = src.replace(reHTMLEmptyTag, "");
258               htmlParsed = true;
259            }
260
261            // If we didn't parse HTML-style, it must be an enclosing tag
262            if (!htmlParsed)
263            {
264               var enc = reEnclosingTag.exec(src);
265
266               // Go deeper into the document
267               XMLDOC.Parser.eat(elem, enc[6]);
268
269               // Append the new element node
270               parentNode.nodes.push(elem);
271
272               // Eat the tag
273               src = src.replace(reEnclosingTag, "");
274            }
275         }
276
277         // Reset the lastIndex of reTag
278         reTag.lastIndex = 0;
279      }
280   }
281
282   // No tag was found... append the text if there is any
283   src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
284   if (src.length > 0 && (src != "\n"))
285   {
286      var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
287      txtNode.charData = src;
288
289      // Append the new text node
290      parentNode.nodes.push(txtNode);
291   }
292};