/tags/jsdoc_toolkit-2.2.1/jsdoc-toolkit/app/handlers/XMLDOC/XMLParse.js

http://jsdoc-toolkit.googlecode.com/ · JavaScript · 292 lines · 160 code · 51 blank · 81 comment · 32 complexity · 0f165b84327fa450e6d10b9ca95e0a06 MD5 · raw file

  1. LOG.inform("XMLDOC.Parser loaded");
  2. /**
  3. * XML Parser object. Returns an {@link #XMLDOC.Parser.node} which is
  4. * the root element of the parsed document.
  5. * <p/>
  6. * By default, this parser will only handle well formed XML. To
  7. * allow the parser to handle HTML, set the <tt>XMLDOC.Parser.strictMode</tt>
  8. * variable to <tt>false</tt> before calling <tt>XMLDOC.Parser.parse()</tt>.
  9. * <p/>
  10. * <i>Note: If you pass poorly formed XML, it will cause the parser to throw
  11. * an exception.</i>
  12. *
  13. * @author Brett Fattori (bfattori@fry.com)
  14. * @author $Author: micmath $
  15. * @version $Revision: 497 $
  16. */
  17. XMLDOC.Parser = {};
  18. /**
  19. * Strict mode setting. Setting this to false allows HTML-style source to
  20. * be parsed. Normally, well formed XML has defined end tags, or empty tags
  21. * are properly formed. Default: <tt>true</tt>
  22. * @type Boolean
  23. */
  24. XMLDOC.Parser.strictMode = true;
  25. /**
  26. * A node in an XML Document. Node types are ROOT, ELEMENT, COMMENT, PI, and TEXT.
  27. * @param parent {XMLDOC.Parser.node} The parent node
  28. * @param name {String} The node name
  29. * @param type {String} One of the types
  30. */
  31. XMLDOC.Parser.node = function(parent, name, type)
  32. {
  33. this.name = name;
  34. this.type = type || "ELEMENT";
  35. this.parent = parent;
  36. this.charData = "";
  37. this.attrs = {};
  38. this.nodes = [];
  39. this.cPtr = 0;
  40. XMLDOC.Parser.node.prototype.getAttributeNames = function() {
  41. var a = [];
  42. for (var o in this.attrs)
  43. {
  44. a.push(o);
  45. }
  46. return a;
  47. };
  48. XMLDOC.Parser.node.prototype.getAttribute = function(attr) {
  49. return this.attrs[attr];
  50. };
  51. XMLDOC.Parser.node.prototype.setAttribute = function(attr, val) {
  52. this.attrs[attr] = val;
  53. };
  54. XMLDOC.Parser.node.prototype.getChild = function(idx) {
  55. return this.nodes[idx];
  56. };
  57. XMLDOC.Parser.node.prototype.parentNode = function() {
  58. return this.parent;
  59. };
  60. XMLDOC.Parser.node.prototype.firstChild = function() {
  61. return this.nodes[0];
  62. };
  63. XMLDOC.Parser.node.prototype.lastChild = function() {
  64. return this.nodes[this.nodes.length - 1];
  65. };
  66. XMLDOC.Parser.node.prototype.nextSibling = function() {
  67. var p = this.parent;
  68. if (p && (p.nodes.indexOf(this) + 1 != p.nodes.length))
  69. {
  70. return p.getChild(p.nodes.indexOf(this) + 1);
  71. }
  72. return null;
  73. };
  74. XMLDOC.Parser.node.prototype.prevSibling = function() {
  75. var p = this.parent;
  76. if (p && (p.nodes.indexOf(this) - 1 >= 0))
  77. {
  78. return p.getChild(p.nodes.indexOf(this) - 1);
  79. }
  80. return null;
  81. };
  82. };
  83. /**
  84. * Parse an XML Document from the specified source. The XML should be
  85. * well formed, unless strict mode is disabled, then the parser will
  86. * handle HTML-style XML documents.
  87. * @param src {String} The source to parse
  88. */
  89. XMLDOC.Parser.parse = function(src)
  90. {
  91. var A = [];
  92. // Normailize whitespace
  93. A = src.split("\r\n");
  94. src = A.join("\n");
  95. A = src.split("\r");
  96. src = A.join("\n");
  97. // Remove XML and DOCTYPE specifier
  98. src.replace(/<\?XML .*\?>/i, "");
  99. src.replace(/<!DOCTYPE .*\>/i, "");
  100. // The document is the root node and cannot be modified or removed
  101. var doc = new XMLDOC.Parser.node(null, "ROOT", "DOCUMENT");
  102. // Let's break it down
  103. XMLDOC.Parser.eat(doc, src);
  104. return doc;
  105. };
  106. /**
  107. * The XML fragment processing routine. This method is private and should not be called
  108. * directly.
  109. * @param parentNode {XMLDOC.Parser.node} The node which is the parent of this fragment
  110. * @param src {String} The source within the fragment to process
  111. * @private
  112. */
  113. XMLDOC.Parser.eat = function(parentNode, src)
  114. {
  115. // A simple tag def
  116. var reTag = new RegExp("<(!|)(\\?|--|)((.|\\s)*?)\\2>","g");
  117. // Special tag types
  118. var reCommentTag = /<!--((.|\s)*?)-->/;
  119. var rePITag = /<\?((.|\s)*?)\?>/;
  120. // A start tag (with potential empty marker)
  121. var reStartTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*(\/)?>/;
  122. // An empty HTML style tag (not proper XML, but we'll accept it so we can process HTML)
  123. var reHTMLEmptyTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*)\4)*>/;
  124. // Fully enclosing tag with nested tags
  125. var reEnclosingTag = /<(.*?)( +([\w_\-]*)=(\"|')(.*?)\4)*>((.|\s)*?)<\/\1>/;
  126. // Breaks down attributes
  127. var reAttributes = new RegExp(" +([\\w_\\-]*)=(\"|')(.*?)\\2","g");
  128. // Find us a tag
  129. var tag;
  130. while ((tag = reTag.exec(src)) != null)
  131. {
  132. if (tag.index > 0)
  133. {
  134. // The next tag has some text before it
  135. var text = src.substring(0, tag.index).replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
  136. if (text.length > 0 && (text != "\n"))
  137. {
  138. var txtnode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
  139. txtnode.charData = text;
  140. // Append the new text node
  141. parentNode.nodes.push(txtnode);
  142. }
  143. // Reset the lastIndex of reTag
  144. reTag.lastIndex -= src.substring(0, tag.index).length;
  145. // Eat the text
  146. src = src.substring(tag.index);
  147. }
  148. if (reCommentTag.test(tag[0]))
  149. {
  150. // Is this a comment?
  151. var comment = new XMLDOC.Parser.node(parentNode, "", "COMMENT");
  152. comment.charData = reCommentTag.exec(tag[0])[1];
  153. // Append the comment
  154. parentNode.nodes.push(comment);
  155. // Move the lastIndex of reTag
  156. reTag.lastIndex -= tag[0].length;
  157. // Eat the tag
  158. src = src.replace(reCommentTag, "");
  159. }
  160. else if (rePITag.test(tag[0]))
  161. {
  162. // Is this a processing instruction?
  163. var pi = new XMLDOC.Parser.node(parentNode, "", "PI");
  164. pi.charData = rePITag.exec(tag[0])[1];
  165. // Append the processing instruction
  166. parentNode.nodes.push(pi);
  167. // Move the lastIndex of reTag
  168. reTag.lastIndex -= tag[0].length;
  169. // Eat the tag
  170. src = src.replace(rePITag, "");
  171. }
  172. else if (reStartTag.test(tag[0]))
  173. {
  174. // Break it down
  175. var e = reStartTag.exec(tag[0]);
  176. var elem = new XMLDOC.Parser.node(parentNode, e[1], "ELEMENT");
  177. // Get attributes from the tag
  178. var a;
  179. while ((a = reAttributes.exec(e[2])) != null )
  180. {
  181. elem.attrs[a[1]] = a[3];
  182. }
  183. // Is this an empty XML-style tag?
  184. if (e[6] == "/")
  185. {
  186. // Append the empty element
  187. parentNode.nodes.push(elem);
  188. // Move the lastIndex of reTag (include the start tag length)
  189. reTag.lastIndex -= e[0].length;
  190. // Eat the tag
  191. src = src.replace(reStartTag, "");
  192. }
  193. else
  194. {
  195. // Check for malformed XML tags
  196. var htmlParsed = false;
  197. var htmlStartTag = reHTMLEmptyTag.exec(src);
  198. // See if there isn't an end tag within this block
  199. var reHTMLEndTag = new RegExp("</" + htmlStartTag[1] + ">");
  200. var htmlEndTag = reHTMLEndTag.exec(src);
  201. if (XMLDOC.Parser.strictMode && htmlEndTag == null)
  202. {
  203. // Poorly formed XML fails in strict mode
  204. var err = new Error("Malformed XML passed to XMLDOC.Parser... Error contains malformed 'src'");
  205. err.src = src;
  206. throw err;
  207. }
  208. else if (htmlEndTag == null)
  209. {
  210. // This is an HTML-style empty tag, store the element for it in non-strict mode
  211. parentNode.nodes.push(elem);
  212. // Eat the tag
  213. src = src.replace(reHTMLEmptyTag, "");
  214. htmlParsed = true;
  215. }
  216. // If we didn't parse HTML-style, it must be an enclosing tag
  217. if (!htmlParsed)
  218. {
  219. var enc = reEnclosingTag.exec(src);
  220. // Go deeper into the document
  221. XMLDOC.Parser.eat(elem, enc[6]);
  222. // Append the new element node
  223. parentNode.nodes.push(elem);
  224. // Eat the tag
  225. src = src.replace(reEnclosingTag, "");
  226. }
  227. }
  228. // Reset the lastIndex of reTag
  229. reTag.lastIndex = 0;
  230. }
  231. }
  232. // No tag was found... append the text if there is any
  233. src = src.replace(/^[ \t\n]+((.|\n)*?)[ \t\n]+$/, "$1");
  234. if (src.length > 0 && (src != "\n"))
  235. {
  236. var txtNode = new XMLDOC.Parser.node(parentNode, "", "TEXT");
  237. txtNode.charData = src;
  238. // Append the new text node
  239. parentNode.nodes.push(txtNode);
  240. }
  241. };