PageRenderTime 59ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/src/net/sf/sketchel/TrivialDOM.java

https://code.google.com/p/ingatan/
Java | 608 lines | 532 code | 40 blank | 36 comment | 57 complexity | 613b9856677e89fe68e8ae1654d11a0f MD5 | raw file
Possible License(s): LGPL-3.0, GPL-3.0
  1. /*
  2. Sketch Elements: Chemistry molecular diagram drawing tool.
  3. (c) 2008 Dr. Alex M. Clark
  4. Released as GNUware, under the Gnu Public License (GPL)
  5. See www.gnu.org for details.
  6. */
  7. package net.sf.sketchel;
  8. import java.io.*;
  9. import java.util.*;
  10. import java.util.regex.*;
  11. /*
  12. An incredibly lightweight implementation of DOM-style access to XML content. Only a subset of XML files are supported, that
  13. being simple combinations of elements, attributes and text. Overly sophisticated input files may break the reader. Also, some
  14. of the pedantic XML treatment of whitespace is simplified (which suits the rest of this application nicely). Malformed XML
  15. should generate vaguely helpful explanations, by and large.
  16. This class is intended to be used with XML documents which describe hierarchical data, rather than text with XML markup.
  17. Manipulation of the underlying content liberally rearranges whitespace to emphasise human-readability of the hierarchichy,
  18. as well as removing unnecessary whitespace text nodes which get in the way of programmatically traversing the document.
  19. */
  20. public class TrivialDOM
  21. {
  22. public static final int TYPE_NODE=1;
  23. public static final int TYPE_TEXT=2;
  24. public static class Node
  25. {
  26. private Node parentNode=null;
  27. private String nodeName;
  28. private Hashtable<String,String> nodeAttr;
  29. private ArrayList<Object> children;
  30. public Node(String NodeName)
  31. {
  32. nodeName=NodeName;
  33. nodeAttr=new Hashtable<String,String>();
  34. children=new ArrayList<Object>();
  35. }
  36. // makes a superficial copy of the node, i.e. the returned node has the same name and attributes, but no children
  37. public Node clone()
  38. {
  39. Node ret=new Node(nodeName);
  40. Set<String> attr=nodeAttr.keySet();
  41. String[] names=new String[attr.size()];
  42. attr.toArray(names);
  43. for (int n=0;n<names.length;n++) ret.setAttribute(names[n],nodeAttr.get(names[n]));
  44. return ret;
  45. }
  46. // makes a deep copy of the node: a new node is created with the same name and attributes, and all of its children are
  47. // similarly duplicated; changing the new node will not affect the original
  48. public Node deepClone()
  49. {
  50. Node ret=clone();
  51. for (int n=0;n<children.size();n++)
  52. {
  53. Object o=children.get(n);
  54. if (o instanceof Node) ret.appendChild(((Node)o).deepClone());
  55. else if (o instanceof Text)
  56. {
  57. Text t=(Text)o;
  58. ret.appendText(t.get(),t.preserve());
  59. }
  60. }
  61. return ret;
  62. }
  63. public Node parent() {return parentNode;}
  64. public void setParent(Node Parent) {parentNode=Parent;}
  65. public String nodeName() {return nodeName;}
  66. public void setNodeName(String Name) {nodeName=Name;}
  67. public String attribute(String Attr) {return nodeAttr.containsKey(Attr) ? nodeAttr.get(Attr) : null;}
  68. public void setAttribute(String Attr,String Value) {nodeAttr.put(Attr,Value);}
  69. public String[] getAttributeNames()
  70. {
  71. Set<String> attr=nodeAttr.keySet();
  72. String[] names=new String[attr.size()];
  73. return attr.toArray(names);
  74. }
  75. public int numChildren() {return children.size();}
  76. public int childType(int N)
  77. {
  78. Object child=children.get(N);
  79. if (child instanceof Node) return TYPE_NODE;
  80. if (child instanceof Text) return TYPE_TEXT;
  81. return 0;
  82. }
  83. public Node getChildNode(int N) {return (Node)children.get(N);}
  84. public Text getChildText(int N) {return (Text)children.get(N);}
  85. public void clear() {children.clear();}
  86. public void deleteChild(int N) {children.remove(N);}
  87. public void setText(String Txt,boolean Preserve)
  88. {
  89. clear();
  90. Text txt=new Text(Txt,Preserve);
  91. txt.setParent(this);
  92. children.add(txt);
  93. }
  94. public String getText()
  95. {
  96. String txt="";
  97. for (int n=0;n<numChildren();n++)
  98. {
  99. if (childType(n)==TYPE_TEXT) txt+=getChildText(n).get();
  100. else txt+=getChildNode(n).getText();
  101. }
  102. return txt;
  103. }
  104. public Node appendChild(Node Nod) {Nod.setParent(this); children.add(Nod); return Nod;}
  105. public Text appendChild(Text Txt) {Txt.setParent(this); children.add(Txt); return Txt;}
  106. public Node insertChild(int N,Node Nod) {Nod.setParent(this); children.add(N,Nod); return Nod;}
  107. public Text insertChild(int N,Text Txt) {Txt.setParent(this); children.add(N,Txt); return Txt;}
  108. public Node appendNode(String Name)
  109. {
  110. Node nod=new Node(Name);
  111. nod.setParent(this);
  112. children.add(nod);
  113. return nod;
  114. }
  115. public Text appendText(String Txt,boolean Preserve)
  116. {
  117. Text txt=new Text(Txt,Preserve);
  118. txt.setParent(this);
  119. children.add(txt);
  120. return txt;
  121. }
  122. // scans the child-list for the first instance of the named node; several variations
  123. public int findChildIndex(String name) {return findChildIndex(name,0);}
  124. public int findChildIndex(String name,int startAt)
  125. {
  126. for (int n=startAt;n<numChildren();n++)
  127. if (childType(n)==TYPE_NODE && getChildNode(n).nodeName().equals(name)) return n;
  128. return -1;
  129. }
  130. public Node findChildNode(String name)
  131. {
  132. int i=findChildIndex(name);
  133. if (i>=0) return getChildNode(i);
  134. return null;
  135. }
  136. // returns a list of only the node children, or null if there are none
  137. public Node[] listChildNodes()
  138. {
  139. int count=0;
  140. for (int n=0;n<numChildren();n++) if (childType(n)==TYPE_NODE) count++;
  141. if (count==0) return null;
  142. Node[] list=new Node[count];
  143. for (int n=0,p=0;n<numChildren();n++) if (childType(n)==TYPE_NODE) list[p++]=getChildNode(n);
  144. return list;
  145. }
  146. // a convenience function: given a stream of XML source, adds each piece as a child of the current node; e.g.
  147. // "ning<nang>nong</nang>"
  148. // adds two children: one text ("ning") and one non-empty element (<nang>nong</nang>)
  149. public void appendRawXML(String raw) throws IOException {appendRawXML(raw,true);}
  150. public void appendRawXML(String raw,boolean trimWS) throws IOException
  151. {
  152. TrivialDOM.Node head=TrivialDOM.readString("<z>"+raw+"</z>",trimWS).document();
  153. for (int n=0;n<head.numChildren();n++)
  154. {
  155. if (head.childType(n)==TYPE_NODE) appendChild(head.getChildNode(n));
  156. else if (head.childType(n)==TYPE_TEXT) appendChild(head.getChildText(n));
  157. }
  158. }
  159. // returns a string containing the entire contents of this element, including the tag & attributes
  160. public String toString() {return new TrivialDOM(this).toString(true);}
  161. public String toString(boolean prettyPrint) {return new TrivialDOM(this).toString(prettyPrint);}
  162. // returns an XML string containing all of the child nodes, but not the enclosing element wrapper; note that this
  163. // is never pretty-printed
  164. public String getRawXML()
  165. {
  166. StringBuffer buff=new StringBuffer();
  167. for (int n=0;n<numChildren();n++)
  168. {
  169. if (childType(n)==TYPE_NODE) buff.append(new TrivialDOM(getChildNode(n)).toString(false));
  170. else if (childType(n)==TYPE_TEXT) buff.append(getChildText(n).get());
  171. }
  172. return buff.toString();
  173. }
  174. }
  175. public static class Text
  176. {
  177. private Node parentNode=null;
  178. private String text;
  179. private boolean preserve; // if true, is CDATA type; otherwise may be freely trimmed for whitespace
  180. public Text(String Text,boolean Preserve) {text=Text; preserve=Preserve;}
  181. public Node parent() {return parentNode;}
  182. public void setParent(Node Parent) {parentNode=Parent;}
  183. public String get() {return text;}
  184. public void set(String Txt) {text=Txt;}
  185. public boolean preserve() {return preserve;}
  186. }
  187. public Node createNode(String Name) {return new Node(Name);}
  188. public Text createText(String Text,boolean Preserve) {return new Text(Text,Preserve);}
  189. protected Node doc=null;
  190. // constructors
  191. public TrivialDOM() {}
  192. public TrivialDOM(String docName)
  193. {
  194. doc=new Node(docName);
  195. }
  196. public TrivialDOM(TrivialDOM cpy)
  197. {
  198. doc=cpy.document(); // !! CLONE
  199. }
  200. public TrivialDOM(Node docNode)
  201. {
  202. doc=docNode; // !! CLONE
  203. }
  204. public Node document() {return doc;}
  205. public String toString() {return toString(true);}
  206. public String toString(boolean prettyPrint)
  207. {
  208. StringWriter out=new StringWriter();
  209. try {writeXML(out,this,false,true,prettyPrint);}
  210. catch (IOException e) {return e.getMessage();}
  211. return out.toString();
  212. }
  213. // parsing input from a prespecified string
  214. public static TrivialDOM readString(String str) throws IOException {return readString(str,true);}
  215. public static TrivialDOM readString(String str,boolean trimWS) throws IOException
  216. {
  217. return readXML(new BufferedReader(new StringReader(str)),trimWS);
  218. }
  219. // parsing input files
  220. public static TrivialDOM readXML(BufferedReader in) throws IOException {return readXML(in,true);}
  221. public static TrivialDOM readXML(BufferedReader in,boolean trimWS) throws IOException
  222. {
  223. final String EOF="ReadXML: unexpected end of file during parsing";
  224. // PART 1: read the input file one character at a time, and carve it up into chunks, which are preserved as strings; these
  225. // include tag start & end, text, CDATA, and comments.
  226. ArrayList<String> chunks=new ArrayList<String>();
  227. StringBuffer strbuf=new StringBuffer(8192);
  228. while (true)
  229. {
  230. int ich;
  231. if (strbuf.length()==0)
  232. {
  233. ich=in.read();
  234. if (ich<0) break;
  235. strbuf.delete(0,strbuf.length());
  236. strbuf.append((char)ich);
  237. }
  238. if (strbuf.charAt(0)=='<') // either a tag or a CDATA
  239. {
  240. for (int n=0;n<2;n++)
  241. {
  242. ich=in.read();
  243. if (ich<0) throw new TrivialDOMException(EOF);
  244. strbuf.append((char)ich);
  245. }
  246. if (strbuf.length()>=3 && strbuf.substring(0,3).equals("<![")) // it's a CDATA
  247. {
  248. while (true)
  249. {
  250. ich=in.read();
  251. if (ich<0) throw new TrivialDOMException(EOF);
  252. strbuf.append((char)ich);
  253. if (strbuf.length()>=6 && strbuf.substring(strbuf.length()-3,strbuf.length()).equals("]]>"))
  254. {
  255. chunks.add(strbuf.toString());
  256. strbuf.delete(0,strbuf.length());
  257. break;
  258. }
  259. }
  260. }
  261. else if (strbuf.length()>=3 && strbuf.substring(0,3).equals("<!-")) // it's a comment
  262. {
  263. while (true)
  264. {
  265. ich=in.read();
  266. if (ich<0) throw new TrivialDOMException(EOF);
  267. strbuf.append((char)ich);
  268. if (strbuf.length()>=6 && strbuf.substring(strbuf.length()-3,strbuf.length()).equals("-->"))
  269. {
  270. chunks.add(strbuf.toString());
  271. strbuf.delete(0,strbuf.length());
  272. break;
  273. }
  274. }
  275. }
  276. else if (strbuf.length()==3 && strbuf.charAt(0)=='<' && strbuf.charAt(2)=='>') // very short tag
  277. {
  278. chunks.add(strbuf.toString());
  279. strbuf.delete(0,strbuf.length());
  280. }
  281. else // it's an opening tag, which will get closed later
  282. {
  283. boolean inquot=false;
  284. while (true)
  285. {
  286. ich=in.read();
  287. if (ich<0) throw new TrivialDOMException(EOF);
  288. strbuf.append((char)ich);
  289. if ((char)ich=='"') inquot=!inquot;
  290. else if ((char)ich=='>')
  291. {
  292. chunks.add(strbuf.toString());
  293. strbuf.delete(0,strbuf.length());
  294. break;
  295. }
  296. }
  297. }
  298. }
  299. else // must be plain text
  300. {
  301. boolean eof=false;
  302. while (true)
  303. {
  304. ich=in.read();
  305. if (ich<0) {eof=true; break;}
  306. if ((char)ich=='<')
  307. {
  308. chunks.add(strbuf.toString());
  309. strbuf.delete(0,strbuf.length());
  310. strbuf.append((char)ich);
  311. break;
  312. }
  313. strbuf.append((char)ich);
  314. }
  315. if (eof)
  316. {
  317. if (strbuf.toString().trim().length()==0) break; else throw new TrivialDOMException(EOF);
  318. }
  319. }
  320. }
  321. // PART 2: analyze the resulting pieces, and build up the node tree
  322. TrivialDOM xml=new TrivialDOM("unknown");
  323. Node node=null;
  324. String str=null;
  325. for (int n=0;n<chunks.size();n++)
  326. {
  327. str=chunks.get(n);
  328. if ((trimWS || node==null) && str.trim().length()==0) continue; // ignore chunks which are pure whitespace
  329. if (str.charAt(0)=='<' && str.length()>=2 && ((str.charAt(1)>='A' && str.charAt(1)<='Z') ||
  330. (str.charAt(1)>='a' && str.charAt(1)<='z')) && str.endsWith(">"))
  331. {
  332. str=str.substring(1,str.length()-1);
  333. boolean isclosed=str.endsWith("/");
  334. if (isclosed) str=str.substring(0,str.length()-1);
  335. String[] bits=splitSpace(str);
  336. Node newNode=null;
  337. if (node==null)
  338. {
  339. newNode=xml.document();
  340. newNode.setNodeName(bits[0]);
  341. }
  342. else newNode=node.appendNode(bits[0]);
  343. for (int i=1;i<bits.length;i++)
  344. {
  345. int spc=bits[i].indexOf("=");
  346. if (spc<=0) throw new TrivialDOMException("Malformed attribute: ["+snip(bits[i])+"].");
  347. String key=bits[i].substring(0,spc),val=bits[i].substring(spc+1);
  348. if (!val.startsWith("\"") || !val.endsWith("\""))
  349. throw new TrivialDOMException("Malformed attribute value: ["+snip(bits[i])+"].");
  350. val=val.substring(1,val.length()-1);
  351. newNode.setAttribute(key,val);
  352. }
  353. if (!isclosed) node=newNode;
  354. }
  355. else if (str.startsWith("</"))
  356. {
  357. if (node==null) throw new TrivialDOMException("Unexpected end tag: ["+snip(str)+"].");
  358. str=str.substring(2,str.length()-1);
  359. if (str.compareTo(node.nodeName())!=0)
  360. throw new TrivialDOMException("Closing tag does not match opening tag: ["+snip(str)+"].");
  361. node=node.parent();
  362. }
  363. else if (str.startsWith("<![CDATA["))
  364. {
  365. if (node==null) throw new TrivialDOMException("Unexpected CDATA node: ["+snip(str)+"].");
  366. if (!str.endsWith("]]>")) throw new TrivialDOMException("CDATA node not ended: ["+snip(str)+"].");
  367. str=str.substring(9,str.length()-3);
  368. node.appendText(str,true);
  369. }
  370. else if (str.startsWith("<!--"))
  371. {
  372. if (!str.endsWith("-->")) throw new TrivialDOMException("Unterminated comment: ["+snip(str)+"].");
  373. }
  374. else if (str.startsWith("<?")) {} // ignore
  375. else if (str.startsWith("<")) throw new TrivialDOMException("Unexpected angle bracket, near: ["+snip(str)+"].");
  376. else
  377. {
  378. if (node==null) throw new TrivialDOMException("Misplaced text-like block: ["+snip(str)+"].");
  379. String txt=unescapeText(str);
  380. if (trimWS) txt=txt.trim();
  381. node.appendText(txt,false);
  382. }
  383. }
  384. return xml;
  385. }
  386. // chop a string off if it's too big to go in an exception
  387. private static String snip(String str)
  388. {
  389. if (str.length()<60) return str;
  390. return str.substring(0,60)+"...";
  391. }
  392. // writing output files
  393. public static void writeXML(Writer out,TrivialDOM dom) throws IOException {writeXML(out,dom,true,true);}
  394. public static void writeXML(Writer out,TrivialDOM dom,boolean xmlHeader) throws IOException {writeXML(out,dom,xmlHeader,true);}
  395. public static void writeXML(Writer out,TrivialDOM dom,boolean xmlHeader,boolean shortClose) throws IOException {writeXML(out,dom,xmlHeader,true,true);}
  396. public static void writeXML(Writer out,TrivialDOM dom,boolean xmlHeader,boolean shortClose,boolean prettyPrint) throws IOException
  397. {
  398. if (xmlHeader) out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  399. recursiveWriteNode(out,dom.document(),0,shortClose,prettyPrint);
  400. out.flush();
  401. }
  402. private static void recursiveWriteNode(Writer out,Node nod,int level,boolean shortClose,boolean prettyPrint) throws IOException
  403. {
  404. // emit the node tag & attributes
  405. if (prettyPrint) for (int n=0;n<level;n++) out.write(" ");
  406. out.write("<"+nod.nodeName());
  407. String[] attr=nod.getAttributeNames();
  408. for (int n=0;n<attr.length;n++) out.write(" "+attr[n]+"=\""+escapeAttr(nod.attribute(attr[n]))+"\"");
  409. // special case for empty nodes
  410. if (nod.numChildren()==0)
  411. {
  412. if (shortClose) out.write("/>");
  413. else out.write("></"+nod.nodeName()+">");
  414. if (prettyPrint) out.write("\n");
  415. return;
  416. }
  417. out.write(">");
  418. boolean doIndent=true;
  419. if (!prettyPrint) doIndent=false;
  420. else if (nod.numChildren()==1 && nod.childType(0)==TYPE_TEXT) doIndent=false;
  421. else if (nod.numChildren()>0 && nod.childType(0)==TYPE_TEXT && nod.getChildText(0).preserve()) doIndent=false;
  422. if (doIndent) out.write("\n");
  423. // emit the child nodes
  424. for (int n=0;n<nod.numChildren();n++)
  425. {
  426. if (nod.childType(n)==TYPE_TEXT)
  427. {
  428. Text txt=nod.getChildText(n);
  429. if (doIndent) for (int i=0;i<level+1;i++) out.write(" ");
  430. if (txt.preserve())
  431. out.write("<![CDATA["+txt.get()+"]]>");
  432. else
  433. out.write(escapeText(txt.get()));
  434. if (doIndent) out.write("\n");
  435. }
  436. else recursiveWriteNode(out,nod.getChildNode(n),level+1,shortClose,prettyPrint);
  437. }
  438. // emit the closing tag
  439. if (doIndent) for (int n=0;n<level;n++) out.write(" ");
  440. out.write("</"+nod.nodeName()+">");
  441. if (prettyPrint) out.write("\n");
  442. }
  443. // miscellaneous
  444. // splits a string into pieces based on whitespace, but avoiding quotation marks
  445. private static String[] splitSpace(String str)
  446. {
  447. ArrayList<String> bits=new ArrayList<String>();
  448. boolean inquot=false;
  449. StringBuffer buff=new StringBuffer();
  450. for (int n=0;n<str.length();n++)
  451. {
  452. char ch=str.charAt(n);
  453. if (!inquot && (ch==' ' || ch=='\r' || ch=='\n' || ch=='\t'))
  454. {
  455. if (buff.length()>0) bits.add(buff.toString());
  456. buff.delete(0,buff.length());
  457. continue;
  458. }
  459. if (ch=='"') inquot=!inquot;
  460. buff.append(ch);
  461. }
  462. if (buff.length()>0) bits.add(buff.toString());
  463. if (bits.size()==0) bits.add("");
  464. return bits.toArray(new String[bits.size()]);
  465. }
  466. // make sure a string is suitable to encode in an attribute value (quoted)
  467. public static String escapeAttr(String S)
  468. {
  469. int i;
  470. while ((i=S.indexOf('"'))>=0) {S=S.substring(0,i)+"&quot;"+S.substring(i+1);}
  471. while ((i=S.indexOf('\''))>=0) {S=S.substring(0,i)+"&apos;"+S.substring(i+1);}
  472. return S;
  473. }
  474. // make sure a string is suitable for general XML text
  475. public static String escapeText(String S)
  476. {
  477. StringBuffer buff=new StringBuffer();
  478. for (int n=0;n<S.length();n++)
  479. {
  480. char ch=S.charAt(n);
  481. if (ch=='&') buff.append("&amp;");
  482. else if (ch=='<') buff.append("&lt;");
  483. else if (ch=='>') buff.append("&gt;");
  484. else if (ch>=127) buff.append("&#"+(int)ch+";");
  485. else buff.append(ch);
  486. }
  487. return buff.toString();
  488. }
  489. // convert any escaped entities back into regular text
  490. public static String unescapeText(String S)
  491. {
  492. Pattern hexcode=null;
  493. StringBuffer buff=new StringBuffer();
  494. int i=0;
  495. while (i<S.length())
  496. {
  497. if (i+5<=S.length() && S.substring(i,i+5).equals("&amp;")) {buff.append("&"); i+=5;}
  498. else if (i+4<=S.length() && S.substring(i,i+4).equals("&lt;")) {buff.append("<"); i+=4;}
  499. else if (i+4<=S.length() && S.substring(i,i+4).equals("&gt;")) {buff.append(">"); i+=4;}
  500. else if (i+6<=S.length() && S.substring(i,i+3).equals("&#x")) // hex unicode
  501. {
  502. if (hexcode==null) hexcode=Pattern.compile("\\&\\#x([0-9a-zA-Z]+)\\;");
  503. Matcher m=hexcode.matcher(S.substring(i));
  504. if (m.find())
  505. {
  506. String hex=m.group(1);
  507. try
  508. {
  509. char ch=(char)Integer.parseInt(hex,16);
  510. buff.append(ch);
  511. }
  512. catch (NumberFormatException ex) {} // oh well
  513. i+=4+hex.length();
  514. }
  515. else {buff.append(S.charAt(i)); i++;}
  516. }
  517. else if (i+4<=S.length() && S.substring(i,i+2).equals("&#")) // decimal unicode
  518. {
  519. int j=i+2;
  520. boolean bad=false;
  521. while (j<S.length())
  522. {
  523. if (S.charAt(j)==';') break;
  524. if (j>i+10 || S.charAt(j)<'0' || S.charAt(j)>'9') {bad=true; break;}
  525. j++;
  526. }
  527. if (bad) {buff.append(S.charAt(i)); i++;}
  528. else
  529. {
  530. int ch=Util.safeInt(S.substring(i+2,j));
  531. buff.append((char)ch);
  532. i+=j-i+1;
  533. }
  534. }
  535. else {buff.append(S.charAt(i)); i++;}
  536. }
  537. return buff.toString();
  538. }
  539. }