PageRenderTime 64ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/utilities/src/gov/nist/toolkit/utilities/xml/XmlFormatter.java

https://bitbucket.org/healthcare/ihe-open-source
Java | 538 lines | 465 code | 54 blank | 19 comment | 138 complexity | 0f065ab168119e18763c1dfb12c48dea MD5 | raw file
  1. package gov.nist.toolkit.utilities.xml;
  2. public class XmlFormatter {
  3. private String in;
  4. private int indent;
  5. private int indentAmount;
  6. private StringBuffer buf;
  7. private int currentPos;
  8. private int length;
  9. private int nextParmStart, nextParmEnd;
  10. StringBuffer diagBuf;
  11. boolean diagOn;
  12. boolean xmlHeader;
  13. boolean noOut;
  14. boolean useHtmlEscapes;
  15. String uriPrefix;
  16. public class ElementParser {
  17. String tagName;
  18. boolean closes;
  19. StringBuffer b;
  20. int index;
  21. public String print() {
  22. return "ElementParser: tagName = " + tagName + " closes = " + closes;
  23. }
  24. ElementParser(StringBuffer b, int index, int direction) {
  25. this.b = b;
  26. this.index = index;
  27. if (direction == -1)
  28. this.parseBackwards();
  29. else
  30. this.parseForwards();
  31. }
  32. void parseBackwards() {
  33. int i = index;
  34. char c = b.charAt(i);
  35. closes = false;
  36. while(i > -1 && c != '<') {
  37. if (c == '/') closes = true;
  38. i--;
  39. c = b.charAt(i);
  40. }
  41. if (b.charAt(i) == '<')
  42. i++;
  43. if (b.charAt(i) == '/')
  44. i++;
  45. int from = i;
  46. while (b.charAt(i) != ' ' && b.charAt(i) != '>' && b.charAt(i) != '/') {
  47. i++;
  48. }
  49. tagName = b.substring(from, i);
  50. }
  51. void parseForwards() {
  52. int i = index;
  53. char c = b.charAt(i);
  54. closes = false;
  55. tagName = "";
  56. while (i < b.length() && c != '>') {
  57. if (c == '/') closes = true;
  58. i++;
  59. if (i>=b.length())
  60. return;
  61. c = b.charAt(i);
  62. }
  63. int end = i;
  64. while (i > -1 && b.charAt(i) != '<')
  65. i--;
  66. i++;
  67. if (b.charAt(i) == '/') i++;
  68. int start = i;
  69. while (b.charAt(i) != ' ' && b.charAt(i) != '/' && b.charAt(i) != '>') i++;
  70. tagName = b.substring(start, i);
  71. }
  72. }
  73. public XmlFormatter(String inXml) {
  74. in = XmlFormatter.normalize(inXml);
  75. length = in.length();
  76. buf = new StringBuffer(in.length());
  77. diagBuf = new StringBuffer();
  78. indent = 0;
  79. currentPos = 0;
  80. nextParmStart = -1;
  81. nextParmEnd = -1;
  82. indentAmount = 3;
  83. diagOn = false;
  84. noOut = false;
  85. useHtmlEscapes = false;
  86. uriPrefix = null;
  87. }
  88. void out(char s) {
  89. if (noOut) return;
  90. buf.append(s);
  91. }
  92. void out(String s) {
  93. if (noOut) return;
  94. buf.append(s);
  95. }
  96. void diag(String s) {
  97. if (diagOn)
  98. diagBuf.append(s);
  99. }
  100. void diagln(String s) {
  101. diag(s);
  102. diag("\n");
  103. }
  104. char c(int pos) {
  105. return (cvalid(pos)) ? in.charAt(pos) : ' ';
  106. }
  107. char co(int offset) {
  108. return ((currentPos+offset)<length) ? in.charAt(currentPos+offset)
  109. : ' ';
  110. }
  111. boolean hasPrefix(String prefix) {
  112. for (int i=0; i<prefix.length(); i++) {
  113. if (prefix.charAt(i) != co(i))
  114. return false;
  115. }
  116. return true;
  117. }
  118. void hyperlinkUuid() {
  119. if (in.charAt(currentPos+45) != '"')
  120. return; // not valid uuid
  121. String uuid = in.substring(currentPos, currentPos+44); // uuid is 45 chars
  122. buf.append("<a href=\"" + uriPrefix + uuid + "\">" + uuid + "</a>");
  123. currentPos += 45;
  124. }
  125. boolean covalid(int offset) {
  126. return (currentPos+offset >= 0) && (currentPos+offset)<length;
  127. }
  128. boolean cvalid(int pos) {
  129. return (pos >= 0) && pos<length;
  130. }
  131. boolean endOfBuffer() {
  132. return !(currentPos < length);
  133. }
  134. void next() {
  135. currentPos++;
  136. }
  137. int findNext(int startAt, char ch) {
  138. for (int i=startAt; cvalid(i); i++) {
  139. if (c(i) == ch)
  140. return i;
  141. }
  142. return -1;
  143. }
  144. int findNext(char ch) {
  145. return findNext(currentPos, ch);
  146. }
  147. int findPrev(int startAt, char ch) {
  148. for (int i=startAt; cvalid(i); i--) {
  149. if (c(i) == ch)
  150. return i;
  151. }
  152. return -1;
  153. }
  154. int findPrev(char ch) {
  155. return findPrev(currentPos, ch);
  156. }
  157. int parmStart(int eq) {
  158. int i = eq;
  159. // pass spaces
  160. while (cvalid(i) && c(i) == ' ') i--;
  161. if (cvalid(i) && c(i) == '=') {
  162. i--;
  163. // pass spaces
  164. while (cvalid(i) && c(i) == ' ') i--;
  165. }
  166. // start of parm name
  167. while (cvalid(i) && c(i) != ' ' && c(i) != '\t') i--;
  168. i++;
  169. return i;
  170. }
  171. int parmEnd(int eq) {
  172. int i = eq;
  173. // pass spaces
  174. while (cvalid(i) && c(i) != '"') i++;
  175. // pass "
  176. i++;
  177. // find end of quote
  178. while (cvalid(i) && c(i) != '"') i++;
  179. return i;
  180. }
  181. String snippet(int centeredAt) {
  182. int amount = 2;
  183. if ( !cvalid(centeredAt-amount) || !cvalid(centeredAt+amount))
  184. return " ";
  185. return in.substring(centeredAt-amount, centeredAt+amount+1);
  186. }
  187. void findNextParm(int startingAt) {
  188. diagln("strt@ " + snippet(startingAt));
  189. nextParmStart = -1;
  190. int nextEq = findNext(startingAt, '=');
  191. if ( !cvalid(nextEq) )
  192. return;
  193. diagln("=@ " + snippet(nextEq));
  194. int nextClose = findNext(startingAt, '>');
  195. diagln(">@ " + snippet(nextClose));
  196. if (nextClose < nextEq)
  197. return;
  198. nextParmStart = parmStart(nextEq);
  199. nextParmEnd = parmEnd(nextEq);
  200. diagln("a@ " + snippet(nextParmStart));
  201. diagln("z@ " + snippet(nextParmEnd));
  202. diagln("parm " + in.substring(nextParmStart, nextParmEnd+1));
  203. }
  204. void findNextParm() {
  205. findNextParm(currentPos);
  206. }
  207. boolean atParmStart() {
  208. return nextParmStart == currentPos;
  209. }
  210. void doIndent() {
  211. if (useHtmlEscapes)
  212. out("<br/>");
  213. else
  214. out('\n');
  215. // out("(");
  216. // out(indent);
  217. // out(")");
  218. for (int j=0; j<indent; j++)
  219. if (useHtmlEscapes)
  220. out("&nbsp;");
  221. else
  222. out(' ');
  223. }
  224. void handleUri() {
  225. int start = findNext('"');
  226. int end = findNext(start+1,'"');
  227. start++;
  228. String uri = in.substring(start,end);
  229. if (!in.substring(start, start+9).equals("urn:uuid:"))
  230. return;
  231. diagln("uri is " + uri);
  232. char c;
  233. while ((c=co(0))!='"') {
  234. out(c);
  235. next();
  236. }
  237. out(co(0)); // output "
  238. next();
  239. buf.append("<a target=\"mainFrame\" href=\"" + uriPrefix + uri +"\">");
  240. while ((c=co(0))!='"') {
  241. out(c);
  242. next();
  243. }
  244. buf.append("</a>");
  245. out(co(0)); // output "
  246. next();
  247. }
  248. boolean isWhite(char c) {
  249. if (c == ' ') return true;
  250. if (c == '\n') return true;
  251. if (c == '\t') return true;
  252. if (c == '\r') return true;
  253. return false;
  254. }
  255. boolean prevIsTextNode() {
  256. for (int i= -1; ; i--) {
  257. if (!covalid(i))
  258. return false;
  259. if (co(i) == '>')
  260. return false;
  261. if (!isWhite(co(i)))
  262. return true;
  263. }
  264. }
  265. void doParmStart() {
  266. doIndent();
  267. if (uriPrefix != null)
  268. handleUri();
  269. findNextParm(nextParmEnd);
  270. }
  271. void newOpen() {
  272. findNextParm();
  273. }
  274. void newClose() {
  275. }
  276. String run() {
  277. char cc;
  278. while( ! endOfBuffer() ) {
  279. cc = co(0);
  280. if (atParmStart())
  281. doParmStart();
  282. if (co(0) == '<') {
  283. if (co(1) == '/') {
  284. if (prevIsTextNode()) {
  285. indent -= indentAmount;
  286. } else {
  287. indent -= indentAmount;
  288. doIndent();
  289. newClose();
  290. }
  291. } else if (co(1) == '?') {
  292. doIndent();
  293. if (!xmlHeader)
  294. noOut = true;
  295. } else {
  296. doIndent();
  297. indent += indentAmount;
  298. newOpen();
  299. }
  300. out(useHtmlEscapes ? "&lt;" : "<");
  301. } else if (co(0) == '>') {
  302. out(useHtmlEscapes ? "&gt;" : ">");
  303. noOut = false;
  304. if (co(-1) == '/')
  305. indent -= indentAmount;
  306. } else if (co(0) == '&') {
  307. out("&amp;");
  308. } else
  309. out(co(0));
  310. next();
  311. }
  312. if (useHtmlEscapes)
  313. return buf.toString() + "<br/><br/>" + diagBuf.toString();
  314. return buf.toString() + "\n\n" + diagBuf.toString();
  315. }
  316. static public String normalize(String inXml) {
  317. if (inXml.length() == 0)
  318. return inXml;
  319. StringBuffer b = new StringBuffer(inXml);
  320. b.append(" ");
  321. boolean inElement = false;
  322. boolean inString=false;
  323. boolean isClosed=false;
  324. if (b.charAt(0) == '<')
  325. inElement = true;
  326. for (int i=0; i<b.length()-5; ) {
  327. char c_1 = (i == 0) ? 'z' : b.charAt(i-1);
  328. char c = b.charAt(i);
  329. char c1 = b.charAt(i+1);
  330. char c2 = b.charAt(i+2);
  331. if (c == '\n') {
  332. b.setCharAt(i, ' ');
  333. continue;
  334. }
  335. if (c == '\t') {
  336. b.setCharAt(i, ' ');
  337. continue;
  338. }
  339. if (c == ' ' && c1 == ' ' && ! isTextNode(b, i)) {
  340. b.deleteCharAt(i);
  341. continue;
  342. }
  343. if (c_1 == '>' && c == ' ') {
  344. b.deleteCharAt(i);
  345. continue;
  346. }
  347. if (c == ' ' && c1 == '<') {
  348. b.deleteCharAt(i);
  349. continue;
  350. }
  351. if (c_1 == '>' && c == '<' && c1 == '/') {
  352. if (tagName(b, i-1).equals(tagName(b, i))) {
  353. deleteSimpleElement(b, i);
  354. b.insert(i-1, '/');
  355. continue;
  356. }
  357. }
  358. isClosed = isClosed(b, i);
  359. inString = (c == '"') ? false : true;
  360. i++;
  361. c = b.charAt(i);
  362. if (c == '<') inElement = true;
  363. else if (c == '>') inElement = false;
  364. else if (c == '"') inString = (inString) ? false : true;
  365. }
  366. /* for (int i=0; i<b.length(); i++ ) {
  367. if (b.charAt(i) == '>' && b.charAt(i+1) == ' ' && b.charAt(i+2) == '<') {
  368. b.deleteCharAt(i+1);
  369. }
  370. }
  371. for (int i=0; i<b.length(); i++ ) {
  372. if (b.charAt(i) == '>' && b.charAt(i+1) == '<' && b.charAt(i+2) == '/') {
  373. if (tagName(b, i).equals(tagName(b, i+1))) {
  374. deleteSimpleElement(b, i+1);
  375. b.insert(i, '/');
  376. }
  377. }
  378. } */
  379. for (int i=b.length()-1; b.charAt(i) ==' '; i=b.length()-1) {
  380. b.deleteCharAt(i);
  381. }
  382. return b.toString();
  383. }
  384. XmlFormatter() {}
  385. static public boolean isTextNode(StringBuffer b, int index) {
  386. XmlFormatter xf = new XmlFormatter();
  387. return xf.isaTextNode(b, index);
  388. }
  389. private boolean isaTextNode(StringBuffer b, int index) {
  390. int i = index;
  391. ElementParser left = new ElementParser(b, index, -1);
  392. ElementParser right = new ElementParser(b, index, 1);
  393. // System.out.println(".....................");
  394. // System.out.println(left.print());
  395. // System.out.println(right.print());
  396. if (left.tagName.equals(right.tagName) && left.closes == false && right.closes == true)
  397. return true;
  398. return false;
  399. }
  400. static private boolean isClosed(StringBuffer b, int index) {
  401. while(index > -1) {
  402. if (b.charAt(index) == '/')
  403. return true;
  404. if (b.charAt(index) == '<')
  405. return false;
  406. index--;
  407. }
  408. return false;
  409. }
  410. static public void deleteSimpleElement(StringBuffer b, int index) {
  411. int start = index;
  412. while (b.charAt(start) != '<' && start > 0)
  413. start--;
  414. int end = index;
  415. while (b.charAt(end) != '>')
  416. end++;
  417. b.delete(start, end+1);
  418. }
  419. static public String tagName(StringBuffer b, int index) {
  420. int start = index;
  421. while (b.charAt(start) != '<' && start > 0)
  422. start--;
  423. start++;
  424. if (b.charAt(start) == '/')
  425. start++;
  426. int end = start;
  427. while (b.charAt(end) != '/' && b.charAt(end) != '>' && b.charAt(end) != ' ')
  428. end++;
  429. end--;
  430. return b.substring(start, end+1);
  431. }
  432. static public String htmlize(String inXml, boolean xmlHeader) {
  433. XmlFormatter f = new XmlFormatter(inXml);
  434. f.xmlHeader = xmlHeader;
  435. f.useHtmlEscapes = true;
  436. return f.run();
  437. }
  438. static public String htmlize(String inXml) {
  439. return htmlize(inXml, true);
  440. }
  441. static public String format(String inXml, boolean xmlHeader) {
  442. XmlFormatter f = new XmlFormatter(inXml);
  443. f.xmlHeader = xmlHeader;
  444. f.useHtmlEscapes = false;
  445. return f.run();
  446. }
  447. static public String format(String inXml, boolean xmlHeader, String uriPrefix) {
  448. XmlFormatter f = new XmlFormatter(inXml);
  449. f.xmlHeader = xmlHeader;
  450. f.useHtmlEscapes = true;
  451. f.uriPrefix = uriPrefix;
  452. return f.run();
  453. }
  454. static public void main(String[] argvs) {
  455. // String inXml = "<foo a=\"urn:uuid:2e82c1f6-a085-4c72-9da3-8640a32e42a\"><bar/><a>Hi</a></foo>";
  456. String inXml = "<foo><a>Hi</a></foo>";
  457. XmlFormatter f = new XmlFormatter(inXml);
  458. f.xmlHeader = false;
  459. f.useHtmlEscapes = false;
  460. f.diagOn = true;
  461. f.uriPrefix = "http://localhost:8084/hl7services/get?id=";
  462. System.out.println(f.run());
  463. }
  464. }