/xmlenc-0.52/src/main/org/znerd/xmlenc/XMLChecker.java

# · Java · 948 lines · 522 code · 49 blank · 377 comment · 710 complexity · 0cf68ccd23ab7d9a292d15a974d81c33 MD5 · raw file

  1. /*
  2. * $Id: XMLChecker.java,v 1.11 2005/09/12 08:40:02 znerd Exp $
  3. */
  4. package org.znerd.xmlenc;
  5. /**
  6. * Utility class that provides XML checking functionality.
  7. *
  8. * @version $Revision: 1.11 $ $Date: 2005/09/12 08:40:02 $
  9. * @author Ernst de Haan (<a href="mailto:wfe.dehaan@gmail.com">wfe.dehaan@gmail.com</a>)
  10. *
  11. * @since xmlenc 0.41
  12. */
  13. public final class XMLChecker extends Object {
  14. //-------------------------------------------------------------------------
  15. // Class functions
  16. //-------------------------------------------------------------------------
  17. /**
  18. * Checks if the specified string matches the <em>S</em> (white space)
  19. * production.
  20. *
  21. * <p>See:
  22. * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>.
  23. *
  24. * @param s
  25. * the character string to check, cannot be <code>null</code>.
  26. *
  27. * @throws NullPointerException
  28. * if <code>s == null</code>.
  29. *
  30. * @throws InvalidXMLException
  31. * if the specified character string does not match the <em>S</em>
  32. * production.
  33. */
  34. public static final void checkS(String s)
  35. throws NullPointerException {
  36. checkS(s.toCharArray(), 0, s.length());
  37. }
  38. /**
  39. * Checks if the specified part of a character array matches the <em>S</em>
  40. * (white space) production.
  41. *
  42. * <p>See:
  43. * <a href="http://www.w3.org/TR/REC-xml#NT-S">Definition of S</a>.
  44. *
  45. * @param ch
  46. * the character array that contains the characters to be checked,
  47. * cannot be <code>null</code>.
  48. *
  49. * @param start
  50. * the start index into <code>ch</code>, must be &gt;= 0.
  51. *
  52. * @param length
  53. * the number of characters to take from <code>ch</code>, starting at
  54. * the <code>start</code> index.
  55. *
  56. * @throws NullPointerException
  57. * if <code>ch == null</code>.
  58. *
  59. * @throws IndexOutOfBoundsException
  60. * if <code>start &lt; 0
  61. * || start + length &gt; ch.length</code>.
  62. *
  63. * @throws InvalidXMLException
  64. * if the specified character string does not match the <em>S</em>
  65. * production.
  66. */
  67. public static final void checkS(char[] ch, int start, int length)
  68. throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException {
  69. // Loop through the array and check each character
  70. for (int i = start; i < length; i++) {
  71. int c = (int) ch[i];
  72. if (c != 0x20 && c != 0x9 && c != 0xD && c != 0xA) {
  73. throw new InvalidXMLException("The character 0x" + Integer.toHexString(c) + " is not valid for the 'S' production (white space).");
  74. }
  75. }
  76. }
  77. /**
  78. * Determines if the specified string matches the <em>Name</em> production.
  79. *
  80. * <p>See:
  81. * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
  82. *
  83. * @param s
  84. * the character string to check, cannot be <code>null</code>.
  85. *
  86. * @throws NullPointerException
  87. * if <code>s == null</code>.
  88. *
  89. * @return
  90. * <code>true</code> if the {@link String} matches the production, or
  91. * <code>false</code> otherwise.
  92. */
  93. public static final boolean isName(String s)
  94. throws NullPointerException {
  95. try {
  96. checkName(s);
  97. return true;
  98. } catch (InvalidXMLException exception) {
  99. return false;
  100. }
  101. }
  102. /**
  103. * Checks if the specified string matches the <em>Name</em> production.
  104. *
  105. * <p>See:
  106. * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
  107. *
  108. * @param s
  109. * the character string to check, cannot be <code>null</code>.
  110. *
  111. * @throws NullPointerException
  112. * if <code>s == null</code>.
  113. *
  114. * @throws InvalidXMLException
  115. * if the specified character string does not match the <em>Name</em>
  116. * production.
  117. */
  118. public static final void checkName(String s)
  119. throws NullPointerException, InvalidXMLException {
  120. checkName(s.toCharArray(), 0, s.length());
  121. }
  122. /**
  123. * Checks if the specified part of a character array matches the
  124. * <em>Name</em> production.
  125. *
  126. * <p>See:
  127. * <a href="http://www.w3.org/TR/REC-xml#NT-Name">Definition of Name</a>.
  128. *
  129. * @param ch
  130. * the character array that contains the characters to be checked,
  131. * cannot be <code>null</code>.
  132. *
  133. * @param start
  134. * the start index into <code>ch</code>, must be &gt;= 0.
  135. *
  136. * @param length
  137. * the number of characters to take from <code>ch</code>, starting at
  138. * the <code>start</code> index.
  139. *
  140. * @throws NullPointerException
  141. * if <code>ch == null</code>.
  142. *
  143. * @throws IndexOutOfBoundsException
  144. * if <code>start &lt; 0
  145. * || start + length &gt; ch.length</code>.
  146. *
  147. * @throws InvalidXMLException
  148. * if the specified character string does not match the <em>Name</em>
  149. * production.
  150. */
  151. public static final void checkName(char[] ch, int start, int length)
  152. throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException {
  153. // Minimum length is 1
  154. if (length < 1) {
  155. throw new InvalidXMLException("An empty string does not match the 'Name' production.");
  156. }
  157. // First char must match: (Letter | '_' | ':')
  158. int i = start;
  159. char c = ch[i];
  160. if (c != '_' && c != ':' && !isLetter(c)) {
  161. throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is invalid as a starting character in the 'Name' production.");
  162. }
  163. // Loop through the array and check each character
  164. for (i++; i < length; i++) {
  165. c = ch[i];
  166. if (!isNameChar(c)) {
  167. throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid for the 'Name' production.");
  168. }
  169. }
  170. }
  171. /**
  172. * Determines if the specified string matches the <em>SystemLiteral</em>
  173. * production.
  174. *
  175. * <p>See:
  176. * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
  177. *
  178. * @param s
  179. * the character string to check, cannot be <code>null</code>.
  180. *
  181. * @throws NullPointerException
  182. * if <code>s == null</code>.
  183. *
  184. * @return
  185. * <code>true</code> if the {@link String} matches the production, or
  186. * <code>false</code> otherwise.
  187. */
  188. public static final boolean isSystemLiteral(String s)
  189. throws NullPointerException {
  190. try {
  191. checkSystemLiteral(s);
  192. return true;
  193. } catch (InvalidXMLException exception) {
  194. return false;
  195. }
  196. }
  197. /**
  198. * Checks if the specified string matches the <em>SystemLiteral</em>
  199. * production.
  200. *
  201. * <p>See:
  202. * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
  203. *
  204. * @param s
  205. * the character string to check, cannot be <code>null</code>.
  206. *
  207. * @throws NullPointerException
  208. * if <code>s == null</code>.
  209. *
  210. * @throws InvalidXMLException
  211. * if the specified character string does not match the
  212. * <em>PubidLiteral</em> production.
  213. */
  214. public static final void checkSystemLiteral(String s)
  215. throws NullPointerException, InvalidXMLException {
  216. checkSystemLiteral(s.toCharArray(), 0, s.length());
  217. }
  218. /**
  219. * Checks if the specified part of a character array matches the
  220. * <em>SystemLiteral</em> production.
  221. *
  222. * <p>See:
  223. * <a href="http://www.w3.org/TR/REC-xml#NT-SystemLiteral">Definition of SystemLiteral</a>.
  224. *
  225. * @param ch
  226. * the character array that contains the characters to be checked,
  227. * cannot be <code>null</code>.
  228. *
  229. * @param start
  230. * the start index into <code>ch</code>, must be &gt;= 0.
  231. *
  232. * @param length
  233. * the number of characters to take from <code>ch</code>, starting at
  234. * the <code>start</code> index.
  235. *
  236. * @throws NullPointerException
  237. * if <code>ch == null</code>.
  238. *
  239. * @throws IndexOutOfBoundsException
  240. * if <code>start &lt; 0
  241. * || start + length &gt; ch.length</code>.
  242. *
  243. * @throws InvalidXMLException
  244. * if the specified character string does not match the
  245. * <em>SystemLiteral</em> production.
  246. */
  247. public static final void checkSystemLiteral(char[] ch,
  248. int start,
  249. int length)
  250. throws NullPointerException,
  251. IndexOutOfBoundsException,
  252. InvalidXMLException {
  253. // Minimum length is 3
  254. if (length < 3) {
  255. throw new InvalidXMLException("Minimum length for the 'SystemLiteral' production is 3 characters.");
  256. }
  257. int lastIndex = start + length - 1;
  258. char firstChar = ch[0];
  259. char lastChar = ch[lastIndex];
  260. // First and last char: single qoute (apostrophe)
  261. String otherAllowedChars;
  262. if (firstChar == '\'') {
  263. if (lastChar != '\'') {
  264. throw new InvalidXMLException("First character is '\\'', but the "
  265. + "last character is 0x"
  266. + Integer.toHexString((int) lastChar)
  267. + '.');
  268. }
  269. otherAllowedChars = "-()+,./:=?;!*#@$_%";
  270. // First and last char: double qoute character
  271. } else if (firstChar == '"') {
  272. if (lastChar != '"') {
  273. throw new InvalidXMLException("First character is '\"', but the "
  274. + "last character is 0x"
  275. + Integer.toHexString((int) lastChar)
  276. + '.');
  277. }
  278. otherAllowedChars = "-'()+,./:=?;!*#@$_%";
  279. // First character is invalid
  280. } else {
  281. throw new InvalidXMLException("First char must either be '\\'' or "
  282. + "'\"' instead of 0x"
  283. + Integer.toHexString((int) firstChar)
  284. + '.');
  285. }
  286. // Check each character
  287. for (int i = 1; i < (length - 1); i++) {
  288. char c = ch[i];
  289. if (c == firstChar) {
  290. if (firstChar == '\'') {
  291. throw new InvalidXMLException("Found '\\'' at position " + i + '.');
  292. } else {
  293. throw new InvalidXMLException("Found '\"' at position " + i + '.');
  294. }
  295. }
  296. }
  297. }
  298. /**
  299. * Determines if the specified string matches the <em>PubidLiteral</em>
  300. * production.
  301. *
  302. * <p>See:
  303. * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
  304. *
  305. * @param s
  306. * the character string to check, cannot be <code>null</code>.
  307. *
  308. * @throws NullPointerException
  309. * if <code>s == null</code>.
  310. *
  311. * @return
  312. * <code>true</code> if the {@link String} matches the production, or
  313. * <code>false</code> otherwise.
  314. */
  315. public static final boolean isPubidLiteral(String s)
  316. throws NullPointerException {
  317. try {
  318. checkPubidLiteral(s);
  319. return true;
  320. } catch (InvalidXMLException exception) {
  321. return false;
  322. }
  323. }
  324. /**
  325. * Checks if the specified string matches the <em>PubidLiteral</em>
  326. * production.
  327. *
  328. * <p>See:
  329. * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
  330. *
  331. * @param s
  332. * the character string to check, cannot be <code>null</code>.
  333. *
  334. * @throws NullPointerException
  335. * if <code>s == null</code>.
  336. *
  337. * @throws InvalidXMLException
  338. * if the specified character string does not match the
  339. * <em>PubidLiteral</em> production.
  340. */
  341. public static final void checkPubidLiteral(String s)
  342. throws NullPointerException, InvalidXMLException {
  343. checkPubidLiteral(s.toCharArray(), 0, s.length());
  344. }
  345. /**
  346. * Checks if the specified part of a character array matches the
  347. * <em>PubidLiteral</em> production.
  348. *
  349. * <p>See:
  350. * <a href="http://www.w3.org/TR/REC-xml#NT-PubidLiteral">Definition of PubidLiteral</a>.
  351. *
  352. * @param ch
  353. * the character array that contains the characters to be checked,
  354. * cannot be <code>null</code>.
  355. *
  356. * @param start
  357. * the start index into <code>ch</code>, must be &gt;= 0.
  358. *
  359. * @param length
  360. * the number of characters to take from <code>ch</code>, starting at
  361. * the <code>start</code> index.
  362. *
  363. * @throws NullPointerException
  364. * if <code>ch == null</code>.
  365. *
  366. * @throws IndexOutOfBoundsException
  367. * if <code>start &lt; 0
  368. * || start + length &gt; ch.length</code>.
  369. *
  370. * @throws InvalidXMLException
  371. * if the specified character string does not match the
  372. * <em>PubidLiteral</em> production.
  373. */
  374. public static final void checkPubidLiteral(char[] ch,
  375. int start,
  376. int length)
  377. throws NullPointerException,
  378. IndexOutOfBoundsException,
  379. InvalidXMLException {
  380. // Minimum length is 3
  381. if (length < 3) {
  382. throw new InvalidXMLException("Minimum length for the 'PubidLiteral' production is 3 characters.");
  383. }
  384. int lastIndex = start + length - 1;
  385. char firstChar = ch[0];
  386. char lastChar = ch[lastIndex];
  387. // First and last char: single qoute (apostrophe)
  388. String otherAllowedChars;
  389. if (firstChar == '\'') {
  390. if (lastChar != '\'') {
  391. throw new InvalidXMLException("First character is '\\'', but the "
  392. + "last character is 0x"
  393. + Integer.toHexString((int) lastChar)
  394. + '.');
  395. }
  396. otherAllowedChars = "-()+,./:=?;!*#@$_%";
  397. // First and last char: double qoute character
  398. } else if (firstChar == '"') {
  399. if (lastChar != '"') {
  400. throw new InvalidXMLException("First character is '\"', but the "
  401. + "last character is 0x"
  402. + Integer.toHexString((int) lastChar)
  403. + '.');
  404. }
  405. otherAllowedChars = "-'()+,./:=?;!*#@$_%";
  406. // First character is invalid
  407. } else {
  408. throw new InvalidXMLException("First char must either be '\\'' or "
  409. + "'\"' instead of 0x"
  410. + Integer.toHexString((int) firstChar)
  411. + '.');
  412. }
  413. // Check each character
  414. for (int i = 1; i < (length - 1); i++) {
  415. char c = ch[i];
  416. if (c != 0x20 && c != 0x0D && c != 0x0A && !isLetter(c) && !isDigit(c)
  417. && otherAllowedChars.indexOf(c) < 0) {
  418. // TODO: Quote character properly, even if it is an apostrophe
  419. throw new InvalidXMLException("The character '"
  420. + c
  421. + "' (0x"
  422. + Integer.toHexString((int) c)
  423. + ") is not valid for the "
  424. + "'PubidLiteral' production.");
  425. }
  426. }
  427. }
  428. /**
  429. * Determines if the specified character matches the <em>NameChar</em>
  430. * production.
  431. *
  432. * <p>See:
  433. * <a href="http://www.w3.org/TR/REC-xml#NT-NameChar">Definition of NameChar</a>.
  434. *
  435. * @param c
  436. * the character to check.
  437. *
  438. * @return
  439. * <code>true</code> if the character matches the production, or
  440. * <code>false</code> if it does not.
  441. */
  442. private static final boolean isNameChar(char c) {
  443. return c == '.'
  444. || c == '-'
  445. || c == '_'
  446. || c == ':'
  447. || isDigit(c)
  448. || isLetter(c)
  449. || isCombiningChar(c)
  450. || isExtender(c);
  451. }
  452. /**
  453. * Determines if the specified character matches the <em>Letter</em>
  454. * production.
  455. *
  456. * <p>See:
  457. * <a href="http://www.w3.org/TR/REC-xml#NT-Letter">Definition of Letter</a>.
  458. *
  459. * @param c
  460. * the character to check.
  461. *
  462. * @return
  463. * <code>true</code> if the character matches the production, or
  464. * <code>false</code> if it does not.
  465. */
  466. private static final boolean isLetter(char c) {
  467. return isBaseChar(c) || isIdeographic(c);
  468. }
  469. /**
  470. * Determines if the specified character matches the <em>BaseChar</em>
  471. * production.
  472. *
  473. * <p>See:
  474. * <a href="http://www.w3.org/TR/REC-xml#NT-BaseChar">Definition of BaseChar</a>.
  475. *
  476. * @param c
  477. * the character to check.
  478. *
  479. * @return
  480. * <code>true</code> if the character matches the production, or
  481. * <code>false</code> if it does not.
  482. */
  483. private static final boolean isBaseChar(char c) {
  484. int n = (int) c;
  485. return (n >= 0x0041 && n <= 0x005A)
  486. || (n >= 0x0061 && n <= 0x007A)
  487. || (n >= 0x00C0 && n <= 0x00D6)
  488. || (n >= 0x00D8 && n <= 0x00F6)
  489. || (n >= 0x00F8 && n <= 0x00FF)
  490. || (n >= 0x0100 && n <= 0x0131)
  491. || (n >= 0x0134 && n <= 0x013E)
  492. || (n >= 0x0141 && n <= 0x0148)
  493. || (n >= 0x014A && n <= 0x017E)
  494. || (n >= 0x0180 && n <= 0x01C3)
  495. || (n >= 0x01CD && n <= 0x01F0)
  496. || (n >= 0x01F4 && n <= 0x01F5)
  497. || (n >= 0x01FA && n <= 0x0217)
  498. || (n >= 0x0250 && n <= 0x02A8)
  499. || (n >= 0x02BB && n <= 0x02C1)
  500. || (n == 0x0386)
  501. || (n >= 0x0388 && n <= 0x038A)
  502. || (n == 0x038C)
  503. || (n >= 0x038E && n <= 0x03A1)
  504. || (n >= 0x03A3 && n <= 0x03CE)
  505. || (n >= 0x03D0 && n <= 0x03D6)
  506. || (n == 0x03DA)
  507. || (n == 0x03DC)
  508. || (n == 0x03DE)
  509. || (n == 0x03E0)
  510. || (n >= 0x03E2 && n <= 0x03F3)
  511. || (n >= 0x0401 && n <= 0x040C)
  512. || (n >= 0x040E && n <= 0x044F)
  513. || (n >= 0x0451 && n <= 0x045C)
  514. || (n >= 0x045E && n <= 0x0481)
  515. || (n >= 0x0490 && n <= 0x04C4)
  516. || (n >= 0x04C7 && n <= 0x04C8)
  517. || (n >= 0x04CB && n <= 0x04CC)
  518. || (n >= 0x04D0 && n <= 0x04EB)
  519. || (n >= 0x04EE && n <= 0x04F5)
  520. || (n >= 0x04F8 && n <= 0x04F9)
  521. || (n >= 0x0531 && n <= 0x0556)
  522. || (n == 0x0559)
  523. || (n >= 0x0561 && n <= 0x0586)
  524. || (n >= 0x05D0 && n <= 0x05EA)
  525. || (n >= 0x05F0 && n <= 0x05F2)
  526. || (n >= 0x0621 && n <= 0x063A)
  527. || (n >= 0x0641 && n <= 0x064A)
  528. || (n >= 0x0671 && n <= 0x06B7)
  529. || (n >= 0x06BA && n <= 0x06BE)
  530. || (n >= 0x06C0 && n <= 0x06CE)
  531. || (n >= 0x06D0 && n <= 0x06D3)
  532. || (n == 0x06D5)
  533. || (n >= 0x06E5 && n <= 0x06E6)
  534. || (n >= 0x0905 && n <= 0x0939)
  535. || (n == 0x093D)
  536. || (n >= 0x0958 && n <= 0x0961)
  537. || (n >= 0x0985 && n <= 0x098C)
  538. || (n >= 0x098F && n <= 0x0990)
  539. || (n >= 0x0993 && n <= 0x09A8)
  540. || (n >= 0x09AA && n <= 0x09B0)
  541. || (n == 0x09B2)
  542. || (n >= 0x09B6 && n <= 0x09B9)
  543. || (n >= 0x09DC && n <= 0x09DD)
  544. || (n >= 0x09DF && n <= 0x09E1)
  545. || (n >= 0x09F0 && n <= 0x09F1)
  546. || (n >= 0x0A05 && n <= 0x0A0A)
  547. || (n >= 0x0A0F && n <= 0x0A10)
  548. || (n >= 0x0A13 && n <= 0x0A28)
  549. || (n >= 0x0A2A && n <= 0x0A30)
  550. || (n >= 0x0A32 && n <= 0x0A33)
  551. || (n >= 0x0A35 && n <= 0x0A36)
  552. || (n >= 0x0A38 && n <= 0x0A39)
  553. || (n >= 0x0A59 && n <= 0x0A5C)
  554. || (n == 0x0A5E)
  555. || (n >= 0x0A72 && n <= 0x0A74)
  556. || (n >= 0x0A85 && n <= 0x0A8B)
  557. || (n == 0x0A8D)
  558. || (n >= 0x0A8F && n <= 0x0A91)
  559. || (n >= 0x0A93 && n <= 0x0AA8)
  560. || (n >= 0x0AAA && n <= 0x0AB0)
  561. || (n >= 0x0AB2 && n <= 0x0AB3)
  562. || (n >= 0x0AB5 && n <= 0x0AB9)
  563. || (n == 0x0ABD)
  564. || (n == 0x0AE0)
  565. || (n >= 0x0B05 && n <= 0x0B0C)
  566. || (n >= 0x0B0F && n <= 0x0B10)
  567. || (n >= 0x0B13 && n <= 0x0B28)
  568. || (n >= 0x0B2A && n <= 0x0B30)
  569. || (n >= 0x0B32 && n <= 0x0B33)
  570. || (n >= 0x0B36 && n <= 0x0B39)
  571. || (n == 0x0B3D)
  572. || (n >= 0x0B5C && n <= 0x0B5D)
  573. || (n >= 0x0B5F && n <= 0x0B61)
  574. || (n >= 0x0B85 && n <= 0x0B8A)
  575. || (n >= 0x0B8E && n <= 0x0B90)
  576. || (n >= 0x0B92 && n <= 0x0B95)
  577. || (n >= 0x0B99 && n <= 0x0B9A)
  578. || (n == 0x0B9C)
  579. || (n >= 0x0B9E && n <= 0x0B9F)
  580. || (n >= 0x0BA3 && n <= 0x0BA4)
  581. || (n >= 0x0BA8 && n <= 0x0BAA)
  582. || (n >= 0x0BAE && n <= 0x0BB5)
  583. || (n >= 0x0BB7 && n <= 0x0BB9)
  584. || (n >= 0x0C05 && n <= 0x0C0C)
  585. || (n >= 0x0C0E && n <= 0x0C10)
  586. || (n >= 0x0C12 && n <= 0x0C28)
  587. || (n >= 0x0C2A && n <= 0x0C33)
  588. || (n >= 0x0C35 && n <= 0x0C39)
  589. || (n >= 0x0C60 && n <= 0x0C61)
  590. || (n >= 0x0C85 && n <= 0x0C8C)
  591. || (n >= 0x0C8E && n <= 0x0C90)
  592. || (n >= 0x0C92 && n <= 0x0CA8)
  593. || (n >= 0x0CAA && n <= 0x0CB3)
  594. || (n >= 0x0CB5 && n <= 0x0CB9)
  595. || (n == 0x0CDE)
  596. || (n >= 0x0CE0 && n <= 0x0CE1)
  597. || (n >= 0x0D05 && n <= 0x0D0C)
  598. || (n >= 0x0D0E && n <= 0x0D10)
  599. || (n >= 0x0D12 && n <= 0x0D28)
  600. || (n >= 0x0D2A && n <= 0x0D39)
  601. || (n >= 0x0D60 && n <= 0x0D61)
  602. || (n >= 0x0E01 && n <= 0x0E2E)
  603. || (n == 0x0E30)
  604. || (n >= 0x0E32 && n <= 0x0E33)
  605. || (n >= 0x0E40 && n <= 0x0E45)
  606. || (n >= 0x0E81 && n <= 0x0E82)
  607. || (n == 0x0E84)
  608. || (n >= 0x0E87 && n <= 0x0E88)
  609. || (n == 0x0E8A)
  610. || (n == 0x0E8D)
  611. || (n >= 0x0E94 && n <= 0x0E97)
  612. || (n >= 0x0E99 && n <= 0x0E9F)
  613. || (n >= 0x0EA1 && n <= 0x0EA3)
  614. || (n == 0x0EA5)
  615. || (n == 0x0EA7)
  616. || (n >= 0x0EAA && n <= 0x0EAB)
  617. || (n >= 0x0EAD && n <= 0x0EAE)
  618. || (n == 0x0EB0)
  619. || (n >= 0x0EB2 && n <= 0x0EB3)
  620. || (n == 0x0EBD)
  621. || (n >= 0x0EC0 && n <= 0x0EC4)
  622. || (n >= 0x0F40 && n <= 0x0F47)
  623. || (n >= 0x0F49 && n <= 0x0F69)
  624. || (n >= 0x10A0 && n <= 0x10C5)
  625. || (n >= 0x10D0 && n <= 0x10F6)
  626. || (n == 0x1100)
  627. || (n >= 0x1102 && n <= 0x1103)
  628. || (n >= 0x1105 && n <= 0x1107)
  629. || (n == 0x1109)
  630. || (n >= 0x110B && n <= 0x110C)
  631. || (n >= 0x110E && n <= 0x1112)
  632. || (n == 0x113C)
  633. || (n == 0x113E)
  634. || (n == 0x1140)
  635. || (n == 0x114C)
  636. || (n == 0x114E)
  637. || (n == 0x1150)
  638. || (n >= 0x1154 && n <= 0x1155)
  639. || (n == 0x1159)
  640. || (n >= 0x115F && n <= 0x1161)
  641. || (n == 0x1163)
  642. || (n == 0x1165)
  643. || (n == 0x1167)
  644. || (n == 0x1169)
  645. || (n >= 0x116D && n <= 0x116E)
  646. || (n >= 0x1172 && n <= 0x1173)
  647. || (n == 0x1175)
  648. || (n == 0x119E)
  649. || (n == 0x11A8)
  650. || (n == 0x11AB)
  651. || (n >= 0x11AE && n <= 0x11AF)
  652. || (n >= 0x11B7 && n <= 0x11B8)
  653. || (n == 0x11BA)
  654. || (n >= 0x11BC && n <= 0x11C2)
  655. || (n == 0x11EB)
  656. || (n == 0x11F0)
  657. || (n == 0x11F9)
  658. || (n >= 0x1E00 && n <= 0x1E9B)
  659. || (n >= 0x1EA0 && n <= 0x1EF9)
  660. || (n >= 0x1F00 && n <= 0x1F15)
  661. || (n >= 0x1F18 && n <= 0x1F1D)
  662. || (n >= 0x1F20 && n <= 0x1F45)
  663. || (n >= 0x1F48 && n <= 0x1F4D)
  664. || (n >= 0x1F50 && n <= 0x1F57)
  665. || (n == 0x1F59)
  666. || (n == 0x1F5B)
  667. || (n == 0x1F5D)
  668. || (n >= 0x1F5F && n <= 0x1F7D)
  669. || (n >= 0x1F80 && n <= 0x1FB4)
  670. || (n >= 0x1FB6 && n <= 0x1FBC)
  671. || (n == 0x1FBE)
  672. || (n >= 0x1FC2 && n <= 0x1FC4)
  673. || (n >= 0x1FC6 && n <= 0x1FCC)
  674. || (n >= 0x1FD0 && n <= 0x1FD3)
  675. || (n >= 0x1FD6 && n <= 0x1FDB)
  676. || (n >= 0x1FE0 && n <= 0x1FEC)
  677. || (n >= 0x1FF2 && n <= 0x1FF4)
  678. || (n >= 0x1FF6 && n <= 0x1FFC)
  679. || (n == 0x2126)
  680. || (n >= 0x212A && n <= 0x212B)
  681. || (n == 0x212E)
  682. || (n >= 0x2180 && n <= 0x2182)
  683. || (n >= 0x3041 && n <= 0x3094)
  684. || (n >= 0x30A1 && n <= 0x30FA)
  685. || (n >= 0x3105 && n <= 0x312C)
  686. || (n >= 0xAC00 && n <= 0xD7A3);
  687. }
  688. /**
  689. * Determines if the specified character matches the <em>Ideographic</em>
  690. * production.
  691. *
  692. * <p>See:
  693. * <a href="http://www.w3.org/TR/REC-xml#NT-Ideographic">Definition of Ideographic</a>.
  694. *
  695. * @param c
  696. * the character to check.
  697. *
  698. * @return
  699. * <code>true</code> if the character matches the production, or
  700. * <code>false</code> if it does not.
  701. */
  702. private static final boolean isIdeographic(char c) {
  703. int n = (int) c;
  704. return (n >= 0x4E00 && n <= 0x9FA5)
  705. || (n == 0x3007)
  706. || (n >= 0x3021 && n <= 0x3029);
  707. }
  708. /**
  709. * Determines if the specified character matches the <em>CombiningChar</em>
  710. * production.
  711. *
  712. * <p>See:
  713. * <a href="http://www.w3.org/TR/REC-xml#NT-CombiningChar">Definition of CombiningChar</a>.
  714. *
  715. * @param c
  716. * the character to check.
  717. *
  718. * @return
  719. * <code>true</code> if the character matches the production, or
  720. * <code>false</code> if it does not.
  721. */
  722. private static final boolean isCombiningChar(char c) {
  723. int n = (int) c;
  724. return (n >= 0x0300 && n <= 0x0345)
  725. || (n >= 0x0360 && n <= 0x0361)
  726. || (n >= 0x0483 && n <= 0x0486)
  727. || (n >= 0x0591 && n <= 0x05A1)
  728. || (n >= 0x05A3 && n <= 0x05B9)
  729. || (n >= 0x05BB && n <= 0x05BD)
  730. || (n == 0x05BF)
  731. || (n >= 0x05C1 && n <= 0x05C2)
  732. || (n == 0x05C4)
  733. || (n >= 0x064B && n <= 0x0652)
  734. || (n == 0x0670)
  735. || (n >= 0x06D6 && n <= 0x06DC)
  736. || (n >= 0x06DD && n <= 0x06DF)
  737. || (n >= 0x06E0 && n <= 0x06E4)
  738. || (n >= 0x06E7 && n <= 0x06E8)
  739. || (n >= 0x06EA && n <= 0x06ED)
  740. || (n >= 0x0901 && n <= 0x0903)
  741. || (n == 0x093C)
  742. || (n >= 0x093E && n <= 0x094C)
  743. || (n == 0x094D)
  744. || (n >= 0x0951 && n <= 0x0954)
  745. || (n >= 0x0962 && n <= 0x0963)
  746. || (n >= 0x0981 && n <= 0x0983)
  747. || (n == 0x09BC)
  748. || (n == 0x09BE)
  749. || (n == 0x09BF)
  750. || (n >= 0x09C0 && n <= 0x09C4)
  751. || (n >= 0x09C7 && n <= 0x09C8)
  752. || (n >= 0x09CB && n <= 0x09CD)
  753. || (n == 0x09D7)
  754. || (n >= 0x09E2 && n <= 0x09E3)
  755. || (n == 0x0A02)
  756. || (n == 0x0A3C)
  757. || (n == 0x0A3E)
  758. || (n == 0x0A3F)
  759. || (n >= 0x0A40 && n <= 0x0A42)
  760. || (n >= 0x0A47 && n <= 0x0A48)
  761. || (n >= 0x0A4B && n <= 0x0A4D)
  762. || (n >= 0x0A70 && n <= 0x0A71)
  763. || (n >= 0x0A81 && n <= 0x0A83)
  764. || (n == 0x0ABC)
  765. || (n >= 0x0ABE && n <= 0x0AC5)
  766. || (n >= 0x0AC7 && n <= 0x0AC9)
  767. || (n >= 0x0ACB && n <= 0x0ACD)
  768. || (n >= 0x0B01 && n <= 0x0B03)
  769. || (n == 0x0B3C)
  770. || (n >= 0x0B3E && n <= 0x0B43)
  771. || (n >= 0x0B47 && n <= 0x0B48)
  772. || (n >= 0x0B4B && n <= 0x0B4D)
  773. || (n >= 0x0B56 && n <= 0x0B57)
  774. || (n >= 0x0B82 && n <= 0x0B83)
  775. || (n >= 0x0BBE && n <= 0x0BC2)
  776. || (n >= 0x0BC6 && n <= 0x0BC8)
  777. || (n >= 0x0BCA && n <= 0x0BCD)
  778. || (n == 0x0BD7)
  779. || (n >= 0x0C01 && n <= 0x0C03)
  780. || (n >= 0x0C3E && n <= 0x0C44)
  781. || (n >= 0x0C46 && n <= 0x0C48)
  782. || (n >= 0x0C4A && n <= 0x0C4D)
  783. || (n >= 0x0C55 && n <= 0x0C56)
  784. || (n >= 0x0C82 && n <= 0x0C83)
  785. || (n >= 0x0CBE && n <= 0x0CC4)
  786. || (n >= 0x0CC6 && n <= 0x0CC8)
  787. || (n >= 0x0CCA && n <= 0x0CCD)
  788. || (n >= 0x0CD5 && n <= 0x0CD6)
  789. || (n >= 0x0D02 && n <= 0x0D03)
  790. || (n >= 0x0D3E && n <= 0x0D43)
  791. || (n >= 0x0D46 && n <= 0x0D48)
  792. || (n >= 0x0D4A && n <= 0x0D4D)
  793. || (n == 0x0D57)
  794. || (n == 0x0E31)
  795. || (n >= 0x0E34 && n <= 0x0E3A)
  796. || (n >= 0x0E47 && n <= 0x0E4E)
  797. || (n == 0x0EB1)
  798. || (n >= 0x0EB4 && n <= 0x0EB9)
  799. || (n >= 0x0EBB && n <= 0x0EBC)
  800. || (n >= 0x0EC8 && n <= 0x0ECD)
  801. || (n >= 0x0F18 && n <= 0x0F19)
  802. || (n == 0x0F35)
  803. || (n == 0x0F37)
  804. || (n == 0x0F39)
  805. || (n == 0x0F3E)
  806. || (n == 0x0F3F)
  807. || (n >= 0x0F71 && n <= 0x0F84)
  808. || (n >= 0x0F86 && n <= 0x0F8B)
  809. || (n >= 0x0F90 && n <= 0x0F95)
  810. || (n == 0x0F97)
  811. || (n >= 0x0F99 && n <= 0x0FAD)
  812. || (n >= 0x0FB1 && n <= 0x0FB7)
  813. || (n == 0x0FB9)
  814. || (n >= 0x20D0 && n <= 0x20DC)
  815. || (n == 0x20E1)
  816. || (n >= 0x302A && n <= 0x302F)
  817. || (n == 0x3099)
  818. || (n == 0x309A);
  819. }
  820. /**
  821. * Determines if the specified character matches the <em>Digit</em>
  822. * production.
  823. *
  824. * <p>See:
  825. * <a href="http://www.w3.org/TR/REC-xml#NT-Digit">Definition of Digit</a>.
  826. *
  827. * @param c
  828. * the character to check.
  829. *
  830. * @return
  831. * <code>true</code> if the character matches the production, or
  832. * <code>false</code> if it does not.
  833. */
  834. private static final boolean isDigit(char c) {
  835. int n = (int) c;
  836. return (n >= 0x0030 && n <= 0x0039)
  837. || (n >= 0x0660 && n <= 0x0669)
  838. || (n >= 0x06F0 && n <= 0x06F9)
  839. || (n >= 0x0966 && n <= 0x096F)
  840. || (n >= 0x09E6 && n <= 0x09EF)
  841. || (n >= 0x0A66 && n <= 0x0A6F)
  842. || (n >= 0x0AE6 && n <= 0x0AEF)
  843. || (n >= 0x0B66 && n <= 0x0B6F)
  844. || (n >= 0x0BE7 && n <= 0x0BEF)
  845. || (n >= 0x0C66 && n <= 0x0C6F)
  846. || (n >= 0x0CE6 && n <= 0x0CEF)
  847. || (n >= 0x0D66 && n <= 0x0D6F)
  848. || (n >= 0x0E50 && n <= 0x0E59)
  849. || (n >= 0x0ED0 && n <= 0x0ED9)
  850. || (n >= 0x0F20 && n <= 0x0F29);
  851. }
  852. /**
  853. * Determines if the specified character matches the <em>Extender</em>
  854. * production.
  855. *
  856. * <p>See:
  857. * <a href="http://www.w3.org/TR/REC-xml#NT-Extender">Definition of Extender</a>.
  858. *
  859. * @param c
  860. * the character to check.
  861. *
  862. * @return
  863. * <code>true</code> if the character matches the production, or
  864. * <code>false</code> if it does not.
  865. */
  866. private static final boolean isExtender(char c) {
  867. int n = (int) c;
  868. return (n == 0x00B7)
  869. || (n == 0x02D0)
  870. || (n == 0x02D1)
  871. || (n == 0x0387)
  872. || (n == 0x0640)
  873. || (n == 0x0E46)
  874. || (n == 0x0EC6)
  875. || (n == 0x3005)
  876. || (n >= 0x3031 && n <= 0x3035)
  877. || (n >= 0x309D && n <= 0x309E)
  878. || (n >= 0x30FC && n <= 0x30FE);
  879. }
  880. //-------------------------------------------------------------------------
  881. // Class fields
  882. //-------------------------------------------------------------------------
  883. //-------------------------------------------------------------------------
  884. // Constructor
  885. //-------------------------------------------------------------------------
  886. /**
  887. * Constructs a new <code>XMLChecker</code> object. This constructor is
  888. * private since no objects of this class should be created.
  889. */
  890. private XMLChecker() {
  891. // empty
  892. }
  893. //-------------------------------------------------------------------------
  894. // Fields
  895. //-------------------------------------------------------------------------
  896. //-------------------------------------------------------------------------
  897. // Methods
  898. //-------------------------------------------------------------------------
  899. }