PageRenderTime 45ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/twitter4j-core/src/internal-json/java/twitter4j/HTMLEntity.java

http://github.com/yusuke/twitter4j
Java | 469 lines | 397 code | 19 blank | 53 comment | 47 complexity | 55f76f8048da89b3195720002d99c694 MD5 | raw file
Possible License(s): Apache-2.0
  1. /*
  2. * Copyright 2007 Yusuke Yamamoto
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package twitter4j;
  17. import java.util.Arrays;
  18. import java.util.HashMap;
  19. import java.util.Map;
  20. final class HTMLEntity {
  21. static String escape(String original) {
  22. StringBuilder buf = new StringBuilder(original);
  23. escape(buf);
  24. return buf.toString();
  25. }
  26. static void escape(StringBuilder original) {
  27. int index = 0;
  28. String escaped;
  29. while (index < original.length()) {
  30. escaped = entityEscapeMap.get(original.substring(index, index + 1));
  31. if (escaped != null) {
  32. original.replace(index, index + 1, escaped);
  33. index += escaped.length();
  34. } else {
  35. index++;
  36. }
  37. }
  38. }
  39. static String unescape(String original) {
  40. String returnValue = null;
  41. if (original != null) {
  42. StringBuilder buf = new StringBuilder(original);
  43. unescape(buf);
  44. returnValue = buf.toString();
  45. }
  46. return returnValue;
  47. }
  48. static void unescape(StringBuilder original) {
  49. int index = 0;
  50. int semicolonIndex;
  51. String escaped;
  52. String entity;
  53. while (index < original.length()) {
  54. index = original.indexOf("&", index);
  55. if (-1 == index) {
  56. break;
  57. }
  58. semicolonIndex = original.indexOf(";", index);
  59. if (-1 != semicolonIndex) {
  60. escaped = original.substring(index, semicolonIndex + 1);
  61. entity = escapeEntityMap.get(escaped);
  62. if (entity != null) {
  63. original.replace(index, semicolonIndex + 1, entity);
  64. }
  65. index++;
  66. } else {
  67. break;
  68. }
  69. }
  70. }
  71. /**
  72. * @author Yusuke Yamamoto - yusuke at mac.com
  73. * @author Philip Hachey - philip dot hachey at gmail dot com
  74. */
  75. static String unescapeAndSlideEntityIncdices(String text, UserMentionEntity[] userMentionEntities,
  76. URLEntity[] urlEntities, HashtagEntity[] hashtagEntities,
  77. MediaEntity[] mediaEntities) {
  78. int entityIndexesLength = 0;
  79. entityIndexesLength += userMentionEntities == null ? 0 : userMentionEntities.length;
  80. entityIndexesLength += urlEntities == null ? 0 : urlEntities.length;
  81. entityIndexesLength += hashtagEntities == null ? 0 : hashtagEntities.length;
  82. entityIndexesLength += mediaEntities == null ? 0 : mediaEntities.length;
  83. EntityIndex[] entityIndexes = new EntityIndex[entityIndexesLength];
  84. int copyStartIndex = 0;
  85. if (userMentionEntities != null) {
  86. System.arraycopy(userMentionEntities, 0, entityIndexes, copyStartIndex, userMentionEntities.length);
  87. copyStartIndex += userMentionEntities.length;
  88. }
  89. if (urlEntities != null) {
  90. System.arraycopy(urlEntities, 0, entityIndexes, copyStartIndex, urlEntities.length);
  91. copyStartIndex += urlEntities.length;
  92. }
  93. if (hashtagEntities != null) {
  94. System.arraycopy(hashtagEntities, 0, entityIndexes, copyStartIndex, hashtagEntities.length);
  95. copyStartIndex += hashtagEntities.length;
  96. }
  97. if (mediaEntities != null) {
  98. System.arraycopy(mediaEntities, 0, entityIndexes, copyStartIndex, mediaEntities.length);
  99. }
  100. Arrays.sort(entityIndexes);
  101. boolean handlingStart = true;
  102. int entityIndex = 0;
  103. int delta = 0;
  104. int semicolonIndex;
  105. String escaped;
  106. String entity;
  107. StringBuilder unescaped = new StringBuilder(text.length());
  108. /*
  109. * Slide indices of twitter entities not only when replacing character
  110. * entity references but also adjust the twitter code point based
  111. * indexes with Java standard character indexes. See: HTMLEntityTest.
  112. * testUnescapeAndSlideEntityIncdicesWithSurrogateCodePoints
  113. */
  114. int textCodePointLength = text.codePointCount(0, text.length());
  115. int codePoint;
  116. for (int index = 0, twitterIndex = 0; index < text.length(); index +=
  117. Character.charCount(codePoint), twitterIndex++) {
  118. codePoint = text.codePointAt(index);
  119. if (codePoint == '&') {
  120. semicolonIndex = text.indexOf(";", index);
  121. if (-1 != semicolonIndex) {
  122. escaped = text.substring(index, semicolonIndex + 1);
  123. entity = escapeEntityMap.get(escaped);
  124. if (entity != null) {
  125. unescaped.append(entity);
  126. index = semicolonIndex;
  127. twitterIndex = text.codePointCount(0, semicolonIndex);
  128. delta = 1 - escaped.length();
  129. } else {
  130. unescaped.appendCodePoint(codePoint);
  131. }
  132. } else {
  133. unescaped.appendCodePoint(codePoint);
  134. }
  135. } else {
  136. unescaped.appendCodePoint(codePoint);
  137. }
  138. if (entityIndex < entityIndexes.length) {
  139. if (handlingStart) {
  140. if (entityIndexes[entityIndex].getStart() == (delta + twitterIndex)) {
  141. entityIndexes[entityIndex]
  142. .setStart(unescaped.length() - Character.charCount(text.codePointAt(index)));
  143. handlingStart = false;
  144. }
  145. } else if (entityIndexes[entityIndex].getEnd() == (delta + twitterIndex)) {
  146. entityIndexes[entityIndex]
  147. .setEnd(unescaped.length() - Character.charCount(text.codePointAt(index)));
  148. entityIndex++;
  149. handlingStart = true;
  150. }
  151. }
  152. delta = 0;
  153. }
  154. if (entityIndex < entityIndexes.length) {
  155. if (entityIndexes[entityIndex].getEnd() == textCodePointLength) {
  156. entityIndexes[entityIndex].setEnd(unescaped.length());
  157. }
  158. }
  159. return unescaped.toString();
  160. }
  161. private static final Map<String, String> entityEscapeMap = new HashMap<String, String>();
  162. private static final Map<String, String> escapeEntityMap = new HashMap<String, String>();
  163. static {
  164. String[][] entities =
  165. {{"&nbsp;", "&#160;"/* no-break space = non-breaking space */, "\u00A0"}
  166. , {"&iexcl;", "&#161;"/* inverted exclamation mark */, "\u00A1"}
  167. , {"&cent;", "&#162;"/* cent sign */, "\u00A2"}
  168. , {"&pound;", "&#163;"/* pound sign */, "\u00A3"}
  169. , {"&curren;", "&#164;"/* currency sign */, "\u00A4"}
  170. , {"&yen;", "&#165;"/* yen sign = yuan sign */, "\u00A5"}
  171. , {"&brvbar;", "&#166;"/* broken bar = broken vertical bar */, "\u00A6"}
  172. , {"&sect;", "&#167;"/* section sign */, "\u00A7"}
  173. , {"&uml;", "&#168;"/* diaeresis = spacing diaeresis */, "\u00A8"}
  174. , {"&copy;", "&#169;"/* copyright sign */, "\u00A9"}
  175. , {"&ordf;", "&#170;"/* feminine ordinal indicator */, "\u00AA"}
  176. , {"&laquo;", "&#171;"/* left-pointing double angle quotation mark = left pointing guillemet */, "\u00AB"}
  177. , {"&not;", "&#172;"/* not sign = discretionary hyphen */, "\u00AC"}
  178. , {"&shy;", "&#173;"/* soft hyphen = discretionary hyphen */, "\u00AD"}
  179. , {"&reg;", "&#174;"/* registered sign = registered trade mark sign */, "\u00AE"}
  180. , {"&macr;", "&#175;"/* macron = spacing macron = overline = APL overbar */, "\u00AF"}
  181. , {"&deg;", "&#176;"/* degree sign */, "\u00B0"}
  182. , {"&plusmn;", "&#177;"/* plus-minus sign = plus-or-minus sign */, "\u00B1"}
  183. , {"&sup2;", "&#178;"/* superscript two = superscript digit two = squared */, "\u00B2"}
  184. , {"&sup3;", "&#179;"/* superscript three = superscript digit three = cubed */, "\u00B3"}
  185. , {"&acute;", "&#180;"/* acute accent = spacing acute */, "\u00B4"}
  186. , {"&micro;", "&#181;"/* micro sign */, "\u00B5"}
  187. , {"&para;", "&#182;"/* pilcrow sign = paragraph sign */, "\u00B6"}
  188. , {"&middot;", "&#183;"/* middle dot = Georgian comma = Greek middle dot */, "\u00B7"}
  189. , {"&cedil;", "&#184;"/* cedilla = spacing cedilla */, "\u00B8"}
  190. , {"&sup1;", "&#185;"/* superscript one = superscript digit one */, "\u00B9"}
  191. , {"&ordm;", "&#186;"/* masculine ordinal indicator */, "\u00BA"}
  192. , {"&raquo;", "&#187;"/* right-pointing double angle quotation mark = right pointing guillemet */, "\u00BB"}
  193. , {"&frac14;", "&#188;"/* vulgar fraction one quarter = fraction one quarter */, "\u00BC"}
  194. , {"&frac12;", "&#189;"/* vulgar fraction one half = fraction one half */, "\u00BD"}
  195. , {"&frac34;", "&#190;"/* vulgar fraction three quarters = fraction three quarters */, "\u00BE"}
  196. , {"&iquest;", "&#191;"/* inverted question mark = turned question mark */, "\u00BF"}
  197. , {"&Agrave;", "&#192;"/* latin capital letter A with grave = latin capital letter A grave */, "\u00C0"}
  198. , {"&Aacute;", "&#193;"/* latin capital letter A with acute */, "\u00C1"}
  199. , {"&Acirc;", "&#194;"/* latin capital letter A with circumflex */, "\u00C2"}
  200. , {"&Atilde;", "&#195;"/* latin capital letter A with tilde */, "\u00C3"}
  201. , {"&Auml;", "&#196;"/* latin capital letter A with diaeresis */, "\u00C4"}
  202. , {"&Aring;", "&#197;"/* latin capital letter A with ring above = latin capital letter A ring */, "\u00C5"}
  203. , {"&AElig;", "&#198;"/* latin capital letter AE = latin capital ligature AE */, "\u00C6"}
  204. , {"&Ccedil;", "&#199;"/* latin capital letter C with cedilla */, "\u00C7"}
  205. , {"&Egrave;", "&#200;"/* latin capital letter E with grave */, "\u00C8"}
  206. , {"&Eacute;", "&#201;"/* latin capital letter E with acute */, "\u00C9"}
  207. , {"&Ecirc;", "&#202;"/* latin capital letter E with circumflex */, "\u00CA"}
  208. , {"&Euml;", "&#203;"/* latin capital letter E with diaeresis */, "\u00CB"}
  209. , {"&Igrave;", "&#204;"/* latin capital letter I with grave */, "\u00CC"}
  210. , {"&Iacute;", "&#205;"/* latin capital letter I with acute */, "\u00CD"}
  211. , {"&Icirc;", "&#206;"/* latin capital letter I with circumflex */, "\u00CE"}
  212. , {"&Iuml;", "&#207;"/* latin capital letter I with diaeresis */, "\u00CF"}
  213. , {"&ETH;", "&#208;"/* latin capital letter ETH */, "\u00D0"}
  214. , {"&Ntilde;", "&#209;"/* latin capital letter N with tilde */, "\u00D1"}
  215. , {"&Ograve;", "&#210;"/* latin capital letter O with grave */, "\u00D2"}
  216. , {"&Oacute;", "&#211;"/* latin capital letter O with acute */, "\u00D3"}
  217. , {"&Ocirc;", "&#212;"/* latin capital letter O with circumflex */, "\u00D4"}
  218. , {"&Otilde;", "&#213;"/* latin capital letter O with tilde */, "\u00D5"}
  219. , {"&Ouml;", "&#214;"/* latin capital letter O with diaeresis */, "\u00D6"}
  220. , {"&times;", "&#215;"/* multiplication sign */, "\u00D7"}
  221. , {"&Oslash;", "&#216;"/* latin capital letter O with stroke = latin capital letter O slash */, "\u00D8"}
  222. , {"&Ugrave;", "&#217;"/* latin capital letter U with grave */, "\u00D9"}
  223. , {"&Uacute;", "&#218;"/* latin capital letter U with acute */, "\u00DA"}
  224. , {"&Ucirc;", "&#219;"/* latin capital letter U with circumflex */, "\u00DB"}
  225. , {"&Uuml;", "&#220;"/* latin capital letter U with diaeresis */, "\u00DC"}
  226. , {"&Yacute;", "&#221;"/* latin capital letter Y with acute */, "\u00DD"}
  227. , {"&THORN;", "&#222;"/* latin capital letter THORN */, "\u00DE"}
  228. , {"&szlig;", "&#223;"/* latin small letter sharp s = ess-zed */, "\u00DF"}
  229. , {"&agrave;", "&#224;"/* latin small letter a with grave = latin small letter a grave */, "\u00E0"}
  230. , {"&aacute;", "&#225;"/* latin small letter a with acute */, "\u00E1"}
  231. , {"&acirc;", "&#226;"/* latin small letter a with circumflex */, "\u00E2"}
  232. , {"&atilde;", "&#227;"/* latin small letter a with tilde */, "\u00E3"}
  233. , {"&auml;", "&#228;"/* latin small letter a with diaeresis */, "\u00E4"}
  234. , {"&aring;", "&#229;"/* latin small letter a with ring above = latin small letter a ring */, "\u00E5"}
  235. , {"&aelig;", "&#230;"/* latin small letter ae = latin small ligature ae */, "\u00E6"}
  236. , {"&ccedil;", "&#231;"/* latin small letter c with cedilla */, "\u00E7"}
  237. , {"&egrave;", "&#232;"/* latin small letter e with grave */, "\u00E8"}
  238. , {"&eacute;", "&#233;"/* latin small letter e with acute */, "\u00E9"}
  239. , {"&ecirc;", "&#234;"/* latin small letter e with circumflex */, "\u00EA"}
  240. , {"&euml;", "&#235;"/* latin small letter e with diaeresis */, "\u00EB"}
  241. , {"&igrave;", "&#236;"/* latin small letter i with grave */, "\u00EC"}
  242. , {"&iacute;", "&#237;"/* latin small letter i with acute */, "\u00ED"}
  243. , {"&icirc;", "&#238;"/* latin small letter i with circumflex */, "\u00EE"}
  244. , {"&iuml;", "&#239;"/* latin small letter i with diaeresis */, "\u00EF"}
  245. , {"&eth;", "&#240;"/* latin small letter eth */, "\u00F0"}
  246. , {"&ntilde;", "&#241;"/* latin small letter n with tilde */, "\u00F1"}
  247. , {"&ograve;", "&#242;"/* latin small letter o with grave */, "\u00F2"}
  248. , {"&oacute;", "&#243;"/* latin small letter o with acute */, "\u00F3"}
  249. , {"&ocirc;", "&#244;"/* latin small letter o with circumflex */, "\u00F4"}
  250. , {"&otilde;", "&#245;"/* latin small letter o with tilde */, "\u00F5"}
  251. , {"&ouml;", "&#246;"/* latin small letter o with diaeresis */, "\u00F6"}
  252. , {"&divide;", "&#247;"/* division sign */, "\u00F7"}
  253. , {"&oslash;", "&#248;"/* latin small letter o with stroke = latin small letter o slash */, "\u00F8"}
  254. , {"&ugrave;", "&#249;"/* latin small letter u with grave */, "\u00F9"}
  255. , {"&uacute;", "&#250;"/* latin small letter u with acute */, "\u00FA"}
  256. , {"&ucirc;", "&#251;"/* latin small letter u with circumflex */, "\u00FB"}
  257. , {"&uuml;", "&#252;"/* latin small letter u with diaeresis */, "\u00FC"}
  258. , {"&yacute;", "&#253;"/* latin small letter y with acute */, "\u00FD"}
  259. , {"&thorn;", "&#254;"/* latin small letter thorn with */, "\u00FE"}
  260. , {"&yuml;", "&#255;"/* latin small letter y with diaeresis */, "\u00FF"}
  261. , {"&fnof;", "&#402;"/* latin small f with hook = function = florin */, "\u0192"}
  262. /* Greek */
  263. , {"&Alpha;", "&#913;"/* greek capital letter alpha */, "\u0391"}
  264. , {"&Beta;", "&#914;"/* greek capital letter beta */, "\u0392"}
  265. , {"&Gamma;", "&#915;"/* greek capital letter gamma */, "\u0393"}
  266. , {"&Delta;", "&#916;"/* greek capital letter delta */, "\u0394"}
  267. , {"&Epsilon;", "&#917;"/* greek capital letter epsilon */, "\u0395"}
  268. , {"&Zeta;", "&#918;"/* greek capital letter zeta */, "\u0396"}
  269. , {"&Eta;", "&#919;"/* greek capital letter eta */, "\u0397"}
  270. , {"&Theta;", "&#920;"/* greek capital letter theta */, "\u0398"}
  271. , {"&Iota;", "&#921;"/* greek capital letter iota */, "\u0399"}
  272. , {"&Kappa;", "&#922;"/* greek capital letter kappa */, "\u039A"}
  273. , {"&Lambda;", "&#923;"/* greek capital letter lambda */, "\u039B"}
  274. , {"&Mu;", "&#924;"/* greek capital letter mu */, "\u039C"}
  275. , {"&Nu;", "&#925;"/* greek capital letter nu */, "\u039D"}
  276. , {"&Xi;", "&#926;"/* greek capital letter xi */, "\u039E"}
  277. , {"&Omicron;", "&#927;"/* greek capital letter omicron */, "\u039F"}
  278. , {"&Pi;", "&#928;"/* greek capital letter pi */, "\u03A0"}
  279. , {"&Rho;", "&#929;"/* greek capital letter rho */, "\u03A1"}
  280. /* there is no Sigmaf and no \u03A2 */
  281. , {"&Sigma;", "&#931;"/* greek capital letter sigma */, "\u03A3"}
  282. , {"&Tau;", "&#932;"/* greek capital letter tau */, "\u03A4"}
  283. , {"&Upsilon;", "&#933;"/* greek capital letter upsilon */, "\u03A5"}
  284. , {"&Phi;", "&#934;"/* greek capital letter phi */, "\u03A6"}
  285. , {"&Chi;", "&#935;"/* greek capital letter chi */, "\u03A7"}
  286. , {"&Psi;", "&#936;"/* greek capital letter psi */, "\u03A8"}
  287. , {"&Omega;", "&#937;"/* greek capital letter omega */, "\u03A9"}
  288. , {"&alpha;", "&#945;"/* greek small letter alpha */, "\u03B1"}
  289. , {"&beta;", "&#946;"/* greek small letter beta */, "\u03B2"}
  290. , {"&gamma;", "&#947;"/* greek small letter gamma */, "\u03B3"}
  291. , {"&delta;", "&#948;"/* greek small letter delta */, "\u03B4"}
  292. , {"&epsilon;", "&#949;"/* greek small letter epsilon */, "\u03B5"}
  293. , {"&zeta;", "&#950;"/* greek small letter zeta */, "\u03B6"}
  294. , {"&eta;", "&#951;"/* greek small letter eta */, "\u03B7"}
  295. , {"&theta;", "&#952;"/* greek small letter theta */, "\u03B8"}
  296. , {"&iota;", "&#953;"/* greek small letter iota */, "\u03B9"}
  297. , {"&kappa;", "&#954;"/* greek small letter kappa */, "\u03BA"}
  298. , {"&lambda;", "&#955;"/* greek small letter lambda */, "\u03BB"}
  299. , {"&mu;", "&#956;"/* greek small letter mu */, "\u03BC"}
  300. , {"&nu;", "&#957;"/* greek small letter nu */, "\u03BD"}
  301. , {"&xi;", "&#958;"/* greek small letter xi */, "\u03BE"}
  302. , {"&omicron;", "&#959;"/* greek small letter omicron */, "\u03BF"}
  303. , {"&pi;", "&#960;"/* greek small letter pi */, "\u03C0"}
  304. , {"&rho;", "&#961;"/* greek small letter rho */, "\u03C1"}
  305. , {"&sigmaf;", "&#962;"/* greek small letter final sigma */, "\u03C2"}
  306. , {"&sigma;", "&#963;"/* greek small letter sigma */, "\u03C3"}
  307. , {"&tau;", "&#964;"/* greek small letter tau */, "\u03C4"}
  308. , {"&upsilon;", "&#965;"/* greek small letter upsilon */, "\u03C5"}
  309. , {"&phi;", "&#966;"/* greek small letter phi */, "\u03C6"}
  310. , {"&chi;", "&#967;"/* greek small letter chi */, "\u03C7"}
  311. , {"&psi;", "&#968;"/* greek small letter psi */, "\u03C8"}
  312. , {"&omega;", "&#969;"/* greek small letter omega */, "\u03C9"}
  313. , {"&thetasym;", "&#977;"/* greek small letter theta symbol */, "\u03D1"}
  314. , {"&upsih;", "&#978;"/* greek upsilon with hook symbol */, "\u03D2"}
  315. , {"&piv;", "&#982;"/* greek pi symbol */, "\u03D6"}
  316. /* General Punctuation */
  317. , {"&bull;", "&#8226;"/* bullet = black small circle */, "\u2022"}
  318. /* bullet is NOT the same as bullet operator ,"\u2219*/
  319. , {"&hellip;", "&#8230;"/* horizontal ellipsis = three dot leader */, "\u2026"}
  320. , {"&prime;", "&#8242;"/* prime = minutes = feet */, "\u2032"}
  321. , {"&Prime;", "&#8243;"/* double prime = seconds = inches */, "\u2033"}
  322. , {"&oline;", "&#8254;"/* overline = spacing overscore */, "\u203E"}
  323. , {"&frasl;", "&#8260;"/* fraction slash */, "\u2044"}
  324. /* Letterlike Symbols */
  325. , {"&weierp;", "&#8472;"/* script capital P = power set = Weierstrass p */, "\u2118"}
  326. , {"&image;", "&#8465;"/* blackletter capital I = imaginary part */, "\u2111"}
  327. , {"&real;", "&#8476;"/* blackletter capital R = real part symbol */, "\u211C"}
  328. , {"&trade;", "&#8482;"/* trade mark sign */, "\u2122"}
  329. , {"&alefsym;", "&#8501;"/* alef symbol = first transfinite cardinal */, "\u2135"}
  330. /* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"}*/
  331. /* Arrows */
  332. , {"&larr;", "&#8592;"/* leftwards arrow */, "\u2190"}
  333. , {"&uarr;", "&#8593;"/* upwards arrow */, "\u2191"}
  334. , {"&rarr;", "&#8594;"/* rightwards arrow */, "\u2192"}
  335. , {"&darr;", "&#8595;"/* downwards arrow */, "\u2193"}
  336. , {"&harr;", "&#8596;"/* left right arrow */, "\u2194"}
  337. , {"&crarr;", "&#8629;"/* downwards arrow with corner leftwards = carriage return */, "\u21B5"}
  338. , {"&lArr;", "&#8656;"/* leftwards double arrow */, "\u21D0"}
  339. /* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */
  340. , {"&uArr;", "&#8657;"/* upwards double arrow */, "\u21D1"}
  341. , {"&rArr;", "&#8658;"/* rightwards double arrow */, "\u21D2"}
  342. /* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */
  343. , {"&dArr;", "&#8659;"/* downwards double arrow */, "\u21D3"}
  344. , {"&hArr;", "&#8660;"/* left right double arrow */, "\u21D4"}
  345. /* Mathematical Operators */
  346. , {"&forall;", "&#8704;"/* for all */, "\u2200"}
  347. , {"&part;", "&#8706;"/* partial differential */, "\u2202"}
  348. , {"&exist;", "&#8707;"/* there exists */, "\u2203"}
  349. , {"&empty;", "&#8709;"/* empty set = null set = diameter */, "\u2205"}
  350. , {"&nabla;", "&#8711;"/* nabla = backward difference */, "\u2207"}
  351. , {"&isin;", "&#8712;"/* element of */, "\u2208"}
  352. , {"&notin;", "&#8713;"/* not an element of */, "\u2209"}
  353. , {"&ni;", "&#8715;"/* contains as member */, "\u220B"}
  354. /* should there be a more memorable name than 'ni'? */
  355. , {"&prod;", "&#8719;"/* n-ary product = product sign */, "\u220F"}
  356. /* prod is NOT the same character as ,"\u03A0"}*/
  357. , {"&sum;", "&#8721;"/* n-ary sumation */, "\u2211"}
  358. /* sum is NOT the same character as ,"\u03A3"}*/
  359. , {"&minus;", "&#8722;"/* minus sign */, "\u2212"}
  360. , {"&lowast;", "&#8727;"/* asterisk operator */, "\u2217"}
  361. , {"&radic;", "&#8730;"/* square root = radical sign */, "\u221A"}
  362. , {"&prop;", "&#8733;"/* proportional to */, "\u221D"}
  363. , {"&infin;", "&#8734;"/* infinity */, "\u221E"}
  364. , {"&ang;", "&#8736;"/* angle */, "\u2220"}
  365. , {"&and;", "&#8743;"/* logical and = wedge */, "\u2227"}
  366. , {"&or;", "&#8744;"/* logical or = vee */, "\u2228"}
  367. , {"&cap;", "&#8745;"/* intersection = cap */, "\u2229"}
  368. , {"&cup;", "&#8746;"/* union = cup */, "\u222A"}
  369. , {"&int;", "&#8747;"/* integral */, "\u222B"}
  370. , {"&there4;", "&#8756;"/* therefore */, "\u2234"}
  371. , {"&sim;", "&#8764;"/* tilde operator = varies with = similar to */, "\u223C"}
  372. /* tilde operator is NOT the same character as the tilde ,"\u007E"}*/
  373. , {"&cong;", "&#8773;"/* approximately equal to */, "\u2245"}
  374. , {"&asymp;", "&#8776;"/* almost equal to = asymptotic to */, "\u2248"}
  375. , {"&ne;", "&#8800;"/* not equal to */, "\u2260"}
  376. , {"&equiv;", "&#8801;"/* identical to */, "\u2261"}
  377. , {"&le;", "&#8804;"/* less-than or equal to */, "\u2264"}
  378. , {"&ge;", "&#8805;"/* greater-than or equal to */, "\u2265"}
  379. , {"&sub;", "&#8834;"/* subset of */, "\u2282"}
  380. , {"&sup;", "&#8835;"/* superset of */, "\u2283"}
  381. /* note that nsup 'not a superset of ,"\u2283"}*/
  382. , {"&sube;", "&#8838;"/* subset of or equal to */, "\u2286"}
  383. , {"&supe;", "&#8839;"/* superset of or equal to */, "\u2287"}
  384. , {"&oplus;", "&#8853;"/* circled plus = direct sum */, "\u2295"}
  385. , {"&otimes;", "&#8855;"/* circled times = vector product */, "\u2297"}
  386. , {"&perp;", "&#8869;"/* up tack = orthogonal to = perpendicular */, "\u22A5"}
  387. , {"&sdot;", "&#8901;"/* dot operator */, "\u22C5"}
  388. /* dot operator is NOT the same character as ,"\u00B7"}
  389. /* Miscellaneous Technical */
  390. , {"&lceil;", "&#8968;"/* left ceiling = apl upstile */, "\u2308"}
  391. , {"&rceil;", "&#8969;"/* right ceiling */, "\u2309"}
  392. , {"&lfloor;", "&#8970;"/* left floor = apl downstile */, "\u230A"}
  393. , {"&rfloor;", "&#8971;"/* right floor */, "\u230B"}
  394. , {"&lang;", "&#9001;"/* left-pointing angle bracket = bra */, "\u2329"}
  395. /* lang is NOT the same character as ,"\u003C"}*/
  396. , {"&rang;", "&#9002;"/* right-pointing angle bracket = ket */, "\u232A"}
  397. /* rang is NOT the same character as ,"\u003E"}*/
  398. /* Geometric Shapes */
  399. , {"&loz;", "&#9674;"/* lozenge */, "\u25CA"}
  400. /* Miscellaneous Symbols */
  401. , {"&spades;", "&#9824;"/* black spade suit */, "\u2660"}
  402. /* black here seems to mean filled as opposed to hollow */
  403. , {"&clubs;", "&#9827;"/* black club suit = shamrock */, "\u2663"}
  404. , {"&hearts;", "&#9829;"/* black heart suit = valentine */, "\u2665"}
  405. , {"&diams;", "&#9830;"/* black diamond suit */, "\u2666"}
  406. , {"&quot;", "&#34;" /* quotation mark = APL quote */, "\""}
  407. , {"&amp;", "&#38;" /* ampersand */, "\u0026"}
  408. , {"&lt;", "&#60;" /* less-than sign */, "\u003C"}
  409. , {"&gt;", "&#62;" /* greater-than sign */, "\u003E"}
  410. /* Latin Extended-A */
  411. , {"&OElig;", "&#338;" /* latin capital ligature OE */, "\u0152"}
  412. , {"&oelig;", "&#339;" /* latin small ligature oe */, "\u0153"}
  413. /* ligature is a misnomer this is a separate character in some languages */
  414. , {"&Scaron;", "&#352;" /* latin capital letter S with caron */, "\u0160"}
  415. , {"&scaron;", "&#353;" /* latin small letter s with caron */, "\u0161"}
  416. , {"&Yuml;", "&#376;" /* latin capital letter Y with diaeresis */, "\u0178"}
  417. /* Spacing Modifier Letters */
  418. , {"&circ;", "&#710;" /* modifier letter circumflex accent */, "\u02C6"}
  419. , {"&tilde;", "&#732;" /* small tilde */, "\u02DC"}
  420. /* General Punctuation */
  421. , {"&ensp;", "&#8194;"/* en space */, "\u2002"}
  422. , {"&emsp;", "&#8195;"/* em space */, "\u2003"}
  423. , {"&thinsp;", "&#8201;"/* thin space */, "\u2009"}
  424. , {"&zwnj;", "&#8204;"/* zero width non-joiner */, "\u200C"}
  425. , {"&zwj;", "&#8205;"/* zero width joiner */, "\u200D"}
  426. , {"&lrm;", "&#8206;"/* left-to-right mark */, "\u200E"}
  427. , {"&rlm;", "&#8207;"/* right-to-left mark */, "\u200F"}
  428. , {"&ndash;", "&#8211;"/* en dash */, "\u2013"}
  429. , {"&mdash;", "&#8212;"/* em dash */, "\u2014"}
  430. , {"&lsquo;", "&#8216;"/* left single quotation mark */, "\u2018"}
  431. , {"&rsquo;", "&#8217;"/* right single quotation mark */, "\u2019"}
  432. , {"&sbquo;", "&#8218;"/* single low-9 quotation mark */, "\u201A"}
  433. , {"&ldquo;", "&#8220;"/* left double quotation mark */, "\u201C"}
  434. , {"&rdquo;", "&#8221;"/* right double quotation mark */, "\u201D"}
  435. , {"&bdquo;", "&#8222;"/* double low-9 quotation mark */, "\u201E"}
  436. , {"&dagger;", "&#8224;"/* dagger */, "\u2020"}
  437. , {"&Dagger;", "&#8225;"/* double dagger */, "\u2021"}
  438. , {"&permil;", "&#8240;"/* per mille sign */, "\u2030"}
  439. , {"&lsaquo;", "&#8249;"/* single left-pointing angle quotation mark */, "\u2039"}
  440. /* lsaquo is proposed but not yet ISO standardized */
  441. , {"&rsaquo;", "&#8250;"/* single right-pointing angle quotation mark */, "\u203A"}
  442. /* rsaquo is proposed but not yet ISO standardized */
  443. , {"&euro;", "&#8364;" /* euro sign */, "\u20AC"}};
  444. for (String[] entity : entities) {
  445. entityEscapeMap.put(entity[2], entity[0]);
  446. escapeEntityMap.put(entity[0], entity[2]);
  447. escapeEntityMap.put(entity[1], entity[2]);
  448. }
  449. }
  450. }