/twitter4j-core/src/internal-json/java/twitter4j/HTMLEntity.java
Java | 469 lines | 397 code | 19 blank | 53 comment | 47 complexity | 55f76f8048da89b3195720002d99c694 MD5 | raw file
Possible License(s): Apache-2.0
- /*
- * Copyright 2007 Yusuke Yamamoto
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package twitter4j;
- import java.util.Arrays;
- import java.util.HashMap;
- import java.util.Map;
- final class HTMLEntity {
- static String escape(String original) {
- StringBuilder buf = new StringBuilder(original);
- escape(buf);
- return buf.toString();
- }
- static void escape(StringBuilder original) {
- int index = 0;
- String escaped;
- while (index < original.length()) {
- escaped = entityEscapeMap.get(original.substring(index, index + 1));
- if (escaped != null) {
- original.replace(index, index + 1, escaped);
- index += escaped.length();
- } else {
- index++;
- }
- }
- }
- static String unescape(String original) {
- String returnValue = null;
- if (original != null) {
- StringBuilder buf = new StringBuilder(original);
- unescape(buf);
- returnValue = buf.toString();
- }
- return returnValue;
- }
- static void unescape(StringBuilder original) {
- int index = 0;
- int semicolonIndex;
- String escaped;
- String entity;
- while (index < original.length()) {
- index = original.indexOf("&", index);
- if (-1 == index) {
- break;
- }
- semicolonIndex = original.indexOf(";", index);
- if (-1 != semicolonIndex) {
- escaped = original.substring(index, semicolonIndex + 1);
- entity = escapeEntityMap.get(escaped);
- if (entity != null) {
- original.replace(index, semicolonIndex + 1, entity);
- }
- index++;
- } else {
- break;
- }
- }
- }
- /**
- * @author Yusuke Yamamoto - yusuke at mac.com
- * @author Philip Hachey - philip dot hachey at gmail dot com
- */
- static String unescapeAndSlideEntityIncdices(String text, UserMentionEntity[] userMentionEntities,
- URLEntity[] urlEntities, HashtagEntity[] hashtagEntities,
- MediaEntity[] mediaEntities) {
-
- int entityIndexesLength = 0;
- entityIndexesLength += userMentionEntities == null ? 0 : userMentionEntities.length;
- entityIndexesLength += urlEntities == null ? 0 : urlEntities.length;
- entityIndexesLength += hashtagEntities == null ? 0 : hashtagEntities.length;
- entityIndexesLength += mediaEntities == null ? 0 : mediaEntities.length;
- EntityIndex[] entityIndexes = new EntityIndex[entityIndexesLength];
- int copyStartIndex = 0;
- if (userMentionEntities != null) {
- System.arraycopy(userMentionEntities, 0, entityIndexes, copyStartIndex, userMentionEntities.length);
- copyStartIndex += userMentionEntities.length;
- }
-
- if (urlEntities != null) {
- System.arraycopy(urlEntities, 0, entityIndexes, copyStartIndex, urlEntities.length);
- copyStartIndex += urlEntities.length;
- }
-
- if (hashtagEntities != null) {
- System.arraycopy(hashtagEntities, 0, entityIndexes, copyStartIndex, hashtagEntities.length);
- copyStartIndex += hashtagEntities.length;
- }
-
- if (mediaEntities != null) {
- System.arraycopy(mediaEntities, 0, entityIndexes, copyStartIndex, mediaEntities.length);
- }
- Arrays.sort(entityIndexes);
- boolean handlingStart = true;
- int entityIndex = 0;
- int delta = 0;
- int semicolonIndex;
- String escaped;
- String entity;
- StringBuilder unescaped = new StringBuilder(text.length());
-
- /*
- * Slide indices of twitter entities not only when replacing character
- * entity references but also adjust the twitter code point based
- * indexes with Java standard character indexes. See: HTMLEntityTest.
- * testUnescapeAndSlideEntityIncdicesWithSurrogateCodePoints
- */
- int textCodePointLength = text.codePointCount(0, text.length());
- int codePoint;
- for (int index = 0, twitterIndex = 0; index < text.length(); index +=
- Character.charCount(codePoint), twitterIndex++) {
- codePoint = text.codePointAt(index);
- if (codePoint == '&') {
- semicolonIndex = text.indexOf(";", index);
- if (-1 != semicolonIndex) {
- escaped = text.substring(index, semicolonIndex + 1);
- entity = escapeEntityMap.get(escaped);
- if (entity != null) {
- unescaped.append(entity);
- index = semicolonIndex;
- twitterIndex = text.codePointCount(0, semicolonIndex);
- delta = 1 - escaped.length();
- } else {
- unescaped.appendCodePoint(codePoint);
- }
- } else {
- unescaped.appendCodePoint(codePoint);
- }
- } else {
- unescaped.appendCodePoint(codePoint);
- }
- if (entityIndex < entityIndexes.length) {
- if (handlingStart) {
- if (entityIndexes[entityIndex].getStart() == (delta + twitterIndex)) {
- entityIndexes[entityIndex]
- .setStart(unescaped.length() - Character.charCount(text.codePointAt(index)));
- handlingStart = false;
- }
- } else if (entityIndexes[entityIndex].getEnd() == (delta + twitterIndex)) {
- entityIndexes[entityIndex]
- .setEnd(unescaped.length() - Character.charCount(text.codePointAt(index)));
- entityIndex++;
- handlingStart = true;
- }
- }
- delta = 0;
- }
- if (entityIndex < entityIndexes.length) {
- if (entityIndexes[entityIndex].getEnd() == textCodePointLength) {
- entityIndexes[entityIndex].setEnd(unescaped.length());
- }
- }
- return unescaped.toString();
- }
- private static final Map<String, String> entityEscapeMap = new HashMap<String, String>();
- private static final Map<String, String> escapeEntityMap = new HashMap<String, String>();
- static {
- String[][] entities =
- {{" ", " "/* no-break space = non-breaking space */, "\u00A0"}
- , {"¡", "¡"/* inverted exclamation mark */, "\u00A1"}
- , {"¢", "¢"/* cent sign */, "\u00A2"}
- , {"£", "£"/* pound sign */, "\u00A3"}
- , {"¤", "¤"/* currency sign */, "\u00A4"}
- , {"¥", "¥"/* yen sign = yuan sign */, "\u00A5"}
- , {"¦", "¦"/* broken bar = broken vertical bar */, "\u00A6"}
- , {"§", "§"/* section sign */, "\u00A7"}
- , {"¨", "¨"/* diaeresis = spacing diaeresis */, "\u00A8"}
- , {"©", "©"/* copyright sign */, "\u00A9"}
- , {"ª", "ª"/* feminine ordinal indicator */, "\u00AA"}
- , {"«", "«"/* left-pointing double angle quotation mark = left pointing guillemet */, "\u00AB"}
- , {"¬", "¬"/* not sign = discretionary hyphen */, "\u00AC"}
- , {"­", "­"/* soft hyphen = discretionary hyphen */, "\u00AD"}
- , {"®", "®"/* registered sign = registered trade mark sign */, "\u00AE"}
- , {"¯", "¯"/* macron = spacing macron = overline = APL overbar */, "\u00AF"}
- , {"°", "°"/* degree sign */, "\u00B0"}
- , {"±", "±"/* plus-minus sign = plus-or-minus sign */, "\u00B1"}
- , {"²", "²"/* superscript two = superscript digit two = squared */, "\u00B2"}
- , {"³", "³"/* superscript three = superscript digit three = cubed */, "\u00B3"}
- , {"´", "´"/* acute accent = spacing acute */, "\u00B4"}
- , {"µ", "µ"/* micro sign */, "\u00B5"}
- , {"¶", "¶"/* pilcrow sign = paragraph sign */, "\u00B6"}
- , {"·", "·"/* middle dot = Georgian comma = Greek middle dot */, "\u00B7"}
- , {"¸", "¸"/* cedilla = spacing cedilla */, "\u00B8"}
- , {"¹", "¹"/* superscript one = superscript digit one */, "\u00B9"}
- , {"º", "º"/* masculine ordinal indicator */, "\u00BA"}
- , {"»", "»"/* right-pointing double angle quotation mark = right pointing guillemet */, "\u00BB"}
- , {"¼", "¼"/* vulgar fraction one quarter = fraction one quarter */, "\u00BC"}
- , {"½", "½"/* vulgar fraction one half = fraction one half */, "\u00BD"}
- , {"¾", "¾"/* vulgar fraction three quarters = fraction three quarters */, "\u00BE"}
- , {"¿", "¿"/* inverted question mark = turned question mark */, "\u00BF"}
- , {"À", "À"/* latin capital letter A with grave = latin capital letter A grave */, "\u00C0"}
- , {"Á", "Á"/* latin capital letter A with acute */, "\u00C1"}
- , {"Â", "Â"/* latin capital letter A with circumflex */, "\u00C2"}
- , {"Ã", "Ã"/* latin capital letter A with tilde */, "\u00C3"}
- , {"Ä", "Ä"/* latin capital letter A with diaeresis */, "\u00C4"}
- , {"Å", "Å"/* latin capital letter A with ring above = latin capital letter A ring */, "\u00C5"}
- , {"Æ", "Æ"/* latin capital letter AE = latin capital ligature AE */, "\u00C6"}
- , {"Ç", "Ç"/* latin capital letter C with cedilla */, "\u00C7"}
- , {"È", "È"/* latin capital letter E with grave */, "\u00C8"}
- , {"É", "É"/* latin capital letter E with acute */, "\u00C9"}
- , {"Ê", "Ê"/* latin capital letter E with circumflex */, "\u00CA"}
- , {"Ë", "Ë"/* latin capital letter E with diaeresis */, "\u00CB"}
- , {"Ì", "Ì"/* latin capital letter I with grave */, "\u00CC"}
- , {"Í", "Í"/* latin capital letter I with acute */, "\u00CD"}
- , {"Î", "Î"/* latin capital letter I with circumflex */, "\u00CE"}
- , {"Ï", "Ï"/* latin capital letter I with diaeresis */, "\u00CF"}
- , {"Ð", "Ð"/* latin capital letter ETH */, "\u00D0"}
- , {"Ñ", "Ñ"/* latin capital letter N with tilde */, "\u00D1"}
- , {"Ò", "Ò"/* latin capital letter O with grave */, "\u00D2"}
- , {"Ó", "Ó"/* latin capital letter O with acute */, "\u00D3"}
- , {"Ô", "Ô"/* latin capital letter O with circumflex */, "\u00D4"}
- , {"Õ", "Õ"/* latin capital letter O with tilde */, "\u00D5"}
- , {"Ö", "Ö"/* latin capital letter O with diaeresis */, "\u00D6"}
- , {"×", "×"/* multiplication sign */, "\u00D7"}
- , {"Ø", "Ø"/* latin capital letter O with stroke = latin capital letter O slash */, "\u00D8"}
- , {"Ù", "Ù"/* latin capital letter U with grave */, "\u00D9"}
- , {"Ú", "Ú"/* latin capital letter U with acute */, "\u00DA"}
- , {"Û", "Û"/* latin capital letter U with circumflex */, "\u00DB"}
- , {"Ü", "Ü"/* latin capital letter U with diaeresis */, "\u00DC"}
- , {"Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD"}
- , {"Þ", "Þ"/* latin capital letter THORN */, "\u00DE"}
- , {"ß", "ß"/* latin small letter sharp s = ess-zed */, "\u00DF"}
- , {"à", "à"/* latin small letter a with grave = latin small letter a grave */, "\u00E0"}
- , {"á", "á"/* latin small letter a with acute */, "\u00E1"}
- , {"â", "â"/* latin small letter a with circumflex */, "\u00E2"}
- , {"ã", "ã"/* latin small letter a with tilde */, "\u00E3"}
- , {"ä", "ä"/* latin small letter a with diaeresis */, "\u00E4"}
- , {"å", "å"/* latin small letter a with ring above = latin small letter a ring */, "\u00E5"}
- , {"æ", "æ"/* latin small letter ae = latin small ligature ae */, "\u00E6"}
- , {"ç", "ç"/* latin small letter c with cedilla */, "\u00E7"}
- , {"è", "è"/* latin small letter e with grave */, "\u00E8"}
- , {"é", "é"/* latin small letter e with acute */, "\u00E9"}
- , {"ê", "ê"/* latin small letter e with circumflex */, "\u00EA"}
- , {"ë", "ë"/* latin small letter e with diaeresis */, "\u00EB"}
- , {"ì", "ì"/* latin small letter i with grave */, "\u00EC"}
- , {"í", "í"/* latin small letter i with acute */, "\u00ED"}
- , {"î", "î"/* latin small letter i with circumflex */, "\u00EE"}
- , {"ï", "ï"/* latin small letter i with diaeresis */, "\u00EF"}
- , {"ð", "ð"/* latin small letter eth */, "\u00F0"}
- , {"ñ", "ñ"/* latin small letter n with tilde */, "\u00F1"}
- , {"ò", "ò"/* latin small letter o with grave */, "\u00F2"}
- , {"ó", "ó"/* latin small letter o with acute */, "\u00F3"}
- , {"ô", "ô"/* latin small letter o with circumflex */, "\u00F4"}
- , {"õ", "õ"/* latin small letter o with tilde */, "\u00F5"}
- , {"ö", "ö"/* latin small letter o with diaeresis */, "\u00F6"}
- , {"÷", "÷"/* division sign */, "\u00F7"}
- , {"ø", "ø"/* latin small letter o with stroke = latin small letter o slash */, "\u00F8"}
- , {"ù", "ù"/* latin small letter u with grave */, "\u00F9"}
- , {"ú", "ú"/* latin small letter u with acute */, "\u00FA"}
- , {"û", "û"/* latin small letter u with circumflex */, "\u00FB"}
- , {"ü", "ü"/* latin small letter u with diaeresis */, "\u00FC"}
- , {"ý", "ý"/* latin small letter y with acute */, "\u00FD"}
- , {"þ", "þ"/* latin small letter thorn with */, "\u00FE"}
- , {"ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF"}
- , {"ƒ", "ƒ"/* latin small f with hook = function = florin */, "\u0192"}
- /* Greek */
- , {"Α", "Α"/* greek capital letter alpha */, "\u0391"}
- , {"Β", "Β"/* greek capital letter beta */, "\u0392"}
- , {"Γ", "Γ"/* greek capital letter gamma */, "\u0393"}
- , {"Δ", "Δ"/* greek capital letter delta */, "\u0394"}
- , {"Ε", "Ε"/* greek capital letter epsilon */, "\u0395"}
- , {"Ζ", "Ζ"/* greek capital letter zeta */, "\u0396"}
- , {"Η", "Η"/* greek capital letter eta */, "\u0397"}
- , {"Θ", "Θ"/* greek capital letter theta */, "\u0398"}
- , {"Ι", "Ι"/* greek capital letter iota */, "\u0399"}
- , {"Κ", "Κ"/* greek capital letter kappa */, "\u039A"}
- , {"Λ", "Λ"/* greek capital letter lambda */, "\u039B"}
- , {"Μ", "Μ"/* greek capital letter mu */, "\u039C"}
- , {"Ν", "Ν"/* greek capital letter nu */, "\u039D"}
- , {"Ξ", "Ξ"/* greek capital letter xi */, "\u039E"}
- , {"Ο", "Ο"/* greek capital letter omicron */, "\u039F"}
- , {"Π", "Π"/* greek capital letter pi */, "\u03A0"}
- , {"Ρ", "Ρ"/* greek capital letter rho */, "\u03A1"}
- /* there is no Sigmaf and no \u03A2 */
- , {"Σ", "Σ"/* greek capital letter sigma */, "\u03A3"}
- , {"Τ", "Τ"/* greek capital letter tau */, "\u03A4"}
- , {"Υ", "Υ"/* greek capital letter upsilon */, "\u03A5"}
- , {"Φ", "Φ"/* greek capital letter phi */, "\u03A6"}
- , {"Χ", "Χ"/* greek capital letter chi */, "\u03A7"}
- , {"Ψ", "Ψ"/* greek capital letter psi */, "\u03A8"}
- , {"Ω", "Ω"/* greek capital letter omega */, "\u03A9"}
- , {"α", "α"/* greek small letter alpha */, "\u03B1"}
- , {"β", "β"/* greek small letter beta */, "\u03B2"}
- , {"γ", "γ"/* greek small letter gamma */, "\u03B3"}
- , {"δ", "δ"/* greek small letter delta */, "\u03B4"}
- , {"ε", "ε"/* greek small letter epsilon */, "\u03B5"}
- , {"ζ", "ζ"/* greek small letter zeta */, "\u03B6"}
- , {"η", "η"/* greek small letter eta */, "\u03B7"}
- , {"θ", "θ"/* greek small letter theta */, "\u03B8"}
- , {"ι", "ι"/* greek small letter iota */, "\u03B9"}
- , {"κ", "κ"/* greek small letter kappa */, "\u03BA"}
- , {"λ", "λ"/* greek small letter lambda */, "\u03BB"}
- , {"μ", "μ"/* greek small letter mu */, "\u03BC"}
- , {"ν", "ν"/* greek small letter nu */, "\u03BD"}
- , {"ξ", "ξ"/* greek small letter xi */, "\u03BE"}
- , {"ο", "ο"/* greek small letter omicron */, "\u03BF"}
- , {"π", "π"/* greek small letter pi */, "\u03C0"}
- , {"ρ", "ρ"/* greek small letter rho */, "\u03C1"}
- , {"ς", "ς"/* greek small letter final sigma */, "\u03C2"}
- , {"σ", "σ"/* greek small letter sigma */, "\u03C3"}
- , {"τ", "τ"/* greek small letter tau */, "\u03C4"}
- , {"υ", "υ"/* greek small letter upsilon */, "\u03C5"}
- , {"φ", "φ"/* greek small letter phi */, "\u03C6"}
- , {"χ", "χ"/* greek small letter chi */, "\u03C7"}
- , {"ψ", "ψ"/* greek small letter psi */, "\u03C8"}
- , {"ω", "ω"/* greek small letter omega */, "\u03C9"}
- , {"ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1"}
- , {"ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2"}
- , {"ϖ", "ϖ"/* greek pi symbol */, "\u03D6"}
- /* General Punctuation */
- , {"•", "•"/* bullet = black small circle */, "\u2022"}
- /* bullet is NOT the same as bullet operator ,"\u2219*/
- , {"…", "…"/* horizontal ellipsis = three dot leader */, "\u2026"}
- , {"′", "′"/* prime = minutes = feet */, "\u2032"}
- , {"″", "″"/* double prime = seconds = inches */, "\u2033"}
- , {"‾", "‾"/* overline = spacing overscore */, "\u203E"}
- , {"⁄", "⁄"/* fraction slash */, "\u2044"}
- /* Letterlike Symbols */
- , {"℘", "℘"/* script capital P = power set = Weierstrass p */, "\u2118"}
- , {"ℑ", "ℑ"/* blackletter capital I = imaginary part */, "\u2111"}
- , {"ℜ", "ℜ"/* blackletter capital R = real part symbol */, "\u211C"}
- , {"™", "™"/* trade mark sign */, "\u2122"}
- , {"ℵ", "ℵ"/* alef symbol = first transfinite cardinal */, "\u2135"}
- /* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"}*/
- /* Arrows */
- , {"←", "←"/* leftwards arrow */, "\u2190"}
- , {"↑", "↑"/* upwards arrow */, "\u2191"}
- , {"→", "→"/* rightwards arrow */, "\u2192"}
- , {"↓", "↓"/* downwards arrow */, "\u2193"}
- , {"↔", "↔"/* left right arrow */, "\u2194"}
- , {"↵", "↵"/* downwards arrow with corner leftwards = carriage return */, "\u21B5"}
- , {"⇐", "⇐"/* leftwards double arrow */, "\u21D0"}
- /* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */
- , {"⇑", "⇑"/* upwards double arrow */, "\u21D1"}
- , {"⇒", "⇒"/* rightwards double arrow */, "\u21D2"}
- /* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */
- , {"⇓", "⇓"/* downwards double arrow */, "\u21D3"}
- , {"⇔", "⇔"/* left right double arrow */, "\u21D4"}
- /* Mathematical Operators */
- , {"∀", "∀"/* for all */, "\u2200"}
- , {"∂", "∂"/* partial differential */, "\u2202"}
- , {"∃", "∃"/* there exists */, "\u2203"}
- , {"∅", "∅"/* empty set = null set = diameter */, "\u2205"}
- , {"∇", "∇"/* nabla = backward difference */, "\u2207"}
- , {"∈", "∈"/* element of */, "\u2208"}
- , {"∉", "∉"/* not an element of */, "\u2209"}
- , {"∋", "∋"/* contains as member */, "\u220B"}
- /* should there be a more memorable name than 'ni'? */
- , {"∏", "∏"/* n-ary product = product sign */, "\u220F"}
- /* prod is NOT the same character as ,"\u03A0"}*/
- , {"∑", "∑"/* n-ary sumation */, "\u2211"}
- /* sum is NOT the same character as ,"\u03A3"}*/
- , {"−", "−"/* minus sign */, "\u2212"}
- , {"∗", "∗"/* asterisk operator */, "\u2217"}
- , {"√", "√"/* square root = radical sign */, "\u221A"}
- , {"∝", "∝"/* proportional to */, "\u221D"}
- , {"∞", "∞"/* infinity */, "\u221E"}
- , {"∠", "∠"/* angle */, "\u2220"}
- , {"∧", "∧"/* logical and = wedge */, "\u2227"}
- , {"∨", "∨"/* logical or = vee */, "\u2228"}
- , {"∩", "∩"/* intersection = cap */, "\u2229"}
- , {"∪", "∪"/* union = cup */, "\u222A"}
- , {"∫", "∫"/* integral */, "\u222B"}
- , {"∴", "∴"/* therefore */, "\u2234"}
- , {"∼", "∼"/* tilde operator = varies with = similar to */, "\u223C"}
- /* tilde operator is NOT the same character as the tilde ,"\u007E"}*/
- , {"≅", "≅"/* approximately equal to */, "\u2245"}
- , {"≈", "≈"/* almost equal to = asymptotic to */, "\u2248"}
- , {"≠", "≠"/* not equal to */, "\u2260"}
- , {"≡", "≡"/* identical to */, "\u2261"}
- , {"≤", "≤"/* less-than or equal to */, "\u2264"}
- , {"≥", "≥"/* greater-than or equal to */, "\u2265"}
- , {"⊂", "⊂"/* subset of */, "\u2282"}
- , {"⊃", "⊃"/* superset of */, "\u2283"}
- /* note that nsup 'not a superset of ,"\u2283"}*/
- , {"⊆", "⊆"/* subset of or equal to */, "\u2286"}
- , {"⊇", "⊇"/* superset of or equal to */, "\u2287"}
- , {"⊕", "⊕"/* circled plus = direct sum */, "\u2295"}
- , {"⊗", "⊗"/* circled times = vector product */, "\u2297"}
- , {"⊥", "⊥"/* up tack = orthogonal to = perpendicular */, "\u22A5"}
- , {"⋅", "⋅"/* dot operator */, "\u22C5"}
- /* dot operator is NOT the same character as ,"\u00B7"}
- /* Miscellaneous Technical */
- , {"⌈", "⌈"/* left ceiling = apl upstile */, "\u2308"}
- , {"⌉", "⌉"/* right ceiling */, "\u2309"}
- , {"⌊", "⌊"/* left floor = apl downstile */, "\u230A"}
- , {"⌋", "⌋"/* right floor */, "\u230B"}
- , {"⟨", "〈"/* left-pointing angle bracket = bra */, "\u2329"}
- /* lang is NOT the same character as ,"\u003C"}*/
- , {"⟩", "〉"/* right-pointing angle bracket = ket */, "\u232A"}
- /* rang is NOT the same character as ,"\u003E"}*/
- /* Geometric Shapes */
- , {"◊", "◊"/* lozenge */, "\u25CA"}
- /* Miscellaneous Symbols */
- , {"♠", "♠"/* black spade suit */, "\u2660"}
- /* black here seems to mean filled as opposed to hollow */
- , {"♣", "♣"/* black club suit = shamrock */, "\u2663"}
- , {"♥", "♥"/* black heart suit = valentine */, "\u2665"}
- , {"♦", "♦"/* black diamond suit */, "\u2666"}
- , {""", """ /* quotation mark = APL quote */, "\""}
- , {"&", "&" /* ampersand */, "\u0026"}
- , {"<", "<" /* less-than sign */, "\u003C"}
- , {">", ">" /* greater-than sign */, "\u003E"}
- /* Latin Extended-A */
- , {"Œ", "Œ" /* latin capital ligature OE */, "\u0152"}
- , {"œ", "œ" /* latin small ligature oe */, "\u0153"}
- /* ligature is a misnomer this is a separate character in some languages */
- , {"Š", "Š" /* latin capital letter S with caron */, "\u0160"}
- , {"š", "š" /* latin small letter s with caron */, "\u0161"}
- , {"Ÿ", "Ÿ" /* latin capital letter Y with diaeresis */, "\u0178"}
- /* Spacing Modifier Letters */
- , {"ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6"}
- , {"˜", "˜" /* small tilde */, "\u02DC"}
- /* General Punctuation */
- , {" ", " "/* en space */, "\u2002"}
- , {" ", " "/* em space */, "\u2003"}
- , {" ", " "/* thin space */, "\u2009"}
- , {"‌", "‌"/* zero width non-joiner */, "\u200C"}
- , {"‍", "‍"/* zero width joiner */, "\u200D"}
- , {"‎", "‎"/* left-to-right mark */, "\u200E"}
- , {"‏", "‏"/* right-to-left mark */, "\u200F"}
- , {"–", "–"/* en dash */, "\u2013"}
- , {"—", "—"/* em dash */, "\u2014"}
- , {"‘", "‘"/* left single quotation mark */, "\u2018"}
- , {"’", "’"/* right single quotation mark */, "\u2019"}
- , {"‚", "‚"/* single low-9 quotation mark */, "\u201A"}
- , {"“", "“"/* left double quotation mark */, "\u201C"}
- , {"”", "”"/* right double quotation mark */, "\u201D"}
- , {"„", "„"/* double low-9 quotation mark */, "\u201E"}
- , {"†", "†"/* dagger */, "\u2020"}
- , {"‡", "‡"/* double dagger */, "\u2021"}
- , {"‰", "‰"/* per mille sign */, "\u2030"}
- , {"‹", "‹"/* single left-pointing angle quotation mark */, "\u2039"}
- /* lsaquo is proposed but not yet ISO standardized */
- , {"›", "›"/* single right-pointing angle quotation mark */, "\u203A"}
- /* rsaquo is proposed but not yet ISO standardized */
- , {"€", "€" /* euro sign */, "\u20AC"}};
- for (String[] entity : entities) {
- entityEscapeMap.put(entity[2], entity[0]);
- escapeEntityMap.put(entity[0], entity[2]);
- escapeEntityMap.put(entity[1], entity[2]);
- }
- }
- }