PageRenderTime 76ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 1ms

/presrc/com/substanceofcode/utils/EncodingUtil.java

http://mobile-rss-reader.googlecode.com/
Java | 1219 lines | 922 code | 67 blank | 230 comment | 131 complexity | e393850742e52f16b4ae5b25b99339e4 MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. * EncodingUtil.java
  3. TODO methods for booleans
  4. *
  5. * Copyright (C) 2005-2006 Tommi Laukkanen
  6. * http://www.substanceofcode.com
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  21. *
  22. */
  23. // Expand to define test define
  24. @DTESTDEF@
  25. // Expand to define logging define
  26. @DLOGDEF@
  27. package com.substanceofcode.utils;
  28. import java.io.IOException;
  29. import java.io.InputStream;
  30. import java.io.InputStreamReader;
  31. import java.io.UnsupportedEncodingException;
  32. import java.util.Hashtable;
  33. import java.util.Vector;
  34. import com.substanceofcode.utils.CauseException;
  35. //#ifdef DLOGGING
  36. import net.sf.jlogmicro.util.logging.Logger;
  37. import net.sf.jlogmicro.util.logging.Level;
  38. //#endif
  39. /**
  40. * Simple encoding handler to allow handling utf-16 and 1252.
  41. *
  42. * @author Irving Bunton Jr
  43. */
  44. public class EncodingUtil {
  45. final static public boolean m_midpIso = (System.getProperty(
  46. "microedition.encoding").toLowerCase().startsWith("iso-8859") ||
  47. System.getProperty(
  48. "microedition.encoding").toLowerCase().startsWith("iso8859"));
  49. final static public String m_isoEncoding = initIsoEncoding();
  50. final static public boolean m_midpWin = (System.getProperty(
  51. "microedition.encoding").toLowerCase().startsWith("cp") ||
  52. System.getProperty(
  53. "microedition.encoding").toLowerCase().startsWith("windows"));
  54. final static public String m_winEncoding = initWinEncoding();
  55. final static public boolean m_midpUni = System.getProperty(
  56. "microedition.encoding").toLowerCase().startsWith("utf-8");
  57. final static String[] m_isoCommonEntities =
  58. {"iexcl", "cent", "pound", "curren", "yen",
  59. "brvbar", "sect", "uml", "copy", "ordf",
  60. "laquo", "not", "shy", "reg", "macr",
  61. "deg", "plusmn", "sup2", "sup3", "acute",
  62. "micro", "para", "middot", "cedil", "sup1",
  63. "ordm", "raquo", "frac14", "frac12", "frac34",
  64. "iquest"};
  65. final static String[] m_isoSpecialEntities =
  66. {"ndash", // en dash
  67. "mdash", // em dash
  68. "lsquo", // left single quotation mark
  69. "rsquo", // right single quotation mark
  70. "sbquo", // single low-9 quotation mark
  71. "ldquo", // left double quotation mark
  72. "rdquo", // right double quotation mark
  73. "bdquo"}; // double low-9 quotation mark
  74. final static char[] m_isoSpecialValues =
  75. {'-', // en dash
  76. '-', // em dash
  77. '\'', // left single quotation mark
  78. '\'', // right single quotation mark
  79. '\'', // single low-9 quotation mark
  80. '\"', // left double quotation mark
  81. '\"', // right double quotation mark
  82. '\"'}; // double low-9 quotation mark
  83. final static char[] m_isoCommValues =
  84. {0xA1, 0xA2, 0xA3, 0xA4, 0xA5,
  85. 0xA6, 0xA7, 0xA8, 0xA9, 0xAA,
  86. 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  87. 0xB0, 0xB1, 0xB2, 0xB3, 0xB4,
  88. 0xB5, 0xB6, 0xB7, 0xB8, 0xB9,
  89. 0xBA, 0xBB, 0xBC, 0xBD, 0xBE,
  90. 0xBF};
  91. final static String[] m_isoLatin1Entities =
  92. {"Agrave", "Aacute", "Acirc", "Atilde", "Auml",
  93. "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml",
  94. "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve",
  95. "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave",
  96. "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave",
  97. "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil",
  98. "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
  99. "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
  100. "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml",
  101. "yacute", "thorn", "yuml"};
  102. // Convert windows characters in iso 8859 control range to ISO
  103. // (not the actual character, but a good fix or remove if no equivalent)
  104. final public static char[] m_winIsoConvx80 = initWinIsoConv();
  105. // Convert uni chars to equivalent windows characters in the 0x80 - 0x9f
  106. // range.
  107. final public static char[] m_uniWinConvx80 = initUniWinConvx80();
  108. // See if windows cp-1252 is supported.
  109. final public static boolean m_hasWinEncoding = hasWinEncoding();
  110. // See if ISO8859-1 is supported.
  111. final public static boolean m_hasIso8859Encoding = hasIso8859Encoding();
  112. final private static String m_xmlEntKeys =
  113. "< >  & '"";
  114. final private static String[] m_xmlEntValues =
  115. {"<", ">", " ", "&", "'", "\""};
  116. // Left single quote in cp-1252 (Windows) encoding.
  117. public static final char CWSGL_LOW9_QUOTE = 0x82; // #130;
  118. public static final char CWDBL_LOW9_QUOTE = 0x84; // #132;
  119. public static final char CWLEFT_SGL_QUOTE = 0x91; // #145;
  120. public static final char CWRIGHT_SGL_QUOTE = 0x92; // #146;
  121. public static final char [] CAWRIGHT_SGL_QUOTE = {CWRIGHT_SGL_QUOTE};
  122. public static final String WRIGHT_SGL_QUOTE = new String(CAWRIGHT_SGL_QUOTE);
  123. public static final char CWLEFT_DBL_QUOTE = 0x93; // #147;
  124. public static final char CWRIGHT_DBL_QUOTE = 0x94; // #148;
  125. public static final char CWEN_DASH = 0x96; // #150;
  126. public static final char CWEM_DASH = 0x97; // #151;
  127. // Left single quote in Unicode (utf-16) encoding.
  128. // Long dash a.k.a en dash
  129. public static final char CEN_DASH = 0x2013;
  130. public static final char CEM_DASH = 0x2014;
  131. public static final char CLEFT_SGL_QUOTE = 0x2018;
  132. public static final char CRIGHT_SGL_QUOTE = 0x2019;
  133. public static final char [] CARIGHT_SGL_QUOTE = {CRIGHT_SGL_QUOTE};
  134. public static final String RIGHT_SGL_QUOTE = new String(CARIGHT_SGL_QUOTE);
  135. public static final char CSGL_LOW9_QUOTE = 0x201A;
  136. private static final char CLEFT_DBL_QUOTE = 0x201C;
  137. private static final char CRIGHT_DBL_QUOTE = 0x201D;
  138. public static final char CDBL_LOW9_QUOTE = 0x201E;
  139. public static final char CA_UMLAUTE = (char)228;
  140. private static final char CO_UMLAUTE = (char)246;
  141. public static final char CNON_BREAKING_SP = (char)160;
  142. private EncodingStreamReader m_encodingStreamReader;
  143. final private static Hashtable m_convXmlEntities = initXmlEntities();
  144. final private static Hashtable m_convIso88591 = initAlphaIso88591(false);
  145. final private static Hashtable m_convXmlIso88591 = initAlphaIso88591(true);
  146. final private static Hashtable m_convCp1252 = initAlphaCp1252(false);
  147. final private static Hashtable m_convXmlCp1252 = initAlphaCp1252(true);
  148. private String m_docEncoding = ""; // Default for XML is UTF-8.
  149. // unexpected UTF-16.
  150. private boolean m_utf = false; // Doc is utf.
  151. private boolean m_getPrologue = true;
  152. private boolean m_windows = false; // True if windows code space
  153. final private static boolean m_convWinUni = initConvWinUni();
  154. static Vector m_statExcs = null; // Exceptions encountered
  155. Vector m_excs = null; // Exceptions encountered
  156. //#ifdef DTEST
  157. final private static boolean m_debugTrace = false; // True if want to trace more
  158. //#endif
  159. //#ifdef DLOGGING
  160. final private Logger logger = Logger.getLogger("EncodingUtil");
  161. final private boolean fineLoggable = logger.isLoggable(Level.FINE);
  162. final private boolean finestLoggable = logger.isLoggable(Level.FINEST);
  163. //#endif
  164. /** Creates a new instance of EncodingUtil */
  165. public EncodingUtil(InputStream inputStream) {
  166. m_encodingStreamReader = new EncodingStreamReader(inputStream);
  167. }
  168. /** Determine the encoding based on what is passed in as well
  169. as if/when strings are to be further encoded. Also decide to
  170. modify bytes read.
  171. **/
  172. public void getEncoding(final String fileEncoding, final String encoding) {
  173. getEncoding(m_hasIso8859Encoding, m_isoEncoding, m_hasWinEncoding,
  174. m_winEncoding, fileEncoding, encoding);
  175. }
  176. /** Determine the encoding based on what is passed in as well
  177. as if/when strings are to be further encoded. Also decide to
  178. modify bytes read.
  179. **/
  180. public void getEncoding(final boolean hasIso8859Encoding,
  181. final String isoEncoding, final boolean hasWinEncoding,
  182. final String winEncoding, final String fileEncoding,
  183. final String encoding) {
  184. String cencoding = encoding;
  185. // If there is a second char, don't stop splitting until we
  186. // return that char as input.
  187. if (cencoding == null) {
  188. cencoding = "UTF-8";
  189. }
  190. cencoding = cencoding.toUpperCase();
  191. boolean modUTF16 = m_encodingStreamReader.isModUTF16();
  192. boolean modEncoding = m_encodingStreamReader.isModEncoding();
  193. m_utf = false;
  194. m_windows = false;
  195. String docEncoding = fileEncoding;
  196. // Only need to convert from 2 byte to 1 byte and vsa versa.
  197. if ((cencoding.equals("UTF-8") || cencoding.equals("UTF8"))) {
  198. docEncoding = "UTF-8";
  199. modEncoding = false;
  200. m_utf = true;
  201. } else if (cencoding.equals("UTF-16") || cencoding.equals("UTF16")) {
  202. // If utf-16, don't set doc encoding as we are converting the
  203. // bytes to single chars.
  204. modUTF16 = true;
  205. m_utf = true;
  206. // Don't do doc encoding as the stream reader does it.
  207. docEncoding = "";
  208. } else if (cencoding.startsWith("ISO-8859")) {
  209. if (hasIso8859Encoding) {
  210. if (isoEncoding.indexOf("-") == -1) {
  211. docEncoding = StringUtil.replace(cencoding, "ISO-",
  212. "ISO");
  213. docEncoding = docEncoding.replace('-', '_');
  214. } else {
  215. docEncoding = cencoding;
  216. }
  217. } else {
  218. docEncoding = "";
  219. }
  220. modEncoding = false;
  221. } else if (cencoding.startsWith("ISO8859")) {
  222. if (hasIso8859Encoding) {
  223. if (isoEncoding.indexOf("-") >= 0) {
  224. docEncoding = StringUtil.replace(cencoding, "ISO",
  225. "ISO-");
  226. docEncoding = docEncoding.replace('_', '-');
  227. } else {
  228. docEncoding = cencoding;
  229. }
  230. } else {
  231. docEncoding = "";
  232. }
  233. modEncoding = false;
  234. } else if (cencoding.startsWith("WINDOWS-12")) {
  235. if (hasWinEncoding) {
  236. if (winEncoding.indexOf("-") == -1) {
  237. docEncoding = StringUtil.replace(cencoding, "WINDOWS-",
  238. "Cp");
  239. } else {
  240. docEncoding = cencoding;
  241. }
  242. } else {
  243. docEncoding = "";
  244. }
  245. modEncoding = false;
  246. m_windows = true;
  247. } else if (cencoding.indexOf("CP-") == 0) {
  248. if (hasWinEncoding) {
  249. if (winEncoding.indexOf("-") >= 0) {
  250. docEncoding = StringUtil.replace(cencoding, "CP-",
  251. "WINDOWS-");
  252. } else {
  253. docEncoding = StringUtil.replace(cencoding, "CP-",
  254. "Cp");
  255. }
  256. } else {
  257. docEncoding = "";
  258. }
  259. modEncoding = false;
  260. m_windows = true;
  261. } else if (cencoding.startsWith("CP")) {
  262. if (hasWinEncoding) {
  263. if (winEncoding.indexOf("-") >= 0) {
  264. docEncoding = StringUtil.replace(cencoding, "CP",
  265. "WINDOWS-");
  266. } else {
  267. docEncoding = StringUtil.replace(cencoding, "CP", "Cp");
  268. }
  269. } else {
  270. docEncoding = "";
  271. }
  272. modEncoding = false;
  273. m_windows = true;
  274. }
  275. if (docEncoding.equals(fileEncoding)) {
  276. m_docEncoding = "";
  277. } else {
  278. m_docEncoding = docEncoding;
  279. }
  280. if (m_docEncoding.length() != 0) {
  281. try {
  282. String a = new String("a".getBytes(), m_docEncoding);
  283. } catch (UnsupportedEncodingException e) {
  284. CauseException ce = new CauseException(
  285. "UnsupportedEncodingException while trying to " +
  286. "convert doc encoding: " + m_docEncoding, e);
  287. if (m_excs == null) {
  288. m_excs = new Vector();
  289. }
  290. m_excs.addElement(ce);
  291. //#ifdef DLOGGING
  292. logger.severe(ce.getMessage(), e);
  293. //#endif
  294. System.out.println(ce.getMessage());
  295. // If encoding problem, use the main encoding as it is
  296. // close enough.
  297. if (m_windows) {
  298. if (hasWinEncoding) {
  299. m_docEncoding = winEncoding;
  300. } else {
  301. m_docEncoding = "";
  302. }
  303. } else if (m_utf) {
  304. m_docEncoding = "";
  305. } else {
  306. if (hasIso8859Encoding) {
  307. m_docEncoding = isoEncoding;
  308. } else {
  309. m_docEncoding = "";
  310. }
  311. }
  312. try {
  313. String a = new String("a".getBytes(), m_docEncoding);
  314. } catch (UnsupportedEncodingException e2) {
  315. CauseException ce2 = new CauseException(
  316. "Second unsupportedEncodingException while " +
  317. " trying to convert doc encoding: " +
  318. m_docEncoding, e2);
  319. m_excs.addElement(ce2);
  320. //#ifdef DLOGGING
  321. logger.severe(ce2.getMessage(), e2);
  322. //#endif
  323. System.out.println(ce2.getMessage());
  324. m_docEncoding = "";
  325. }
  326. }
  327. }
  328. m_encodingStreamReader.setModEncoding(modEncoding);
  329. m_encodingStreamReader.setModUTF16(modUTF16);
  330. //#ifdef DLOGGING
  331. if (fineLoggable) {logger.fine("hasIso8859Encoding=" + hasIso8859Encoding);}
  332. if (fineLoggable) {logger.fine("isoEncoding=" + isoEncoding);}
  333. if (fineLoggable) {logger.fine("hasWinEncoding=" + hasWinEncoding);}
  334. if (fineLoggable) {logger.fine("winEncoding=" + winEncoding);}
  335. if (fineLoggable) {logger.fine("encoding=" + encoding);}
  336. if (fineLoggable) {logger.fine("cencoding=" + cencoding);}
  337. if (fineLoggable) {logger.fine("docEncoding=" + docEncoding);}
  338. if (fineLoggable) {logger.fine("m_docEncoding=" + m_docEncoding);}
  339. if (fineLoggable) {logger.fine("fileEncoding=" + fileEncoding);}
  340. if (fineLoggable) {logger.fine("m_windows=" + m_windows);}
  341. if (fineLoggable) {logger.fine("m_utf=" + m_utf);}
  342. if (fineLoggable) {logger.fine("modEncoding=" + modEncoding);}
  343. if (fineLoggable) {logger.fine("modUTF16=" + modUTF16);}
  344. //#endif
  345. }
  346. /* Replace special characters with valid ones for the specified
  347. encoding. */
  348. public static String replaceSpChars(String text, boolean isWindows,
  349. boolean isUtf) {
  350. return replaceSpChars(text, isWindows, isUtf, m_midpWin, m_midpUni);
  351. }
  352. /* Replace special characters with valid ones for the specified
  353. encoding. For callers which use an instance of this class. */
  354. public String replaceSpChars(String text) {
  355. return replaceSpChars(text, m_windows, m_utf, m_midpWin, m_midpUni);
  356. }
  357. /* Replace special characters with valid ones for the specified
  358. encoding. */
  359. public static String replaceSpChars(String text, final boolean isWindows,
  360. final boolean isUtf,
  361. final boolean midpWin,
  362. final boolean midpUni) {
  363. try {
  364. // No need to convert i diaeresis anymore as we do encoding
  365. // change.
  366. if (isWindows) {
  367. if (midpWin) {
  368. if (m_convWinUni) {
  369. text = replaceSpUniChars(text);
  370. return text;
  371. }
  372. /* If we are converting a windows doc, the windows special
  373. characters are control characters in other encodings,
  374. so change to ASCII. */
  375. } else if (m_convWinUni) {
  376. if (!midpUni) {
  377. text = replaceSpUniWinChars(text);
  378. }
  379. } else {
  380. char [] ctext = text.toCharArray();
  381. char [] ntext = new char[text.length()];
  382. int jc = 0;
  383. for (int ic = 0; ic < ctext.length; ic++) {
  384. final char cchr = ctext[ic];
  385. if ((0x80 <= (int)cchr) && ((int)cchr <= 0x9f)) {
  386. if (m_winIsoConvx80[(int)cchr - 0x80] != 0x01) {
  387. ntext[jc++] = m_winIsoConvx80[(int)cchr - 0x80];
  388. //#ifdef DTEST
  389. if (m_debugTrace) {System.out.println("array cchr,conv=" + cchr + "," + Integer.toHexString(cchr) + "," + ntext[jc - 1] + "," + Integer.toHexString(ntext[jc - 1]));}
  390. //#endif
  391. }
  392. } else {
  393. ntext[jc++] = cchr;
  394. //#ifdef DTEST
  395. if (m_debugTrace) {System.out.println("cchr,conv=" + cchr + "," + Integer.toHexString(cchr) + "," + ntext[jc - 1] + "," + Integer.toHexString(ntext[jc - 1]));}
  396. //#endif
  397. }
  398. }
  399. text = new String(ntext, 0, jc);
  400. //#ifdef DTEST
  401. if (m_debugTrace) {System.out.println( "text,len=" + text + "," + text.length());}
  402. //#endif
  403. }
  404. } else if (isUtf && !midpUni) {
  405. text = replaceSpUniChars(text);
  406. }
  407. text = text.replace(CNON_BREAKING_SP, ' ');
  408. } catch (Throwable t) {
  409. //#ifdef DLOGGING
  410. Logger logger = Logger.getLogger("EncodingUtil");
  411. logger.severe("replaceSpChars error ", t);
  412. //#endif
  413. System.out.println("replaceSpChars error " + t + "," +
  414. t.getMessage());
  415. }
  416. return text;
  417. }
  418. /* Replace Unicode special characters with valid ones for Windows
  419. encoding as they sometimes are valid even in iso8859_1 even though
  420. it shouldn't be. */
  421. public static String replaceSpUniWinChars(String text) {
  422. try {
  423. final char [] ctext = text.toCharArray();
  424. char [] ntext = new char[text.length()];
  425. int jc = 0;
  426. for (int ic = 0; ic < ctext.length; ic++) {
  427. final char c = ctext[ic];
  428. switch(c & 0xff00) {
  429. case 0x2000:
  430. switch(c) {
  431. case CEN_DASH:
  432. ntext[jc++] = '-';
  433. break;
  434. case CEM_DASH:
  435. ntext[jc++] = '-';
  436. break;
  437. case CLEFT_SGL_QUOTE:
  438. ntext[jc++] = '\'';
  439. break;
  440. case CRIGHT_SGL_QUOTE:
  441. ntext[jc++] = '\'';
  442. break;
  443. case CSGL_LOW9_QUOTE:
  444. ntext[jc++] = '\'';
  445. break;
  446. case CLEFT_DBL_QUOTE:
  447. ntext[jc++] = '\"';
  448. break;
  449. case CRIGHT_DBL_QUOTE:
  450. ntext[jc++] = '\"';
  451. break;
  452. case CDBL_LOW9_QUOTE:
  453. ntext[jc++] = '\"';
  454. break;
  455. case 0x2020:
  456. ntext[jc++] = 0x86;
  457. break;
  458. case 0x2021:
  459. ntext[jc++] = 0x87;
  460. break;
  461. case 0x2022:
  462. ntext[jc++] = 0x95;
  463. break;
  464. case 0x2026:
  465. ntext[jc++] = 0x85;
  466. break;
  467. case 0x2030:
  468. ntext[jc++] = 0x89;
  469. break;
  470. case 0x2039:
  471. ntext[jc++] = 0x8B;
  472. break;
  473. case 0x203A:
  474. ntext[jc++] = 0x9B;
  475. break;
  476. case 0x20AC:
  477. ntext[jc++] = 0x80;
  478. System.out.println("ic,c=" + c + "," + Integer.toHexString(ntext[jc-1]));
  479. break;
  480. default:
  481. ntext[jc++] = c;
  482. break;
  483. }
  484. break;
  485. default:
  486. ntext[jc++] = c;
  487. break;
  488. }
  489. }
  490. text = new String(ntext, 0, jc);
  491. } catch (Throwable t) {
  492. //#ifdef DLOGGING
  493. Logger logger = Logger.getLogger("EncodingUtil");
  494. logger.severe("replaceSpUniWinChars error ", t);
  495. //#endif
  496. System.out.println("replaceSpUniWinChars error " + t + "," +
  497. t.getMessage());
  498. }
  499. return text;
  500. }
  501. /* Replace Unicode special characters which have Windows (cp1252)
  502. equivalents into their windows equivalents except for those
  503. that have simi-equivalents (e.g. en dash to regular dash)*/
  504. public static String replaceSpUniChars(String text) {
  505. text = text.replace(CSGL_LOW9_QUOTE, '\'');
  506. text = text.replace(CLEFT_SGL_QUOTE, '\'');
  507. text = text.replace(CRIGHT_SGL_QUOTE, '\'');
  508. text = text.replace(CLEFT_DBL_QUOTE, '\"');
  509. text = text.replace(CRIGHT_DBL_QUOTE, '\"');
  510. text = text.replace(CDBL_LOW9_QUOTE, '\"');
  511. text = text.replace(CEN_DASH, '-');
  512. text = text.replace(CEM_DASH, '-');
  513. return text;
  514. }
  515. /* Replace Windows special characters with simi-equivalents
  516. (e.g. en dash to regular dash)*/
  517. public static String replaceSpWinChars(String text) {
  518. text = text.replace(CWSGL_LOW9_QUOTE, '\'');
  519. text = text.replace(CWLEFT_SGL_QUOTE, '\'');
  520. text = text.replace(CWRIGHT_SGL_QUOTE, '\'');
  521. text = text.replace(CWLEFT_DBL_QUOTE, '\"');
  522. text = text.replace(CWRIGHT_DBL_QUOTE, '\"');
  523. text = text.replace(CWDBL_LOW9_QUOTE, '\"');
  524. text = text.replace(CWEN_DASH, '-');
  525. text = text.replace(CWEM_DASH, '-');
  526. return text;
  527. }
  528. /* Replace all numeric entites e.g. &#228;
  529. * @param s String to alter.
  530. */
  531. public static String replaceNumEntity( String s) {
  532. if (s == null) return s;
  533. String snum = "";
  534. try {
  535. int index01 = s.indexOf( "&#" );
  536. char [] achar = new char[1];
  537. while (index01 != -1) {
  538. int index02 = s.indexOf( ';' , index01 );
  539. if (index02 == -1) {
  540. return s;
  541. }
  542. try {
  543. snum = s.substring(index01 + 2, index02);
  544. // TODO redo with StringBuffer?
  545. if (snum.length() == 0) {
  546. return s;
  547. }
  548. switch (snum.charAt(0)) {
  549. case 'x':
  550. case 'X':
  551. achar[0] = (char)Integer.parseInt(snum.substring(
  552. 1), 16);
  553. break;
  554. default:
  555. achar[0] = (char)Integer.parseInt(snum);
  556. break;
  557. }
  558. s = s.substring(0, index01) + new String(achar) +
  559. s.substring(index02 + 1);
  560. } catch (NumberFormatException e) {
  561. //#ifdef DLOGGING
  562. Logger logger = Logger.getLogger("EncodingUtil");
  563. logger.severe("replaceNumEntity NumberFormatException error for " + snum, e);
  564. //#endif
  565. System.out.println("replaceNumEntity error " + e + "," +
  566. e.getMessage());
  567. return s;
  568. }
  569. index01 = s.indexOf( "&#" );
  570. }
  571. } catch (Throwable t) {
  572. //#ifdef DLOGGING
  573. Logger logger = Logger.getLogger("EncodingUtil");
  574. logger.severe("replaceNumEntity error ", t);
  575. //#endif
  576. System.out.println("replaceNumEntity error " + t + "," +
  577. t.getMessage());
  578. }
  579. return s;
  580. }
  581. /**
  582. Replace alphabetic entities.
  583. */
  584. public static String replaceAlphaEntities(final boolean convXmlEnts,
  585. String text) {
  586. final Hashtable m_convEntities = (m_midpWin) ?
  587. (convXmlEnts ? m_convXmlCp1252 : m_convCp1252) :
  588. (convXmlEnts ? m_convXmlIso88591 : m_convIso88591);
  589. int beginPos = 0;
  590. int pos = -1;
  591. while ((pos = text.indexOf('&', beginPos)) >= 0) {
  592. int epos = text.indexOf(';', pos);
  593. if (epos < 0) {
  594. break;
  595. }
  596. int nbpos = text.indexOf('&', pos + 1);
  597. if ((nbpos >= 0) && (nbpos < epos)) {
  598. beginPos = nbpos;
  599. continue;
  600. }
  601. if ((pos + 1) == epos) {
  602. beginPos = epos + 1;
  603. continue;
  604. }
  605. String entity = text.substring(pos + 1, epos);
  606. Object oent = m_convEntities.get(entity);
  607. if (oent != null) {
  608. String ent = (String)oent;
  609. text = text.substring(0, pos) + ent + text.substring(epos + 1);
  610. // If we made a substitution, keep the position the same
  611. // as sometimes, we get a double substitution when
  612. // we substitute &amp; for & this may create another
  613. // entity (e.g. &amp;quot; becomes & &quot;)
  614. beginPos = pos;
  615. } else {
  616. beginPos = epos + 1;
  617. }
  618. }
  619. return text;
  620. }
  621. /**
  622. Replace alphabetic entities.
  623. */
  624. public static String replaceXmlEntities(String text) {
  625. int beginPos = 0;
  626. int pos = -1;
  627. while ((pos = text.indexOf('&', beginPos)) >= 0) {
  628. int epos = text.indexOf(';', pos);
  629. if (epos < 0) {
  630. break;
  631. }
  632. int nbpos = text.indexOf('&', pos + 1);
  633. if ((nbpos >= 0) && (nbpos < epos)) {
  634. beginPos = nbpos;
  635. continue;
  636. }
  637. if ((pos + 1) == epos) {
  638. beginPos = epos + 1;
  639. continue;
  640. }
  641. String entity = text.substring(pos, epos + 1);
  642. int spos = m_xmlEntKeys.indexOf(entity);
  643. if (spos >= 0) {
  644. String ent = m_xmlEntValues[spos / 6];
  645. text = text.substring(0, pos) + ent + text.substring(epos + 1);
  646. // If we made a substitution, keep the position the same
  647. // as sometimes, we get a double substitution when
  648. // we substitute &amp; for & this may create another
  649. // entity (e.g. &amp;quot; becomes & &quot;)
  650. beginPos = pos;
  651. } else {
  652. beginPos = epos + 1;
  653. }
  654. }
  655. return text;
  656. }
  657. /**
  658. Create table of XML entities.
  659. */
  660. public static Hashtable initXmlEntities() {
  661. Hashtable convEntities = new Hashtable();
  662. try {
  663. initHtmlCommEnts(convEntities);
  664. } catch (Throwable t) {
  665. //#ifdef DLOGGING
  666. Logger logger = Logger.getLogger("EncodingUtil");
  667. logger.severe("initXmlEntities", t);
  668. //#endif
  669. }
  670. return convEntities;
  671. }
  672. /**
  673. Create table of alpha entities for iso8859-1.
  674. */
  675. public static Hashtable initAlphaIso88591(final boolean convXmlEnts) {
  676. //#ifdef DTEST
  677. System.out.println( "m_midpIso=" + m_midpIso);
  678. //#endif
  679. final char isoLatin1Values[] =
  680. {0xC0, 0xC1, 0xC2, 0xC3, 0xC4,
  681. 0xC5, 0xC6, 0xC7, 0xC8, 0xC9,
  682. 0xCA, 0xCB, 0xCC, 0xCD, 0xCE,
  683. 0xCF, 0xD0, 0xD1, 0xD2, 0xD3,
  684. 0xD4, 0xD5, 0xD6, 0xD7, 0xD8,
  685. 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
  686. 0xDE, 0xDF, 0xE0, 0xE1, 0xE2,
  687. 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  688. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC,
  689. 0xED, 0xEE, 0xEF, 0xF0, 0xF1,
  690. 0xF2, 0xF3, 0xF4, 0xF5, 0xF6,
  691. 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
  692. 0xFC, 0xFD, 0xFE, 0xFF};
  693. Hashtable convEntities = new Hashtable();
  694. try {
  695. initEntVals(convEntities, m_isoCommonEntities, m_isoCommValues);
  696. initEntVals(convEntities, m_isoLatin1Entities, isoLatin1Values);
  697. initEntVals(convEntities, m_isoSpecialEntities, m_isoSpecialValues);
  698. if (convXmlEnts) {
  699. initHtmlCommEnts(convEntities);
  700. }
  701. } catch (Throwable t) {
  702. //#ifdef DLOGGING
  703. Logger logger = Logger.getLogger("EncodingUtil");
  704. logger.severe("initAlphaIso88591", t);
  705. //#endif
  706. }
  707. return convEntities;
  708. }
  709. /**
  710. Create table of alpha entities for windows 1252.
  711. */
  712. public static Hashtable initAlphaCp1252(final boolean convXmlEnts) {
  713. //#ifdef DTEST
  714. System.out.println( "m_midpWin=" + m_midpWin);
  715. //#endif
  716. char isoLatin1Values[] =
  717. {0xC0, 0xC1, 0xC2, 0xC3, 0xC4,
  718. 0xC5, 0xC6, 0xC7, 0xC8, 0xC9,
  719. 0xCA, 0xCB, 0xCC, 0xCD, 0xCE,
  720. 0xCF, 0xD0, 0xD1, 0xD2, 0xD3,
  721. 0xD4, 0xD5, 0xD6, 0xD7, 0xD8,
  722. 0xD9, 0xDA, 0xDB, 0xDC, 0xDD,
  723. 0xDE, 0xDF, 0xE0, 0xE1, 0xE2,
  724. 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  725. 0xE8, 0xE9, 0xEA, 0xEB, 0xEC,
  726. 0xED, 0xEE, 0xEF, 0xF0, 0xF1,
  727. 0xF2, 0xF3, 0xF4, 0xF5, 0xF6,
  728. 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
  729. 0xFC, 0xFD, 0xFE, 0xFF};
  730. Hashtable convEntities = new Hashtable();
  731. try {
  732. /* ISO common entities have same encodings as Cp1252 */
  733. initEntVals(convEntities, m_isoCommonEntities, m_isoCommValues);
  734. initEntVals(convEntities, m_isoLatin1Entities, isoLatin1Values);
  735. char wm_isoSpecialValues[] =
  736. {CWEN_DASH, // en dash
  737. CWEM_DASH, // em dash
  738. CWLEFT_SGL_QUOTE, // left single quotation mark
  739. CWRIGHT_SGL_QUOTE, // right single quotation mark
  740. 0x82, // single low-9 quotation mark
  741. CWLEFT_DBL_QUOTE, // left double quotation mark
  742. CWRIGHT_DBL_QUOTE, // right double quotation mark
  743. 0x84}; // double low-9 quotation mark
  744. initEntVals(convEntities, m_isoSpecialEntities, wm_isoSpecialValues);
  745. if (convXmlEnts) {
  746. initHtmlCommEnts(convEntities);
  747. }
  748. } catch (Throwable t) {
  749. //#ifdef DLOGGING
  750. Logger logger = Logger.getLogger("EncodingUtil");
  751. logger.severe("initAlphaCp1252", t);
  752. //#endif
  753. }
  754. return convEntities;
  755. }
  756. /* Initialize entries with passed in entity strings and character
  757. values turned into strings. */
  758. public static void initEntVals(Hashtable convEntities, String[] entities, char[] entValues) {
  759. try {
  760. //#ifdef DTEST
  761. System.out.println( "Entities, values len=" + entities.length + "," + entValues.length);
  762. //#endif
  763. for (int ic = 0; (ic < entities.length) && (ic < entValues.length);
  764. ic++) {
  765. char [] cvalue = {entValues[ic]};
  766. // Sometimes, this can produce an error in some default
  767. // encodings.
  768. try {
  769. String value = new String(cvalue);
  770. convEntities.put(entities[ic], value);
  771. } catch (Throwable t) {
  772. //#ifdef DLOGGING
  773. Logger logger = Logger.getLogger("EncodingUtil");
  774. logger.severe("initEntVals convert error bvalue=" +
  775. Integer.toHexString(cvalue[0]), t);
  776. //#endif
  777. }
  778. }
  779. } catch (Throwable t) {
  780. //#ifdef DLOGGING
  781. Logger logger = Logger.getLogger("EncodingUtil");
  782. logger.severe("initEntVals", t);
  783. //#endif
  784. }
  785. }
  786. /* Init windows (cp-1252) to Iso 8859 encoding. This has either 1
  787. if there is no equivalent (this is used to remove the equivalent char
  788. from the string to be converted). If not a 1, the character is
  789. used to replace the character in the string to be converted.
  790. The conversion starts at 0x80 and goes to including 0x9f.
  791. */
  792. private static char [] initWinIsoConv() {
  793. char [] convTable = new char[0x9f - 0x80 + 1];
  794. try {
  795. //#ifdef DTEST
  796. System.out.println( "convTable.length=" + convTable.length);
  797. //#endif
  798. convTable[0x80 - 0x80] = 0x20AC; //EURO SIGN
  799. convTable[0x81 - 0x80] = 0x01;
  800. convTable[0x82 - 0x80] = '\''; //SINGLE LOW-9 QUOTATION MARK
  801. convTable[0x83 - 0x80] = 0x0192; //LATIN SMALL LETTER F WITH HOOK
  802. convTable[0x84 - 0x80] = '\"'; //DOUBLE LOW-9 QUOTATION MARK
  803. convTable[0x85 - 0x80] = 0x2026; //HORIZONTAL ELLIPSIS
  804. convTable[0x86 - 0x80] = 0x2020; //DAGGER
  805. convTable[0x87 - 0x80] = 0x2021; //DOUBLE DAGGER
  806. convTable[0x88 - 0x80] = 0x02C6; //MODIFIER LETTER CIRCUMFLEX ACCENT
  807. convTable[0x89 - 0x80] = 0x2030; //PER MILLE SIGN
  808. convTable[0x8A - 0x80] = 0x0160; //LATIN CAPITAL LETTER S WITH CARON
  809. convTable[0x8B - 0x80] = 0x2039; //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  810. convTable[0x8C - 0x80] = 0x0152; //LATIN CAPITAL LIGATURE OE
  811. convTable[0x8D - 0x80] = 0x01;
  812. convTable[0x8E - 0x80] = 0x017D; //LATIN CAPITAL LETTER Z WITH CARON
  813. convTable[0x8F - 0x80] = 0x01;
  814. convTable[0x90 - 0x80] = 0x01;
  815. convTable[0x91 - 0x80] = '\''; //LEFT SINGLE QUOTATION MARK
  816. convTable[0x92 - 0x80] = '\''; //RIGHT SINGLE QUOTATION MARK
  817. convTable[0x93 - 0x80] = '\"'; //LEFT DOUBLE QUOTATION MARK
  818. convTable[0x94 - 0x80] = '\"'; //RIGHT DOUBLE QUOTATION MARK
  819. convTable[0x95 - 0x80] = 0x2022; //BULLET
  820. convTable[0x96 - 0x80] = '-'; //EN DASH
  821. convTable[0x97 - 0x80] = '-'; //EM DASH
  822. convTable[0x98 - 0x80] = 0x02DC; //SMALL TILDE
  823. convTable[0x99 - 0x80] = 0x2122; //TRADE MARK SIGN
  824. convTable[0x9A - 0x80] = 0x0161; //LATIN SMALL LETTER S WITH CARON
  825. convTable[0x9B - 0x80] = 0x203A; //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  826. convTable[0x9C - 0x80] = 0x0153; //LATIN SMALL LIGATURE OE
  827. convTable[0x9D - 0x80] = 0x01;
  828. convTable[0x9E - 0x80] = 0x017E; //LATIN SMALL LETTER Z WITH CARON
  829. convTable[0x9F - 0x80] = 0x0178; //LATIN CAPITAL LETTER Y WITH DIAERESIS
  830. } catch (Throwable t) {
  831. //#ifdef DLOGGING
  832. Logger logger = Logger.getLogger("EncodingUtil");
  833. logger.severe("initWinIsoConv", t);
  834. //#endif
  835. }
  836. return convTable;
  837. }
  838. /* Init unicode to windows (cp-1252). This has either 1
  839. if there is no equivalent (this is used to remove the equivalent char
  840. from the string to be converted). If not a 1, the character is
  841. used to replace the character in the string to be converted.
  842. The conversion starts at 0x80 and goes to including 0x9f.
  843. */
  844. private static char [] initUniWinConvx80() {
  845. char [] convTable = new char[0x9f - 0x80 + 1];
  846. try {
  847. //#ifdef DTEST
  848. System.out.println( "convTable.length=" + convTable.length);
  849. //#endif
  850. for (int ic = 0; ic < convTable.length; ic++) {
  851. char cc = (char)(ic + 0x80);
  852. switch (cc) {
  853. case CWSGL_LOW9_QUOTE:
  854. convTable[ic] = '\'';
  855. break;
  856. case CWDBL_LOW9_QUOTE:
  857. convTable[ic] = '\"';
  858. break;
  859. case CWLEFT_DBL_QUOTE:
  860. convTable[ic] = '\"';
  861. break;
  862. case CWRIGHT_DBL_QUOTE:
  863. convTable[ic] = '\"';
  864. break;
  865. case CWLEFT_SGL_QUOTE:
  866. convTable[ic] = '\'';
  867. break;
  868. case CWEN_DASH:
  869. convTable[ic] = '-';
  870. break;
  871. case CWEM_DASH:
  872. convTable[ic] = '-';
  873. break;
  874. default:
  875. convTable[ic] = 0x01;
  876. break;
  877. }
  878. }
  879. } catch (Throwable t) {
  880. //#ifdef DLOGGING
  881. Logger logger = Logger.getLogger("EncodingUtil");
  882. logger.severe("initUniWinConvx80", t);
  883. //#endif
  884. }
  885. return convTable;
  886. }
  887. /* Initialize entries for XML. */
  888. private static void initHtmlCommEnts(Hashtable convEntities) {
  889. String htmlCommonEntities[] =
  890. {"lt", "gt", "nbsp", "amp", "apos", "quot"};
  891. char htmlCommonValues[] = {'<', '>', ' ', '&', '\'', '\"'};
  892. initEntVals(convEntities, htmlCommonEntities, htmlCommonValues);
  893. }
  894. /* Determine if creating a string converts the windows chars to
  895. Unicode. */
  896. private static boolean initConvWinUni() {
  897. boolean rtn = false;
  898. try {
  899. byte[] blftSgl = {(byte)CWLEFT_SGL_QUOTE};
  900. try {
  901. String convStr = new String(blftSgl, "Cp1252");
  902. rtn = convStr.charAt(0) == CLEFT_SGL_QUOTE;
  903. } catch (UnsupportedEncodingException e) {
  904. //#ifdef DTEST
  905. System.out.println( "Unsupported encoding Cp1252");
  906. //#endif
  907. //#ifdef DLOGGING
  908. Logger logger = Logger.getLogger("EncodingUtil");
  909. logger.severe("UnsupportedEncodingException Cp1252", e);
  910. //#endif
  911. try {
  912. String convStr2 = new String(blftSgl, "Cp1252");
  913. rtn = convStr2.charAt(0) == CLEFT_SGL_QUOTE;
  914. } catch (UnsupportedEncodingException e2) {
  915. //#ifdef DTEST
  916. System.out.println( "Unsupported encoding WINDOWS-1252");
  917. //#endif
  918. //#ifdef DLOGGING
  919. logger.severe("UnsupportedEncodingException Cp1252", e2);
  920. //#endif
  921. }
  922. }
  923. //#ifdef DTEST
  924. System.out.println( "initConvWinUni()=" + rtn);
  925. //#endif
  926. } catch (Throwable t) {
  927. //#ifdef DLOGGING
  928. Logger logger = Logger.getLogger("EncodingUtil");
  929. logger.severe("initConvWinUni", t);
  930. //#endif
  931. }
  932. return rtn;
  933. }
  934. /* Determine ISO encoding string. */
  935. private static String initIsoEncoding() {
  936. try {
  937. try {
  938. String convStr = new String("a".getBytes(), "ISO8859_1");
  939. return "ISO8859_1";
  940. } catch (UnsupportedEncodingException e) {
  941. //#ifdef DTEST
  942. System.out.println( "Unsupported encoding ISO8859_1");
  943. //#endif
  944. //#ifdef DLOGGING
  945. Logger logger = Logger.getLogger("EncodingUtil");
  946. logger.severe("initIsoEncoding UnsupportedEncodingException ISO8859_1", e);
  947. //#endif
  948. try {
  949. String convStr2 = new String("a".getBytes(), "ISO-8859-1");
  950. return "ISO-8859-1";
  951. } catch (UnsupportedEncodingException e2) {
  952. //#ifdef DTEST
  953. System.out.println("initIsoEncoding Unsupported encoding ISO-8859-1");
  954. //#endif
  955. //#ifdef DLOGGING
  956. logger.severe("initIsoEncoding UnsupportedEncodingException ISO-8859-1", e2);
  957. //#endif
  958. }
  959. }
  960. } catch (Throwable t) {
  961. //#ifdef DLOGGING
  962. Logger logger = Logger.getLogger("EncodingUtil");
  963. logger.severe("initIsoEncoding initConvWinUni", t);
  964. //#endif
  965. }
  966. return "ISO8859_1";
  967. }
  968. /* Determine Windows encoding string. */
  969. private static String initWinEncoding() {
  970. try {
  971. try {
  972. String convStr = new String("a".getBytes(), "Cp1252");
  973. return "Cp1252";
  974. } catch (UnsupportedEncodingException e) {
  975. CauseException ce = new CauseException(
  976. "initWinEncoding UnsupportedEncodingException " +
  977. "while trying to convert encoding Cp1252.", e);
  978. if (m_statExcs == null) {
  979. m_statExcs = new Vector();
  980. }
  981. m_statExcs.addElement(ce);
  982. //#ifdef DTEST
  983. System.out.println(ce.getMessage());
  984. //#endif
  985. //#ifdef DLOGGING
  986. Logger logger = Logger.getLogger("EncodingUtil");
  987. logger.severe(ce.getMessage(), e);
  988. //#endif
  989. try {
  990. String convStr2 = new String("a".getBytes(), "WINDOWS-1252");
  991. return "WINDOWS-1252";
  992. } catch (UnsupportedEncodingException e2) {
  993. CauseException ce2 = new CauseException(
  994. "initWinEncoding second " +
  995. "unsupportedEncodingException while " +
  996. " trying to convert encoding WINDOWS-1252.", e2);
  997. m_statExcs.addElement(ce2);
  998. //#ifdef DTEST
  999. System.out.println(ce2.getMessage());
  1000. //#endif
  1001. //#ifdef DLOGGING
  1002. logger.severe(ce2.getMessage(), e2);
  1003. //#endif
  1004. }
  1005. }
  1006. } catch (Throwable t) {
  1007. //#ifdef DLOGGING
  1008. Logger logger = Logger.getLogger("EncodingUtil");
  1009. logger.severe("initWinEncoding() initConvWinUni", t);
  1010. //#endif
  1011. }
  1012. return "Cp1252";
  1013. }
  1014. /* Determine if windows encoding is supported. */
  1015. public static boolean hasWinEncoding() {
  1016. try {
  1017. try {
  1018. String convStr = new String("a".getBytes(), "Cp1252");
  1019. return true;
  1020. } catch (UnsupportedEncodingException e) {
  1021. CauseException ce = new CauseException(
  1022. "hasWinEncoding UnsupportedEncodingException " +
  1023. "while trying to convert encoding Cp1252.", e);
  1024. if (m_statExcs == null) {
  1025. m_statExcs = new Vector();
  1026. }
  1027. m_statExcs.addElement(ce);
  1028. //#ifdef DTEST
  1029. System.out.println(ce.getMessage());
  1030. //#endif
  1031. //#ifdef DLOGGING
  1032. Logger logger = Logger.getLogger("EncodingUtil");
  1033. logger.severe(ce.getMessage(), e);
  1034. //#endif
  1035. try {
  1036. String convStr2 = new String("a".getBytes(), "WINDOWS-1252");
  1037. return true;
  1038. } catch (UnsupportedEncodingException e2) {
  1039. CauseException ce2 = new CauseException(
  1040. "initWinEncoding second " +
  1041. "unsupportedEncodingException while " +
  1042. " trying to convert encoding WINDOWS-1252.", e2);
  1043. m_statExcs.addElement(ce2);
  1044. //#ifdef DTEST
  1045. System.out.println(ce2.getMessage());
  1046. //#endif
  1047. //#ifdef DLOGGING
  1048. logger.severe(ce2.getMessage(), e2);
  1049. //#endif
  1050. }
  1051. }
  1052. } catch (Throwable t) {
  1053. //#ifdef DLOGGING
  1054. Logger logger = Logger.getLogger("EncodingUtil");
  1055. logger.severe("hasWinEncoding initConvWinUni", t);
  1056. //#endif
  1057. }
  1058. return false;
  1059. }
  1060. /* Determine if iso-8859-1 encoding is supported. */
  1061. private static boolean hasIso8859Encoding() {
  1062. try {
  1063. try {
  1064. String convStr = new String("a".getBytes(), "ISO8859_1");
  1065. return true;
  1066. } catch (UnsupportedEncodingException e) {
  1067. //#ifdef DTEST
  1068. System.out.println( "Unsupported encoding ISO8859_1");
  1069. //#endif
  1070. //#ifdef DLOGGING
  1071. Logger logger = Logger.getLogger("EncodingUtil");
  1072. logger.severe("hasIso8859Encoding UnsupportedEncodingException ISO8859_1", e);
  1073. //#endif
  1074. try {
  1075. String convStr2 = new String("a".getBytes(), "ISO-8859-1");
  1076. return true;
  1077. } catch (UnsupportedEncodingException e2) {
  1078. //#ifdef DTEST
  1079. System.out.println("hasIso8859Encoding Unsupported encoding ISO-8859-1");
  1080. //#endif
  1081. //#ifdef DLOGGING
  1082. logger.severe("initIsoEncoding UnsupportedEncodingException ISO-8859-1", e2);
  1083. //#endif
  1084. }
  1085. }
  1086. } catch (Throwable t) {
  1087. //#ifdef DLOGGING
  1088. Logger logger = Logger.getLogger("EncodingUtil");
  1089. logger.severe("hasIso8859Encoding initConvWinUni", t);
  1090. //#endif
  1091. }
  1092. return false;
  1093. }
  1094. public void setDocEncoding(String m_docEncoding) {
  1095. this.m_docEncoding = m_docEncoding;
  1096. }
  1097. public String getDocEncoding() {
  1098. return (m_docEncoding);
  1099. }
  1100. public void setEncodingStreamReader(EncodingStreamReader m_encodingStreamReader) {
  1101. this.m_encodingStreamReader = m_encodingStreamReader;
  1102. }
  1103. public EncodingStreamReader getEncodingStreamReader() {
  1104. return (m_encodingStreamReader);
  1105. }
  1106. public boolean isWindows() {
  1107. return (m_windows);
  1108. }
  1109. public boolean isUtf() {
  1110. return (m_utf);
  1111. }
  1112. //#ifdef DTEST
  1113. public static String[] getIsoCommonEntities() {
  1114. return (m_isoCommonEntities);
  1115. }
  1116. public static Hashtable getConvIso88591() {
  1117. return (m_convIso88591);
  1118. }
  1119. public static Hashtable getConvCp1252() {
  1120. return (m_convCp1252);
  1121. }
  1122. static public String[] getIsoSpecialEntities() {
  1123. return (m_isoSpecialEntities);
  1124. }
  1125. static public String getWinEncoding() {
  1126. return (m_winEncoding);
  1127. }
  1128. public static boolean isConvWinUni() {
  1129. return (m_convWinUni);
  1130. }
  1131. public static boolean isHasWinEncoding() {
  1132. return (m_hasWinEncoding);
  1133. }
  1134. //#endif
  1135. static public String getIsoEncoding() {
  1136. return (m_isoEncoding);
  1137. }
  1138. public Vector getExcs() {
  1139. if (m_excs == null) {
  1140. return new Vector();
  1141. } else {
  1142. return (m_excs);
  1143. }
  1144. }
  1145. public static Vector getStatExcs() {
  1146. if (m_statExcs == null) {
  1147. return new Vector();
  1148. } else {
  1149. return (m_statExcs);
  1150. }
  1151. }
  1152. }