PageRenderTime 52ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/code/common/common/src/main/java/org/tomstools/common/util/CharsetCheck.java

https://github.com/lotomer/tomstools
Java | 2142 lines | 1978 code | 23 blank | 141 comment | 221 complexity | d6fa7650400b0b175ba90596ca5f3a47 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. package org.tomstools.common.util;
  2. import java.io.File;
  3. import java.io.FileInputStream;
  4. import java.io.InputStream;
  5. import java.net.URL;
  6. /**
  7. * 字符集检查
  8. * @date 2012-6-12
  9. * @time 下午02:52:04
  10. */
  11. public class CharsetCheck {
  12. static final int GB2312 = 0;
  13. static final int GBK = 1;
  14. static final int HZ = 2;
  15. static final int BIG5 = 3;
  16. static final int EUC_TW = 4;
  17. static final int ISO_2022_CN = 5;
  18. static final int UTF8 = 6;
  19. static final int UNICODE = 7;
  20. static final int ASCII = 8;
  21. static final int OTHER = 9;
  22. static final int TOTAL_ENCODINGS = 10;
  23. // Frequency tables to hold the GB, Big5, and EUC-TW character
  24. // frequencies
  25. int GBFreq[][];
  26. int GBKFreq[][];
  27. int Big5Freq[][];
  28. int EUC_TWFreq[][];
  29. // int UnicodeFreq[94][128];
  30. public static String[] nicename;
  31. public static String[] codings;
  32. public CharsetCheck() {
  33. // Initialize the Frequency Table for GB, Big5, EUC-TW
  34. GBFreq = new int[94][94];
  35. GBKFreq = new int[126][191];
  36. Big5Freq = new int[94][158];
  37. EUC_TWFreq = new int[94][94];
  38. codings = new String[TOTAL_ENCODINGS];
  39. codings[GB2312] = "GB2312";
  40. codings[GBK] = "GBK";
  41. codings[HZ] = "HZ";
  42. codings[BIG5] = "BIG5";
  43. codings[EUC_TW] = "CNS11643";
  44. codings[ISO_2022_CN] = "ISO2022CN";
  45. codings[UTF8] = "UTF8";
  46. codings[UNICODE] = "Unicode";
  47. codings[ASCII] = "ASCII";
  48. codings[OTHER] = "OTHER";
  49. nicename = new String[TOTAL_ENCODINGS];
  50. nicename[GB2312] = "GB2312";
  51. nicename[GBK] = "GBK";
  52. nicename[HZ] = "HZ";
  53. nicename[BIG5] = "Big5";
  54. nicename[EUC_TW] = "CNS 11643";
  55. nicename[ISO_2022_CN] = "ISO 2022-CN";
  56. nicename[UTF8] = "UTF-8";
  57. nicename[UNICODE] = "Unicode";
  58. nicename[ASCII] = "ASCII";
  59. nicename[OTHER] = "OTHER";
  60. initialize_frequencies();
  61. }
  62. public static String getCharsetName(String content) {
  63. CharsetCheck checker;
  64. int result = OTHER;
  65. checker = new CharsetCheck();
  66. if (content.startsWith("http://") == true) {
  67. try {
  68. result = checker.detectEncoding(new URL(content));
  69. } catch (Exception e) {
  70. System.err.println("Bad URL " + e.toString());
  71. }
  72. } else {
  73. // result = sinodetector.detectEncoding(new File(argc[0]));
  74. result = checker.detectEncoding(content.getBytes());
  75. }
  76. return nicename[result];
  77. }
  78. public static void main(String argc[]) {
  79. System.out.println(argc[0]);
  80. }
  81. /**
  82. * Function : detectEncoding Aruguments: URL Returns : One of the encodings
  83. * from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII, or OTHER)
  84. * Description: This function looks at the URL contents and assigns it a
  85. * probability score for each encoding type. The encoding type with the
  86. * highest probability is returned.
  87. */
  88. public int detectEncoding(URL testurl) {
  89. byte[] rawtext = new byte[10000];
  90. int bytesread = 0, byteoffset = 0;
  91. int guess = OTHER;
  92. InputStream chinesestream;
  93. try {
  94. chinesestream = testurl.openStream();
  95. while ((bytesread = chinesestream
  96. .read(rawtext, byteoffset, rawtext.length - byteoffset)) > 0) {
  97. byteoffset += bytesread;
  98. }
  99. ;
  100. chinesestream.close();
  101. guess = detectEncoding(rawtext);
  102. } catch (Exception e) {
  103. System.err.println("Error loading or using URL " + e.toString());
  104. guess = OTHER;
  105. }
  106. return guess;
  107. }
  108. /**
  109. * Function : detectEncoding Aruguments: File Returns : One of the encodings
  110. * from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII, or OTHER)
  111. * Description: This function looks at the file and assigns it a probability
  112. * score for each encoding type. The encoding type with the highest
  113. * probability is returned.
  114. */
  115. @SuppressWarnings("resource")
  116. public int detectEncoding(File testfile) {
  117. FileInputStream chinesefile;
  118. byte[] rawtext;
  119. rawtext = new byte[(int) testfile.length()];
  120. try {
  121. chinesefile = new FileInputStream(testfile);
  122. chinesefile.read(rawtext);
  123. } catch (Exception e) {
  124. System.err.println("Error: " + e);
  125. }
  126. return detectEncoding(rawtext);
  127. }
  128. /**
  129. * Function : detectEncoding Aruguments: byte array Returns : One of the
  130. * encodings from the Encoding enumeration (GB2312, HZ, BIG5, EUC_TW, ASCII,
  131. * or OTHER) Description: This function looks at the byte array and assigns
  132. * it a probability score for each encoding type. The encoding type with the
  133. * highest probability is returned.
  134. */
  135. public int detectEncoding(byte[] rawtext) {
  136. int[] scores;
  137. int index, maxscore = 0;
  138. int encoding_guess = OTHER;
  139. scores = new int[TOTAL_ENCODINGS];
  140. // Assign Scores
  141. scores[GB2312] = gb2312_probability(rawtext);
  142. scores[GBK] = gbk_probability(rawtext);
  143. scores[HZ] = hz_probability(rawtext);
  144. scores[BIG5] = big5_probability(rawtext);
  145. scores[EUC_TW] = euc_tw_probability(rawtext);
  146. scores[ISO_2022_CN] = iso_2022_cn_probability(rawtext);
  147. scores[UTF8] = utf8_probability(rawtext);
  148. scores[UNICODE] = utf16_probability(rawtext);
  149. scores[ASCII] = ascii_probability(rawtext);
  150. scores[OTHER] = 0;
  151. // Tabulate Scores
  152. for (index = 0; index < TOTAL_ENCODINGS; index++) {
  153. if (scores[index] > maxscore) {
  154. encoding_guess = index;
  155. maxscore = scores[index];
  156. }
  157. }
  158. // Return OTHER if nothing scored above 50
  159. if (maxscore <= 50) {
  160. encoding_guess = OTHER;
  161. }
  162. return encoding_guess;
  163. }
  164. /*
  165. * Function: gb2312_probability Argument: pointer to byte array Returns :
  166. * number from 0 to 100 representing probability text in array uses GB-2312
  167. * encoding
  168. */
  169. int gb2312_probability(byte[] rawtext) {
  170. int i, rawtextlen = 0;
  171. int dbchars = 1, gbchars = 1;
  172. long gbfreq = 0, totalfreq = 1;
  173. float rangeval = 0, freqval = 0;
  174. int row, column;
  175. // Stage 1: Check to see if characters fit into acceptable ranges
  176. rawtextlen = rawtext.length;
  177. for (i = 0; i < rawtextlen - 1; i++) {
  178. // System.err.println(rawtext[i]);
  179. if (rawtext[i] >= 0) {
  180. // asciichars++;
  181. } else {
  182. dbchars++;
  183. if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7
  184. && (byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) {
  185. gbchars++;
  186. totalfreq += 500;
  187. row = rawtext[i] + 256 - 0xA1;
  188. column = rawtext[i + 1] + 256 - 0xA1;
  189. if (GBFreq[row][column] != 0) {
  190. gbfreq += GBFreq[row][column];
  191. } else if (15 <= row && row < 55) {
  192. gbfreq += 200;
  193. }
  194. }
  195. i++;
  196. }
  197. }
  198. rangeval = 50 * ((float) gbchars / (float) dbchars);
  199. freqval = 50 * ((float) gbfreq / (float) totalfreq);
  200. return (int) (rangeval + freqval);
  201. }
  202. /*
  203. * Function: gb2312_probability Argument: pointer to byte array Returns :
  204. * number from 0 to 100 representing probability text in array uses GB-2312
  205. * encoding
  206. */
  207. int gbk_probability(byte[] rawtext) {
  208. int i, rawtextlen = 0;
  209. int dbchars = 1, gbchars = 1;
  210. long gbfreq = 0, totalfreq = 1;
  211. float rangeval = 0, freqval = 0;
  212. int row, column;
  213. // Stage 1: Check to see if characters fit into acceptable ranges
  214. rawtextlen = rawtext.length;
  215. for (i = 0; i < rawtextlen - 1; i++) {
  216. // System.err.println(rawtext[i]);
  217. if (rawtext[i] >= 0) {
  218. // asciichars++;
  219. } else {
  220. dbchars++;
  221. if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 && // Original
  222. // GB
  223. // range
  224. (byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) {
  225. gbchars++;
  226. totalfreq += 500;
  227. row = rawtext[i] + 256 - 0xA1;
  228. column = rawtext[i + 1] + 256 - 0xA1;
  229. // System.out.println("original row " + row + " column " +
  230. // column);
  231. if (GBFreq[row][column] != 0) {
  232. gbfreq += GBFreq[row][column];
  233. } else if (15 <= row && row < 55) {
  234. gbfreq += 200;
  235. }
  236. } else if ((byte) 0x81 <= rawtext[i]
  237. && rawtext[i] <= (byte) 0xFE
  238. && // Extended GB range
  239. (((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E))) {
  240. gbchars++;
  241. totalfreq += 500;
  242. row = rawtext[i] + 256 - 0x81;
  243. if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) {
  244. column = rawtext[i + 1] - 0x40;
  245. } else {
  246. column = rawtext[i + 1] + 256 - 0x80;
  247. }
  248. // System.out.println("extended row " + row + " column " +
  249. // column + " rawtext[i] " + rawtext[i]);
  250. if (GBKFreq[row][column] != 0) {
  251. gbfreq += GBKFreq[row][column];
  252. }
  253. }
  254. i++;
  255. }
  256. }
  257. rangeval = 50 * ((float) gbchars / (float) dbchars);
  258. freqval = 50 * ((float) gbfreq / (float) totalfreq);
  259. // For regular GB files, this would give the same score, so I handicap
  260. // it slightly
  261. return (int) (rangeval + freqval) - 1;
  262. }
  263. /*
  264. * Function: hz_probability Argument: byte array Returns : number from 0 to
  265. * 100 representing probability text in array uses HZ encoding
  266. */
  267. int hz_probability(byte[] rawtext) {
  268. int i, rawtextlen;
  269. int hzchars = 0, dbchars = 1;
  270. long hzfreq = 0, totalfreq = 1;
  271. float rangeval = 0, freqval = 0;
  272. int hzstart = 0, hzend = 0;
  273. int row, column;
  274. rawtextlen = rawtext.length;
  275. for (i = 0; i < rawtextlen; i++) {
  276. if (rawtext[i] == '~') {
  277. if (rawtext[i + 1] == '{') {
  278. hzstart++;
  279. i += 2;
  280. while (i < rawtextlen - 1) {
  281. if (rawtext[i] == 0x0A || rawtext[i] == 0x0D) {
  282. break;
  283. } else if (rawtext[i] == '~' && rawtext[i + 1] == '}') {
  284. hzend++;
  285. i++;
  286. break;
  287. } else if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77)
  288. && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77)) {
  289. hzchars += 2;
  290. row = rawtext[i] - 0x21;
  291. column = rawtext[i + 1] - 0x21;
  292. totalfreq += 500;
  293. if (GBFreq[row][column] != 0) {
  294. hzfreq += GBFreq[row][column];
  295. } else if (15 <= row && row < 55) {
  296. hzfreq += 200;
  297. }
  298. } else if ((0xA1 <= rawtext[i] && rawtext[i] <= 0xF7)
  299. && (0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= 0xF7)) {
  300. hzchars += 2;
  301. row = rawtext[i] + 256 - 0xA1;
  302. column = rawtext[i + 1] + 256 - 0xA1;
  303. totalfreq += 500;
  304. if (GBFreq[row][column] != 0) {
  305. hzfreq += GBFreq[row][column];
  306. } else if (15 <= row && row < 55) {
  307. hzfreq += 200;
  308. }
  309. }
  310. dbchars += 2;
  311. i += 2;
  312. }
  313. } else if (rawtext[i + 1] == '}') {
  314. hzend++;
  315. i++;
  316. } else if (rawtext[i + 1] == '~') {
  317. i++;
  318. }
  319. }
  320. }
  321. if (hzstart > 4) {
  322. rangeval = 50;
  323. } else if (hzstart > 1) {
  324. rangeval = 41;
  325. } else if (hzstart > 0) { // Only 39 in case the sequence happened to
  326. // occur
  327. rangeval = 39; // in otherwise non-Hz text
  328. } else {
  329. rangeval = 0;
  330. }
  331. freqval = 50 * ((float) hzfreq / (float) totalfreq);
  332. return (int) (rangeval + freqval);
  333. }
  334. /**
  335. * Function: big5_probability Argument: byte array Returns : number from 0
  336. * to 100 representing probability text in array uses Big5 encoding
  337. */
  338. int big5_probability(byte[] rawtext) {
  339. // int score = 0;
  340. int i, rawtextlen = 0;
  341. int dbchars = 1, bfchars = 1;
  342. float rangeval = 0, freqval = 0;
  343. long bffreq = 0, totalfreq = 1;
  344. int row, column;
  345. // Check to see if characters fit into acceptable ranges
  346. rawtextlen = rawtext.length;
  347. for (i = 0; i < rawtextlen - 1; i++) {
  348. if (rawtext[i] >= 0) {
  349. // asciichars++;
  350. } else {
  351. dbchars++;
  352. if ((byte) 0xA1 <= rawtext[i]
  353. && rawtext[i] <= (byte) 0xF9
  354. && (((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E) || ((byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE))) {
  355. bfchars++;
  356. totalfreq += 500;
  357. row = rawtext[i] + 256 - 0xA1;
  358. if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) {
  359. column = rawtext[i + 1] - 0x40;
  360. } else {
  361. column = rawtext[i + 1] + 256 - 0x61;
  362. }
  363. if (Big5Freq[row][column] != 0) {
  364. bffreq += Big5Freq[row][column];
  365. } else if (3 <= row && row <= 37) {
  366. bffreq += 200;
  367. }
  368. }
  369. i++;
  370. }
  371. }
  372. rangeval = 50 * ((float) bfchars / (float) dbchars);
  373. freqval = 50 * ((float) bffreq / (float) totalfreq);
  374. return (int) (rangeval + freqval);
  375. }
  376. /*
  377. * Function: euc_tw_probability Argument: byte array Returns : number from 0
  378. * to 100 representing probability text in array uses EUC-TW (CNS 11643)
  379. * encoding
  380. */
  381. int euc_tw_probability(byte[] rawtext) {
  382. int i, rawtextlen = 0;
  383. int dbchars = 1, cnschars = 1;
  384. long cnsfreq = 0, totalfreq = 1;
  385. float rangeval = 0, freqval = 0;
  386. int row, column;
  387. // Check to see if characters fit into acceptable ranges
  388. // and have expected frequency of use
  389. rawtextlen = rawtext.length;
  390. for (i = 0; i < rawtextlen - 1; i++) {
  391. if (rawtext[i] >= 0) { // in ASCII range
  392. // asciichars++;
  393. } else { // high bit set
  394. dbchars++;
  395. if (i + 3 < rawtextlen && (byte) 0x8E == rawtext[i]
  396. && (byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xB0
  397. && (byte) 0xA1 <= rawtext[i + 2] && rawtext[i + 2] <= (byte) 0xFE
  398. && (byte) 0xA1 <= rawtext[i + 3] && rawtext[i + 3] <= (byte) 0xFE) { // Planes
  399. // 1
  400. // -
  401. // 16
  402. cnschars++;
  403. // System.out.println("plane 2 or above CNS char");
  404. // These are all less frequent chars so just ignore freq
  405. i += 3;
  406. } else if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xFE && // Plane
  407. // 1
  408. (byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) {
  409. cnschars++;
  410. totalfreq += 500;
  411. row = rawtext[i] + 256 - 0xA1;
  412. column = rawtext[i + 1] + 256 - 0xA1;
  413. if (EUC_TWFreq[row][column] != 0) {
  414. cnsfreq += EUC_TWFreq[row][column];
  415. } else if (35 <= row && row <= 92) {
  416. cnsfreq += 150;
  417. }
  418. i++;
  419. }
  420. }
  421. }
  422. rangeval = 50 * ((float) cnschars / (float) dbchars);
  423. freqval = 50 * ((float) cnsfreq / (float) totalfreq);
  424. return (int) (rangeval + freqval);
  425. }
  426. /*
  427. * Function: iso_2022_cn_probability Argument: byte array Returns : number
  428. * from 0 to 100 representing probability text in array uses ISO 2022-CN
  429. * encoding WORKS FOR BASIC CASES, BUT STILL NEEDS MORE WORK
  430. */
  431. int iso_2022_cn_probability(byte[] rawtext) {
  432. int i, rawtextlen = 0;
  433. int dbchars = 1, isochars = 1;
  434. long isofreq = 0, totalfreq = 1;
  435. float rangeval = 0, freqval = 0;
  436. int row, column;
  437. // Check to see if characters fit into acceptable ranges
  438. // and have expected frequency of use
  439. rawtextlen = rawtext.length;
  440. for (i = 0; i < rawtextlen - 1; i++) {
  441. if (rawtext[i] == (byte) 0x1B && i + 3 < rawtextlen) { // Escape
  442. // char ESC
  443. if (rawtext[i + 1] == (byte) 0x24 && rawtext[i + 2] == 0x29
  444. && rawtext[i + 3] == (byte) 0x41) { // GB Escape $ ) A
  445. i += 4;
  446. while (rawtext[i] != (byte) 0x1B) {
  447. dbchars++;
  448. if ((0x21 <= rawtext[i] && rawtext[i] <= 0x77)
  449. && (0x21 <= rawtext[i + 1] && rawtext[i + 1] <= 0x77)) {
  450. isochars++;
  451. row = rawtext[i] - 0x21;
  452. column = rawtext[i + 1] - 0x21;
  453. totalfreq += 500;
  454. if (GBFreq[row][column] != 0) {
  455. isofreq += GBFreq[row][column];
  456. } else if (15 <= row && row < 55) {
  457. isofreq += 200;
  458. }
  459. i++;
  460. }
  461. i++;
  462. }
  463. } else if (i + 3 < rawtextlen && rawtext[i + 1] == (byte) 0x24
  464. && rawtext[i + 2] == (byte) 0x29 && rawtext[i + 3] == (byte) 0x47) {
  465. // CNS Escape $ ) G
  466. i += 4;
  467. while (rawtext[i] != (byte) 0x1B) {
  468. dbchars++;
  469. if ((byte) 0x21 <= rawtext[i] && rawtext[i] <= (byte) 0x7E
  470. && (byte) 0x21 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E) {
  471. isochars++;
  472. totalfreq += 500;
  473. row = rawtext[i] - 0x21;
  474. column = rawtext[i + 1] - 0x21;
  475. if (EUC_TWFreq[row][column] != 0) {
  476. isofreq += EUC_TWFreq[row][column];
  477. } else if (35 <= row && row <= 92) {
  478. isofreq += 150;
  479. }
  480. i++;
  481. }
  482. i++;
  483. }
  484. }
  485. if (rawtext[i] == (byte) 0x1B && i + 2 < rawtextlen
  486. && rawtext[i + 1] == (byte) 0x28 && rawtext[i + 2] == (byte) 0x42) { // ASCII:
  487. // ESC
  488. // (
  489. // B
  490. i += 2;
  491. }
  492. }
  493. }
  494. rangeval = 50 * ((float) isochars / (float) dbchars);
  495. freqval = 50 * ((float) isofreq / (float) totalfreq);
  496. // System.out.println("isochars dbchars isofreq totalfreq " + isochars +
  497. // " " + dbchars + " " + isofreq + " " + totalfreq + " " + rangeval +
  498. // " " + freqval);
  499. return (int) (rangeval + freqval);
  500. // return 0;
  501. }
  502. /*
  503. * Function: utf8_probability Argument: byte array Returns : number from 0
  504. * to 100 representing probability text in array uses UTF-8 encoding of
  505. * Unicode
  506. */
  507. int utf8_probability(byte[] rawtext) {
  508. int score = 0;
  509. int i, rawtextlen = 0;
  510. int goodbytes = 0, asciibytes = 0;
  511. // Maybe also use UTF8 Byte Order Mark: EF BB BF
  512. // Check to see if characters fit into acceptable ranges
  513. rawtextlen = rawtext.length;
  514. for (i = 0; i < rawtextlen; i++) {
  515. if ((rawtext[i] & (byte) 0x7F) == rawtext[i]) { // One byte
  516. asciibytes++;
  517. // Ignore ASCII, can throw off count
  518. } else if (-64 <= rawtext[i] && rawtext[i] <= -33 && // Two bytes
  519. i + 1 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65) {
  520. goodbytes += 2;
  521. i++;
  522. } else if (-32 <= rawtext[i] && rawtext[i] <= -17
  523. && // Three bytes
  524. i + 2 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65
  525. && -128 <= rawtext[i + 2] && rawtext[i + 2] <= -65) {
  526. goodbytes += 3;
  527. i += 2;
  528. }
  529. }
  530. if (asciibytes == rawtextlen) {
  531. return 0;
  532. }
  533. score = (int) (100 * ((float) goodbytes / (float) (rawtextlen - asciibytes)));
  534. // If not above 98, reduce to zero to prevent coincidental matches
  535. // Allows for some (few) bad formed sequences
  536. if (score > 98) {
  537. return score;
  538. } else if (score > 95 && goodbytes > 30) {
  539. return score;
  540. } else {
  541. return 0;
  542. }
  543. }
  544. /*
  545. * Function: utf16_probability Argument: byte array Returns : number from 0
  546. * to 100 representing probability text in array uses UTF-16 encoding of
  547. * Unicode, guess based on BOM // NOT VERY GENERAL, NEEDS MUCH MORE WORK
  548. */
  549. int utf16_probability(byte[] rawtext) {
  550. // int score = 0;
  551. // int i, rawtextlen = 0;
  552. // int goodbytes = 0, asciibytes = 0;
  553. if (((byte) 0xFE == rawtext[0] && (byte) 0xFF == rawtext[1]) || // Big-endian
  554. ((byte) 0xFF == rawtext[0] && (byte) 0xFE == rawtext[1])) { // Little-endian
  555. return 100;
  556. }
  557. return 0;
  558. /*
  559. * // Check to see if characters fit into acceptable ranges rawtextlen =
  560. * rawtext.length; for (i = 0; i < rawtextlen; i++) { if ((rawtext[i] &
  561. * (byte)0x7F) == rawtext[i]) { // One byte goodbytes += 1;
  562. * asciibytes++; } else if ((rawtext[i] & (byte)0xDF) == rawtext[i]) {
  563. * // Two bytes if (i+1 < rawtextlen && (rawtext[i+1] & (byte)0xBF) ==
  564. * rawtext[i+1]) { goodbytes += 2; i++; } } else if ((rawtext[i] &
  565. * (byte)0xEF) == rawtext[i]) { // Three bytes if (i+2 < rawtextlen &&
  566. * (rawtext[i+1] & (byte)0xBF) == rawtext[i+1] && (rawtext[i+2] &
  567. * (byte)0xBF) == rawtext[i+2]) { goodbytes += 3; i+=2; } } } score =
  568. * (int)(100 * ((float)goodbytes/(float)rawtext.length)); // An all
  569. * ASCII file is also a good UTF8 file, but I'd rather it // get
  570. * identified as ASCII. Can delete following 3 lines otherwise if
  571. * (goodbytes == asciibytes) { score = 0; } // If not above 90, reduce
  572. * to zero to prevent coincidental matches if (score > 90) { return
  573. * score; } else { return 0; }
  574. */
  575. }
  576. /*
  577. * Function: ascii_probability Argument: byte array Returns : number from 0
  578. * to 100 representing probability text in array uses all ASCII Description:
  579. * Sees if array has any characters not in ASCII range, if so, score is
  580. * reduced
  581. */
  582. int ascii_probability(byte[] rawtext) {
  583. int score = 70;
  584. int i, rawtextlen;
  585. rawtextlen = rawtext.length;
  586. for (i = 0; i < rawtextlen; i++) {
  587. if (rawtext[i] < 0) {
  588. score = score - 5;
  589. } else if (rawtext[i] == (byte) 0x1B) { // ESC (used by ISO 2022)
  590. score = score - 5;
  591. }
  592. }
  593. return score;
  594. }
  595. void initialize_frequencies() {
  596. int i, j;
  597. for (i = 0; i < 93; i++) {
  598. for (j = 0; j < 93; j++) {
  599. GBFreq[i][j] = 0;
  600. }
  601. }
  602. for (i = 0; i < 126; i++) {
  603. for (j = 0; j < 191; j++) {
  604. GBKFreq[i][j] = 0;
  605. }
  606. }
  607. for (i = 0; i < 93; i++) {
  608. for (j = 0; j < 157; j++) {
  609. Big5Freq[i][j] = 0;
  610. }
  611. }
  612. for (i = 0; i < 93; i++) {
  613. for (j = 0; j < 93; j++) {
  614. EUC_TWFreq[i][j] = 0;
  615. }
  616. }
  617. GBFreq[20][35] = 599;
  618. GBFreq[49][26] = 598;
  619. GBFreq[41][38] = 597;
  620. GBFreq[17][26] = 596;
  621. GBFreq[32][42] = 595;
  622. GBFreq[39][42] = 594;
  623. GBFreq[45][49] = 593;
  624. GBFreq[51][57] = 592;
  625. GBFreq[50][47] = 591;
  626. GBFreq[42][90] = 590;
  627. GBFreq[52][65] = 589;
  628. GBFreq[53][47] = 588;
  629. GBFreq[19][82] = 587;
  630. GBFreq[31][19] = 586;
  631. GBFreq[40][46] = 585;
  632. GBFreq[24][89] = 584;
  633. GBFreq[23][85] = 583;
  634. GBFreq[20][28] = 582;
  635. GBFreq[42][20] = 581;
  636. GBFreq[34][38] = 580;
  637. GBFreq[45][9] = 579;
  638. GBFreq[54][50] = 578;
  639. GBFreq[25][44] = 577;
  640. GBFreq[35][66] = 576;
  641. GBFreq[20][55] = 575;
  642. GBFreq[18][85] = 574;
  643. GBFreq[20][31] = 573;
  644. GBFreq[49][17] = 572;
  645. GBFreq[41][16] = 571;
  646. GBFreq[35][73] = 570;
  647. GBFreq[20][34] = 569;
  648. GBFreq[29][44] = 568;
  649. GBFreq[35][38] = 567;
  650. GBFreq[49][9] = 566;
  651. GBFreq[46][33] = 565;
  652. GBFreq[49][51] = 564;
  653. GBFreq[40][89] = 563;
  654. GBFreq[26][64] = 562;
  655. GBFreq[54][51] = 561;
  656. GBFreq[54][36] = 560;
  657. GBFreq[39][4] = 559;
  658. GBFreq[53][13] = 558;
  659. GBFreq[24][92] = 557;
  660. GBFreq[27][49] = 556;
  661. GBFreq[48][6] = 555;
  662. GBFreq[21][51] = 554;
  663. GBFreq[30][40] = 553;
  664. GBFreq[42][92] = 552;
  665. GBFreq[31][78] = 551;
  666. GBFreq[25][82] = 550;
  667. GBFreq[47][0] = 549;
  668. GBFreq[34][19] = 548;
  669. GBFreq[47][35] = 547;
  670. GBFreq[21][63] = 546;
  671. GBFreq[43][75] = 545;
  672. GBFreq[21][87] = 544;
  673. GBFreq[35][59] = 543;
  674. GBFreq[25][34] = 542;
  675. GBFreq[21][27] = 541;
  676. GBFreq[39][26] = 540;
  677. GBFreq[34][26] = 539;
  678. GBFreq[39][52] = 538;
  679. GBFreq[50][57] = 537;
  680. GBFreq[37][79] = 536;
  681. GBFreq[26][24] = 535;
  682. GBFreq[22][1] = 534;
  683. GBFreq[18][40] = 533;
  684. GBFreq[41][33] = 532;
  685. GBFreq[53][26] = 531;
  686. GBFreq[54][86] = 530;
  687. GBFreq[20][16] = 529;
  688. GBFreq[46][74] = 528;
  689. GBFreq[30][19] = 527;
  690. GBFreq[45][35] = 526;
  691. GBFreq[45][61] = 525;
  692. GBFreq[30][9] = 524;
  693. GBFreq[41][53] = 523;
  694. GBFreq[41][13] = 522;
  695. GBFreq[50][34] = 521;
  696. GBFreq[53][86] = 520;
  697. GBFreq[47][47] = 519;
  698. GBFreq[22][28] = 518;
  699. GBFreq[50][53] = 517;
  700. GBFreq[39][70] = 516;
  701. GBFreq[38][15] = 515;
  702. GBFreq[42][88] = 514;
  703. GBFreq[16][29] = 513;
  704. GBFreq[27][90] = 512;
  705. GBFreq[29][12] = 511;
  706. GBFreq[44][22] = 510;
  707. GBFreq[34][69] = 509;
  708. GBFreq[24][10] = 508;
  709. GBFreq[44][11] = 507;
  710. GBFreq[39][92] = 506;
  711. GBFreq[49][48] = 505;
  712. GBFreq[31][46] = 504;
  713. GBFreq[19][50] = 503;
  714. GBFreq[21][14] = 502;
  715. GBFreq[32][28] = 501;
  716. GBFreq[18][3] = 500;
  717. GBFreq[53][9] = 499;
  718. GBFreq[34][80] = 498;
  719. GBFreq[48][88] = 497;
  720. GBFreq[46][53] = 496;
  721. GBFreq[22][53] = 495;
  722. GBFreq[28][10] = 494;
  723. GBFreq[44][65] = 493;
  724. GBFreq[20][10] = 492;
  725. GBFreq[40][76] = 491;
  726. GBFreq[47][8] = 490;
  727. GBFreq[50][74] = 489;
  728. GBFreq[23][62] = 488;
  729. GBFreq[49][65] = 487;
  730. GBFreq[28][87] = 486;
  731. GBFreq[15][48] = 485;
  732. GBFreq[22][7] = 484;
  733. GBFreq[19][42] = 483;
  734. GBFreq[41][20] = 482;
  735. GBFreq[26][55] = 481;
  736. GBFreq[21][93] = 480;
  737. GBFreq[31][76] = 479;
  738. GBFreq[34][31] = 478;
  739. GBFreq[20][66] = 477;
  740. GBFreq[51][33] = 476;
  741. GBFreq[34][86] = 475;
  742. GBFreq[37][67] = 474;
  743. GBFreq[53][53] = 473;
  744. GBFreq[40][88] = 472;
  745. GBFreq[39][10] = 471;
  746. GBFreq[24][3] = 470;
  747. GBFreq[27][25] = 469;
  748. GBFreq[26][15] = 468;
  749. GBFreq[21][88] = 467;
  750. GBFreq[52][62] = 466;
  751. GBFreq[46][81] = 465;
  752. GBFreq[38][72] = 464;
  753. GBFreq[17][30] = 463;
  754. GBFreq[52][92] = 462;
  755. GBFreq[34][90] = 461;
  756. GBFreq[21][7] = 460;
  757. GBFreq[36][13] = 459;
  758. GBFreq[45][41] = 458;
  759. GBFreq[32][5] = 457;
  760. GBFreq[26][89] = 456;
  761. GBFreq[23][87] = 455;
  762. GBFreq[20][39] = 454;
  763. GBFreq[27][23] = 453;
  764. GBFreq[25][59] = 452;
  765. GBFreq[49][20] = 451;
  766. GBFreq[54][77] = 450;
  767. GBFreq[27][67] = 449;
  768. GBFreq[47][33] = 448;
  769. GBFreq[41][17] = 447;
  770. GBFreq[19][81] = 446;
  771. GBFreq[16][66] = 445;
  772. GBFreq[45][26] = 444;
  773. GBFreq[49][81] = 443;
  774. GBFreq[53][55] = 442;
  775. GBFreq[16][26] = 441;
  776. GBFreq[54][62] = 440;
  777. GBFreq[20][70] = 439;
  778. GBFreq[42][35] = 438;
  779. GBFreq[20][57] = 437;
  780. GBFreq[34][36] = 436;
  781. GBFreq[46][63] = 435;
  782. GBFreq[19][45] = 434;
  783. GBFreq[21][10] = 433;
  784. GBFreq[52][93] = 432;
  785. GBFreq[25][2] = 431;
  786. GBFreq[30][57] = 430;
  787. GBFreq[41][24] = 429;
  788. GBFreq[28][43] = 428;
  789. GBFreq[45][86] = 427;
  790. GBFreq[51][56] = 426;
  791. GBFreq[37][28] = 425;
  792. GBFreq[52][69] = 424;
  793. GBFreq[43][92] = 423;
  794. GBFreq[41][31] = 422;
  795. GBFreq[37][87] = 421;
  796. GBFreq[47][36] = 420;
  797. GBFreq[16][16] = 419;
  798. GBFreq[40][56] = 418;
  799. GBFreq[24][55] = 417;
  800. GBFreq[17][1] = 416;
  801. GBFreq[35][57] = 415;
  802. GBFreq[27][50] = 414;
  803. GBFreq[26][14] = 413;
  804. GBFreq[50][40] = 412;
  805. GBFreq[39][19] = 411;
  806. GBFreq[19][89] = 410;
  807. GBFreq[29][91] = 409;
  808. GBFreq[17][89] = 408;
  809. GBFreq[39][74] = 407;
  810. GBFreq[46][39] = 406;
  811. GBFreq[40][28] = 405;
  812. GBFreq[45][68] = 404;
  813. GBFreq[43][10] = 403;
  814. GBFreq[42][13] = 402;
  815. GBFreq[44][81] = 401;
  816. GBFreq[41][47] = 400;
  817. GBFreq[48][58] = 399;
  818. GBFreq[43][68] = 398;
  819. GBFreq[16][79] = 397;
  820. GBFreq[19][5] = 396;
  821. GBFreq[54][59] = 395;
  822. GBFreq[17][36] = 394;
  823. GBFreq[18][0] = 393;
  824. GBFreq[41][5] = 392;
  825. GBFreq[41][72] = 391;
  826. GBFreq[16][39] = 390;
  827. GBFreq[54][0] = 389;
  828. GBFreq[51][16] = 388;
  829. GBFreq[29][36] = 387;
  830. GBFreq[47][5] = 386;
  831. GBFreq[47][51] = 385;
  832. GBFreq[44][7] = 384;
  833. GBFreq[35][30] = 383;
  834. GBFreq[26][9] = 382;
  835. GBFreq[16][7] = 381;
  836. GBFreq[32][1] = 380;
  837. GBFreq[33][76] = 379;
  838. GBFreq[34][91] = 378;
  839. GBFreq[52][36] = 377;
  840. GBFreq[26][77] = 376;
  841. GBFreq[35][48] = 375;
  842. GBFreq[40][80] = 374;
  843. GBFreq[41][92] = 373;
  844. GBFreq[27][93] = 372;
  845. GBFreq[15][17] = 371;
  846. GBFreq[16][76] = 370;
  847. GBFreq[51][12] = 369;
  848. GBFreq[18][20] = 368;
  849. GBFreq[15][54] = 367;
  850. GBFreq[50][5] = 366;
  851. GBFreq[33][22] = 365;
  852. GBFreq[37][57] = 364;
  853. GBFreq[28][47] = 363;
  854. GBFreq[42][31] = 362;
  855. GBFreq[18][2] = 361;
  856. GBFreq[43][64] = 360;
  857. GBFreq[23][47] = 359;
  858. GBFreq[28][79] = 358;
  859. GBFreq[25][45] = 357;
  860. GBFreq[23][91] = 356;
  861. GBFreq[22][19] = 355;
  862. GBFreq[25][46] = 354;
  863. GBFreq[22][36] = 353;
  864. GBFreq[54][85] = 352;
  865. GBFreq[46][20] = 351;
  866. GBFreq[27][37] = 350;
  867. GBFreq[26][81] = 349;
  868. GBFreq[42][29] = 348;
  869. GBFreq[31][90] = 347;
  870. GBFreq[41][59] = 346;
  871. GBFreq[24][65] = 345;
  872. GBFreq[44][84] = 344;
  873. GBFreq[24][90] = 343;
  874. GBFreq[38][54] = 342;
  875. GBFreq[28][70] = 341;
  876. GBFreq[27][15] = 340;
  877. GBFreq[28][80] = 339;
  878. GBFreq[29][8] = 338;
  879. GBFreq[45][80] = 337;
  880. GBFreq[53][37] = 336;
  881. GBFreq[28][65] = 335;
  882. GBFreq[23][86] = 334;
  883. GBFreq[39][45] = 333;
  884. GBFreq[53][32] = 332;
  885. GBFreq[38][68] = 331;
  886. GBFreq[45][78] = 330;
  887. GBFreq[43][7] = 329;
  888. GBFreq[46][82] = 328;
  889. GBFreq[27][38] = 327;
  890. GBFreq[16][62] = 326;
  891. GBFreq[24][17] = 325;
  892. GBFreq[22][70] = 324;
  893. GBFreq[52][28] = 323;
  894. GBFreq[23][40] = 322;
  895. GBFreq[28][50] = 321;
  896. GBFreq[42][91] = 320;
  897. GBFreq[47][76] = 319;
  898. GBFreq[15][42] = 318;
  899. GBFreq[43][55] = 317;
  900. GBFreq[29][84] = 316;
  901. GBFreq[44][90] = 315;
  902. GBFreq[53][16] = 314;
  903. GBFreq[22][93] = 313;
  904. GBFreq[34][10] = 312;
  905. GBFreq[32][53] = 311;
  906. GBFreq[43][65] = 310;
  907. GBFreq[28][7] = 309;
  908. GBFreq[35][46] = 308;
  909. GBFreq[21][39] = 307;
  910. GBFreq[44][18] = 306;
  911. GBFreq[40][10] = 305;
  912. GBFreq[54][53] = 304;
  913. GBFreq[38][74] = 303;
  914. GBFreq[28][26] = 302;
  915. GBFreq[15][13] = 301;
  916. GBFreq[39][34] = 300;
  917. GBFreq[39][46] = 299;
  918. GBFreq[42][66] = 298;
  919. GBFreq[33][58] = 297;
  920. GBFreq[15][56] = 296;
  921. GBFreq[18][51] = 295;
  922. GBFreq[49][68] = 294;
  923. GBFreq[30][37] = 293;
  924. GBFreq[51][84] = 292;
  925. GBFreq[51][9] = 291;
  926. GBFreq[40][70] = 290;
  927. GBFreq[41][84] = 289;
  928. GBFreq[28][64] = 288;
  929. GBFreq[32][88] = 287;
  930. GBFreq[24][5] = 286;
  931. GBFreq[53][23] = 285;
  932. GBFreq[42][27] = 284;
  933. GBFreq[22][38] = 283;
  934. GBFreq[32][86] = 282;
  935. GBFreq[34][30] = 281;
  936. GBFreq[38][63] = 280;
  937. GBFreq[24][59] = 279;
  938. GBFreq[22][81] = 278;
  939. GBFreq[32][11] = 277;
  940. GBFreq[51][21] = 276;
  941. GBFreq[54][41] = 275;
  942. GBFreq[21][50] = 274;
  943. GBFreq[23][89] = 273;
  944. GBFreq[19][87] = 272;
  945. GBFreq[26][7] = 271;
  946. GBFreq[30][75] = 270;
  947. GBFreq[43][84] = 269;
  948. GBFreq[51][25] = 268;
  949. GBFreq[16][67] = 267;
  950. GBFreq[32][9] = 266;
  951. GBFreq[48][51] = 265;
  952. GBFreq[39][7] = 264;
  953. GBFreq[44][88] = 263;
  954. GBFreq[52][24] = 262;
  955. GBFreq[23][34] = 261;
  956. GBFreq[32][75] = 260;
  957. GBFreq[19][10] = 259;
  958. GBFreq[28][91] = 258;
  959. GBFreq[32][83] = 257;
  960. GBFreq[25][75] = 256;
  961. GBFreq[53][45] = 255;
  962. GBFreq[29][85] = 254;
  963. GBFreq[53][59] = 253;
  964. GBFreq[16][2] = 252;
  965. GBFreq[19][78] = 251;
  966. GBFreq[15][75] = 250;
  967. GBFreq[51][42] = 249;
  968. GBFreq[45][67] = 248;
  969. GBFreq[15][74] = 247;
  970. GBFreq[25][81] = 246;
  971. GBFreq[37][62] = 245;
  972. GBFreq[16][55] = 244;
  973. GBFreq[18][38] = 243;
  974. GBFreq[23][23] = 242;
  975. GBFreq[38][30] = 241;
  976. GBFreq[17][28] = 240;
  977. GBFreq[44][73] = 239;
  978. GBFreq[23][78] = 238;
  979. GBFreq[40][77] = 237;
  980. GBFreq[38][87] = 236;
  981. GBFreq[27][19] = 235;
  982. GBFreq[38][82] = 234;
  983. GBFreq[37][22] = 233;
  984. GBFreq[41][30] = 232;
  985. GBFreq[54][9] = 231;
  986. GBFreq[32][30] = 230;
  987. GBFreq[30][52] = 229;
  988. GBFreq[40][84] = 228;
  989. GBFreq[53][57] = 227;
  990. GBFreq[27][27] = 226;
  991. GBFreq[38][64] = 225;
  992. GBFreq[18][43] = 224;
  993. GBFreq[23][69] = 223;
  994. GBFreq[28][12] = 222;
  995. GBFreq[50][78] = 221;
  996. GBFreq[50][1] = 220;
  997. GBFreq[26][88] = 219;
  998. GBFreq[36][40] = 218;
  999. GBFreq[33][89] = 217;
  1000. GBFreq[41][28] = 216;
  1001. GBFreq[31][77] = 215;
  1002. GBFreq[46][1] = 214;
  1003. GBFreq[47][19] = 213;
  1004. GBFreq[35][55] = 212;
  1005. GBFreq[41][21] = 211;
  1006. GBFreq[27][10] = 210;
  1007. GBFreq[32][77] = 209;
  1008. GBFreq[26][37] = 208;
  1009. GBFreq[20][33] = 207;
  1010. GBFreq[41][52] = 206;
  1011. GBFreq[32][18] = 205;
  1012. GBFreq[38][13] = 204;
  1013. GBFreq[20][18] = 203;
  1014. GBFreq[20][24] = 202;
  1015. GBFreq[45][19] = 201;
  1016. GBFreq[18][53] = 200;
  1017. Big5Freq[9][89] = 600;
  1018. Big5Freq[11][15] = 599;
  1019. Big5Freq[3][66] = 598;
  1020. Big5Freq[6][121] = 597;
  1021. Big5Freq[3][0] = 596;
  1022. Big5Freq[5][82] = 595;
  1023. Big5Freq[3][42] = 594;
  1024. Big5Freq[5][34] = 593;
  1025. Big5Freq[3][8] = 592;
  1026. Big5Freq[3][6] = 591;
  1027. Big5Freq[3][67] = 590;
  1028. Big5Freq[7][139] = 589;
  1029. Big5Freq[23][137] = 588;
  1030. Big5Freq[12][46] = 587;
  1031. Big5Freq[4][8] = 586;
  1032. Big5Freq[4][41] = 585;
  1033. Big5Freq[18][47] = 584;
  1034. Big5Freq[12][114] = 583;
  1035. Big5Freq[6][1] = 582;
  1036. Big5Freq[22][60] = 581;
  1037. Big5Freq[5][46] = 580;
  1038. Big5Freq[11][79] = 579;
  1039. Big5Freq[3][23] = 578;
  1040. Big5Freq[7][114] = 577;
  1041. Big5Freq[29][102] = 576;
  1042. Big5Freq[19][14] = 575;
  1043. Big5Freq[4][133] = 574;
  1044. Big5Freq[3][29] = 573;
  1045. Big5Freq[4][109] = 572;
  1046. Big5Freq[14][127] = 571;
  1047. Big5Freq[5][48] = 570;
  1048. Big5Freq[13][104] = 569;
  1049. Big5Freq[3][132] = 568;
  1050. Big5Freq[26][64] = 567;
  1051. Big5Freq[7][19] = 566;
  1052. Big5Freq[4][12] = 565;
  1053. Big5Freq[11][124] = 564;
  1054. Big5Freq[7][89] = 563;
  1055. Big5Freq[15][124] = 562;
  1056. Big5Freq[4][108] = 561;
  1057. Big5Freq[19][66] = 560;
  1058. Big5Freq[3][21] = 559;
  1059. Big5Freq[24][12] = 558;
  1060. Big5Freq[28][111] = 557;
  1061. Big5Freq[12][107] = 556;
  1062. Big5Freq[3][112] = 555;
  1063. Big5Freq[8][113] = 554;
  1064. Big5Freq[5][40] = 553;
  1065. Big5Freq[26][145] = 552;
  1066. Big5Freq[3][48] = 551;
  1067. Big5Freq[3][70] = 550;
  1068. Big5Freq[22][17] = 549;
  1069. Big5Freq[16][47] = 548;
  1070. Big5Freq[3][53] = 547;
  1071. Big5Freq[4][24] = 546;
  1072. Big5Freq[32][120] = 545;
  1073. Big5Freq[24][49] = 544;
  1074. Big5Freq[24][142] = 543;
  1075. Big5Freq[18][66] = 542;
  1076. Big5Freq[29][150] = 541;
  1077. Big5Freq[5][122] = 540;
  1078. Big5Freq[5][114] = 539;
  1079. Big5Freq[3][44] = 538;
  1080. Big5Freq[10][128] = 537;
  1081. Big5Freq[15][20] = 536;
  1082. Big5Freq[13][33] = 535;
  1083. Big5Freq[14][87] = 534;
  1084. Big5Freq[3][126] = 533;
  1085. Big5Freq[4][53] = 532;
  1086. Big5Freq[4][40] = 531;
  1087. Big5Freq[9][93] = 530;
  1088. Big5Freq[15][137] = 529;
  1089. Big5Freq[10][123] = 528;
  1090. Big5Freq[4][56] = 527;
  1091. Big5Freq[5][71] = 526;
  1092. Big5Freq[10][8] = 525;
  1093. Big5Freq[5][16] = 524;
  1094. Big5Freq[5][146] = 523;
  1095. Big5Freq[18][88] = 522;
  1096. Big5Freq[24][4] = 521;
  1097. Big5Freq[20][47] = 520;
  1098. Big5Freq[5][33] = 519;
  1099. Big5Freq[9][43] = 518;
  1100. Big5Freq[20][12] = 517;
  1101. Big5Freq[20][13] = 516;
  1102. Big5Freq[5][156] = 515;
  1103. Big5Freq[22][140] = 514;
  1104. Big5Freq[8][146] = 513;
  1105. Big5Freq[21][123] = 512;
  1106. Big5Freq[4][90] = 511;
  1107. Big5Freq[5][62] = 510;
  1108. Big5Freq[17][59] = 509;
  1109. Big5Freq[10][37] = 508;
  1110. Big5Freq[18][107] = 507;
  1111. Big5Freq[14][53] = 506;
  1112. Big5Freq[22][51] = 505;
  1113. Big5Freq[8][13] = 504;
  1114. Big5Freq[5][29] = 503;
  1115. Big5Freq[9][7] = 502;
  1116. Big5Freq[22][14] = 501;
  1117. Big5Freq[8][55] = 500;
  1118. Big5Freq[33][9] = 499;
  1119. Big5Freq[16][64] = 498;
  1120. Big5Freq[7][131] = 497;
  1121. Big5Freq[34][4] = 496;
  1122. Big5Freq[7][101] = 495;
  1123. Big5Freq[11][139] = 494;
  1124. Big5Freq[3][135] = 493;
  1125. Big5Freq[7][102] = 492;
  1126. Big5Freq[17][13] = 491;
  1127. Big5Freq[3][20] = 490;
  1128. Big5Freq[27][106] = 489;
  1129. Big5Freq[5][88] = 488;
  1130. Big5Freq[6][33] = 487;
  1131. Big5Freq[5][139] = 486;
  1132. Big5Freq[6][0] = 485;
  1133. Big5Freq[17][58] = 484;
  1134. Big5Freq[5][133] = 483;
  1135. Big5Freq[9][107] = 482;
  1136. Big5Freq[23][39] = 481;
  1137. Big5Freq[5][23] = 480;
  1138. Big5Freq[3][79] = 479;
  1139. Big5Freq[32][97] = 478;
  1140. Big5Freq[3][136] = 477;
  1141. Big5Freq[4][94] = 476;
  1142. Big5Freq[21][61] = 475;
  1143. Big5Freq[23][123] = 474;
  1144. Big5Freq[26][16] = 473;
  1145. Big5Freq[24][137] = 472;
  1146. Big5Freq[22][18] = 471;
  1147. Big5Freq[5][1] = 470;
  1148. Big5Freq[20][119] = 469;
  1149. Big5Freq[3][7] = 468;
  1150. Big5Freq[10][79] = 467;
  1151. Big5Freq[15][105] = 466;
  1152. Big5Freq[3][144] = 465;
  1153. Big5Freq[12][80] = 464;
  1154. Big5Freq[15][73] = 463;
  1155. Big5Freq[3][19] = 462;
  1156. Big5Freq[8][109] = 461;
  1157. Big5Freq[3][15] = 460;
  1158. Big5Freq[31][82] = 459;
  1159. Big5Freq[3][43] = 458;
  1160. Big5Freq[25][119] = 457;
  1161. Big5Freq[16][111] = 456;
  1162. Big5Freq[7][77] = 455;
  1163. Big5Freq[3][95] = 454;
  1164. Big5Freq[24][82] = 453;
  1165. Big5Freq[7][52] = 452;
  1166. Big5Freq[9][151] = 451;
  1167. Big5Freq[3][129] = 450;
  1168. Big5Freq[5][87] = 449;
  1169. Big5Freq[3][55] = 448;
  1170. Big5Freq[8][153] = 447;
  1171. Big5Freq[4][83] = 446;
  1172. Big5Freq[3][114] = 445;
  1173. Big5Freq[23][147] = 444;
  1174. Big5Freq[15][31] = 443;
  1175. Big5Freq[3][54] = 442;
  1176. Big5Freq[11][122] = 441;
  1177. Big5Freq[4][4] = 440;
  1178. Big5Freq[34][149] = 439;
  1179. Big5Freq[3][17] = 438;
  1180. Big5Freq[21][64] = 437;
  1181. Big5Freq[26][144] = 436;
  1182. Big5Freq[4][62] = 435;
  1183. Big5Freq[8][15] = 434;
  1184. Big5Freq[35][80] = 433;
  1185. Big5Freq[7][110] = 432;
  1186. Big5Freq[23][114] = 431;
  1187. Big5Freq[3][108] = 430;
  1188. Big5Freq[3][62] = 429;
  1189. Big5Freq[21][41] = 428;
  1190. Big5Freq[15][99] = 427;
  1191. Big5Freq[5][47] = 426;
  1192. Big5Freq[4][96] = 425;
  1193. Big5Freq[20][122] = 424;
  1194. Big5Freq[5][21] = 423;
  1195. Big5Freq[4][157] = 422;
  1196. Big5Freq[16][14] = 421;
  1197. Big5Freq[3][117] = 420;
  1198. Big5Freq[7][129] = 419;
  1199. Big5Freq[4][27] = 418;
  1200. Big5Freq[5][30] = 417;
  1201. Big5Freq[22][16] = 416;
  1202. Big5Freq[5][64] = 415;
  1203. Big5Freq[17][99] = 414;
  1204. Big5Freq[17][57] = 413;
  1205. Big5Freq[8][105] = 412;
  1206. Big5Freq[5][112] = 411;
  1207. Big5Freq[20][59] = 410;
  1208. Big5Freq[6][129] = 409;
  1209. Big5Freq[18][17] = 408;
  1210. Big5Freq[3][92] = 407;
  1211. Big5Freq[28][118] = 406;
  1212. Big5Freq[3][109] = 405;
  1213. Big5Freq[31][51] = 404;
  1214. Big5Freq[13][116] = 403;
  1215. Big5Freq[6][15] = 402;
  1216. Big5Freq[36][136] = 401;
  1217. Big5Freq[12][74] = 400;
  1218. Big5Freq[20][88] = 399;
  1219. Big5Freq[36][68] = 398;
  1220. Big5Freq[3][147] = 397;
  1221. Big5Freq[15][84] = 396;
  1222. Big5Freq[16][32] = 395;
  1223. Big5Freq[16][58] = 394;
  1224. Big5Freq[7][66] = 393;
  1225. Big5Freq[23][107] = 392;
  1226. Big5Freq[9][6] = 391;
  1227. Big5Freq[12][86] = 390;
  1228. Big5Freq[23][112] = 389;
  1229. Big5Freq[37][23] = 388;
  1230. Big5Freq[3][138] = 387;
  1231. Big5Freq[20][68] = 386;
  1232. Big5Freq[15][116] = 385;
  1233. Big5Freq[18][64] = 384;
  1234. Big5Freq[12][139] = 383;
  1235. Big5Freq[11][155] = 382;
  1236. Big5Freq[4][156] = 381;
  1237. Big5Freq[12][84] = 380;
  1238. Big5Freq[18][49] = 379;
  1239. Big5Freq[25][125] = 378;
  1240. Big5Freq[25][147] = 377;
  1241. Big5Freq[15][110] = 376;
  1242. Big5Freq[19][96] = 375;
  1243. Big5Freq[30][152] = 374;
  1244. Big5Freq[6][31] = 373;
  1245. Big5Freq[27][117] = 372;
  1246. Big5Freq[3][10] = 371;
  1247. Big5Freq[6][131] = 370;
  1248. Big5Freq[13][112] = 369;
  1249. Big5Freq[36][156] = 368;
  1250. Big5Freq[4][60] = 367;
  1251. Big5Freq[15][121] = 366;
  1252. Big5Freq[4][112] = 365;
  1253. Big5Freq[30][142] = 364;
  1254. Big5Freq[23][154] = 363;
  1255. Big5Freq[27][101] = 362;
  1256. Big5Freq[9][140] = 361;
  1257. Big5Freq[3][89] = 360;
  1258. Big5Freq[18][148] = 359;
  1259. Big5Freq[4][69] = 358;
  1260. Big5Freq[16][49] = 357;
  1261. Big5Freq[6][117] = 356;
  1262. Big5Freq[36][55] = 355;
  1263. Big5Freq[5][123] = 354;
  1264. Big5Freq[4][126] = 353;
  1265. Big5Freq[4][119] = 352;
  1266. Big5Freq[9][95] = 351;
  1267. Big5Freq[5][24] = 350;
  1268. Big5Freq[16][133] = 349;
  1269. Big5Freq[10][134] = 348;
  1270. Big5Freq[26][59] = 347;
  1271. Big5Freq[6][41] = 346;
  1272. Big5Freq[6][146] = 345;
  1273. Big5Freq[19][24] = 344;
  1274. Big5Freq[5][113] = 343;
  1275. Big5Freq[10][118] = 342;
  1276. Big5Freq[34][151] = 341;
  1277. Big5Freq[9][72] = 340;
  1278. Big5Freq[31][25] = 339;
  1279. Big5Freq[18][126] = 338;
  1280. Big5Freq[18][28] = 337;
  1281. Big5Freq[4][153] = 336;
  1282. Big5Freq[3][84] = 335;
  1283. Big5Freq[21][18] = 334;
  1284. Big5Freq[25][129] = 333;
  1285. Big5Freq[6][107] = 332;
  1286. Big5Freq[12][25] = 331;
  1287. Big5Freq[17][109] = 330;
  1288. Big5Freq[7][76] = 329;
  1289. Big5Freq[15][15] = 328;
  1290. Big5Freq[4][14] = 327;
  1291. Big5Freq[23][88] = 326;
  1292. Big5Freq[18][2] = 325;
  1293. Big5Freq[6][88] = 324;
  1294. Big5Freq[16][84] = 323;
  1295. Big5Freq[12][48] = 322;
  1296. Big5Freq[7][68] = 321;
  1297. Big5Freq[5][50] = 320;
  1298. Big5Freq[13][54] = 319;
  1299. Big5Freq[7][98] = 318;
  1300. Big5Freq[11][6] = 317;
  1301. Big5Freq[9][80] = 316;
  1302. Big5Freq[16][41] = 315;
  1303. Big5Freq[7][43] = 314;
  1304. Big5Freq[28][117] = 313;
  1305. Big5Freq[3][51] = 312;
  1306. Big5Freq[7][3] = 311;
  1307. Big5Freq[20][81] = 310;
  1308. Big5Freq[4][2] = 309;
  1309. Big5Freq[11][16] = 308;
  1310. Big5Freq[10][4] = 307;
  1311. Big5Freq[10][119] = 306;
  1312. Big5Freq[6][142] = 305;
  1313. Big5Freq[18][51] = 304;
  1314. Big5Freq[8][144] = 303;
  1315. Big5Freq[10][65] = 302;
  1316. Big5Freq[11][64] = 301;
  1317. Big5Freq[11][130] = 300;
  1318. Big5Freq[9][92] = 299;
  1319. Big5Freq[18][29] = 298;
  1320. Big5Freq[18][78] = 297;
  1321. Big5Freq[18][151] = 296;
  1322. Big5Freq[33][127] = 295;
  1323. Big5Freq[35][113] = 294;
  1324. Big5Freq[10][155] = 293;
  1325. Big5Freq[3][76] = 292;
  1326. Big5Freq[36][123] = 291;
  1327. Big5Freq[13][143] = 290;
  1328. Big5Freq[5][135] = 289;
  1329. Big5Freq[23][116] = 288;
  1330. Big5Freq[6][101] = 287;
  1331. Big5Freq[14][74] = 286;
  1332. Big5Freq[7][153] = 285;
  1333. Big5Freq[3][101] = 284;
  1334. Big5Freq[9][74] = 283;
  1335. Big5Freq[3][156] = 282;
  1336. Big5Freq[4][147] = 281;
  1337. Big5Freq[9][12] = 280;
  1338. Big5Freq[18][133] = 279;
  1339. Big5Freq[4][0] = 278;
  1340. Big5Freq[7][155] = 277;
  1341. Big5Freq[9][144] = 276;
  1342. Big5Freq[23][49] = 275;
  1343. Big5Freq[5][89] = 274;
  1344. Big5Freq[10][11] = 273;
  1345. Big5Freq[3][110] = 272;
  1346. Big5Freq[3][40] = 271;
  1347. Big5Freq[29][115] = 270;
  1348. Big5Freq[9][100] = 269;
  1349. Big5Freq[21][67] = 268;
  1350. Big5Freq[23][145] = 267;
  1351. Big5Freq[10][47] = 266;
  1352. Big5Freq[4][31] = 265;
  1353. Big5Freq[4][81] = 264;
  1354. Big5Freq[22][62] = 263;
  1355. Big5Freq[4][28] = 262;
  1356. Big5Freq[27][39] = 261;
  1357. Big5Freq[27][54] = 260;
  1358. Big5Freq[32][46] = 259;
  1359. Big5Freq[4][76] = 258;
  1360. Big5Freq[26][15] = 257;
  1361. Big5Freq[12][154] = 256;
  1362. Big5Freq[9][150] = 255;
  1363. Big5Freq[15][17] = 254;
  1364. Big5Freq[5][129] = 253;
  1365. Big5Freq[10][40] = 252;
  1366. Big5Freq[13][37] = 251;
  1367. Big5Freq[31][104] = 250;
  1368. Big5Freq[3][152] = 249;
  1369. Big5Freq[5][22] = 248;
  1370. Big5Freq[8][48] = 247;
  1371. Big5Freq[4][74] = 246;
  1372. Big5Freq[6][17] = 245;
  1373. Big5Freq[30][82] = 244;
  1374. Big5Freq[4][116] = 243;
  1375. Big5Freq[16][42] = 242;
  1376. Big5Freq[5][55] = 241;
  1377. Big5Freq[4][64] = 240;
  1378. Big5Freq[14][19] = 239;
  1379. Big5Freq[35][82] = 238;
  1380. Big5Freq[30][139] = 237;
  1381. Big5Freq[26][

Large files files are truncated, but you can click here to view the full file