PageRenderTime 51ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src-lucene-tokenizer/org/apache/lucene/analysis/ja/ToStringUtil.java

https://bitbucket.org/johtani/junsai-lucene-tokenizer
Java | 1020 lines | 979 code | 10 blank | 31 comment | 367 complexity | 6e6a317f5ff7c6a6f6ab9aae70a21b01 MD5 | raw file
  1. package org.apache.lucene.analysis.ja;
  2. /**
  3. * Copyright 2004 The Apache Software Foundation
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. import java.util.HashMap;
  18. public class ToStringUtil {
  19. // a translation map for parts of speech, only used for reflectWith
  20. private static final HashMap<String,String> posTranslations = new HashMap<String,String>();
  21. static {
  22. posTranslations.put("名詞", "noun");
  23. posTranslations.put("名詞-一般", "noun-common");
  24. posTranslations.put("名詞-固有名詞", "noun-proper");
  25. posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc");
  26. posTranslations.put("名詞-固有名詞-人名", "noun-proper-person");
  27. posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc");
  28. posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname");
  29. posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name");
  30. posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization");
  31. posTranslations.put("名詞-固有名詞-地域", "noun-proper-place");
  32. posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc");
  33. posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country");
  34. posTranslations.put("名詞-代名詞", "noun-pronoun");
  35. posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc");
  36. posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction");
  37. posTranslations.put("名詞-副詞可能", "noun-adverbial");
  38. posTranslations.put("名詞-サ変接続", "noun-verbal");
  39. posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base");
  40. posTranslations.put("名詞-数", "noun-numeric");
  41. posTranslations.put("名詞-非自立", "noun-affix");
  42. posTranslations.put("名詞-非自立-一般", "noun-affix-misc");
  43. posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial");
  44. posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux");
  45. posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base");
  46. posTranslations.put("名詞-特殊", "noun-special");
  47. posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux");
  48. posTranslations.put("名詞-接尾", "noun-suffix");
  49. posTranslations.put("名詞-接尾-一般", "noun-suffix-misc");
  50. posTranslations.put("名詞-接尾-人名", "noun-suffix-person");
  51. posTranslations.put("名詞-接尾-地域", "noun-suffix-place");
  52. posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal");
  53. posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux");
  54. posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base");
  55. posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial");
  56. posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier");
  57. posTranslations.put("名詞-接尾-特殊", "noun-suffix-special");
  58. posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive");
  59. posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux");
  60. posTranslations.put("名詞-引用文字列", "noun-quotation");
  61. posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective");
  62. posTranslations.put("接頭詞", "prefix");
  63. posTranslations.put("接頭詞-名詞接続", "prefix-nominal");
  64. posTranslations.put("接頭詞-動詞接続", "prefix-verbal");
  65. posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival");
  66. posTranslations.put("接頭詞-数接続", "prefix-numerical");
  67. posTranslations.put("動詞", "verb");
  68. posTranslations.put("動詞-自立", "verb-main");
  69. posTranslations.put("動詞-非自立", "verb-auxiliary");
  70. posTranslations.put("動詞-接尾", "verb-suffix");
  71. posTranslations.put("形容詞", "adjective");
  72. posTranslations.put("形容詞-自立", "adjective-main");
  73. posTranslations.put("形容詞-非自立", "adjective-auxiliary");
  74. posTranslations.put("形容詞-接尾", "adjective-suffix");
  75. posTranslations.put("副詞", "adverb");
  76. posTranslations.put("副詞-一般", "adverb-misc");
  77. posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction");
  78. posTranslations.put("連体詞", "adnominal");
  79. posTranslations.put("接続詞", "conjunction");
  80. posTranslations.put("助詞", "particle");
  81. posTranslations.put("助詞-格助詞", "particle-case");
  82. posTranslations.put("助詞-格助詞-一般", "particle-case-misc");
  83. posTranslations.put("助詞-格助詞-引用", "particle-case-quote");
  84. posTranslations.put("助詞-格助詞-連語", "particle-case-compound");
  85. posTranslations.put("助詞-接続助詞", "particle-conjunctive");
  86. posTranslations.put("助詞-係助詞", "particle-dependency");
  87. posTranslations.put("助詞-副助詞", "particle-adverbial");
  88. posTranslations.put("助詞-間投助詞", "particle-interjective");
  89. posTranslations.put("助詞-並立助詞", "particle-coordinate");
  90. posTranslations.put("助詞-終助詞", "particle-final");
  91. posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final");
  92. posTranslations.put("助詞-連体化", "particle-adnominalizer");
  93. posTranslations.put("助詞-副詞化", "particle-adnominalizer");
  94. posTranslations.put("助詞-特殊", "particle-special");
  95. posTranslations.put("助動詞", "auxiliary-verb");
  96. posTranslations.put("感動詞", "interjection");
  97. posTranslations.put("記号", "symbol");
  98. posTranslations.put("記号-一般", "symbol-misc");
  99. posTranslations.put("記号-句点", "symbol-period");
  100. posTranslations.put("記号-読点", "symbol-comma");
  101. posTranslations.put("記号-空白", "symbol-space");
  102. posTranslations.put("記号-括弧開", "symbol-open_bracket");
  103. posTranslations.put("記号-括弧閉", "symbol-close_bracket");
  104. posTranslations.put("記号-アルファベット", "symbol-alphabetic");
  105. posTranslations.put("その他", "other");
  106. posTranslations.put("その他-間投", "other-interjection");
  107. posTranslations.put("フィラー", "filler");
  108. posTranslations.put("非言語音", "non-verbal");
  109. posTranslations.put("語断片", "fragment");
  110. posTranslations.put("未知語", "unknown");
  111. }
  112. /**
  113. * Get the english form of a POS tag
  114. */
  115. public static String getPOSTranslation(String s) {
  116. return posTranslations.get(s);
  117. }
  118. // a translation map for conjugational types, only used for reflectWith
  119. private static final HashMap<String,String> conjTypeTranslations = new HashMap<String,String>();
  120. static {
  121. conjTypeTranslations.put("*", "*");
  122. conjTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u");
  123. conjTypeTranslations.put("形容詞・イ段", "adj-group-i");
  124. conjTypeTranslations.put("不変化型", "non-inflectional");
  125. conjTypeTranslations.put("特殊・タ", "special-da");
  126. conjTypeTranslations.put("特殊・ダ", "special-ta");
  127. conjTypeTranslations.put("文語・ゴトシ", "classical-gotoshi");
  128. conjTypeTranslations.put("特殊・ジャ", "special-ja");
  129. conjTypeTranslations.put("特殊・ナイ", "special-nai");
  130. conjTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special");
  131. conjTypeTranslations.put("特殊・ヌ", "special-nu");
  132. conjTypeTranslations.put("文語・キ", "classical-ki");
  133. conjTypeTranslations.put("特殊・タイ", "special-tai");
  134. conjTypeTranslations.put("文語・ベシ", "classical-beshi");
  135. conjTypeTranslations.put("特殊・ヤ", "special-ya");
  136. conjTypeTranslations.put("文語・マジ", "classical-maji");
  137. conjTypeTranslations.put("下二・タ行", "2-row-lower-cons-t");
  138. conjTypeTranslations.put("特殊・デス", "special-desu");
  139. conjTypeTranslations.put("特殊・マス", "special-masu");
  140. conjTypeTranslations.put("五段・ラ行アル", "5-row-aru");
  141. conjTypeTranslations.put("文語・ナリ", "classical-nari");
  142. conjTypeTranslations.put("文語・リ", "classical-ri");
  143. conjTypeTranslations.put("文語・ケリ", "classical-keri");
  144. conjTypeTranslations.put("文語・ル", "classical-ru");
  145. conjTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin");
  146. conjTypeTranslations.put("五段・サ行", "5-row-cons-s");
  147. conjTypeTranslations.put("一段", "1-row");
  148. conjTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin");
  149. conjTypeTranslations.put("五段・マ行", "5-row-cons-m");
  150. conjTypeTranslations.put("五段・タ行", "5-row-cons-t");
  151. conjTypeTranslations.put("五段・ラ行", "5-row-cons-r");
  152. conjTypeTranslations.put("サ変・−スル", "irregular-suffix-suru");
  153. conjTypeTranslations.put("五段・ガ行", "5-row-cons-g");
  154. conjTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru");
  155. conjTypeTranslations.put("五段・バ行 ", "5-row-cons-b");
  156. conjTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin");
  157. conjTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d");
  158. conjTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku");
  159. conjTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d");
  160. conjTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin");
  161. conjTypeTranslations.put("一段・得ル", "1-row-eru");
  162. conjTypeTranslations.put("四段・タ行", "4-row-cons-t");
  163. conjTypeTranslations.put("五段・ナ行", "5-row-cons-n");
  164. conjTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h");
  165. conjTypeTranslations.put("四段・ハ行", "4-row-cons-h");
  166. conjTypeTranslations.put("四段・バ行", "4-row-cons-b");
  167. conjTypeTranslations.put("サ変・スル", "irregular-suru");
  168. conjTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h");
  169. conjTypeTranslations.put("下二・マ行", "2-row-lower-cons-m");
  170. conjTypeTranslations.put("四段・サ行", "4-row-cons-s");
  171. conjTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g");
  172. conjTypeTranslations.put("カ変・来ル", "kuru-kanji");
  173. conjTypeTranslations.put("一段・クレル", "1-row-kureru");
  174. conjTypeTranslations.put("下二・得", "2-row-lower-u");
  175. conjTypeTranslations.put("カ変・クル", "kuru-kana");
  176. conjTypeTranslations.put("ラ変", "irregular-cons-r");
  177. conjTypeTranslations.put("下二・カ行", "2-row-lower-cons-k");
  178. }
  179. /**
  180. * Get the english form of a conjugational type
  181. */
  182. public static String getConjTypeTranslation(String s) {
  183. return conjTypeTranslations.get(s);
  184. }
  185. // a translation map for conjugated forms, only used for reflectWith
  186. private static final HashMap<String,String> conjFormTranslations = new HashMap<String,String>();
  187. static {
  188. conjFormTranslations.put("*", "*");
  189. conjFormTranslations.put("基本形", "base");
  190. conjFormTranslations.put("文語基本形", "classical-base");
  191. conjFormTranslations.put("未然ヌ接続", "imperfective-nu-connection");
  192. conjFormTranslations.put("未然ウ接続", "imperfective-u-connection");
  193. conjFormTranslations.put("連用タ接続", "conjunctive-ta-connection");
  194. conjFormTranslations.put("連用テ接続", "conjunctive-te-connection");
  195. conjFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection");
  196. conjFormTranslations.put("体言接続", "uninflected-connection");
  197. conjFormTranslations.put("仮定形", "subjunctive");
  198. conjFormTranslations.put("命令e", "imperative-e");
  199. conjFormTranslations.put("仮定縮約1", "conditional-contracted-1");
  200. conjFormTranslations.put("仮定縮約2", "conditional-contracted-2");
  201. conjFormTranslations.put("ガル接続", "garu-connection");
  202. conjFormTranslations.put("未然形", "imperfective");
  203. conjFormTranslations.put("連用形", "conjunctive");
  204. conjFormTranslations.put("音便基本形", "onbin-base");
  205. conjFormTranslations.put("連用デ接続", "conjunctive-de-connection");
  206. conjFormTranslations.put("未然特殊", "imperfective-special");
  207. conjFormTranslations.put("命令i", "imperative-i");
  208. conjFormTranslations.put("連用ニ接続", "conjunctive-ni-connection");
  209. conjFormTranslations.put("命令yo", "imperative-yo");
  210. conjFormTranslations.put("体言接続特殊", "adnominal-special");
  211. conjFormTranslations.put("命令ro", "imperative-ro");
  212. conjFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2");
  213. conjFormTranslations.put("未然レル接続", "imperfective-reru-connection");
  214. conjFormTranslations.put("現代基本形", "modern-base");
  215. }
  216. /**
  217. * Get the english form of a conjugated form
  218. */
  219. public static String getConjFormTranslation(String s) {
  220. return conjFormTranslations.get(s);
  221. }
  222. /**
  223. * Romanize katakana with modified hepburn
  224. */
  225. public static String getRomanization(String s) {
  226. StringBuilder builder = new StringBuilder();
  227. final int len = s.length();
  228. for (int i = 0; i < len; i++) {
  229. // maximum lookahead: 3
  230. char ch = s.charAt(i);
  231. char ch2 = (i < len - 1) ? s.charAt(i + 1) : 0;
  232. char ch3 = (i < len - 2) ? s.charAt(i + 2) : 0;
  233. main: switch (ch) {
  234. case 'ッ':
  235. switch (ch2) {
  236. case 'カ':
  237. case 'キ':
  238. case 'ク':
  239. case 'ケ':
  240. case 'コ':
  241. builder.append('k');
  242. break main;
  243. case 'サ':
  244. case 'シ':
  245. case 'ス':
  246. case 'セ':
  247. case 'ソ':
  248. builder.append('s');
  249. break main;
  250. case 'タ':
  251. case 'チ':
  252. case 'ツ':
  253. case 'テ':
  254. case 'ト':
  255. builder.append('t');
  256. break main;
  257. case 'パ':
  258. case 'ピ':
  259. case 'プ':
  260. case 'ペ':
  261. case 'ポ':
  262. builder.append('p');
  263. break main;
  264. }
  265. break;
  266. case 'ア':
  267. builder.append('a');
  268. break;
  269. case 'イ':
  270. if (ch2 == 'ィ') {
  271. builder.append("yi");
  272. i++;
  273. } else if (ch2 == 'ェ') {
  274. builder.append("ye");
  275. i++;
  276. } else {
  277. builder.append('i');
  278. }
  279. break;
  280. case 'ウ':
  281. switch(ch2) {
  282. case 'ァ':
  283. builder.append("wa");
  284. i++;
  285. break;
  286. case 'ィ':
  287. builder.append("wi");
  288. i++;
  289. break;
  290. case 'ゥ':
  291. builder.append("wu");
  292. i++;
  293. break;
  294. case 'ェ':
  295. builder.append("we");
  296. i++;
  297. break;
  298. case 'ォ':
  299. builder.append("wo");
  300. i++;
  301. break;
  302. case 'ュ':
  303. builder.append("wyu");
  304. i++;
  305. break;
  306. default:
  307. builder.append('u');
  308. break;
  309. }
  310. break;
  311. case 'エ':
  312. builder.append('e');
  313. break;
  314. case 'オ':
  315. if (ch2 == 'ウ') {
  316. builder.append('ō');
  317. i++;
  318. } else {
  319. builder.append('o');
  320. }
  321. break;
  322. case 'カ':
  323. builder.append("ka");
  324. break;
  325. case 'キ':
  326. if (ch2 == 'ョ' && ch3 == 'ウ') {
  327. builder.append("kyō");
  328. i += 2;
  329. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  330. builder.append("kyū");
  331. i += 2;
  332. } else if (ch2 == 'ャ') {
  333. builder.append("kya");
  334. i++;
  335. } else if (ch2 == 'ョ') {
  336. builder.append("kyo");
  337. i++;
  338. } else if (ch2 == 'ュ') {
  339. builder.append("kyu");
  340. i++;
  341. } else if (ch2 == 'ェ') {
  342. builder.append("kye");
  343. i++;
  344. } else {
  345. builder.append("ki");
  346. }
  347. break;
  348. case 'ク':
  349. switch(ch2) {
  350. case 'ァ':
  351. builder.append("kwa");
  352. i++;
  353. break;
  354. case 'ィ':
  355. builder.append("kwi");
  356. i++;
  357. break;
  358. case 'ェ':
  359. builder.append("kwe");
  360. i++;
  361. break;
  362. case 'ォ':
  363. builder.append("kwo");
  364. i++;
  365. break;
  366. case 'ヮ':
  367. builder.append("kwa");
  368. i++;
  369. break;
  370. default:
  371. builder.append("ku");
  372. break;
  373. }
  374. break;
  375. case 'ケ':
  376. builder.append("ke");
  377. break;
  378. case 'コ':
  379. if (ch2 == 'ウ') {
  380. builder.append("kō");
  381. i++;
  382. } else {
  383. builder.append("ko");
  384. }
  385. break;
  386. case 'サ':
  387. builder.append("sa");
  388. break;
  389. case 'シ':
  390. if (ch2 == 'ョ' && ch3 == 'ウ') {
  391. builder.append("shō");
  392. i += 2;
  393. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  394. builder.append("shū");
  395. i += 2;
  396. } else if (ch2 == 'ャ') {
  397. builder.append("sha");
  398. i++;
  399. } else if (ch2 == 'ョ') {
  400. builder.append("sho");
  401. i++;
  402. } else if (ch2 == 'ュ') {
  403. builder.append("shu");
  404. i++;
  405. } else if (ch2 == 'ェ') {
  406. builder.append("she");
  407. i++;
  408. } else {
  409. builder.append("shi");
  410. }
  411. break;
  412. case 'ス':
  413. if (ch2 == 'ィ') {
  414. builder.append("si");
  415. i++;
  416. } else {
  417. builder.append("su");
  418. }
  419. break;
  420. case 'セ':
  421. builder.append("se");
  422. break;
  423. case 'ソ':
  424. if (ch2 == 'ウ') {
  425. builder.append("sō");
  426. i++;
  427. } else {
  428. builder.append("so");
  429. }
  430. break;
  431. case 'タ':
  432. builder.append("ta");
  433. break;
  434. case 'チ':
  435. if (ch2 == 'ョ' && ch3 == 'ウ') {
  436. builder.append("chō");
  437. i += 2;
  438. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  439. builder.append("chū");
  440. i += 2;
  441. } else if (ch2 == 'ャ') {
  442. builder.append("cha");
  443. i++;
  444. } else if (ch2 == 'ョ') {
  445. builder.append("cho");
  446. i++;
  447. } else if (ch2 == 'ュ') {
  448. builder.append("chu");
  449. i++;
  450. } else if (ch2 == 'ェ') {
  451. builder.append("che");
  452. i++;
  453. } else {
  454. builder.append("chi");
  455. }
  456. break;
  457. case 'ツ':
  458. if (ch2 == 'ァ') {
  459. builder.append("tsa");
  460. i++;
  461. } else if (ch2 == 'ィ') {
  462. builder.append("tsi");
  463. i++;
  464. } else if (ch2 == 'ェ') {
  465. builder.append("tse");
  466. i++;
  467. } else if (ch2 == 'ォ') {
  468. builder.append("tso");
  469. i++;
  470. } else if (ch2 == 'ュ') {
  471. builder.append("tsyu");
  472. i++;
  473. } else {
  474. builder.append("tsu");
  475. }
  476. break;
  477. case 'テ':
  478. if (ch2 == 'ィ') {
  479. builder.append("ti");
  480. i++;
  481. } else if (ch2 == 'ゥ') {
  482. builder.append("tu");
  483. i++;
  484. } else if (ch2 == 'ュ') {
  485. builder.append("tyu");
  486. i++;
  487. } else {
  488. builder.append("te");
  489. }
  490. break;
  491. case 'ト':
  492. if (ch2 == 'ウ') {
  493. builder.append("tō");
  494. i++;
  495. } else {
  496. builder.append("to");
  497. }
  498. break;
  499. case 'ナ':
  500. builder.append("na");
  501. break;
  502. case 'ニ':
  503. if (ch2 == 'ョ' && ch3 == 'ウ') {
  504. builder.append("nyō");
  505. i += 2;
  506. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  507. builder.append("nyū");
  508. i += 2;
  509. } else if (ch2 == 'ャ') {
  510. builder.append("nya");
  511. i++;
  512. } else if (ch2 == 'ョ') {
  513. builder.append("nyo");
  514. i++;
  515. } else if (ch2 == 'ュ') {
  516. builder.append("nyu");
  517. i++;
  518. } else if (ch2 == 'ェ') {
  519. builder.append("nye");
  520. i++;
  521. } else {
  522. builder.append("ni");
  523. }
  524. break;
  525. case 'ヌ':
  526. builder.append("nu");
  527. break;
  528. case 'ネ':
  529. builder.append("ne");
  530. break;
  531. case 'ノ':
  532. if (ch2 == 'ウ') {
  533. builder.append("nō");
  534. i++;
  535. } else {
  536. builder.append("no");
  537. }
  538. break;
  539. case 'ハ':
  540. builder.append("ha");
  541. break;
  542. case 'ヒ':
  543. if (ch2 == 'ョ' && ch3 == 'ウ') {
  544. builder.append("hyō");
  545. i += 2;
  546. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  547. builder.append("hyū");
  548. i += 2;
  549. } else if (ch2 == 'ャ') {
  550. builder.append("hya");
  551. i++;
  552. } else if (ch2 == 'ョ') {
  553. builder.append("hyo");
  554. i++;
  555. } else if (ch2 == 'ュ') {
  556. builder.append("hyu");
  557. i++;
  558. } else if (ch2 == 'ェ') {
  559. builder.append("hye");
  560. i++;
  561. } else {
  562. builder.append("hi");
  563. }
  564. break;
  565. case 'フ':
  566. if (ch2 == 'ャ') {
  567. builder.append("fya");
  568. i++;
  569. } else if (ch2 == 'ュ') {
  570. builder.append("fyu");
  571. i++;
  572. } else if (ch2 == 'ィ' && ch3 == 'ェ') {
  573. builder.append("fye");
  574. i+=2;
  575. } else if (ch2 == 'ョ') {
  576. builder.append("fyo");
  577. i++;
  578. } else if (ch2 == 'ァ') {
  579. builder.append("fa");
  580. i++;
  581. } else if (ch2 == 'ィ') {
  582. builder.append("fi");
  583. i++;
  584. } else if (ch2 == 'ェ') {
  585. builder.append("fe");
  586. i++;
  587. } else if (ch2 == 'ォ') {
  588. builder.append("fo");
  589. i++;
  590. } else {
  591. builder.append("fu");
  592. }
  593. break;
  594. case 'ヘ':
  595. builder.append("he");
  596. break;
  597. case 'ホ':
  598. if (ch2 == 'ウ') {
  599. builder.append("hō");
  600. i++;
  601. } else if (ch2 == 'ゥ') {
  602. builder.append("hu");
  603. i++;
  604. } else {
  605. builder.append("ho");
  606. }
  607. break;
  608. case 'マ':
  609. builder.append("ma");
  610. break;
  611. case 'ミ':
  612. if (ch2 == 'ョ' && ch3 == 'ウ') {
  613. builder.append("myō");
  614. i += 2;
  615. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  616. builder.append("myū");
  617. i += 2;
  618. } else if (ch2 == 'ャ') {
  619. builder.append("mya");
  620. i++;
  621. } else if (ch2 == 'ョ') {
  622. builder.append("myo");
  623. i++;
  624. } else if (ch2 == 'ュ') {
  625. builder.append("myu");
  626. i++;
  627. } else if (ch2 == 'ェ') {
  628. builder.append("mye");
  629. i++;
  630. } else {
  631. builder.append("mi");
  632. }
  633. break;
  634. case 'ム':
  635. builder.append("mu");
  636. break;
  637. case 'メ':
  638. builder.append("mi");
  639. break;
  640. case 'モ':
  641. if (ch2 == 'ウ') {
  642. builder.append("mō");
  643. i++;
  644. } else {
  645. builder.append("mo");
  646. }
  647. break;
  648. case 'ヤ':
  649. builder.append("ya");
  650. break;
  651. case 'ユ':
  652. builder.append("yu");
  653. break;
  654. case 'ヨ':
  655. if (ch2 == 'ウ') {
  656. builder.append("yō");
  657. i++;
  658. } else {
  659. builder.append("yo");
  660. }
  661. break;
  662. case 'ラ':
  663. builder.append("ra");
  664. break;
  665. case 'リ':
  666. if (ch2 == 'ョ' && ch3 == 'ウ') {
  667. builder.append("ryō");
  668. i += 2;
  669. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  670. builder.append("ryū");
  671. i += 2;
  672. } else if (ch2 == 'ャ') {
  673. builder.append("rya");
  674. i++;
  675. } else if (ch2 == 'ョ') {
  676. builder.append("ryo");
  677. i++;
  678. } else if (ch2 == 'ュ') {
  679. builder.append("ryu");
  680. i++;
  681. } else if (ch2 == 'ェ') {
  682. builder.append("rye");
  683. i++;
  684. } else {
  685. builder.append("ri");
  686. }
  687. break;
  688. case 'ル':
  689. builder.append("ru");
  690. break;
  691. case 'レ':
  692. builder.append("re");
  693. break;
  694. case 'ロ':
  695. if (ch2 == 'ウ') {
  696. builder.append("rō");
  697. i++;
  698. } else {
  699. builder.append("ro");
  700. }
  701. break;
  702. case 'ワ':
  703. builder.append("wa");
  704. break;
  705. case 'ヰ':
  706. builder.append("i");
  707. break;
  708. case 'ヱ':
  709. builder.append("e");
  710. break;
  711. case 'ヲ':
  712. builder.append("o");
  713. break;
  714. case 'ン':
  715. switch (ch2) {
  716. case 'バ':
  717. case 'ビ':
  718. case 'ブ':
  719. case 'ベ':
  720. case 'ボ':
  721. case 'パ':
  722. case 'ピ':
  723. case 'プ':
  724. case 'ペ':
  725. case 'ポ':
  726. case 'マ':
  727. case 'ミ':
  728. case 'ム':
  729. case 'メ':
  730. case 'モ':
  731. builder.append('m');
  732. break main;
  733. case 'ヤ':
  734. case 'ユ':
  735. case 'ヨ':
  736. case 'ア':
  737. case 'イ':
  738. case 'ウ':
  739. case 'エ':
  740. case 'オ':
  741. builder.append("n'");
  742. break main;
  743. default:
  744. builder.append("n");
  745. break main;
  746. }
  747. case 'ガ':
  748. builder.append("ga");
  749. break;
  750. case 'ギ':
  751. if (ch2 == 'ョ' && ch3 == 'ウ') {
  752. builder.append("gyō");
  753. i += 2;
  754. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  755. builder.append("gyū");
  756. i += 2;
  757. } else if (ch2 == 'ャ') {
  758. builder.append("gya");
  759. i++;
  760. } else if (ch2 == 'ョ') {
  761. builder.append("gyo");
  762. i++;
  763. } else if (ch2 == 'ュ') {
  764. builder.append("gyu");
  765. i++;
  766. } else if (ch2 == 'ェ') {
  767. builder.append("gye");
  768. i++;
  769. } else {
  770. builder.append("gi");
  771. }
  772. break;
  773. case 'グ':
  774. switch(ch2) {
  775. case 'ァ':
  776. builder.append("gwa");
  777. i++;
  778. break;
  779. case 'ィ':
  780. builder.append("gwi");
  781. i++;
  782. break;
  783. case 'ェ':
  784. builder.append("gwe");
  785. i++;
  786. break;
  787. case 'ォ':
  788. builder.append("gwo");
  789. i++;
  790. break;
  791. case 'ヮ':
  792. builder.append("gwa");
  793. i++;
  794. break;
  795. default:
  796. builder.append("gu");
  797. break;
  798. }
  799. break;
  800. case 'ゲ':
  801. builder.append("ge");
  802. break;
  803. case 'ゴ':
  804. if (ch2 == 'ウ') {
  805. builder.append("gō");
  806. i++;
  807. } else {
  808. builder.append("go");
  809. }
  810. break;
  811. case 'ザ':
  812. builder.append("za");
  813. break;
  814. case 'ジ':
  815. if (ch2 == 'ョ' && ch3 == 'ウ') {
  816. builder.append("jō");
  817. i += 2;
  818. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  819. builder.append("jū");
  820. i += 2;
  821. } else if (ch2 == 'ャ') {
  822. builder.append("ja");
  823. i++;
  824. } else if (ch2 == 'ョ') {
  825. builder.append("jo");
  826. i++;
  827. } else if (ch2 == 'ュ') {
  828. builder.append("ju");
  829. i++;
  830. } else if (ch2 == 'ェ') {
  831. builder.append("je");
  832. i++;
  833. } else {
  834. builder.append("ji");
  835. }
  836. break;
  837. case 'ズ':
  838. if (ch2 == 'ィ') {
  839. builder.append("zi");
  840. i++;
  841. } else {
  842. builder.append("zu");
  843. }
  844. break;
  845. case 'ゼ':
  846. builder.append("ze");
  847. break;
  848. case 'ゾ':
  849. if (ch2 == 'ウ') {
  850. builder.append("zō");
  851. i++;
  852. } else {
  853. builder.append("zo");
  854. }
  855. break;
  856. case 'ダ':
  857. builder.append("da");
  858. break;
  859. case 'ヂ':
  860. builder.append("ji");
  861. break;
  862. case 'ヅ':
  863. builder.append("zu");
  864. break;
  865. case 'デ':
  866. if (ch2 == 'ィ') {
  867. builder.append("di");
  868. i++;
  869. } else if (ch2 == 'ュ') {
  870. builder.append("dyu");
  871. i++;
  872. } else {
  873. builder.append("de");
  874. }
  875. break;
  876. case 'ド':
  877. if (ch2 == 'ウ') {
  878. builder.append("dō");
  879. i++;
  880. } else if (ch2 == 'ゥ') {
  881. builder.append("du");
  882. i++;
  883. } else {
  884. builder.append("do");
  885. }
  886. break;
  887. case 'バ':
  888. builder.append("ba");
  889. break;
  890. case 'ビ':
  891. if (ch2 == 'ョ' && ch3 == 'ウ') {
  892. builder.append("byō");
  893. i += 2;
  894. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  895. builder.append("byū");
  896. i += 2;
  897. } else if (ch2 == 'ャ') {
  898. builder.append("bya");
  899. i++;
  900. } else if (ch2 == 'ョ') {
  901. builder.append("byo");
  902. i++;
  903. } else if (ch2 == 'ュ') {
  904. builder.append("byu");
  905. i++;
  906. } else if (ch2 == 'ェ') {
  907. builder.append("bye");
  908. i++;
  909. } else {
  910. builder.append("bi");
  911. }
  912. break;
  913. case 'ブ':
  914. builder.append("bu");
  915. break;
  916. case 'ベ':
  917. builder.append("be");
  918. break;
  919. case 'ボ':
  920. if (ch2 == 'ウ') {
  921. builder.append("bō");
  922. i++;
  923. } else {
  924. builder.append("bo");
  925. }
  926. break;
  927. case 'パ':
  928. builder.append("pa");
  929. break;
  930. case 'ピ':
  931. if (ch2 == 'ョ' && ch3 == 'ウ') {
  932. builder.append("pyō");
  933. i += 2;
  934. } else if (ch2 == 'ュ' && ch3 == 'ウ') {
  935. builder.append("pyū");
  936. i += 2;
  937. } else if (ch2 == 'ャ') {
  938. builder.append("pya");
  939. i++;
  940. } else if (ch2 == 'ョ') {
  941. builder.append("pyo");
  942. i++;
  943. } else if (ch2 == 'ュ') {
  944. builder.append("pyu");
  945. i++;
  946. } else if (ch2 == 'ェ') {
  947. builder.append("pye");
  948. i++;
  949. } else {
  950. builder.append("pi");
  951. }
  952. break;
  953. case 'プ':
  954. builder.append("pu");
  955. break;
  956. case 'ペ':
  957. builder.append("pe");
  958. break;
  959. case 'ポ':
  960. if (ch2 == 'ウ') {
  961. builder.append("pō");
  962. i++;
  963. } else {
  964. builder.append("po");
  965. }
  966. break;
  967. case 'ヴ':
  968. if (ch2 == 'ィ' && ch3 == 'ェ') {
  969. builder.append("vye");
  970. i+= 2;
  971. } else {
  972. builder.append('v');
  973. }
  974. break;
  975. case 'ァ':
  976. builder.append('a');
  977. break;
  978. case 'ィ':
  979. builder.append('i');
  980. break;
  981. case 'ゥ':
  982. builder.append('u');
  983. break;
  984. case 'ェ':
  985. builder.append('e');
  986. break;
  987. case 'ォ':
  988. builder.append('o');
  989. break;
  990. case 'ヮ':
  991. builder.append("wa");
  992. break;
  993. case 'ャ':
  994. builder.append("ya");
  995. break;
  996. case 'ュ':
  997. builder.append("yu");
  998. break;
  999. case 'ョ':
  1000. builder.append("yo");
  1001. break;
  1002. case 'ー':
  1003. break;
  1004. default:
  1005. builder.append(ch);
  1006. }
  1007. }
  1008. return builder.toString();
  1009. }
  1010. }