PageRenderTime 46ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/branches/SPHINX3_5_2_RCI_IRII_BRANCH/sphinx3/src/libs3decoder/libdict/ctxt_table.c

#
C | 464 lines | 275 code | 84 blank | 105 comment | 51 complexity | b00812b7ad23615e45e435be738a0ba9 MD5 | raw file
Possible License(s): Apache-2.0, CC-BY-SA-3.0, BSD-3-Clause, LGPL-2.0, BSD-3-Clause-No-Nuclear-License-2014
  1. /* ====================================================================
  2. * Copyright (c) 1995-2004 Carnegie Mellon University. All rights
  3. * reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. *
  17. * This work was supported in part by funding from the Defense Advanced
  18. * Research Projects Agency and the National Science Foundation of the
  19. * United States of America, and the CMU Sphinx Speech Consortium.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
  22. * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  23. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  24. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  25. * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. *
  33. * ====================================================================
  34. *
  35. */
  36. /*
  37. * ctxt_table.c -- Building a context table , split from flat_fwd.c (or fwd.c)
  38. *
  39. * **********************************************
  40. * CMU ARPA Speech Project
  41. *
  42. * Copyright (c) 1995 Carnegie Mellon University.
  43. * ALL RIGHTS RESERVED.
  44. * **********************************************
  45. * 14-Jul-05 ARCHAN (archan@cs.cmu.edu) at Carnegie Mellon Unversity
  46. * First created it.
  47. *
  48. * $Log$
  49. * Revision 1.1.2.3 2005/10/09 19:55:33 arthchan2003
  50. * Changed int8 to uint8. This follows Dave's change in the trunk.
  51. *
  52. * Revision 1.1.2.2 2005/09/27 07:39:17 arthchan2003
  53. * Added ctxt_table_free.
  54. *
  55. * Revision 1.1.2.1 2005/09/25 19:08:25 arthchan2003
  56. * Move context table from search to here.
  57. *
  58. * Revision 1.1.2.4 2005/09/07 23:32:03 arthchan2003
  59. * 1, Added get_lcpid in parrallel with get_rcpid. 2, Also fixed small mistakes in the macro.
  60. *
  61. * Revision 1.1.2.3 2005/07/24 01:32:54 arthchan2003
  62. * Flush the output of the cross word triphone in ctxt_table.c
  63. *
  64. * Revision 1.1.2.2 2005/07/17 05:42:27 arthchan2003
  65. * Added super-detailed comments ctxt_table.h. Also added dimension to the arrays that stores all context tables.
  66. *
  67. * Revision 1.1.2.1 2005/07/15 07:48:32 arthchan2003
  68. * split the hmm (whmm_t) and context building process (ctxt_table_t) from the the flat_fwd.c
  69. *
  70. *
  71. */
  72. #include <ctxt_table.h>
  73. #include <ckd_alloc.h>
  74. static s3pid_t *tmp_xwdpid ; /**< Temporary array used during the creation of lexical triphones lists */
  75. static uint8 *word_start_ci;
  76. static uint8 *word_end_ci;
  77. void dump_xwdpidmap (xwdpid_t **x, mdef_t *mdef)
  78. {
  79. s3cipid_t b, c1, c2;
  80. s3pid_t p;
  81. for (b = 0; b < mdef->n_ciphone; b++) {
  82. if (! x[b])
  83. continue;
  84. for (c1 = 0; c1 < mdef->n_ciphone; c1++) {
  85. if (! x[b][c1].cimap)
  86. continue;
  87. printf ("n_pid(%s, %s) = %d\n",
  88. mdef_ciphone_str(mdef, b), mdef_ciphone_str(mdef, c1),
  89. x[b][c1].n_pid);
  90. for (c2 = 0; c2 < mdef->n_ciphone; c2++) {
  91. p = x[b][c1].pid[x[b][c1].cimap[c2]];
  92. printf (" %10s %5d\n", mdef_ciphone_str(mdef, c2), p);
  93. }
  94. }
  95. }
  96. fflush(stdout);
  97. }
  98. /**
  99. * Utility function for building cross-word pid maps. Compresses cross-word pid list
  100. * to unique ones.
  101. */
  102. int32 xwdpid_compress (s3pid_t p, s3pid_t *pid, s3cipid_t *map, s3cipid_t ctx,
  103. int32 n,
  104. mdef_t* mdef /**<The model definition */
  105. )
  106. {
  107. s3senid_t *senmap, *prevsenmap;
  108. int32 s;
  109. s3cipid_t i;
  110. int32 n_state;
  111. n_state=mdef->n_emit_state +1 ;
  112. senmap = mdef->phone[p].state;
  113. for (i = 0; i < n; i++) {
  114. if (mdef->phone[p].tmat != mdef->phone[pid[i]].tmat)
  115. continue;
  116. prevsenmap = mdef->phone[pid[i]].state;
  117. for (s = 0; (s < n_state-1) && (senmap[s] == prevsenmap[s]); s++);
  118. if (s == n_state-1) {
  119. /* This state sequence same as a previous ones; just map to it */
  120. map[ctx] = i;
  121. return n;
  122. }
  123. }
  124. /* This state sequence different from all previous ones; allocate new entry */
  125. map[ctx] = n;
  126. pid[n] = p;
  127. return (n+1);
  128. }
  129. /**
  130. * Given base b, and right context rc, build left context cross-word triphones map
  131. * for all left context ciphones. Compress map to unique list.
  132. */
  133. void build_lcpid (ctxt_table_t *ct, s3cipid_t b, s3cipid_t rc, mdef_t *mdef)
  134. {
  135. s3cipid_t lc, *map;
  136. s3pid_t p;
  137. int32 n;
  138. map = (s3cipid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3cipid_t));
  139. n = 0;
  140. for (lc = 0; lc < mdef->n_ciphone; lc++) {
  141. p = mdef_phone_id_nearest (mdef, b, lc, rc, WORD_POSN_BEGIN);
  142. if ((! mdef->ciphone[b].filler) && word_end_ci[lc] &&
  143. mdef_is_ciphone(mdef, p))
  144. ct->n_backoff_ci++;
  145. n = xwdpid_compress (p, tmp_xwdpid, map, lc, n,mdef);
  146. }
  147. /* Copy/Move to lcpid */
  148. ct->lcpid[b][rc].cimap = map;
  149. ct->lcpid[b][rc].n_pid = n;
  150. ct->lcpid[b][rc].pid = (s3pid_t *) ckd_calloc (n, sizeof(s3pid_t));
  151. memcpy (ct->lcpid[b][rc].pid, tmp_xwdpid, n*sizeof(s3pid_t));
  152. }
  153. /**
  154. * Given base b, and left context lc, build right context cross-word triphones map
  155. * for all right context ciphones. Compress map to unique list.
  156. */
  157. void build_rcpid (ctxt_table_t *ct, s3cipid_t b, s3cipid_t lc, mdef_t *mdef)
  158. {
  159. s3cipid_t rc, *map;
  160. s3pid_t p;
  161. int32 n;
  162. map = (s3cipid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3cipid_t));
  163. n = 0;
  164. for (rc = 0; rc < mdef->n_ciphone; rc++) {
  165. p = mdef_phone_id_nearest (mdef, b, lc, rc, WORD_POSN_END);
  166. if ((! mdef->ciphone[b].filler) && word_start_ci[rc] &&
  167. mdef_is_ciphone(mdef, p))
  168. ct->n_backoff_ci++;
  169. n = xwdpid_compress (p, tmp_xwdpid, map, rc, n,mdef);
  170. }
  171. /* Copy/Move to rcpid */
  172. ct->rcpid[b][lc].cimap = map;
  173. ct->rcpid[b][lc].n_pid = n;
  174. ct->rcpid[b][lc].pid = (s3pid_t *) ckd_calloc (n, sizeof(s3pid_t));
  175. memcpy (ct->rcpid[b][lc].pid, tmp_xwdpid, n*sizeof(s3pid_t));
  176. }
  177. /**
  178. * Given base b for a single-phone word, build context cross-word triphones map
  179. * for all left and right context ciphones.
  180. */
  181. void build_lrcpid (ctxt_table_t *ct, s3cipid_t b, mdef_t *mdef)
  182. {
  183. s3cipid_t rc, lc;
  184. for (lc = 0; lc < mdef->n_ciphone; lc++) {
  185. ct->lrcpid[b][lc].pid = (s3pid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3pid_t));
  186. ct->lrcpid[b][lc].cimap = (s3cipid_t *) ckd_calloc (mdef->n_ciphone,sizeof(s3cipid_t));
  187. for (rc = 0; rc < mdef->n_ciphone; rc++) {
  188. ct->lrcpid[b][lc].cimap[rc] = rc;
  189. ct->lrcpid[b][lc].pid[rc] = mdef_phone_id_nearest (mdef, b, lc, rc,
  190. WORD_POSN_SINGLE);
  191. if ((! mdef->ciphone[b].filler) &&
  192. word_start_ci[rc] && word_end_ci[lc] &&
  193. mdef_is_ciphone(mdef, ct->lrcpid[b][lc].pid[rc]))
  194. ct->n_backoff_ci++;
  195. }
  196. ct->lrcpid[b][lc].n_pid = mdef->n_ciphone;
  197. }
  198. }
  199. /**
  200. * Build within-word triphones sequence for each word. The extreme ends are not needed
  201. * since cross-word modelling is used for those. (See lcpid, rcpid, lrcpid.)
  202. */
  203. void build_wwpid (ctxt_table_t* ct, dict_t *dict, mdef_t *mdef )
  204. {
  205. s3wid_t w;
  206. int32 pronlen, l;
  207. s3cipid_t b, lc, rc;
  208. E_INFO ("Building within-word triphones\n");
  209. ct->n_backoff_ci=0;
  210. ct->wwpid = (s3pid_t **) ckd_calloc (dict->n_word, sizeof(s3pid_t *));
  211. for (w = 0; w < dict->n_word; w++) {
  212. pronlen = dict->word[w].pronlen;
  213. if (pronlen >= 3)
  214. ct->wwpid[w] = (s3pid_t *) ckd_calloc (pronlen-1, sizeof(s3pid_t));
  215. else
  216. continue;
  217. lc = dict->word[w].ciphone[0];
  218. b = dict->word[w].ciphone[1];
  219. for (l = 1; l < pronlen-1; l++) {
  220. rc = dict->word[w].ciphone[l+1];
  221. ct->wwpid[w][l] = mdef_phone_id_nearest (mdef, b, lc, rc, WORD_POSN_INTERNAL);
  222. if ((! mdef->ciphone[b].filler) && mdef_is_ciphone(mdef, ct->wwpid[w][l]))
  223. ct->n_backoff_ci++;
  224. lc = b;
  225. b = rc;
  226. }
  227. #if 0
  228. printf ("%-25s ", dict->word[w].word);
  229. for (l = 1; l < pronlen-1; l++)
  230. printf (" %5d", wwpid[w][l]);
  231. printf ("\n");
  232. #endif
  233. }
  234. E_INFO("%d within-word triphone instances mapped to CI-phones\n", ct->n_backoff_ci);
  235. }
  236. /**
  237. * Build cross-word triphones map for the entire dictionary.
  238. */
  239. void build_xwdpid_map (ctxt_table_t* ct, dict_t *dict, mdef_t *mdef)
  240. {
  241. s3wid_t w;
  242. int32 pronlen;
  243. s3cipid_t b, lc, rc;
  244. ct->n_backoff_ci = 0;
  245. /* Build cross-word triphone models */
  246. E_INFO ("Building cross-word triphones\n");
  247. word_start_ci = (uint8 *) ckd_calloc (mdef->n_ciphone, sizeof(uint8));
  248. word_end_ci = (uint8 *) ckd_calloc (mdef->n_ciphone, sizeof(uint8));
  249. /* Mark word beginning and ending ciphones that occur in given dictionary */
  250. for (w = 0; w < dict->n_word; w++) {
  251. word_start_ci[dict->word[w].ciphone[0]] = 1;
  252. word_end_ci[dict->word[w].ciphone[dict->word[w].pronlen-1]] = 1;
  253. }
  254. ct->lcpid = (xwdpid_t **) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t *));
  255. ct->rcpid = (xwdpid_t **) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t *));
  256. ct->lrcpid = (xwdpid_t **) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t *));
  257. for (w = 0; w < dict->n_word; w++) {
  258. pronlen = dict->word[w].pronlen;
  259. if (pronlen > 1) {
  260. /* Multi-phone word; build rcmap and lcmap if not already present */
  261. b = dict->word[w].ciphone[pronlen-1];
  262. lc = dict->word[w].ciphone[pronlen-2];
  263. if (! ct->rcpid[b])
  264. ct->rcpid[b] = (xwdpid_t *) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t));
  265. if (! ct->rcpid[b][lc].cimap)
  266. build_rcpid (ct, b, lc, mdef);
  267. b = dict->word[w].ciphone[0];
  268. rc = dict->word[w].ciphone[1];
  269. if (! ct->lcpid[b])
  270. ct->lcpid[b] = (xwdpid_t *) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t));
  271. if (! ct->lcpid[b][rc].cimap)
  272. build_lcpid (ct, b, rc, mdef);
  273. } else {
  274. /* Single-phone word; build lrcmap if not already present */
  275. b = dict->word[w].ciphone[0];
  276. if (! ct->lrcpid[b]) {
  277. ct->lrcpid[b] = (xwdpid_t *) ckd_calloc (mdef->n_ciphone, sizeof(xwdpid_t));
  278. build_lrcpid (ct, b, mdef);
  279. }
  280. }
  281. }
  282. ckd_free (word_start_ci);
  283. ckd_free (word_end_ci);
  284. E_INFO("%d cross-word triphones mapped to CI-phones\n", ct->n_backoff_ci);
  285. #if 0
  286. E_INFO ("LCXWDPID\n");
  287. dump_xwdpidmap (ct->lcpid,mdef);
  288. E_INFO ("RCXWDPID\n");
  289. dump_xwdpidmap (ct->rcpid,mdef);
  290. E_INFO ("LRCXWDPID\n");
  291. dump_xwdpidmap (ct->lrcpid,mdef);
  292. #endif
  293. }
  294. ctxt_table_t* ctxt_table_init( dict_t *dict,mdef_t *mdef)
  295. {
  296. ctxt_table_t *ct;
  297. ct= (ctxt_table_t*) ckd_calloc(1,sizeof(ctxt_table_t));
  298. tmp_xwdpid = (s3pid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3pid_t));
  299. build_wwpid(ct, dict, mdef);
  300. build_xwdpid_map (ct, dict, mdef) ;
  301. ckd_free (tmp_xwdpid);
  302. return ct;
  303. }
  304. void ctxt_table_free(ctxt_table_t *ct)
  305. {
  306. if(ct->lcpid)
  307. ckd_free(ct->lcpid);
  308. if(ct->rcpid)
  309. ckd_free(ct->rcpid);
  310. if(ct->lrcpid)
  311. ckd_free(ct->lrcpid);
  312. if(ct->wwpid)
  313. ckd_free(ct->wwpid);
  314. }
  315. s3cipid_t *get_rc_cimap (ctxt_table_t *ct, s3wid_t w,dict_t *dict)
  316. {
  317. int32 pronlen;
  318. s3cipid_t b, lc;
  319. pronlen = dict->word[w].pronlen;
  320. b = dict->word[w].ciphone[pronlen-1];
  321. if (pronlen == 1) {
  322. /* No known left context. But all cimaps (for any l) are identical; pick one */
  323. return (ct->lrcpid[b][0].cimap);
  324. } else {
  325. lc = dict->word[w].ciphone[pronlen-2];
  326. return (ct->rcpid[b][lc].cimap);
  327. }
  328. }
  329. s3cipid_t *get_lc_cimap (ctxt_table_t *ct, s3wid_t w,dict_t *dict)
  330. {
  331. int32 pronlen;
  332. s3cipid_t b, rc;
  333. pronlen = dict->word[w].pronlen;
  334. b = dict->word[w].ciphone[0];
  335. if (pronlen == 1) {
  336. /* No known right context. But all cimaps (for any l) are identical; pick one */
  337. return (ct->lrcpid[b][0].cimap);
  338. } else {
  339. rc = dict->word[w].ciphone[1];
  340. return (ct->lcpid[b][rc].cimap);
  341. }
  342. }
  343. void get_rcpid (ctxt_table_t *ct, s3wid_t w, s3pid_t **pid, int32 *npid,dict_t *dict)
  344. {
  345. int32 pronlen;
  346. s3cipid_t b, lc;
  347. pronlen = dict->word[w].pronlen;
  348. assert (pronlen > 1);
  349. b = dict->word[w].ciphone[pronlen-1];
  350. lc = dict->word[w].ciphone[pronlen-2];
  351. *pid = ct->rcpid[b][lc].pid;
  352. *npid = ct->rcpid[b][lc].n_pid;
  353. }
  354. void get_lcpid (ctxt_table_t *ct, s3wid_t w, s3pid_t **pid, int32 *npid,dict_t *dict)
  355. {
  356. int32 pronlen;
  357. s3cipid_t b, rc;
  358. pronlen = dict->word[w].pronlen;
  359. assert (pronlen > 1);
  360. b = dict->word[w].ciphone[0];
  361. rc = dict->word[w].ciphone[1];
  362. *pid = ct->lcpid[b][rc].pid;
  363. *npid = ct->lcpid[b][rc].n_pid;
  364. }
  365. int32 get_rc_npid (ctxt_table_t *ct, s3wid_t w,dict_t *dict)
  366. {
  367. int32 pronlen;
  368. s3cipid_t b, lc;
  369. pronlen = dict->word[w].pronlen;
  370. b = dict->word[w].ciphone[pronlen-1];
  371. assert(ct);
  372. assert(ct->lrcpid);
  373. if (pronlen == 1) {
  374. /* No known left context. But all cimaps (for any l) are identical; pick one */
  375. return (ct->lrcpid[b][0].n_pid);
  376. } else {
  377. lc = dict->word[w].ciphone[pronlen-2];
  378. return (ct->rcpid[b][lc].n_pid);
  379. }
  380. }