PageRenderTime 54ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/tags/BEFORE_MULTI_PRONS_CHECK_IN/SphinxTrain/src/libs/libcommon/quest.c

#
C | 686 lines | 468 code | 106 blank | 112 comment | 163 complexity | f63d6655d8e73559396585a20b06b4ea MD5 | raw file
Possible License(s): Apache-2.0, CC-BY-SA-3.0, BSD-3-Clause, LGPL-2.0, BSD-3-Clause-No-Nuclear-License-2014
  1. /* ====================================================================
  2. * Copyright (c) 1996-2000 Carnegie Mellon University. All rights
  3. * reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. *
  17. * 3. The names "Sphinx" and "Carnegie Mellon" must not be used to
  18. * endorse or promote products derived from this software without
  19. * prior written permission. To obtain permission, contact
  20. * sphinx@cs.cmu.edu.
  21. *
  22. * 4. Redistributions of any form whatsoever must retain the following
  23. * acknowledgment:
  24. * "This product includes software developed by Carnegie
  25. * Mellon University (http://www.speech.cs.cmu.edu/)."
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
  28. * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  29. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  31. * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. *
  39. * ====================================================================
  40. *
  41. */
  42. /*********************************************************************
  43. *
  44. * File: quest.c
  45. *
  46. * Description:
  47. *
  48. * Author:
  49. *
  50. *********************************************************************/
  51. #include <s3/quest.h>
  52. #include <s3/bquest_io.h>
  53. #include <s3/ckd_alloc.h>
  54. #include <s3/s3.h>
  55. #include <string.h>
  56. #include <assert.h>
  57. #include <ctype.h>
  58. char *
  59. s3parse_quest(pset_t *pset, uint32 n_pset, quest_t *q, char *in_str)
  60. {
  61. char *s, *sp;
  62. uint32 i;
  63. s = in_str;
  64. /* skip leading whitespace */
  65. for (; *s != '\0' && isspace((int)*s); s++);
  66. if (*s == '\0') /* Nothing to parse */
  67. return s;
  68. if (*s == '!') {
  69. q->neg = TRUE;
  70. ++s;
  71. if (*s == '\0') {
  72. E_ERROR("question syntax error");
  73. return NULL;
  74. }
  75. }
  76. else
  77. q->neg = FALSE;
  78. sp = strchr(s, ' ');
  79. if (sp == NULL) {
  80. E_ERROR("Expected space after question name\n");
  81. return NULL;
  82. }
  83. *sp = '\0';
  84. for (i = 0; i < n_pset; i++) {
  85. if (strcmp(s, pset[i].name) == 0) {
  86. q->pset = i;
  87. q->member = pset[i].member;
  88. q->posn = pset[i].posn;
  89. break;
  90. }
  91. }
  92. if (i == n_pset) {
  93. E_ERROR("Unknown question %s\n", s);
  94. return NULL;
  95. }
  96. s = sp+1;
  97. *sp = ' '; /* undo set to null */
  98. /* skip whitespace */
  99. for (; *s != '\0' && isspace((int)*s); s++);
  100. if (s[0] == '-') {
  101. if (s[1] == '1') {
  102. q->ctxt = -1;
  103. }
  104. s += 2;
  105. }
  106. else if (s[0] == '0') {
  107. q->ctxt = 0;
  108. s++;
  109. }
  110. else if (s[0] == '1') {
  111. q->ctxt = 1;
  112. s++;
  113. }
  114. /* skip trailing whitespace, if any */
  115. for (; *s != '\0' && isspace((int)*s); s++);
  116. return s;
  117. }
  118. static uint32
  119. count_quest_in_conj(pset_t *pset,
  120. uint32 n_pset,
  121. char *in_str)
  122. {
  123. quest_t tmp;
  124. quest_t *q = &tmp;
  125. char *t;
  126. uint32 n_quest;
  127. n_quest = 0;
  128. t = in_str;
  129. for (; *t != '\0' && isspace((int)*t); t++);
  130. if (*t == ')') {
  131. E_ERROR("Empty conjunction\n");
  132. return 0;
  133. }
  134. while (t && *t != ')' && *t != '\0') {
  135. t = s3parse_quest(pset, n_pset, q, t);
  136. ++n_quest;
  137. for (; t && *t != '\0' && isspace((int)*t); t++);
  138. }
  139. if (t == NULL) {
  140. E_ERROR("Error while parsing conjunction: %s\n", in_str);
  141. return 0;
  142. }
  143. if (*t != ')') {
  144. E_ERROR("Error while parsing conjunction: %s\n", in_str);
  145. return 0;
  146. }
  147. return n_quest;
  148. }
  149. char *
  150. s3parse_conj(pset_t *pset,
  151. uint32 n_pset,
  152. quest_t **term,
  153. uint32 *n_simple_q,
  154. char *in_str)
  155. {
  156. quest_t *termlst;
  157. char *s;
  158. uint32 n_quest;
  159. uint32 i;
  160. s = in_str;
  161. if (*s == '\0') return s;
  162. /* skip leading whitespace */
  163. for (; *s != '\0' && isspace((int)*s); s++);
  164. if (*s == '\0') return s;
  165. if (*s == '(') {
  166. ++s;
  167. }
  168. else {
  169. E_ERROR("Expected '(' before conjunction\n");
  170. return NULL;
  171. }
  172. for (; *s != '\0' && isspace((int)*s); s++);
  173. if (*s == '\0') {
  174. E_ERROR("No terms and close paren in conjunction\n", in_str);
  175. return NULL;
  176. }
  177. n_quest = count_quest_in_conj(pset, n_pset, s);
  178. *n_simple_q = n_quest;
  179. termlst = (quest_t *)ckd_calloc(n_quest, sizeof(quest_t));
  180. *term = termlst;
  181. for (i = 0; i < n_quest; i++) {
  182. s = s3parse_quest(pset, n_pset, &termlst[i], s);
  183. for (; *s != '\0' && isspace((int)*s); s++);
  184. }
  185. assert(*s == ')');
  186. s++;
  187. return s;
  188. }
  189. static uint32
  190. s3cnt_q_term(char *in_str)
  191. {
  192. char *s;
  193. uint32 n_term;
  194. s = in_str;
  195. /* skip any leading whitespace */
  196. for (; *s != '\0' && isspace((int)*s); s++);
  197. /* assume everything is well-formed for the moment.
  198. * later processing will catch syntax errors
  199. * which should be unlikely anyway since this stuff
  200. * is most likely machine generated */
  201. for (s++, n_term = 0; *s && (s = strchr(s, '(')); n_term++, s++);
  202. return n_term;
  203. }
  204. int
  205. s3parse_comp_quest(pset_t *pset,
  206. uint32 n_pset,
  207. comp_quest_t *q,
  208. char *in_str)
  209. {
  210. char *s;
  211. uint32 i;
  212. s = in_str;
  213. for (; *s != '\0' && isspace((int)*s); s++);
  214. if (*s == '\0') {
  215. E_ERROR("Empty string seen for composite question\n");
  216. return S3_ERROR;
  217. }
  218. if (*s != '(') {
  219. E_ERROR("Composite question does not begin with '(' : %s\n",
  220. in_str);
  221. return S3_ERROR;
  222. }
  223. q->sum_len = s3cnt_q_term(in_str);
  224. q->conj_q = (quest_t **)ckd_calloc(q->sum_len, sizeof(quest_t *));
  225. q->prod_len = (uint32 *)ckd_calloc(q->sum_len, sizeof(uint32));
  226. ++s; /* skip the open paren */
  227. i = 0;
  228. do {
  229. s = s3parse_conj(pset,
  230. n_pset,
  231. &q->conj_q[i],
  232. &q->prod_len[i],
  233. s);
  234. ++i;
  235. } while (s && *s && *s == '(');
  236. if (s == NULL) {
  237. E_ERROR("Error while parsing %s\n", in_str);
  238. return S3_ERROR;
  239. }
  240. return S3_SUCCESS;
  241. }
  242. static void
  243. parse_simple_q(quest_t *q,
  244. char *q_str)
  245. {
  246. int i;
  247. int len;
  248. uint32 pset;
  249. assert(q != NULL);
  250. assert(q_str != NULL);
  251. len = strlen(q_str);
  252. /* skip leading whitespace */
  253. for (i = 0; i < len && isspace((int)q_str[i]); i++);
  254. if (i == len)
  255. return;
  256. if (q_str[i] == '~') {
  257. q->neg = TRUE;
  258. i++;
  259. }
  260. else {
  261. q->neg = FALSE;
  262. }
  263. pset = atoi(&q_str[i]);
  264. if (pset >= 400) {
  265. q->ctxt = 1;
  266. pset -= 400;
  267. }
  268. else if (pset < 400) {
  269. q->ctxt = -1;
  270. }
  271. q->pset = pset;
  272. /* HACK to get around WDBNDRY question context */
  273. if (pset < 3)
  274. q->ctxt = 0;
  275. }
  276. char *
  277. parse_conj(quest_t **term,
  278. uint32 *n_simple_q,
  279. char *q_str)
  280. {
  281. char *t, *eot;
  282. int n_q;
  283. char t_str[64];
  284. char *simp_q_str;
  285. quest_t *out;
  286. int i;
  287. /* copy the next product into t_str */
  288. eot = strchr(q_str, '|');
  289. if (eot) {
  290. strncpy(t_str, q_str, (eot - q_str));
  291. t_str[(eot - q_str)] = '\0';
  292. }
  293. else {
  294. strcpy(t_str, q_str);
  295. }
  296. /* count the # of terms in the product */
  297. t = t_str-1;
  298. n_q = 1;
  299. do {
  300. t = strchr(t+1, '&');
  301. if (t) {
  302. n_q++;
  303. }
  304. } while (t);
  305. /* allocate a simple question for each term in product */
  306. out = ckd_calloc(n_q, sizeof(quest_t));
  307. *term = out;
  308. *n_simple_q = n_q;
  309. /* parse each simple question */
  310. simp_q_str = strtok(t_str, "&");
  311. i = 0;
  312. do {
  313. parse_simple_q(&out[i], simp_q_str);
  314. simp_q_str = strtok(NULL, "&");
  315. i++;
  316. } while (simp_q_str);
  317. return eot;
  318. }
  319. uint32
  320. cnt_q_term(char *q_str)
  321. {
  322. char *t;
  323. uint32 n_term;
  324. t = q_str-1;
  325. n_term = 1;
  326. do {
  327. t = strchr(t+1, '|');
  328. if (t) ++n_term;
  329. } while (t);
  330. return n_term;
  331. }
  332. void
  333. parse_compound_q(comp_quest_t *q,
  334. char *q_str)
  335. {
  336. char *rem_q_str;
  337. uint32 i;
  338. q->sum_len = cnt_q_term(q_str);
  339. q->conj_q = ckd_calloc(q->sum_len, sizeof(quest_t *));
  340. q->prod_len = ckd_calloc(q->sum_len, sizeof(uint32));
  341. i = 0;
  342. rem_q_str = q_str-1;
  343. do {
  344. rem_q_str = parse_conj(&q->conj_q[i],
  345. &q->prod_len[i],
  346. rem_q_str+1);
  347. ++i;
  348. } while (rem_q_str);
  349. }
  350. void
  351. print_quest(FILE *fp,
  352. pset_t *pset,
  353. quest_t *q)
  354. {
  355. if (pset == NULL) {
  356. fprintf(fp, "%s%d %d",
  357. (q->neg ? "!" : ""),
  358. q->pset,
  359. q->ctxt);
  360. }
  361. else {
  362. fprintf(fp, "%s%s %d",
  363. (q->neg ? "!" : ""),
  364. pset[q->pset].name,
  365. q->ctxt);
  366. }
  367. }
  368. int
  369. eval_quest(quest_t *q,
  370. uint32 *feat,
  371. uint32 n_feat)
  372. {
  373. uint32 ctxt;
  374. int ret = FALSE;
  375. ctxt = q->ctxt + 1;
  376. if (q->member)
  377. ret = q->member[feat[ctxt]];
  378. else if (q->posn)
  379. ret = q->posn[feat[n_feat-1]];
  380. else {
  381. E_FATAL("Ill-formed question\n");
  382. }
  383. if (q->neg) ret = !ret;
  384. #if 0
  385. E_INFO("eval: (%s%u %d) %u -> %u\n",
  386. (q->neg ? "!" : ""),
  387. q->pset,
  388. q->ctxt,
  389. (q->member ? q->member[feat[ctxt]] :
  390. q->posn[feat[n_feat-1]]),
  391. ret);
  392. #endif
  393. return ret;
  394. }
  395. int
  396. eval_comp_quest(comp_quest_t *q,
  397. uint32 *feat,
  398. uint32 n_feat)
  399. {
  400. int i, j;
  401. for (i = 0; i < q->sum_len; i++) {
  402. for (j = 0; j < q->prod_len[i]; j++) {
  403. if (!eval_quest(&q->conj_q[i][j], feat, n_feat))
  404. break;
  405. }
  406. /* One of the terms in the disjunction
  407. * is satisfied; so the whole is satisfied */
  408. if (j == q->prod_len[i])
  409. return TRUE;
  410. }
  411. /* visited all terms in the disjunction and none
  412. * were satisified; so neither is the disjunction */
  413. return FALSE;
  414. }
  415. void
  416. print_comp_quest(FILE *fp,
  417. pset_t *pset,
  418. comp_quest_t *q)
  419. {
  420. int i, j;
  421. fprintf(fp, "(");
  422. for (i = 0; i < q->sum_len; i++) {
  423. fprintf(fp, "(");
  424. print_quest(fp, pset, &q->conj_q[i][0]);
  425. for (j = 1; j < q->prod_len[i]; j++) {
  426. fprintf(fp, " ");
  427. print_quest(fp, pset, &q->conj_q[i][j]);
  428. }
  429. fprintf(fp, ")");
  430. }
  431. fprintf(fp, ")");
  432. }
  433. int
  434. is_subset(quest_t *a,
  435. quest_t *b,
  436. uint32 n_phone)
  437. {
  438. uint32 p;
  439. int f_a, f_b;
  440. if (a->member && b->member) {
  441. if (a->ctxt != b->ctxt)
  442. return FALSE;
  443. for (p = 0; p < n_phone; p++) {
  444. if (a->neg)
  445. f_a = !a->member[p];
  446. else
  447. f_a = a->member[p];
  448. if (b->neg)
  449. f_b = !b->member[p];
  450. else
  451. f_b = b->member[p];
  452. if (f_a && (f_a != f_b)) {
  453. break;
  454. }
  455. }
  456. if (p != n_phone)
  457. return FALSE;
  458. else
  459. return TRUE;
  460. }
  461. else if ((a->member && b->posn) ||
  462. (a->posn && b->member)) {
  463. /* one question about word boundary
  464. * and the other is about phone context
  465. * so not a subset */
  466. return FALSE;
  467. }
  468. else if (a->posn && b->posn) {
  469. /* Not handled at the moment */
  470. return FALSE;
  471. }
  472. return FALSE;
  473. }
  474. int
  475. simplify_conj(quest_t *conj,
  476. uint32 n_term,
  477. uint32 n_phone)
  478. {
  479. uint32 i, j;
  480. int *del, exist_del = FALSE;
  481. assert(n_term != 0);
  482. if (n_term == 1) /* Only one term; nothing to do */
  483. return 1;
  484. del = ckd_calloc(n_term, sizeof(int));
  485. /* Search for all pairs (i,j) where
  486. * term_i is a subset of term_j. Mark
  487. * all such term_j's for deletion since
  488. * term_i && term_j == term_i */
  489. for (i = 0; i < n_term; i++) {
  490. for (j = 0; j < n_term; j++) {
  491. if ((i != j) && (!del[i] || !del[j])) {
  492. if (is_subset(&conj[i], &conj[j], n_phone)) {
  493. /* mark the superset for deletion */
  494. del[j] = TRUE;
  495. exist_del = TRUE;
  496. }
  497. }
  498. }
  499. }
  500. /* compact the conjunction by removing
  501. * term_j's that are marked for deletion.
  502. */
  503. for (i = 0, j = 0; j < n_term; i++, j++) {
  504. if (del[j]) {
  505. /* move j to the next
  506. * non-deleted term (if any) */
  507. for (j++; del[j] && (j < n_term); j++);
  508. if (j == n_term)
  509. break;
  510. }
  511. if (i != j) {
  512. conj[i] = conj[j];
  513. }
  514. }
  515. ckd_free(del);
  516. return i; /* return new n_term */
  517. }
  518. int
  519. simplify_comp_quest(comp_quest_t *q,
  520. uint32 n_phone)
  521. {
  522. int i;
  523. int ret = FALSE;
  524. int prod_len;
  525. for (i = 0; i < q->sum_len; i++) {
  526. prod_len = simplify_conj(q->conj_q[i], q->prod_len[i], n_phone);
  527. if (prod_len < q->prod_len[i]) {
  528. assert(!(prod_len > q->prod_len[i]));
  529. q->prod_len[i] = prod_len;
  530. ret = TRUE;
  531. }
  532. }
  533. /* TRUE if there is at least one term in the composite
  534. * question that was simplified */
  535. return ret;
  536. }
  537. /*
  538. * Log record. Maintained by RCS.
  539. *
  540. * $Log$
  541. * Revision 1.4 2003/11/18 21:07:25 egouvea
  542. * Got rid of warning casting the argument to "isspace".
  543. *
  544. * Revision 1.3 2001/04/05 20:02:30 awb
  545. * *** empty log message ***
  546. *
  547. * Revision 1.2 2000/09/29 22:35:13 awb
  548. * *** empty log message ***
  549. *
  550. * Revision 1.1 2000/09/24 21:38:31 awb
  551. * *** empty log message ***
  552. *
  553. * Revision 1.3 97/07/16 11:36:22 eht
  554. * *** empty log message ***
  555. *
  556. * Revision 1.2 96/06/17 14:40:41 eht
  557. * *** empty log message ***
  558. *
  559. * Revision 1.1 1996/03/25 15:31:39 eht
  560. * Initial revision
  561. *
  562. *
  563. */