/src/italianverbs/morphology/espanol.clj

https://github.com/franksfo/italianquiz · Clojure · 1124 lines · 863 code · 205 blank · 56 comment · 429 complexity · 9a0201b4723881c29f3391587f4029ee MD5 · raw file

  1. (ns italianverbs.morphology.espanol
  2. (:refer-clojure :exclude [get-in merge resolve]))
  3. (require '[clojure.core :as core])
  4. (require '[clojure.string :as string])
  5. (require '[clojure.string :refer (trim)])
  6. (require '[clojure.tools.logging :as log])
  7. (require '[italianverbs.stringutils :refer :all])
  8. (require '[italianverbs.unify :refer (copy dissoc-paths fail? get-in merge ref? strip-refs unifyc)])
  9. (declare get-string)
  10. ;; TODO: this is an overly huge method that needs to be rewritten to be easier to understand and maintain.
  11. (defn get-string-1 [word & [:usted usted : tu :vosotros vosotros :ustedes ustedes]]
  12. (cond (string? word)
  13. word
  14. (seq? word)
  15. (map (string/join " " #(get-string-1 %))
  16. word)
  17. true
  18. (let [person (get-in word '(:agr :person))
  19. number (get-in word '(:agr :number))
  20. info (log/debug "get-string-1: input word: " word)
  21. vosotros (if vosotros vosotros true)
  22. ustedes (if ustedes ustedes false)
  23. (if false)
  24. usted (if usted usted false)]
  25. (log/debug (str "get-string-1: word: " word))
  26. (log/debug (str "get-string-1: word (stripped-refs): " (strip-refs word)))
  27. (log/debug (str "word's a is a string? " (get-in word '(:a)) " => " (string? (get-in word '(:a)))))
  28. (log/debug (str "word's b is a map? " (get-in word '(:b)) " => " (map? (get-in word '(:b)))))
  29. (log/debug (str "word's a espanol is a string? " (get-in word '(:a :espanol)) " => " (string? (get-in word '(:a :espanol)))))
  30. (cond
  31. (= word :top) ".."
  32. (ref? word)
  33. (get-string-1 @word)
  34. ;; TODO: this is a special case that should be handled below instead
  35. ;; of forcing every input to go through this check.
  36. (= word {:initial false})
  37. ".."
  38. (= word {:initial true})
  39. ".."
  40. (and (string? (get-in word '(:a)))
  41. (string? (get-in word '(:b))))
  42. (get-string (get-in word '(:a))
  43. (get-in word '(:b)))
  44. (and (string? (get-in word '(:a)))
  45. (map? (get-in word '(:b))))
  46. (get-string (get-in word '(:a))
  47. (get-in word '(:b)))
  48. (and (map? (get-in word '(:a)))
  49. (map? (get-in word '(:b))))
  50. (get-string
  51. (get-in word '(:a))
  52. (get-in word '(:b)))
  53. ;; TODO: this rule is pre-empting all of the following rules
  54. ;; that look in :a and :b. Either remove those following rules
  55. ;; if they are redundant and not needed, or move this general rule
  56. ;; below the following rules.
  57. (and (not (= :none (get-in word '(:a) :none)))
  58. (not (= :none (get-in word '(:b) :none))))
  59. (get-string (get-in word '(:a))
  60. (get-in word '(:b)))
  61. (and
  62. (string? (get-in word '(:a :espanol)))
  63. (string? (get-in word '(:b :espanol)))
  64. (or (= :none (get-in word '(:b :agr :number) :none))
  65. (= :top (get-in word '(:b :agr :number) :none)))
  66. )
  67. (str (string/trim (get-in word '(:a :espanol)))
  68. " "
  69. (string/trim (get-in word '(:b :espanol))))
  70. (and
  71. (string? (get-in word '(:a)))
  72. (string? (get-in word '(:b :espanol)))
  73. (or (= :none (get-in word '(:b :agr :number) :none))
  74. (= :top (get-in word '(:b :agr :number) :none)))
  75. )
  76. (str (string/trim (get-in word '(:a)))
  77. " "
  78. (string/trim (get-in word '(:b :espanol))))
  79. (and
  80. (string? (get-in word '(:a :espanol)))
  81. (get-in word '(:a :espanol))
  82. (or (= :none (get-in word '(:b :agr :number) :none))
  83. (= :top (get-in word '(:b :agr :number) :none)))
  84. (= (get-in word '(:a :infl)) :top))
  85. (string/trim (str (get-in word '(:a :espanol))
  86. " " (get-string-1 (get-in word '(:b)))))
  87. (= true (get-in word [:exception]))
  88. (get-in word [:espanol])
  89. (and
  90. (= (get-in word '(:infl)) :present)
  91. (string? (get-in word '(:espanol))))
  92. (let [infinitive (get-in word '(:espanol))
  93. ar-type (try (re-find #"ar$" infinitive)
  94. (catch Exception e
  95. (throw (Exception. (str "Can't regex-find on non-string: " infinitive " from word: " word)))))
  96. er-type (re-find #"er$" infinitive)
  97. ir-type (re-find #"ir$" infinitive)
  98. stem (string/replace infinitive #"[iae]r$" "")
  99. last-stem-char-is-i (re-find #"ir$" infinitive)
  100. last-stem-char-is-e (re-find #"er$" infinitive)
  101. is-care-or-gare? (re-find #"[cg]ar$" infinitive)
  102. person (get-in word '(:agr :person))
  103. number (get-in word '(:agr :number))]
  104. (cond
  105. (and (= person :1st) (= number :sing))
  106. (str stem "o")
  107. (and (= person :2nd) (= number :sing) ar-type (= false usted))
  108. (str stem "as")
  109. (and (= person :2nd) (= number :sing) ar-type usted)
  110. (str stem "a")
  111. (and (= person :2nd) (= number :sing) (or ir-type er-type) (= false usted))
  112. (str stem "es")
  113. (and (= person :2nd) (= number :sing) (or ir-type er-type) usted)
  114. (str stem "e")
  115. (and (= person :3rd) (= number :sing) ar-type)
  116. (str stem "a")
  117. (and (= person :3rd) (= number :sing) (or ir-type er-type))
  118. (str stem "e")
  119. (and (= person :1st) (= number :plur) ar-type)
  120. (str stem "amos")
  121. (and (= person :1st) (= number :plur) er-type)
  122. (str stem "emos")
  123. (and (= person :1st) (= number :plur) ir-type)
  124. (str stem "imos")
  125. ;; <second person plural present>
  126. (and (= person :2nd) (= number :plur) ar-type vosotros)
  127. (str stem "ais")
  128. (and (= person :2nd) (= number :plur) er-type vosotros)
  129. (str stem "eis")
  130. (and (= person :2nd) (= number :plur) ir-type vosotros)
  131. (str stem "ís")
  132. (and (= person :2nd) (= number :plur) ar-type ustedes)
  133. (str stem "an")
  134. (and (= person :2nd) (= number :plur) er-type ustedes)
  135. (str stem "en")
  136. (and (= person :2nd) (= number :plur) ir-type ustedes)
  137. (str stem "en")
  138. ;; </second person plural present>
  139. ;; <third person plural present>
  140. (and (= person :3rd) (= number :plur)
  141. ar-type)
  142. (str stem "an")
  143. (and (= person :3rd) (= number :plur)
  144. er-type)
  145. (str stem "en")
  146. (and (= person :3rd) (= number :plur)
  147. ir-type)
  148. (str stem "en")
  149. ;; </third person plural present>
  150. ;; agreement is underspecified, but an infinitive form (the :espanol key) exists, so just return that infinitive form.
  151. (and (= (get-in word [:agr]) :top)
  152. (string? (get-in word [:espanol])))
  153. (get-in word [:espanol])
  154. :else
  155. (throw (Exception. (str "get-string-1: present regular inflection: don't know what to do with input argument: " (strip-refs word))))))
  156. (and
  157. (= (get-in word '(:infl)) :imperfetto)
  158. (string? (get-in word '(:espanol))))
  159. (let [infinitive (get-in word '(:espanol))
  160. ar-type (try (re-find #"ar$" infinitive)
  161. (catch Exception e
  162. (throw (Exception. (str "Can't regex-find on non-string: " infinitive " from word: " word)))))
  163. er-type (re-find #"er$" infinitive)
  164. ir-type (re-find #"ir$" infinitive)
  165. stem (string/replace infinitive #"[iae]r$" "")
  166. last-stem-char-is-i (re-find #"ir$" infinitive)
  167. last-stem-char-is-e (re-find #"er$" infinitive)
  168. is-care-or-gare? (re-find #"[cg]ar$" infinitive)
  169. vosotros (if vosotros vosotros true)
  170. ustedes (if ustedes ustedes false)
  171. person (get-in word '(:agr :person))
  172. number (get-in word '(:agr :number))]
  173. (cond
  174. (and (= person :1st) (= number :sing) ar-type)
  175. (str stem "aba")
  176. (and (= person :1st) (= number :sing) (or ir-type er-type))
  177. (str stem "ía")
  178. (and (= person :2nd) (= number :sing) ar-type)
  179. (str stem "abas")
  180. (and (= person :2nd) (= number :sing) (or ir-type er-type))
  181. (str stem "ías")
  182. (and (= person :2nd) (= number :sing) ar-type (= usted true))
  183. (str stem "aba")
  184. (and (= person :2nd) (= number :sing) (or ir-type er-type) (= usted true))
  185. (str stem "ía")
  186. (and (= person :3rd) (= number :sing) ar-type)
  187. (str stem "aba")
  188. (and (= person :3rd) (= number :sing) (or ir-type er-type))
  189. (str stem "ía")
  190. (and (= person :1st) (= number :plur) ar-type)
  191. (str stem "ábamos")
  192. (and (= person :1st) (= number :plur) er-type)
  193. (str stem "íamos")
  194. (and (= person :1st) (= number :plur) ir-type)
  195. (str stem "íamos")
  196. ;; <second person plural imperfecto>
  197. (and (= person :2nd) (= number :plur) ar-type vosotros)
  198. (str stem "abais")
  199. (and (= person :2nd) (= number :plur) er-type vosotros)
  200. (str stem "íais")
  201. (and (= person :2nd) (= number :plur) ir-type vosotros)
  202. (str stem "íais")
  203. (and (= person :2nd) (= number :plur) ar-type ustedes)
  204. (str stem "aban")
  205. (and (= person :2nd) (= number :plur) er-type ustedes)
  206. (str stem "ían")
  207. (and (= person :2nd) (= number :plur) ir-type ustedes)
  208. (str stem "ían")
  209. ;; </second person plural imperfecto>
  210. ;; <third person plural imperfecto>
  211. (and (= person :3rd) (= number :plur)
  212. ar-type)
  213. (str stem "aban")
  214. (and (= person :3rd) (= number :plur)
  215. er-type)
  216. (str stem "ían")
  217. (and (= person :3rd) (= number :plur)
  218. ir-type)
  219. (str stem "ían")
  220. ;; </third person plural imperfecto>
  221. :else
  222. (throw (Exception. (str "get-string-1: imperfecto regular inflection: don't know what to do with input argument: " (strip-refs word))))))
  223. (and
  224. (= (get-in word '(:infl)) :futuro)
  225. (string? (get-in word '(:espanol))))
  226. (let [infinitive (get-in word '(:espanol))
  227. ar-type (try (re-find #"ar$" infinitive)
  228. (catch Exception e
  229. (throw (Exception. (str "Can't regex-find on non-string: " infinitive " from word: " word)))))
  230. er-type (re-find #"er$" infinitive)
  231. ir-type (re-find #"ir$" infinitive)
  232. stem (string/replace infinitive #"[iae]r$" "")
  233. last-stem-char-is-i (re-find #"ir$" infinitive)
  234. last-stem-char-is-e (re-find #"er$" infinitive)
  235. is-care-or-gare? (re-find #"[cg]ar$" infinitive)
  236. vosotros (if vosotros vosotros true)
  237. ustedes (if ustedes ustedes false)
  238. person (get-in word '(:agr :person))
  239. number (get-in word '(:agr :number))]
  240. (cond
  241. (and (= person :1st) (= number :sing) ar-type)
  242. (str stem "aré")
  243. (and (= person :1st) (= number :sing) er-type)
  244. (str stem "eré")
  245. (and (= person :1st) (= number :sing) ir-type)
  246. (str stem "iré")
  247. (and (= person :2nd) (= number :sing) ar-type)
  248. (str stem "aras")
  249. (and (= person :2nd) (= number :sing) ir-type)
  250. (str stem "iras")
  251. (and (= person :2nd) (= number :sing) er-type)
  252. (str stem "eras")
  253. (and (= person :2nd) (= number :sing) ar-type (= usted true))
  254. (str stem "erá")
  255. (and (= person :2nd) (= number :sing) ir-type (= usted true))
  256. (str stem "irá")
  257. (and (= person :2nd) (= number :sing) er-type (= usted true))
  258. (str stem "erá")
  259. (and (= person :3rd) (= number :sing) ar-type)
  260. (str stem "erá")
  261. (and (= person :3rd) (= number :sing) ir-type)
  262. (str stem "irá")
  263. (and (= person :3rd) (= number :sing) er-type)
  264. (str stem "erá")
  265. (and (= person :1st) (= number :plur) ar-type)
  266. (str stem "aremos")
  267. (and (= person :1st) (= number :plur) er-type)
  268. (str stem "eremos")
  269. (and (= person :1st) (= number :plur) ir-type)
  270. (str stem "iremos")
  271. ;; <second person plural future>
  272. (and (= person :2nd) (= number :plur) ar-type vosotros)
  273. (str stem "arais")
  274. (and (= person :2nd) (= number :plur) er-type vosotros)
  275. (str stem "erais")
  276. (and (= person :2nd) (= number :plur) ir-type vosotros)
  277. (str stem "irais")
  278. (and (= person :2nd) (= number :plur) ar-type ustedes)
  279. (str stem "aran")
  280. (and (= person :2nd) (= number :plur) er-type ustedes)
  281. (str stem "eran")
  282. (and (= person :2nd) (= number :plur) ir-type ustedes)
  283. (str stem "iran")
  284. ;; </second person plural future>
  285. ;; <third person plural future>
  286. (and (= person :3rd) (= number :plur)
  287. ar-type)
  288. (str stem "aran")
  289. (and (= person :3rd) (= number :plur)
  290. er-type)
  291. (str stem "eran")
  292. (and (= person :3rd) (= number :plur)
  293. ir-type)
  294. (str stem "iran")
  295. ;; </third person plural future>
  296. :else
  297. (throw (Exception. (str "get-string-1: futuro regular inflection: don't know what to do with input argument: " (strip-refs word))))))
  298. (and
  299. (= (get-in word '(:infl)) :conditional)
  300. (string? (get-in word '(:espanol))))
  301. (let [infinitive (get-in word '(:espanol))
  302. ar-type (try (re-find #"ar$" infinitive)
  303. (catch Exception e
  304. (throw (Exception. (str "Can't regex-find on non-string: " infinitive " from word: " word)))))
  305. er-type (re-find #"er$" infinitive)
  306. ir-type (re-find #"ir$" infinitive)
  307. stem (string/replace infinitive #"[iae]r$" "")
  308. last-stem-char-is-i (re-find #"ir$" infinitive)
  309. last-stem-char-is-e (re-find #"er$" infinitive)
  310. is-care-or-gare? (re-find #"[cg]ar$" infinitive)
  311. vosotros (if vosotros vosotros true)
  312. ustedes (if ustedes ustedes false)
  313. person (get-in word '(:agr :person))
  314. number (get-in word '(:agr :number))]
  315. (cond
  316. (and (= person :1st) (= number :sing) ar-type)
  317. (str stem "aría")
  318. (and (= person :1st) (= number :sing) er-type)
  319. (str stem "ería")
  320. (and (= person :1st) (= number :sing) ir-type)
  321. (str stem "iría")
  322. (and (= person :2nd) (= number :sing) ar-type)
  323. (str stem "arías")
  324. (and (= person :2nd) (= number :sing) ir-type)
  325. (str stem "erías")
  326. (and (= person :2nd) (= number :sing) er-type)
  327. (str stem "irías")
  328. (and (= person :2nd) (= number :sing) ar-type (= usted true))
  329. (str stem "aría")
  330. (and (= person :2nd) (= number :sing) ir-type (= usted true))
  331. (str stem "ería")
  332. (and (= person :2nd) (= number :sing) er-type (= usted true))
  333. (str stem "iría")
  334. (and (= person :3rd) (= number :sing) ar-type)
  335. (str stem "aría")
  336. (and (= person :3rd) (= number :sing) ir-type)
  337. (str stem "ería")
  338. (and (= person :3rd) (= number :sing) er-type)
  339. (str stem "iría")
  340. (and (= person :1st) (= number :plur) ar-type)
  341. (str stem "aríamos")
  342. (and (= person :1st) (= number :plur) er-type)
  343. (str stem "eríamos")
  344. (and (= person :1st) (= number :plur) ir-type)
  345. (str stem "iríamos")
  346. ;; <second person plural conditional>
  347. (and (= person :2nd) (= number :plur) ar-type vosotros)
  348. (str stem "aríais")
  349. (and (= person :2nd) (= number :plur) er-type vosotros)
  350. (str stem "eríais")
  351. (and (= person :2nd) (= number :plur) ir-type vosotros)
  352. (str stem "iríais")
  353. (and (= person :2nd) (= number :plur) ar-type ustedes)
  354. (str stem "arían")
  355. (and (= person :2nd) (= number :plur) er-type ustedes)
  356. (str stem "erían")
  357. (and (= person :2nd) (= number :plur) ir-type ustedes)
  358. (str stem "irían")
  359. ;; </second person plural conditional>
  360. ;; <third person plural conditional>
  361. (and (= person :3rd) (= number :plur)
  362. ar-type)
  363. (str stem "arían")
  364. (and (= person :3rd) (= number :plur)
  365. er-type)
  366. (str stem "erían")
  367. (and (= person :3rd) (= number :plur)
  368. ir-type)
  369. (str stem "irían")
  370. ;; </third person plural conditional>
  371. :else
  372. (throw (Exception. (str "get-string-1: conditional regular inflection: don't know what to do with input argument: " (strip-refs word))))))
  373. (and
  374. (= (get-in word '(:infl)) :preterito)
  375. (string? (get-in word '(:espanol))))
  376. (let [infinitive (get-in word '(:espanol))
  377. ar-type (try (re-find #"ar$" infinitive)
  378. (catch Exception e
  379. (throw (Exception. (str "Can't regex-find on non-string: " infinitive " from word: " word)))))
  380. er-type (re-find #"er$" infinitive)
  381. ir-type (re-find #"ir$" infinitive)
  382. stem (string/replace infinitive #"[iae]r$" "")
  383. last-stem-char-is-i (re-find #"ir$" infinitive)
  384. last-stem-char-is-e (re-find #"er$" infinitive)
  385. is-care-or-gare? (re-find #"[cg]ar$" infinitive)
  386. vosotros (if vosotros vosotros true)
  387. ustedes (if ustedes ustedes false)
  388. person (get-in word '(:agr :person))
  389. number (get-in word '(:agr :number))]
  390. (cond
  391. (and (= person :1st) (= number :sing) ar-type)
  392. (str stem "é")
  393. (and (= person :1st) (= number :sing) (or ir-type er-type))
  394. (str stem "í")
  395. (and (= person :2nd) (= number :sing) ar-type (= usted false))
  396. (str stem "aste")
  397. (and (= person :2nd) (= number :sing) (or ir-type er-type) (= usted false))
  398. (str stem "iste")
  399. (and (= person :2nd) (= number :sing) ar-type (= usted true))
  400. (str stem "ó")
  401. (and (= person :2nd) (= number :sing) (or ir-type er-type) (= usted true))
  402. (str stem "ió")
  403. (and (= person :3rd) (= number :sing) ar-type)
  404. (str stem "ó")
  405. (and (= person :3rd) (= number :sing) (or ir-type er-type))
  406. (str stem "ió")
  407. (and (= person :1st) (= number :plur) ar-type)
  408. (str stem "amos")
  409. (and (= person :1st) (= number :plur) er-type)
  410. (str stem "emos")
  411. (and (= person :1st) (= number :plur) ir-type)
  412. (str stem "imos")
  413. ;; <second person plural preterite>
  414. (and (= person :2nd) (= number :plur) ar-type vosotros)
  415. (str stem "asteis")
  416. (and (= person :2nd) (= number :plur) er-type vosotros)
  417. (str stem "isteis")
  418. (and (= person :2nd) (= number :plur) ir-type vosotros)
  419. (str stem "isteis")
  420. (and (= person :2nd) (= number :plur) ar-type ustedes)
  421. (str stem "aron")
  422. (and (= person :2nd) (= number :plur) er-type ustedes)
  423. (str stem "ieron")
  424. (and (= person :2nd) (= number :plur) ir-type ustedes)
  425. (str stem "ieron")
  426. ;; </second person plural preterite>
  427. ;; <third person plural preterite>
  428. (and (= person :3rd) (= number :plur)
  429. ar-type)
  430. (str stem "aron")
  431. (and (= person :3rd) (= number :plur)
  432. er-type)
  433. (str stem "ieron")
  434. (and (= person :3rd) (= number :plur)
  435. ir-type)
  436. (str stem "ieron")
  437. ;; </third person plural preterite>
  438. :else
  439. (throw (Exception. (str "get-string-1: conditional regular inflection: don't know what to do with input argument: " (strip-refs word))))))
  440. (string? (get-in word [:espanol]))
  441. (get-in word [:espanol])
  442. true
  443. (throw (Exception. (str "get-string-1: don't know what to do with input argument: " word)))))))
  444. (defn get-string [a & [ b ]]
  445. (cond (and (nil? b)
  446. (seq? a))
  447. (let [result (get-string-1 a)]
  448. (if (string? result)
  449. (trim result)
  450. result))
  451. true
  452. (trim (string/join " "
  453. (list (get-string-1 a)
  454. (if b (get-string-1 b)
  455. ""))))))
  456. (declare fo-ps-it)
  457. (defn fo-ps [expr]
  458. "show the phrase-structure of a phrase structure tree, e.g [hh21 'mangiare (to eat)' [cc10 'il (the)' 'pane(bread)']]"
  459. ;; [:first = {:head,:comp}] will not yet be found in expr, so this head-first? will always be false.
  460. (let [head-first? (= :head (get-in expr [:first]))]
  461. (cond
  462. (and
  463. (or (set? expr)
  464. (seq? expr)
  465. (vector? expr))
  466. (empty? expr))
  467. (str "")
  468. (and
  469. (or (set? expr)
  470. (seq? expr)
  471. (vector? expr))
  472. (not (empty? expr)))
  473. ;; expr is a sequence of some kind. Assume each element is a phrase structure tree and show each.
  474. (map (fn [each]
  475. (fo-ps each))
  476. expr)
  477. (and (map? expr)
  478. (:espanol expr))
  479. (fo-ps-it (:espanol expr))
  480. (and (map? expr)
  481. (:rule expr)
  482. (= (get-in expr '(:espanol :a))
  483. (get-in expr '(:comp :espanol))))
  484. ;; complement first
  485. (str "[" (:rule expr) " "
  486. (fo-ps (get-in expr '(:comp)))
  487. " "
  488. (fo-ps (get-in expr '(:head)))
  489. "]")
  490. (and (map? expr)
  491. (:rule expr))
  492. ;; head first ('else' case of above.)
  493. (str "[" (:rule expr) " "
  494. (fo-ps (get-in expr '(:head)))
  495. " "
  496. (fo-ps (get-in expr '(:comp)))
  497. "]")
  498. (and (map? expr)
  499. (:comment expr)
  500. (= (get-in expr '(:espanol :a))
  501. (get-in expr '(:comp :espanol))))
  502. ;; complement first
  503. (str "[" (:comment expr) " "
  504. (fo-ps (get-in expr '(:comp)))
  505. " "
  506. (fo-ps (get-in expr '(:head)))
  507. "]")
  508. (and (map? expr)
  509. (:comment expr))
  510. ;; head first ('else' case of above.)
  511. (str "[" (:comment expr) " "
  512. (fo-ps (get-in expr '(:head)))
  513. " "
  514. (fo-ps (get-in expr '(:comp)))
  515. "]")
  516. (and
  517. (map? expr)
  518. (:espanol expr))
  519. (get-string-1 (get-in expr '(:espanol)))
  520. true
  521. expr)))
  522. (defn stem-per-passato-prossimo [infinitive]
  523. "_infinitive_ should be a string (italian verb infinitive form)"
  524. (string/replace infinitive #"^(.*)([aei])(re)$" (fn [[_ prefix vowel suffix]] (str prefix))))
  525. (defn passato-prossimo [infinitive]
  526. (str (stem-per-passato-prossimo infinitive) "ato"))
  527. ;; allows reconstruction of the infinitive form from the inflected form
  528. (def future-to-infinitive
  529. {
  530. ;; future
  531. #"ò$"
  532. {:replace-with "e"
  533. :unify-with {:espanol {:infl :futuro
  534. :agr {:number :sing
  535. :person :1st}}}}
  536. #"ai$"
  537. {:replace-with "e"
  538. :unify-with {:espanol {:infl :futuro
  539. :agr {:number :sing
  540. :person :2nd}}}}
  541. #"à$"
  542. {:replace-with "e"
  543. :unify-with {:espanol {:infl :futuro
  544. :agr {:number :sing
  545. :person :3rd}}}}
  546. #"emo$"
  547. {:replace-with "e"
  548. :unify-with {:espanol {:infl :futuro
  549. :agr {:number :plur
  550. :person :1st}}}}
  551. #"ete$"
  552. {:replace-with "e"
  553. :unify-with {:espanol {:infl :futuro
  554. :agr {:number :plur
  555. :person :2nd}}}}
  556. #"anno$"
  557. {:replace-with "e"
  558. :unify-with {:espanol {:infl :futuro
  559. :agr {:number :plur
  560. :person :3rd}}}}})
  561. (def present-to-infinitive-ire
  562. {
  563. ;; present -ire
  564. #"o$"
  565. {:replace-with "ire"
  566. :unify-with {:espanol {:infl :present
  567. :agr {:number :sing
  568. :person :1st}}}}
  569. #"i$"
  570. {:replace-with "ire"
  571. :unify-with {:espanol {:infl :present
  572. :agr {:number :sing
  573. :person :2nd}}}}
  574. #"e$"
  575. {:replace-with "ire"
  576. :unify-with {:espanol {:infl :present
  577. :agr {:number :sing
  578. :person :3rd}}}}
  579. #"iamo$"
  580. {:replace-with "ire"
  581. :unify-with {:espanol {:infl :present
  582. :agr {:number :plur
  583. :person :1st}}}}
  584. #"ete$"
  585. {:replace-with "ire"
  586. :unify-with {:espanol {:infl :present
  587. :agr {:number :plur
  588. :person :2nd}}}}
  589. #"ono$"
  590. {:replace-with "ire"
  591. :unify-with {:espanol {:infl :present
  592. :agr {:number :plur
  593. :person :3rd}}}}})
  594. (def present-to-infinitive-ere
  595. {;; present -ere
  596. #"o$"
  597. {:replace-with "ere"
  598. :unify-with {:espanol {:infl :present
  599. :agr {:number :sing
  600. :person :1st}}}}
  601. #"i$"
  602. {:replace-with "ere"
  603. :unify-with {:espanol {:infl :present
  604. :agr {:number :sing
  605. :person :2nd}}}}
  606. #"e$"
  607. {:replace-with "ere"
  608. :unify-with {:espanol {:infl :present
  609. :agr {:number :sing
  610. :person :3rd}}}}
  611. #"iamo$"
  612. {:replace-with "ere"
  613. :unify-with {:espanol {:infl :present
  614. :agr {:number :plur
  615. :person :1st}}}}
  616. #"ete$"
  617. {:replace-with "ere"
  618. :unify-with {:espanol {:infl :present
  619. :agr {:number :plur
  620. :person :2nd}}}}
  621. #"ano$"
  622. {:replace-with "ere"
  623. :unify-with {:espanol {:infl :present
  624. :agr {:number :plur
  625. :person :3rd}}}}})
  626. (def present-to-infinitive-are
  627. {
  628. ;; present -are
  629. #"o$"
  630. {:replace-with "are"
  631. :unify-with {:espanol {:infl :present
  632. :agr {:number :sing
  633. :person :1st}}}}
  634. #"i$"
  635. {:replace-with "are"
  636. :unify-with {:espanol {:infl :present
  637. :agr {:number :sing
  638. :person :2nd}}}}
  639. #"e$"
  640. {:replace-with "are"
  641. :unify-with {:espanol {:infl :present
  642. :agr {:number :sing
  643. :person :3rd}}}}
  644. #"iamo$"
  645. {:replace-with "are"
  646. :unify-with {:espanol {:infl :present
  647. :agr {:number :plur
  648. :person :1st}}}}
  649. #"ete$"
  650. {:replace-with "are"
  651. :unify-with {:espanol {:infl :present
  652. :agr {:number :plur
  653. :person :2nd}}}}
  654. #"ano$"
  655. {:replace-with "are"
  656. :unify-with {:espanol {:infl :present
  657. :agr {:number :plur
  658. :person :3rd}}}}})
  659. (def imperfect-to-infinitive-irreg1
  660. {
  661. ;; e.g.: "bevevo/bevevi/..etc" => "bere"
  662. #"vevo$"
  663. {:replace-with "re"
  664. :unify-with {:espanol {:infl :imperfetto
  665. :agr {:number :sing
  666. :person :1st}}}}
  667. #"vevi$"
  668. {:replace-with "re"
  669. :unify-with {:espanol {:infl :imperfetto
  670. :agr {:number :sing
  671. :person :2nd}}}}
  672. #"veva$"
  673. {:replace-with "re"
  674. :unify-with {:espanol {:infl :imperfetto
  675. :agr {:number :sing
  676. :person :3rd}}}}
  677. })
  678. (def past-to-infinitive
  679. {#"ato$"
  680. {:replace-with "are"
  681. :unify-with {:espanol {:infl :past}}}
  682. #"ito$"
  683. {:replace-with "ire"
  684. :unify-with {:espanol {:infl :past}}}
  685. #"uto$"
  686. {:replace-with "ere"
  687. :unify-with {:espanol {:infl :past}}}})
  688. (def plural-to-singular-noun-fem-1
  689. {#"e$"
  690. {:replace-with "a"
  691. :unify-with {:synsem {:cat :noun
  692. :agr {:gender :fem
  693. :number :plur}}}}})
  694. (def plural-to-singular-noun-masc-1
  695. {#"i$"
  696. {:replace-with "o"
  697. :unify-with {:synsem {:cat :noun
  698. :agr {:number :plur}}}}})
  699. (def plural-to-singular-noun-masc-2 ;; e.g. "cani" -> "cane"
  700. {#"i$"
  701. {:replace-with "e"
  702. :unify-with {:synsem {:cat :noun
  703. :agr {:number :plur}}}}})
  704. (def plural-to-singular-adj-masc
  705. {#"i$"
  706. {:replace-with "o"
  707. :unify-with {:synsem {:cat :adjective
  708. :agr {:gender :masc
  709. :number :plur}}}}})
  710. (def plural-to-singular-adj-fem-sing
  711. {#"a$"
  712. {:replace-with "o"
  713. :unify-with {:synsem {:cat :adjective
  714. :agr {:gender :fem
  715. :number :sing}}}}})
  716. (def plural-to-singular-adj-fem-plur
  717. {#"e$"
  718. {:replace-with "o"
  719. :unify-with {:synsem {:cat :adjective
  720. :agr {:gender :fem
  721. :number :plur}}}}})
  722. (def infinitive-to-infinitive
  723. {:identity
  724. {:unify-with {:synsem {:cat :verb
  725. :infl :infinitive}}}})
  726. (def lexical-noun-to-singular
  727. {:identity
  728. {:unify-with {:synsem {:cat :noun
  729. :agr {:number :sing}}}}})
  730. (defn analyze [surface-form lookup-fn]
  731. "return the map incorporating the lexical information about a surface form."
  732. (let [replace-pairs
  733. ;; Even though it's possible for more than one KV pair to have the same key:
  734. ;; e.g. plural-to-singular-noun-masc-1 and plural-to-singular-noun-masc-2 both have
  735. ;; #"i$", they are distinct as separate keys in this 'replace-pairs' hash, as they should be.
  736. (merge
  737. future-to-infinitive
  738. imperfect-to-infinitive-irreg1
  739. infinitive-to-infinitive ;; simply turns :top into :infl
  740. lexical-noun-to-singular ;; turns :number :top to :number :sing
  741. past-to-infinitive
  742. present-to-infinitive-ire
  743. present-to-infinitive-ere
  744. present-to-infinitive-are
  745. plural-to-singular-noun-fem-1
  746. plural-to-singular-noun-masc-1
  747. plural-to-singular-noun-masc-2
  748. plural-to-singular-adj-masc
  749. plural-to-singular-adj-fem-plur
  750. plural-to-singular-adj-fem-sing
  751. )
  752. analyzed
  753. (remove fail?
  754. (mapcat
  755. (fn [key]
  756. (if (and (not (keyword? key)) (re-find key surface-form))
  757. (let [replace-with (get replace-pairs key)
  758. lexical-form (if (= key :identity)
  759. surface-form
  760. (string/replace surface-form key
  761. (:replace-with replace-with)))
  762. looked-up (lookup-fn lexical-form)]
  763. (map #(unifyc
  764. %
  765. (:unify-with replace-with))
  766. looked-up))))
  767. (keys replace-pairs)))
  768. ;; Analyzed-via-identity is used to handle infinitive verbs: converts them from unspecified inflection to
  769. ;; {:infl :infinitive}
  770. ;; Might also be used in the future to convert nouns from unspecified number to singular number.
  771. analyzed-via-identity
  772. (remove fail?
  773. (mapcat
  774. (fn [key]
  775. (if (and (keyword? key) (= key :identity))
  776. (let [lexical-form surface-form
  777. looked-up (lookup-fn lexical-form)]
  778. (map #(unifyc
  779. %
  780. (:unify-with (get replace-pairs key)))
  781. looked-up))))
  782. (keys replace-pairs)))]
  783. (concat
  784. analyzed
  785. ;; also lookup the surface form itself, which
  786. ;; might be either the canonical form of a word, or an irregular conjugation of a word.
  787. (if (not (empty? analyzed-via-identity))
  788. analyzed-via-identity
  789. (lookup-fn surface-form)))))
  790. (defn exception-generator [lexicon]
  791. (let [lexeme-kv (first lexicon)
  792. lexemes (second lexeme-kv)]
  793. (if lexeme-kv
  794. (let [result (mapcat (fn [path-and-merge-fn]
  795. (let [path (:path path-and-merge-fn)
  796. merge-fn (:merge-fn path-and-merge-fn)]
  797. ;; a lexeme-kv is a pair of a key and value. The key is a string (the word's surface form)
  798. ;; and the value is a list of lexemes for that string.
  799. (log/debug (str (first lexeme-kv) "looking at path: " path))
  800. (mapcat (fn [lexeme]
  801. ;; this is where a unify/dissoc that supported
  802. ;; non-maps like :top and :fail, would be useful:
  803. ;; would not need the (if (not (fail? lexeme)..)) check
  804. ;; to avoid a difficult-to-understand "java.lang.ClassCastException: clojure.lang.Keyword cannot be cast to clojure.lang.IPersistentMap" error.
  805. (let [lexeme (cond (= lexeme :fail)
  806. :fail
  807. (= lexeme :top)
  808. :top
  809. true
  810. (dissoc (copy lexeme) :serialized))]
  811. (if (not (= :none (get-in lexeme path :none)))
  812. (list {(get-in lexeme path :none)
  813. (merge
  814. lexeme
  815. (unifyc (apply merge-fn (list lexeme))
  816. {:espanol {:exception true}}))}))))
  817. lexemes)))
  818. [
  819. ;; 1. past-tense exceptions
  820. {:path [:espanol :passato]
  821. :merge-fn
  822. (fn [val]
  823. {:espanol {:infl :past
  824. :espanol (get-in val [:espanol :passato] :nothing)}})}
  825. ;; 2. present-tense exceptions
  826. {:path [:espanol :present :1sing]
  827. :merge-fn
  828. (fn [val]
  829. {:espanol {:infl :present
  830. :espanol (get-in val [:espanol :present :1sing] :nothing)
  831. :agr {:number :sing
  832. :person :1st}}})}
  833. {:path [:espanol :present :2sing]
  834. :merge-fn
  835. (fn [val]
  836. {:espanol {:infl :present
  837. :espanol (get-in val [:espanol :present :2sing] :nothing)
  838. :agr {:number :sing
  839. :person :2nd}}})}
  840. {:path [:espanol :present :3sing]
  841. :merge-fn
  842. (fn [val]
  843. {:espanol {:infl :present
  844. :espanol (get-in val [:espanol :present :3sing] :nothing)
  845. :agr {:number :sing
  846. :person :3rd}}})}
  847. {:path [:espanol :present :1plur]
  848. :merge-fn
  849. (fn [val]
  850. {:espanol {:infl :present
  851. :espanol (get-in val [:espanol :present :1plur] :nothing)
  852. :agr {:number :plur
  853. :person :1st}}})}
  854. {:path [:espanol :present :2plur]
  855. :merge-fn
  856. (fn [val]
  857. {:espanol {:infl :present
  858. :espanol (get-in val [:espanol :present :2plur] :nothing)
  859. :agr {:number :plur
  860. :person :2nd}}})}
  861. {:path [:espanol :present :3plur]
  862. :merge-fn
  863. (fn [val]
  864. {:espanol {:infl :present
  865. :espanol (get-in val [:espanol :present :3plur] :nothing)
  866. :agr {:number :plur
  867. :person :3rd}}})}
  868. ;; adjectives
  869. {:path [:espanol :masc :plur]
  870. :merge-fn
  871. (fn [val]
  872. {:espanol {:agr {:gender :masc
  873. :number :plur}}})}
  874. {:path [:espanol :fem :plur]
  875. :merge-fn
  876. (fn [val]
  877. {:espanol {:agr {:gender :fem
  878. :number :plur}}})}
  879. ])]
  880. (if (not (empty? result))
  881. (concat result (exception-generator (rest lexicon)))
  882. (exception-generator (rest lexicon)))))))
  883. (defn phonize [a-map a-string]
  884. (let [common {:phrasal false}]
  885. (cond (or (vector? a-map) (seq? a-map))
  886. (map (fn [each-entry]
  887. (phonize each-entry a-string))
  888. a-map)
  889. (and (map? a-map)
  890. (not (= :no-espanol (get-in a-map [:espanol] :no-espanol))))
  891. (unifyc {:espanol {:espanol a-string}}
  892. common
  893. a-map)
  894. true
  895. (unifyc a-map
  896. {:espanol {:espanol a-string}}
  897. common))))
  898. (defn agreement [lexical-entry]
  899. (cond
  900. (= (get-in lexical-entry [:synsem :cat]) :verb)
  901. (let [cat (ref :top)
  902. infl (ref :top)]
  903. (unifyc lexical-entry
  904. {:espanol {:cat cat
  905. :infl infl}
  906. :synsem {:cat cat
  907. :infl infl}}))
  908. (= (get-in lexical-entry [:synsem :cat]) :noun)
  909. (let [agr (ref :top)
  910. cat (ref :top)]
  911. (unifyc lexical-entry
  912. {:espanol {:agr agr
  913. :cat cat}
  914. :synsem {:agr agr
  915. :cat cat}}))
  916. true
  917. lexical-entry))
  918. (def espanol-specific-rules
  919. (list agreement))