/tools/old-patches/2008-11-11/replaceHyphens.php

https://github.com/clyfe/DEXonline · PHP · 132 lines · 109 code · 9 blank · 14 comment · 34 complexity · da31098b2c15c339e897e2685075a528 MD5 · raw file

  1. <?php
  2. require_once '../../phplib/util.php';
  3. define("MAX_LEN", 40); // Ignore suffixes after this length
  4. $inflectionsToUse = array('V' => array(54, 52),
  5. 'VT' => array(54, 52),
  6. 'F' => array(11),
  7. 'M' => array(3),
  8. 'N' => array(19),
  9. 'A' => array(27, 33, 35),
  10. 'MF' => array(27, 33, 35),
  11. );
  12. $dbResult = mysql_query('select * from Definition where SourceId in (10, 12) and status = ' . ST_ACTIVE . ' order by id desc');
  13. while ($row = mysql_fetch_assoc($dbResult)) {
  14. $def = Definition::createFromDbRow($row);
  15. // Choose a lexem to inflect. We have four cases
  16. // - No inflected lexems
  17. // - Two or more inflected lexems
  18. // - We don't know what inflections to use for that model type
  19. // - All good
  20. $lexems = Lexem::loadByDefinitionId($def->id);
  21. $lexemsWithInflections = array();
  22. foreach ($lexems as $l) {
  23. if ($l->modelType != 'T') {
  24. $lexemsWithInflections[] = $l;
  25. }
  26. }
  27. $inflections = array();
  28. $ambiguousLexems = false;
  29. $noLexems = false;
  30. $lexem = null;
  31. if (count($lexemsWithInflections) == 1) {
  32. $lexem = $lexemsWithInflections[0];
  33. if (array_key_exists($lexem->modelType, $inflectionsToUse)) {
  34. $inflections = $inflectionsToUse[$lexem->modelType];
  35. }
  36. } else if (count($lexemsWithInflections) > 1) {
  37. $ambiguousLexems = true;
  38. } else {
  39. $noLexems = true;
  40. }
  41. $rep = $def->internalRep;
  42. $len = mb_strlen($rep);
  43. $newRep = '';
  44. $prevC = '';
  45. $curInflection = 0;
  46. //print "Examining {$def->internalRep}\n";
  47. for ($i = 0; $i < $len; $i++) {
  48. $c = text_getCharAt($rep, $i);
  49. if (!text_isUnicodeLetter($prevC) && $c == '-' && $i <= MAX_LEN) {
  50. $j = $i + 1;
  51. while (text_isUnicodeLetter(text_getCharAt($rep, $j))) {
  52. $j++;
  53. }
  54. $chunk = mb_substr($rep, $i, $j - $i);
  55. if ($chunk != '-') {
  56. $suffix = mb_substr($chunk, 1);
  57. //print "{$def->id} [{$def->lexicon}] $i [$chunk]\n";
  58. if ($lexem) {
  59. $matchingForm = null;
  60. foreach ($inflections as $inflId) {
  61. $wls = WordList::loadByLexemIdInflectionId($lexem->id, $inflId);
  62. foreach ($wls as $wl) {
  63. if (matchesWithAccent($wl->form, $suffix)) {
  64. $matchingForm = $wl->form;
  65. //print "Matching [{$wl->form}] to [$chunk]\n";
  66. }
  67. }
  68. }
  69. if ($matchingForm) {
  70. $matchingFormImpl = str_replace($GLOBALS['text_explicitAccent'], $GLOBALS['text_accented'], $matchingForm);
  71. // Convert to uppercase when the suffix itself is uppercase
  72. if ($suffix == text_unicodeToUpper($suffix)) {
  73. $matchingFormImpl = text_unicodeToUpper($matchingFormImpl);
  74. }
  75. $newRep .= $matchingFormImpl;
  76. } else {
  77. $newRep .= $chunk;
  78. print "***** http://dexonline.ro/search.php?cuv={$lexem->unaccented} *****\n";
  79. print "{$rep}\n";
  80. print " * Nu știu ce să fac cu [{$chunk}] la poziția {$i}, lexem {$lexem->form}, model {$lexem->modelType}{$lexem->modelNumber}{$lexem->restriction}\n";
  81. }
  82. } else {
  83. $newRep .= $chunk;
  84. }
  85. } else {
  86. $newRep .= $chunk;
  87. }
  88. $i = $j - 1;
  89. } else {
  90. $newRep .= $c;
  91. }
  92. $prevC = $c;
  93. }
  94. if ($newRep != $rep) {
  95. //print "Rep: {$rep}\nNew rep: {$newRep}\n";
  96. $def->internalRep = $newRep;
  97. $def->htmlRep = text_htmlize($newRep);
  98. $def->save();
  99. }
  100. }
  101. /********************************************************/
  102. /**
  103. *
  104. */
  105. function matchesWithAccent($form, $suffix) {
  106. $suffix = text_unicodeToLower($suffix);
  107. $suffixExpl = str_replace($GLOBALS['text_accented'], $GLOBALS['text_explicitAccent'], $suffix);
  108. $formHasAccent = (strstr($form, "'") !== false);
  109. $suffixHasAccent = (strstr($suffixExpl, "'") !== false);
  110. if ($formHasAccent && $suffixHasAccent) {
  111. $formImpl = str_replace($GLOBALS['text_explicitAccent'], $GLOBALS['text_accented'], $form);
  112. return text_endsWith($formImpl, $suffix);
  113. } else if ($formHasAccent && !$suffixHasAccent) {
  114. $formNoAccent = str_replace("'", "", $form);
  115. return text_endsWith($formNoAccent, $suffix);
  116. } else if (!$formHasAccent && $suffixHasAccent) {
  117. $suffixNoAccent = str_replace("'", "", $suffixExpl);
  118. return text_endsWith($form, $suffixNoAccent);
  119. } else { // No accents
  120. return text_endsWith($form, $suffix);
  121. }
  122. }
  123. ?>