/lib/core/Multilingual/Aligner/UpdatePages.php

https://gitlab.com/ElvisAns/tiki · PHP · 203 lines · 130 code · 23 blank · 50 comment · 50 complexity · f012755075c21e4eadb80847c84a7045 MD5 · raw file

  1. <?php
  2. // (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
  3. //
  4. // All Rights Reserved. See copyright.txt for details and a complete list of authors.
  5. // Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
  6. // $Id$
  7. include_once 'UpdateSentences.php';
  8. include_once 'SentenceSegmentor.php';
  9. include_once 'SentenceAlignments.php';
  10. include_once 'MockMTWrapper.php';
  11. class Multilingual_Aligner_UpdatePages
  12. {
  13. public $alignments = null;
  14. public function setUp()
  15. {
  16. $this->alignments = new Multilingual_Aligner_SentenceAlignments();
  17. $this->translator = new Multilingual_Aligner_MockMTWrapper();
  18. }
  19. public function UpdatingTargetPage($source_outofdate, $source_modified, $target_outofdate, $target_modified, $source_lng, $target_lng)
  20. {
  21. $segmentor = new Multilingual_Aligner_SentenceSegmentor();
  22. $source_outofdate_string = $source_outofdate;
  23. $source_modified_string = $source_modified;
  24. $target_outofdate_string = $target_outofdate;
  25. $target_modified_string = $target_modified;
  26. $source_outofdate_sentences = $segmentor->segment($source_outofdate_string);
  27. $source_modified_sentences = $segmentor->segment($source_modified_string);
  28. $target_outofdate_sentences = $segmentor->segment($target_outofdate_string);
  29. $target_modified_sentences = $segmentor->segment($target_modified_string);
  30. $target_modified_sentences[count($target_modified_sentences)] = 'dummy';
  31. $i = -1;
  32. for ($a = 0, $aCountSourceOutofdateSentences = count($source_outofdate_sentences); $a < $aCountSourceOutofdateSentences; $a++) {
  33. $source_outofdate_sentences[$a] = trim($source_outofdate_sentences[$a]);
  34. }
  35. for ($a = 0, $aCountSourceModifiedSentences = count($source_modified_sentences); $a < $aCountSourceModifiedSentences; $a++) {
  36. $source_modified_sentences[$a] = trim($source_modified_sentences[$a]);
  37. }
  38. for ($a = 0, $aCountTargetOutofdateSentences = count($target_outofdate_sentences); $a < $aCountTargetOutofdateSentences; $a++) {
  39. $target_outofdate_sentences[$a] = trim($target_outofdate_sentences[$a]);
  40. }
  41. for ($a = 0, $aCountTargetModifiedSentences = count($target_modified_sentences); $a < $aCountTargetModifiedSentences; $a++) {
  42. $target_modified_sentences[$a] = trim($target_modified_sentences[$a]);
  43. }
  44. $update = new Multilingual_Aligner_UpdateSentences();
  45. $source_diff = $update->DifferencebetweenOriginalFileandModifiedFile($source_outofdate_sentences, $source_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 1);
  46. $target_diff = $update->DifferencebetweenOriginalFileandModifiedFile($target_outofdate_sentences, $target_modified_sentences, $this->alignments, $this->translator, 'en', 'fr', 0);
  47. for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) {
  48. $source_diff[$a] = trim($source_diff[$a]);
  49. }
  50. for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) {
  51. $target_diff[$a] = trim($target_diff[$a]);
  52. if ($target_diff[$a] == '+dummy') {
  53. unset($target_diff[$a]);
  54. }
  55. }
  56. $target_diff = array_values($target_diff);
  57. for ($a = 0, $aCountSourceDiff = count($source_diff); $a < $aCountSourceDiff; $a++) {
  58. $index = 0;
  59. $i = 2;
  60. if ($source_diff[$a] == '' || ($source_diff[$a][0] != '+')) {
  61. $pat = 0;
  62. if ($source_diff[$a] != '' && $source_diff[$a][0] == '<' && is_numeric($source_diff[$a][1])) {
  63. $b = 2;
  64. while (is_numeric($source_diff[$a][$b])) {
  65. $b++;
  66. }
  67. if ($source_diff[$a][$b] == '>') {
  68. $pat = 1;
  69. }
  70. }
  71. if ($source_diff[$a] == '' || $pat == 0) {
  72. $index = array_search($source_diff[$a], $source_diff);
  73. $index++;
  74. $first = 0;
  75. while (($c = $this->array_search_function($source_diff[$a], $source_diff, $index)) != -1) {
  76. $first = 1;
  77. $source_diff[$c] = '<$i>' . $source_diff[$c];
  78. $i++;
  79. $index = $c + 1;
  80. }
  81. $source_diff[$a] = '<1>' . $source_diff[$a];
  82. }
  83. }
  84. }
  85. for ($a = 0, $aCountTargetDiff = count($target_diff); $a < $aCountTargetDiff; $a++) {
  86. $index = 1;
  87. $i = 2;
  88. if ($target_diff[$a] == '' || ($target_diff[$a][0] != '+')) {
  89. $pat = 0;
  90. if ($target_diff[$a] != '' && $target_diff[$a][0] == '<' && is_numeric($target_diff[$a][1])) {
  91. $b = 2;
  92. while (is_numeric($target_diff[$a][$b])) {
  93. $b++;
  94. }
  95. if ($target_diff[$a][$b] == '>') {
  96. $pat = 1;
  97. }
  98. }
  99. if ($target_diff[$a] == '' || $pat == 0) {
  100. $index = array_search($target_diff[$a], $target_diff);
  101. $index++;
  102. $first = 0;
  103. while (($c = $this->array_search_function($target_diff[$a], $target_diff, $index)) != -1) {
  104. $first = 1;
  105. $target_diff[$c] = '<$i>' . $target_diff[$c];
  106. $i++;
  107. $index = $c + 1;
  108. }
  109. $target_diff[$a] = '<1>' . $target_diff[$a];
  110. }
  111. }
  112. }
  113. $final_updated = $update->FinalUpdatedFileinTagetLanguage($source_diff, $target_diff);
  114. /*
  115. echo "final_updated_sentences<br/>";
  116. foreach ($final_updated as $item)
  117. echo "sentence-> ".$item."<br/>";
  118. $content = implode(' ', $final_updated);
  119. echo "$content<br/>";
  120. if (strcmp("Firefox supporte les caract�res internationaux pour des langues tel que lindien. Added_Source This is a test statement. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.", $content)==0)
  121. echo "match<br/>";
  122. else
  123. echo "fail";
  124. */
  125. return $final_updated;
  126. }
  127. public function SetAlignment($source_file, $target_file, $source_lng, $target_lng)
  128. {
  129. $source_array = explode('<br/>', $source_file);
  130. $target_array = explode('<br/>', $target_file);
  131. // for ($i=1; $i<count($target_array); $i++) {
  132. // $target_array[$i]=utf8_decode($target_array[$i]);
  133. // }
  134. for ($i = 0, $iCountSourceArray = count($source_array); $i < $iCountSourceArray; $i++) {
  135. $this->alignments->addSentencePair($source_array[$i], $source_lng, $target_array[$i], $target_lng);
  136. }
  137. }
  138. public function array_search_function($temp, $array, $index)
  139. {
  140. $i = 0;
  141. for ($i = $index, $iCountArray = count($array); $i < $iCountArray; $i++) {
  142. $val = $array[$i];
  143. if (strcmp($temp, $val) == 0) {
  144. return $i;
  145. }
  146. }
  147. return -1;
  148. }
  149. }
  150. // algorithm can be checked by running through these instructions
  151. /*
  152. $source_lng="en";
  153. $target_lng="fr";
  154. $source_outofdate="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font.";
  155. $source_modified="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font.";
  156. $target_outofdate="Firefox supporte les caract�res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  157. $target_modified="Firefox supporte les caract�res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien. C'est une d�claration d'essai.La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  158. $source_alignment="Firefox supports international characters for languages such as Hindi.<br/>You can test your Firefoxs support of Hindi scripts at BBC Hindi.<br/>Most sites that require additional fonts will have a page describing where you can get the font.";
  159. $target_alignment="Firefox supporte les caract�res internationaux pour des langues tel que lindien.<br/>Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.<br/>La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  160. $source_Mtranslation="This is a test statement.";
  161. $target_Mtranslation="C'est une d�claration d'essai.";
  162. */
  163. // $source_lng="en";
  164. // $target_lng="fr";
  165. // $source_outofdate="Firefox supports international characters for languages such as Hindi. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font.";
  166. // $source_modified="Firefox supports international characters for languages such as Hindi.This is a test statement. You can test your Firefoxs support of Hindi scripts at BBC Hindi.Most sites that require additional fonts will have a page describing where you can get the font.";
  167. // $target_outofdate="Firefox supporte les caract�res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  168. // $target_modified="Firefox supporte les caract�res internationaux pour des langues tel que lindien. Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  169. // $source_alignment="Firefox supports international characters for languages such as Hindi.<br/>You can test your Firefoxs support of Hindi scripts at BBC Hindi.<br/>Most sites that require additional fonts will have a page describing where you can get the font.";
  170. // $target_alignment="Firefox supporte les caract�res internationaux pour des langues tel que lindien.<br/>Vous pouvez tester le support Firefox des scripts indiens sur BBC indien.<br/>La plupart des sites qui ont besoin de polices suppl�mentaires vont avoir une page qui d�crit o� vous pouvez obtenir la police.";
  171. //
  172. // $source_Mtranslation="This is a test statement.";
  173. // $target_Mtranslation="C'est une d�claration d'essai.";
  174. //
  175. // $test=new Multilingual_Aligner_UpdatePages();
  176. // $test->setUp();
  177. // $test->SetAlignment($source_alignment,$target_alignment,$source_lng,$target_lng);
  178. // $test->translator->SetMT($source_Mtranslation,$source_Mtranslation,$source_lng,$target_lng);
  179. // $test->UpdatingTargetPage($source_outofdate,$source_modified,$target_outofdate,$target_modified,$source_lng,$target_lng);