PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/normal/UtfNormalTest2.php

https://bitbucket.org/ghostfreeman/freeside-wiki
PHP | 255 lines | 160 code | 28 blank | 67 comment | 14 complexity | a2ac7d80edb340711e5f577e142b991c MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0
  1. #!/usr/bin/php
  2. <?php
  3. /**
  4. * Other tests for the unicode normalization module.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. * http://www.gnu.org/copyleft/gpl.html
  20. *
  21. * @file
  22. * @ingroup UtfNormal
  23. */
  24. if( php_sapi_name() != 'cli' ) {
  25. die( "Run me from the command line please.\n" );
  26. }
  27. // From http://unicode.org/Public/UNIDATA/NormalizationTest.txt
  28. $file = "NormalizationTest.txt";
  29. // Anything after this character is a comment
  30. define ( 'COMMENT', '#' );
  31. // Semicolons are used to separate the columns
  32. define ( 'SEPARATOR', ';' );
  33. $f = fopen($file, "r");
  34. /**
  35. * The following section will be used for testing different normalization methods.
  36. * - Pure PHP
  37. ~ no assertion errors
  38. ~ 6.25 minutes
  39. * - php_utfnormal.so or intl extension: both are wrappers around
  40. libicu so we list the version of libicu when making the
  41. comparison
  42. * - libicu Ubuntu 3.8.1-3ubuntu1.1 php 5.2.6-3ubuntu4.5
  43. ~ 2200 assertion errors
  44. ~ 5 seconds
  45. ~ output: http://paste2.org/p/921566
  46. * - libicu Ubuntu 4.2.1-3 php 5.3.2-1ubuntu4.2
  47. ~ 1384 assertion errors
  48. ~ 15 seconds
  49. ~ output: http://paste2.org/p/921435
  50. * - libicu Debian 4.4.1-5 php 5.3.2-1ubuntu4.2
  51. ~ no assertion errors
  52. ~ 13 seconds
  53. * - Tests comparing pure PHP output with libicu output were added
  54. later and slow down the runtime.
  55. */
  56. require_once("./UtfNormal.php");
  57. function normalize_form_c($c) { return UtfNormal::toNFC($c); }
  58. function normalize_form_d($c) { return UtfNormal::toNFD($c); }
  59. function normalize_form_kc($c) { return UtfNormal::toNFKC($c); }
  60. function normalize_form_kd($c) { return UtfNormal::toNFKD($c); }
  61. /**
  62. * This set of functions is only useful if youve added a param to the
  63. * following functions to force pure PHP usage. I decided not to
  64. * commit that code since might produce a slowdown in the UTF
  65. * normalization code just for the sake of these tests. -- hexmode
  66. * @return string
  67. */
  68. function normalize_form_c_php($c) { return UtfNormal::toNFC($c, "php"); }
  69. function normalize_form_d_php($c) { return UtfNormal::toNFD($c, "php"); }
  70. function normalize_form_kc_php($c) { return UtfNormal::toNFKC($c, "php"); }
  71. function normalize_form_kd_php($c) { return UtfNormal::toNFKD($c, "php"); }
  72. assert_options(ASSERT_ACTIVE, 1);
  73. assert_options(ASSERT_WARNING, 0);
  74. assert_options(ASSERT_QUIET_EVAL, 1);
  75. assert_options(ASSERT_CALLBACK, 'my_assert');
  76. function my_assert( $file, $line, $code ) {
  77. global $col, $lineNo;
  78. echo "Assertion that '$code' failed on line $lineNo ($col[5])\n";
  79. }
  80. $count = 0;
  81. $lineNo = 0;
  82. if( $f !== false ) {
  83. while( ( $col = getRow( $f ) ) !== false ) {
  84. $lineNo++;
  85. if(count($col) == 6) {
  86. $count++;
  87. if( $count % 100 === 0 ) echo "Count: $count\n";
  88. } else {
  89. continue;
  90. }
  91. # verify that the pure PHP version is correct
  92. $NFCc1 = normalize_form_c($col[0]);
  93. $NFCc1p = normalize_form_c_php($col[0]);
  94. assert('$NFCc1 === $NFCc1p');
  95. $NFCc2 = normalize_form_c($col[1]);
  96. $NFCc2p = normalize_form_c_php($col[1]);
  97. assert('$NFCc2 === $NFCc2p');
  98. $NFCc3 = normalize_form_c($col[2]);
  99. $NFCc3p = normalize_form_c_php($col[2]);
  100. assert('$NFCc3 === $NFCc3p');
  101. $NFCc4 = normalize_form_c($col[3]);
  102. $NFCc4p = normalize_form_c_php($col[3]);
  103. assert('$NFCc4 === $NFCc4p');
  104. $NFCc5 = normalize_form_c($col[4]);
  105. $NFCc5p = normalize_form_c_php($col[4]);
  106. assert('$NFCc5 === $NFCc5p');
  107. $NFDc1 = normalize_form_d($col[0]);
  108. $NFDc1p = normalize_form_d_php($col[0]);
  109. assert('$NFDc1 === $NFDc1p');
  110. $NFDc2 = normalize_form_d($col[1]);
  111. $NFDc2p = normalize_form_d_php($col[1]);
  112. assert('$NFDc2 === $NFDc2p');
  113. $NFDc3 = normalize_form_d($col[2]);
  114. $NFDc3p = normalize_form_d_php($col[2]);
  115. assert('$NFDc3 === $NFDc3p');
  116. $NFDc4 = normalize_form_d($col[3]);
  117. $NFDc4p = normalize_form_d_php($col[3]);
  118. assert('$NFDc4 === $NFDc4p');
  119. $NFDc5 = normalize_form_d($col[4]);
  120. $NFDc5p = normalize_form_d_php($col[4]);
  121. assert('$NFDc5 === $NFDc5p');
  122. $NFKDc1 = normalize_form_kd($col[0]);
  123. $NFKDc1p = normalize_form_kd_php($col[0]);
  124. assert('$NFKDc1 === $NFKDc1p');
  125. $NFKDc2 = normalize_form_kd($col[1]);
  126. $NFKDc2p = normalize_form_kd_php($col[1]);
  127. assert('$NFKDc2 === $NFKDc2p');
  128. $NFKDc3 = normalize_form_kd($col[2]);
  129. $NFKDc3p = normalize_form_kd_php($col[2]);
  130. assert('$NFKDc3 === $NFKDc3p');
  131. $NFKDc4 = normalize_form_kd($col[3]);
  132. $NFKDc4p = normalize_form_kd_php($col[3]);
  133. assert('$NFKDc4 === $NFKDc4p');
  134. $NFKDc5 = normalize_form_kd($col[4]);
  135. $NFKDc5p = normalize_form_kd_php($col[4]);
  136. assert('$NFKDc5 === $NFKDc5p');
  137. $NFKCc1 = normalize_form_kc($col[0]);
  138. $NFKCc1p = normalize_form_kc_php($col[0]);
  139. assert('$NFKCc1 === $NFKCc1p');
  140. $NFKCc2 = normalize_form_kc($col[1]);
  141. $NFKCc2p = normalize_form_kc_php($col[1]);
  142. assert('$NFKCc2 === $NFKCc2p');
  143. $NFKCc3 = normalize_form_kc($col[2]);
  144. $NFKCc3p = normalize_form_kc_php($col[2]);
  145. assert('$NFKCc3 === $NFKCc3p');
  146. $NFKCc4 = normalize_form_kc($col[3]);
  147. $NFKCc4p = normalize_form_kc_php($col[3]);
  148. assert('$NFKCc4 === $NFKCc4p');
  149. $NFKCc5 = normalize_form_kc($col[4]);
  150. $NFKCc5p = normalize_form_kc_php($col[4]);
  151. assert('$NFKCc5 === $NFKCc5p');
  152. # c2 == NFC(c1) == NFC(c2) == NFC(c3)
  153. assert('$col[1] === $NFCc1');
  154. assert('$col[1] === $NFCc2');
  155. assert('$col[1] === $NFCc3');
  156. # c4 == NFC(c4) == NFC(c5)
  157. assert('$col[3] === $NFCc4');
  158. assert('$col[3] === $NFCc5');
  159. # c3 == NFD(c1) == NFD(c2) == NFD(c3)
  160. assert('$col[2] === $NFDc1');
  161. assert('$col[2] === $NFDc2');
  162. assert('$col[2] === $NFDc3');
  163. # c5 == NFD(c4) == NFD(c5)
  164. assert('$col[4] === $NFDc4');
  165. assert('$col[4] === $NFDc5');
  166. # c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
  167. assert('$col[3] === $NFKCc1');
  168. assert('$col[3] === $NFKCc2');
  169. assert('$col[3] === $NFKCc3');
  170. assert('$col[3] === $NFKCc4');
  171. assert('$col[3] === $NFKCc5');
  172. # c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
  173. assert('$col[4] === $NFKDc1');
  174. assert('$col[4] === $NFKDc2');
  175. assert('$col[4] === $NFKDc3');
  176. assert('$col[4] === $NFKDc4');
  177. assert('$col[4] === $NFKDc5');
  178. }
  179. }
  180. echo "done.\n";
  181. // Compare against http://en.wikipedia.org/wiki/UTF-8#Description
  182. function unichr($c) {
  183. if ($c <= 0x7F) {
  184. return chr($c);
  185. } elseif ($c <= 0x7FF) {
  186. return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
  187. } elseif ($c <= 0xFFFF) {
  188. return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
  189. . chr(0x80 | $c & 0x3F);
  190. } elseif ($c <= 0x10FFFF) {
  191. return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
  192. . chr(0x80 | $c >> 6 & 0x3F)
  193. . chr(0x80 | $c & 0x3F);
  194. } else {
  195. return false;
  196. }
  197. }
  198. function unistr($c) {
  199. return implode("", array_map("unichr", array_map("hexdec", explode(" ", $c))));
  200. }
  201. function getRow( $f ) {
  202. $row = fgets( $f );
  203. if( $row === false ) return false;
  204. $row = rtrim($row);
  205. $pos = strpos( $row, COMMENT );
  206. $pos2 = strpos( $row, ")" );
  207. if( $pos === 0 ) return array($row);
  208. $c = "";
  209. if( $pos ) {
  210. if($pos2) $c = substr( $row, $pos2 + 2 );
  211. else $c = substr( $row, $pos );
  212. $row = substr( $row, 0, $pos );
  213. }
  214. $ret = array();
  215. foreach( explode( SEPARATOR, $row ) as $ent ) {
  216. if( trim( $ent ) !== "" ) {
  217. $ret[] = unistr($ent);
  218. }
  219. }
  220. $ret[] = $c;
  221. return $ret;
  222. }