/core/src/test/php/net/xp_framework/unittest/text/AbstractTokenizerTest.class.php

https://github.com/ghiata/xp-framework · PHP · 293 lines · 166 code · 23 blank · 104 comment · 8 complexity · 28682ff9fc17695b7c46ac2d6f00e643 MD5 · raw file

  1. <?php namespace net\xp_framework\unittest\text;
  2. use unittest\TestCase;
  3. use text\Tokenizer;
  4. /**
  5. * Abstract base class for different tokenizer tests
  6. *
  7. * @see xp://text.Tokenizer
  8. * @see xp://net.xp_framework.unittest.text.StringTokenizerTest
  9. * @see xp://net.xp_framework.unittest.text.StreamTokenizerTest
  10. */
  11. abstract class AbstractTokenizerTest extends TestCase {
  12. /**
  13. * Retrieve a tokenizer instance
  14. *
  15. * @param string source
  16. * @param string delimiters default ' '
  17. * @param bool returnDelims default FALSE
  18. * @return text.Tokenizer
  19. */
  20. protected abstract function tokenizerInstance($source, $delimiters= ' ', $returnDelims= false);
  21. /**
  22. * Test string tokenizing
  23. *
  24. */
  25. #[@test]
  26. public function testSimpleString() {
  27. $t= $this->tokenizerInstance("Hello World!\nThis is an example", " \n");
  28. $this->assertEquals('Hello', $t->nextToken());
  29. $this->assertEquals('World!', $t->nextToken());
  30. $this->assertEquals('This', $t->nextToken());
  31. $this->assertEquals('is', $t->nextToken());
  32. $this->assertEquals('an', $t->nextToken());
  33. $this->assertEquals('example', $t->nextToken());
  34. $this->assertFalse($t->hasMoreTokens());
  35. }
  36. /**
  37. * Test string tokenizing
  38. *
  39. */
  40. #[@test]
  41. public function testSimpleStringWithDelims() {
  42. $t= $this->tokenizerInstance("Hello World!\nThis is an example", " \n", true);
  43. $this->assertEquals('Hello', $t->nextToken());
  44. $this->assertEquals(' ', $t->nextToken());
  45. $this->assertEquals('World!', $t->nextToken());
  46. $this->assertEquals("\n", $t->nextToken());
  47. $this->assertEquals('This', $t->nextToken());
  48. $this->assertEquals(' ', $t->nextToken());
  49. $this->assertEquals('is', $t->nextToken());
  50. $this->assertEquals(' ', $t->nextToken());
  51. $this->assertEquals('an', $t->nextToken());
  52. $this->assertEquals(' ', $t->nextToken());
  53. $this->assertEquals('example', $t->nextToken());
  54. $this->assertFalse($t->hasMoreTokens());
  55. }
  56. /**
  57. * Test string tokenizing
  58. *
  59. */
  60. #[@test]
  61. public function repetetiveDelimiters() {
  62. $t= $this->tokenizerInstance("Hello \nWorld!", " \n");
  63. $this->assertEquals('Hello', $t->nextToken());
  64. $this->assertEquals('', $t->nextToken());
  65. $this->assertEquals('World!', $t->nextToken());
  66. $this->assertFalse($t->hasMoreTokens());
  67. }
  68. /**
  69. * Test string tokenizing
  70. *
  71. */
  72. #[@test]
  73. public function repetetiveDelimitersWithDelims() {
  74. $t= $this->tokenizerInstance("Hello \nWorld!", " \n", true);
  75. $this->assertEquals('Hello', $t->nextToken());
  76. $this->assertEquals(' ', $t->nextToken());
  77. $this->assertEquals("\n", $t->nextToken());
  78. $this->assertEquals('World!', $t->nextToken());
  79. $this->assertFalse($t->hasMoreTokens());
  80. }
  81. /**
  82. * Test for loop iteration
  83. *
  84. */
  85. #[@test]
  86. public function forIteration() {
  87. $r= array();
  88. for ($t= $this->tokenizerInstance('A B C', ' '); $t->hasMoreTokens(); ) {
  89. $r[]= $t->nextToken();
  90. }
  91. $this->assertEquals(range('A', 'C'), $r);
  92. }
  93. /**
  94. * Test while loop iteration
  95. *
  96. */
  97. #[@test]
  98. public function whileIteration() {
  99. $r= array();
  100. $t= $this->tokenizerInstance('A B C', ' ');
  101. while ($t->hasMoreTokens()) {
  102. $r[]= $t->nextToken();
  103. }
  104. $this->assertEquals(range('A', 'C'), $r);
  105. }
  106. /**
  107. * Test foreach() overloading
  108. *
  109. */
  110. #[@test]
  111. public function foreachIteration() {
  112. $r= array();
  113. foreach ($this->tokenizerInstance('A B C', ' ') as $token) {
  114. $r[]= $token;
  115. }
  116. $this->assertEquals(range('A', 'C'), $r);
  117. }
  118. /**
  119. * Test resetting a tokenizer
  120. *
  121. */
  122. #[@test]
  123. public function reset() {
  124. $t= $this->tokenizerInstance('A B C', ' ');
  125. $this->assertTrue($t->hasMoreTokens());
  126. $this->assertEquals('A', $t->nextToken());
  127. $t->reset();
  128. $this->assertTrue($t->hasMoreTokens());
  129. $this->assertEquals('A', $t->nextToken());
  130. }
  131. /**
  132. * Test pushing back a string with delimiters
  133. *
  134. */
  135. #[@test]
  136. public function pushBackTokens() {
  137. $t= $this->tokenizerInstance('1,2,5', ',');
  138. $this->assertEquals('1', $t->nextToken());
  139. $this->assertEquals('2', $t->nextToken());
  140. $t->pushBack('3,4,');
  141. $this->assertEquals('3', $t->nextToken());
  142. $this->assertEquals('4', $t->nextToken());
  143. $this->assertEquals('5', $t->nextToken());
  144. }
  145. /**
  146. * Test pushBack() order
  147. *
  148. */
  149. #[@test]
  150. public function pushBackOrder() {
  151. $t= $this->tokenizerInstance('1,2,5', ',');
  152. $this->assertEquals('1', $t->nextToken());
  153. $this->assertEquals('2', $t->nextToken());
  154. $t->pushBack('4,');
  155. $t->pushBack('3,');
  156. $this->assertEquals('3', $t->nextToken());
  157. $this->assertEquals('4', $t->nextToken());
  158. $this->assertEquals('5', $t->nextToken());
  159. }
  160. /**
  161. * Test pushing back a delimiter
  162. *
  163. */
  164. #[@test]
  165. public function pushBackDelimiterAtEnd() {
  166. $t= $this->tokenizerInstance("One\nTwo", "\n");
  167. $this->assertEquals('One', $t->nextToken());
  168. $this->assertEquals('Two', $t->nextToken());
  169. $t->pushBack("Two\n");
  170. $this->assertEquals('Two', $t->nextToken());
  171. }
  172. /**
  173. * Returns all tokens
  174. *
  175. * @param string input
  176. * @param string delim
  177. * @return string[] tokens
  178. */
  179. protected function allTokens($input, $delim) {
  180. $t= $this->tokenizerInstance($input, $delim, true);
  181. $tokens= array();
  182. while ($t->hasMoreTokens()) {
  183. $token= $t->nextToken();
  184. if ('/' === $token) {
  185. $next= $t->nextToken();
  186. if ('/' === $next) {
  187. $token.= $next.$t->nextToken("\n");
  188. } else {
  189. $t->pushBack($next);
  190. }
  191. }
  192. $tokens[]= $token;
  193. }
  194. return $tokens;
  195. }
  196. /**
  197. * Test pushing back a delimiter
  198. *
  199. */
  200. #[@test]
  201. public function pushBackDelimiter() {
  202. $this->assertEquals(
  203. array('// This is a one-line comment', "\n", 'a', '=', ' ', 'b', ' ', '/', ' ', 'c', ';'),
  204. $this->allTokens("// This is a one-line comment\na= b / c;", "/\n =;", "/\n =;")
  205. );
  206. }
  207. /**
  208. * Test pushing back a longer string part which is a regex
  209. *
  210. */
  211. #[@test]
  212. public function pushBackRegex() {
  213. $this->assertEquals(
  214. array('var', ' ', 'pattern', ' ', '=', ' ', '/', '0?([0-9]+)\.0?([0-9]+)(\.0?([0-9]+))?', '/', ';'),
  215. $this->allTokens('var pattern = /0?([0-9]+)\.0?([0-9]+)(\.0?([0-9]+))?/;', "/\n =;")
  216. );
  217. }
  218. /**
  219. * Test pushBack()
  220. *
  221. */
  222. #[@test]
  223. public function pushBackAfterHavingReadUntilEnd() {
  224. $t= $this->tokenizerInstance('1,2,', ',');
  225. $this->assertEquals('1', $t->nextToken());
  226. $this->assertEquals('2', $t->nextToken());
  227. $this->assertFalse($t->hasMoreTokens(), 'Should be at end');
  228. $t->pushBack('6,7');
  229. $this->assertTrue($t->hasMoreTokens(), 'Should have tokens after pushing back');
  230. $this->assertEquals('6', $t->nextToken(), 'Should yield token pushed back');
  231. $this->assertEquals('7', $t->nextToken(), 'Should yield token pushed back');
  232. $this->assertFalse($t->hasMoreTokens(), 'Should be at end again');
  233. }
  234. /**
  235. * Test pushBack()
  236. *
  237. */
  238. #[@test]
  239. public function pushBackWithDelimitersAfterHavingReadUntilEnd() {
  240. $t= $this->tokenizerInstance('1,2,', ',', true);
  241. $this->assertEquals('1', $t->nextToken());
  242. $this->assertEquals(',', $t->nextToken());
  243. $this->assertEquals('2', $t->nextToken());
  244. $this->assertEquals(',', $t->nextToken());
  245. $this->assertFalse($t->hasMoreTokens(), 'Should be at end');
  246. $t->pushBack('6,7');
  247. $this->assertTrue($t->hasMoreTokens(), 'Should have tokens after pushing back');
  248. $this->assertEquals('6', $t->nextToken(), 'Should yield token pushed back');
  249. $this->assertEquals(',', $t->nextToken());
  250. $this->assertEquals('7', $t->nextToken(), 'Should yield token pushed back');
  251. $this->assertFalse($t->hasMoreTokens(), 'Should be at end again');
  252. }
  253. /**
  254. * Test performance
  255. *
  256. */
  257. #[@test, @ignore('Remove ignore annotation to test performance')]
  258. public function performance() {
  259. // Create a string with 10000 tokens
  260. $input= '';
  261. for ($i= 0; $i < 10000; $i++) {
  262. $input.= str_repeat('*', rand(0, 76))."\n";
  263. }
  264. // Tokenize it
  265. $t= $this->tokenizerInstance($input, "\n", false);
  266. while ($t->hasMoreTokens()) {
  267. $token= $t->nextToken();
  268. }
  269. }
  270. }