/arena/sieve/peer/sieve/Lexer.class.php

https://github.com/thekid/xp-experiments · PHP · 188 lines · 152 code · 12 blank · 24 comment · 34 complexity · c6838069d21895bfdb2b8824351fa60d MD5 · raw file

  1. <?php
  2. /* This class is part of the XP framework's experiments
  3. *
  4. * $Id$
  5. */
  6. uses('text.Tokenizer', 'peer.sieve.SieveParser', 'text.parser.generic.AbstractLexer');
  7. $package= 'peer.sieve';
  8. /**
  9. * Lexer for Sieve
  10. *
  11. * @see xp://text.parser.generic.AbstractLexer
  12. * @purpose Lexer
  13. */
  14. class peer·sieve·Lexer extends AbstractLexer {
  15. protected static
  16. $keywords = array(
  17. 'require' => SieveParser::T_REQUIRE,
  18. 'if' => SieveParser::T_IF,
  19. 'else' => SieveParser::T_ELSE,
  20. 'elsif' => SieveParser::T_ELSEIF,
  21. 'allof' => SieveParser::T_ALLOF,
  22. 'anyof' => SieveParser::T_ANYOF,
  23. 'elsif' => SieveParser::T_ELSEIF,
  24. 'not' => SieveParser::T_NOT,
  25. 'header' => SieveParser::T_HEADER,
  26. 'size' => SieveParser::T_SIZE,
  27. 'address' => SieveParser::T_ADDRESS,
  28. 'true' => SieveParser::T_TRUE,
  29. 'false' => SieveParser::T_FALSE,
  30. 'comparator' => SieveParser::T_COMPARATOR,
  31. 'envelope' => SieveParser::T_ENVELOPE,
  32. 'is' => SieveParser::T_IS,
  33. 'exists' => SieveParser::T_EXISTS,
  34. 'contains' => SieveParser::T_CONTAINS,
  35. 'matches' => SieveParser::T_MATCHES,
  36. 'regex' => SieveParser::T_REGEX,
  37. 'count' => SieveParser::T_COUNT,
  38. 'value' => SieveParser::T_VALUE,
  39. 'all' => SieveParser::T_ALL,
  40. 'domain' => SieveParser::T_DOMAIN,
  41. 'localpart' => SieveParser::T_LOCALPART,
  42. 'user' => SieveParser::T_USER,
  43. 'detail' => SieveParser::T_DETAIL,
  44. );
  45. const
  46. DELIMITERS = " |&?!.:;,@%~=<>(){}[]#+-*/\"'\r\n\t";
  47. private
  48. $ahead = NULL;
  49. /**
  50. * Constructor
  51. *
  52. * @param text.Tokenizer tokenizer
  53. * @param string source
  54. */
  55. public function __construct(Tokenizer $tokenizer, $source) {
  56. $this->tokenizer= $tokenizer;
  57. $this->tokenizer->delimiters= self::DELIMITERS;
  58. $this->tokenizer->returnDelims= TRUE;
  59. $this->fileName= $source;
  60. $this->position= array(1, 1); // Y, X
  61. }
  62. /**
  63. * Advance this
  64. *
  65. * @return bool
  66. */
  67. public function advance() {
  68. static $quantifiers= array(
  69. 'K' => 1024,
  70. 'M' => 1048576,
  71. 'G' => 1073741824
  72. );
  73. do {
  74. $done= $this->tokenizer->hasMoreTokens();
  75. if ($this->ahead) {
  76. $token= $this->ahead;
  77. $this->ahead= NULL;
  78. } else {
  79. $token= $this->tokenizer->nextToken(self::DELIMITERS);
  80. }
  81. // Check for whitespace
  82. if (FALSE !== strpos(" \n\r\t", $token)) {
  83. $l= substr_count($token, "\n");
  84. $this->position[1]= strlen($token) + ($l ? 1 : $this->position[1]);
  85. $this->position[0]+= $l;
  86. continue;
  87. }
  88. $this->position[1]+= strlen($this->value);
  89. if ('"' === $token{0}) {
  90. $this->token= SieveParser::T_STRING;
  91. $this->value= '';
  92. do {
  93. if ($token{0} === ($t= $this->tokenizer->nextToken($token{0}))) {
  94. // Empty string, e.g. "" or ''
  95. break;
  96. }
  97. $this->value.= $t;
  98. if ('\\' === $this->value{strlen($this->value)- 1}) {
  99. $this->value= substr($this->value, 0, -1).$this->tokenizer->nextToken($token{0});
  100. continue;
  101. }
  102. $this->tokenizer->nextToken($token{0});
  103. break;
  104. } while ($this->tokenizer->hasMoreTokens());
  105. } else if ('/' === $token{0}) {
  106. $ahead= $this->tokenizer->nextToken(self::DELIMITERS);
  107. if ('*' === $ahead) { // Multi-line comment
  108. do {
  109. if (!$this->tokenizer->hasMoreTokens()) {
  110. throw new IllegalStateException('Unclosed multi-line comment');
  111. }
  112. $t= $this->tokenizer->nextToken('/');
  113. $l= substr_count($t, "\n");
  114. $this->position[1]= strlen($t) + ($l ? 1 : $this->position[1]);
  115. $this->position[0]+= $l;
  116. } while ('*' !== $t{strlen($t)- 1});
  117. $this->tokenizer->nextToken('/');
  118. continue;
  119. } else {
  120. $this->token= ord($token);
  121. $this->value= $token;
  122. $this->ahead= $ahead;
  123. }
  124. } else if ('text' === $token) {
  125. $ahead= $this->tokenizer->nextToken(self::DELIMITERS);
  126. if (':' !== $ahead{0}) {
  127. $this->token= SieveParser::T_WORD;
  128. $this->value= $token;
  129. } else {
  130. $this->token= SieveParser::T_STRING;
  131. $this->value= ltrim(substr($ahead, 1), "\r\n\t ");
  132. do {
  133. $this->value.= $this->tokenizer->nextToken('.');
  134. if ("\n" !== $this->value{strlen($this->value)- 1}) {
  135. continue;
  136. }
  137. $this->tokenizer->nextToken('.');
  138. break;
  139. } while ($this->tokenizer->hasMoreTokens());
  140. }
  141. } else if (isset(self::$keywords[$token])) {
  142. $this->token= self::$keywords[$token];
  143. $this->value= $token;
  144. } else if ('#' === $token{0}) {
  145. $this->tokenizer->nextToken("\n");
  146. $this->position[1]= 1;
  147. $this->position[0]++;
  148. continue;
  149. } else if (FALSE !== strpos(self::DELIMITERS, $token) && 1 == strlen($token)) {
  150. $this->token= ord($token);
  151. $this->value= $token;
  152. } else if (ctype_digit($token)) {
  153. $this->token= SieveParser::T_NUMBER;
  154. $this->value= $token;
  155. } else if (ctype_digit($n= substr($token, 0, -1))) {
  156. $quantifier= strtoupper($token{strlen($token)- 1});
  157. if (!isset($quantifiers[$quantifier])) {
  158. throw new FormatException(sprintf(
  159. 'Unknown quantifier "%s", expected one of %s',
  160. $quantifier,
  161. implode(', ', array_keys($quantifiers))
  162. ));
  163. }
  164. $this->token= SieveParser::T_NUMBER;
  165. $this->value= intval($n) * $quantifiers[$quantifier];
  166. } else {
  167. $this->token= SieveParser::T_WORD;
  168. $this->value= $token;
  169. }
  170. break;
  171. } while (1);
  172. // fprintf(STDERR, "@ %d,%d: %d `%s`\n", $this->position[0], $this->position[1], $this->token, $this->value);
  173. return $done;
  174. }
  175. }
  176. ?>