PageRenderTime 42ms CodeModel.GetById 25ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/framework/vendor/swift/lib/classes/Swift/CharacterReader/Utf8Reader.php

http://zoop.googlecode.com/
PHP | 183 lines | 127 code | 9 blank | 47 comment | 10 complexity | fa6b3429d9e99eb269bcc119ef1f9e0d MD5 | raw file
  1<?php
  2
  3/*
  4 * This file is part of SwiftMailer.
  5 * (c) 2004-2009 Chris Corbyn
  6 *
  7 * For the full copyright and license information, please view the LICENSE
  8 * file that was distributed with this source code.
  9 */
 10
 11//@require 'Swift/CharacterReader.php';
 12
 13/**
 14 * Analyzes UTF-8 characters.
 15 * @package Swift
 16 * @subpackage Encoder
 17 * @author Chris Corbyn
 18 * @author Xavier De Cock <xdecock@gmail.com>
 19 */
 20class Swift_CharacterReader_Utf8Reader
 21  implements Swift_CharacterReader
 22{
 23
 24  /** Pre-computed for optimization */
 25  private static $length_map=array(
 26//N=0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,
 27    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x0N
 28    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x1N
 29    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x2N
 30    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x3N
 31    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x4N
 32    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x5N
 33    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x6N
 34    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x7N
 35    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0x8N
 36    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0x9N
 37    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0xAN
 38    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0xBN
 39    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, //0xCN
 40    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, //0xDN
 41    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, //0xEN
 42    4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0  //0xFN
 43 );
 44  private static $s_length_map=array(
 45  "\x00"=>1, "\x01"=>1, "\x02"=>1, "\x03"=>1, "\x04"=>1, "\x05"=>1, "\x06"=>1, "\x07"=>1,
 46  "\x08"=>1, "\x09"=>1, "\x0a"=>1, "\x0b"=>1, "\x0c"=>1, "\x0d"=>1, "\x0e"=>1, "\x0f"=>1,
 47  "\x10"=>1, "\x11"=>1, "\x12"=>1, "\x13"=>1, "\x14"=>1, "\x15"=>1, "\x16"=>1, "\x17"=>1,
 48  "\x18"=>1, "\x19"=>1, "\x1a"=>1, "\x1b"=>1, "\x1c"=>1, "\x1d"=>1, "\x1e"=>1, "\x1f"=>1,
 49  "\x20"=>1, "\x21"=>1, "\x22"=>1, "\x23"=>1, "\x24"=>1, "\x25"=>1, "\x26"=>1, "\x27"=>1,
 50  "\x28"=>1, "\x29"=>1, "\x2a"=>1, "\x2b"=>1, "\x2c"=>1, "\x2d"=>1, "\x2e"=>1, "\x2f"=>1,
 51  "\x30"=>1, "\x31"=>1, "\x32"=>1, "\x33"=>1, "\x34"=>1, "\x35"=>1, "\x36"=>1, "\x37"=>1,
 52  "\x38"=>1, "\x39"=>1, "\x3a"=>1, "\x3b"=>1, "\x3c"=>1, "\x3d"=>1, "\x3e"=>1, "\x3f"=>1,
 53  "\x40"=>1, "\x41"=>1, "\x42"=>1, "\x43"=>1, "\x44"=>1, "\x45"=>1, "\x46"=>1, "\x47"=>1,
 54  "\x48"=>1, "\x49"=>1, "\x4a"=>1, "\x4b"=>1, "\x4c"=>1, "\x4d"=>1, "\x4e"=>1, "\x4f"=>1,
 55  "\x50"=>1, "\x51"=>1, "\x52"=>1, "\x53"=>1, "\x54"=>1, "\x55"=>1, "\x56"=>1, "\x57"=>1,
 56  "\x58"=>1, "\x59"=>1, "\x5a"=>1, "\x5b"=>1, "\x5c"=>1, "\x5d"=>1, "\x5e"=>1, "\x5f"=>1,
 57  "\x60"=>1, "\x61"=>1, "\x62"=>1, "\x63"=>1, "\x64"=>1, "\x65"=>1, "\x66"=>1, "\x67"=>1,
 58  "\x68"=>1, "\x69"=>1, "\x6a"=>1, "\x6b"=>1, "\x6c"=>1, "\x6d"=>1, "\x6e"=>1, "\x6f"=>1,
 59  "\x70"=>1, "\x71"=>1, "\x72"=>1, "\x73"=>1, "\x74"=>1, "\x75"=>1, "\x76"=>1, "\x77"=>1,
 60  "\x78"=>1, "\x79"=>1, "\x7a"=>1, "\x7b"=>1, "\x7c"=>1, "\x7d"=>1, "\x7e"=>1, "\x7f"=>1,
 61  "\x80"=>0, "\x81"=>0, "\x82"=>0, "\x83"=>0, "\x84"=>0, "\x85"=>0, "\x86"=>0, "\x87"=>0,
 62  "\x88"=>0, "\x89"=>0, "\x8a"=>0, "\x8b"=>0, "\x8c"=>0, "\x8d"=>0, "\x8e"=>0, "\x8f"=>0,
 63  "\x90"=>0, "\x91"=>0, "\x92"=>0, "\x93"=>0, "\x94"=>0, "\x95"=>0, "\x96"=>0, "\x97"=>0,
 64  "\x98"=>0, "\x99"=>0, "\x9a"=>0, "\x9b"=>0, "\x9c"=>0, "\x9d"=>0, "\x9e"=>0, "\x9f"=>0,
 65  "\xa0"=>0, "\xa1"=>0, "\xa2"=>0, "\xa3"=>0, "\xa4"=>0, "\xa5"=>0, "\xa6"=>0, "\xa7"=>0,
 66  "\xa8"=>0, "\xa9"=>0, "\xaa"=>0, "\xab"=>0, "\xac"=>0, "\xad"=>0, "\xae"=>0, "\xaf"=>0,
 67  "\xb0"=>0, "\xb1"=>0, "\xb2"=>0, "\xb3"=>0, "\xb4"=>0, "\xb5"=>0, "\xb6"=>0, "\xb7"=>0,
 68  "\xb8"=>0, "\xb9"=>0, "\xba"=>0, "\xbb"=>0, "\xbc"=>0, "\xbd"=>0, "\xbe"=>0, "\xbf"=>0,
 69  "\xc0"=>2, "\xc1"=>2, "\xc2"=>2, "\xc3"=>2, "\xc4"=>2, "\xc5"=>2, "\xc6"=>2, "\xc7"=>2,
 70  "\xc8"=>2, "\xc9"=>2, "\xca"=>2, "\xcb"=>2, "\xcc"=>2, "\xcd"=>2, "\xce"=>2, "\xcf"=>2,
 71  "\xd0"=>2, "\xd1"=>2, "\xd2"=>2, "\xd3"=>2, "\xd4"=>2, "\xd5"=>2, "\xd6"=>2, "\xd7"=>2,
 72  "\xd8"=>2, "\xd9"=>2, "\xda"=>2, "\xdb"=>2, "\xdc"=>2, "\xdd"=>2, "\xde"=>2, "\xdf"=>2,
 73  "\xe0"=>3, "\xe1"=>3, "\xe2"=>3, "\xe3"=>3, "\xe4"=>3, "\xe5"=>3, "\xe6"=>3, "\xe7"=>3,
 74  "\xe8"=>3, "\xe9"=>3, "\xea"=>3, "\xeb"=>3, "\xec"=>3, "\xed"=>3, "\xee"=>3, "\xef"=>3,
 75  "\xf0"=>4, "\xf1"=>4, "\xf2"=>4, "\xf3"=>4, "\xf4"=>4, "\xf5"=>4, "\xf6"=>4, "\xf7"=>4,
 76  "\xf8"=>5, "\xf9"=>5, "\xfa"=>5, "\xfb"=>5, "\xfc"=>6, "\xfd"=>6, "\xfe"=>0, "\xff"=>0,
 77 );
 78
 79  /**
 80   * Returns the complete charactermap
 81   *
 82   * @param string $string
 83   * @param int $startOffset
 84   * @param array $currentMap
 85   * @param mixed $ignoredChars
 86   */
 87  public function getCharPositions($string, $startOffset, &$currentMap, &$ignoredChars)
 88  {
 89  	if (!isset($currentMap['i']) || !isset($currentMap['p']))
 90  	{
 91  	  $currentMap['p'] = $currentMap['i'] = array();
 92   	}
 93  	$strlen=strlen($string);
 94  	$charPos=count($currentMap['p']);
 95  	$foundChars=0;
 96  	$invalid=false;
 97  	for ($i=0; $i<$strlen; ++$i)
 98  	{
 99  	  $char=$string[$i];
100  	  $size=self::$s_length_map[$char];
101  	  if ($size==0)
102  	  {
103  	    /* char is invalid, we must wait for a resync */
104  	  	$invalid=true;
105  	  	continue;
106   	  }
107   	  else
108   	  {
109   	  	if ($invalid==true)
110   	  	{
111   	  	  /* We mark the chars as invalid and start a new char */
112   	  	  $currentMap['p'][$charPos+$foundChars]=$startOffset+$i;
113   	      $currentMap['i'][$charPos+$foundChars]=true;
114   	      ++$foundChars;
115   	      $invalid=false;
116   	  	}
117   	  	if (($i+$size) > $strlen){
118   	  		$ignoredChars=substr($string, $i);
119   	  		break;
120   	  	}
121   	  	for ($j=1; $j<$size; ++$j)
122   	  	{
123          $char=$string[$i+$j];
124          if ($char>"\x7F" && $char<"\xC0")
125          {
126            // Valid - continue parsing
127          }
128          else
129          {
130            /* char is invalid, we must wait for a resync */
131            $invalid=true;
132            continue 2;
133          }
134   	  	}
135   	  	/* Ok we got a complete char here */
136   	  	$lastChar=$currentMap['p'][$charPos+$foundChars]=$startOffset+$i+$size;
137   	  	$i+=$j-1;
138   	    ++$foundChars;
139   	  }
140  	}
141  	return $foundChars;
142  }
143  
144  /**
145   * Returns mapType
146   * @int mapType
147   */
148  public function getMapType()
149  {
150  	return self::MAP_TYPE_POSITIONS;
151  }
152 
153  /**
154   * Returns an integer which specifies how many more bytes to read.
155   * A positive integer indicates the number of more bytes to fetch before invoking
156   * this method again.
157   * A value of zero means this is already a valid character.
158   * A value of -1 means this cannot possibly be a valid character.
159   * @param string $bytes
160   * @return int
161   */
162  public function validateByteSequence($bytes, $size)
163  {
164    if ($size<1){
165      return -1;
166    }
167    $needed = self::$length_map[$bytes[0]] - $size;
168    return ($needed > -1)
169      ? $needed
170      : -1
171      ;
172  }
173
174  /**
175   * Returns the number of bytes which should be read to start each character.
176   * @return int
177   */
178  public function getInitialByteSize()
179  {
180    return 1;
181  }
182
183}