PageRenderTime 55ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/nette/nette/Nette/Utils/Strings.php

https://bitbucket.org/iiic/iszp
PHP | 580 lines | 292 code | 103 blank | 185 comment | 28 complexity | 73ced03e5476e555495e81052aa1444d MD5 | raw file
  1. <?php
  2. /**
  3. * This file is part of the Nette Framework (http://nette.org)
  4. *
  5. * Copyright (c) 2004 David Grudl (http://davidgrudl.com)
  6. *
  7. * For the full copyright and license information, please view
  8. * the file license.txt that was distributed with this source code.
  9. */
  10. namespace Nette\Utils;
  11. use Nette,
  12. Nette\Diagnostics\Debugger;
  13. /**
  14. * String tools library.
  15. *
  16. * @author David Grudl
  17. */
  18. class Strings
  19. {
  20. /**
  21. * Static class - cannot be instantiated.
  22. */
  23. final public function __construct()
  24. {
  25. throw new Nette\StaticClassException;
  26. }
  27. /**
  28. * Checks if the string is valid for the specified encoding.
  29. * @param string byte stream to check
  30. * @param string expected encoding
  31. * @return bool
  32. */
  33. public static function checkEncoding($s, $encoding = 'UTF-8')
  34. {
  35. return $s === self::fixEncoding($s, $encoding);
  36. }
  37. /**
  38. * Returns correctly encoded string.
  39. * @param string byte stream to fix
  40. * @param string encoding
  41. * @return string
  42. */
  43. public static function fixEncoding($s, $encoding = 'UTF-8')
  44. {
  45. // removes xD800-xDFFF, xFEFF, x110000 and higher
  46. if (strcasecmp($encoding, 'UTF-8') === 0) {
  47. $s = str_replace("\xEF\xBB\xBF", '', $s); // remove UTF-8 BOM
  48. }
  49. if (PHP_VERSION_ID >= 50400) {
  50. ini_set('mbstring.substitute_character', 'none');
  51. return mb_convert_encoding($s, $encoding, $encoding);
  52. }
  53. return @iconv('UTF-16', $encoding . '//IGNORE', iconv($encoding, 'UTF-16//IGNORE', $s)); // intentionally @
  54. }
  55. /**
  56. * Returns a specific character.
  57. * @param int codepoint
  58. * @param string encoding
  59. * @return string
  60. */
  61. public static function chr($code, $encoding = 'UTF-8')
  62. {
  63. return iconv('UTF-32BE', $encoding . '//IGNORE', pack('N', $code));
  64. }
  65. /**
  66. * Starts the $haystack string with the prefix $needle?
  67. * @param string
  68. * @param string
  69. * @return bool
  70. */
  71. public static function startsWith($haystack, $needle)
  72. {
  73. return strncmp($haystack, $needle, strlen($needle)) === 0;
  74. }
  75. /**
  76. * Ends the $haystack string with the suffix $needle?
  77. * @param string
  78. * @param string
  79. * @return bool
  80. */
  81. public static function endsWith($haystack, $needle)
  82. {
  83. return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
  84. }
  85. /**
  86. * Does $haystack contain $needle?
  87. * @param string
  88. * @param string
  89. * @return bool
  90. */
  91. public static function contains($haystack, $needle)
  92. {
  93. return strpos($haystack, $needle) !== FALSE;
  94. }
  95. /**
  96. * Returns a part of UTF-8 string.
  97. * @param string
  98. * @param int
  99. * @param int
  100. * @return string
  101. */
  102. public static function substring($s, $start, $length = NULL)
  103. {
  104. if ($length === NULL) {
  105. $length = self::length($s);
  106. }
  107. return function_exists('mb_substr') ? mb_substr($s, $start, $length, 'UTF-8') : iconv_substr($s, $start, $length, 'UTF-8'); // MB is much faster
  108. }
  109. /**
  110. * Removes special controls characters and normalizes line endings and spaces.
  111. * @param string UTF-8 encoding or 8-bit
  112. * @return string
  113. */
  114. public static function normalize($s)
  115. {
  116. // standardize line endings to unix-like
  117. $s = str_replace("\r\n", "\n", $s); // DOS
  118. $s = strtr($s, "\r", "\n"); // Mac
  119. // remove control characters; leave \t + \n
  120. $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F]+#', '', $s);
  121. // right trim
  122. $s = preg_replace('#[\t ]+$#m', '', $s);
  123. // leading and trailing blank lines
  124. $s = trim($s, "\n");
  125. return $s;
  126. }
  127. /**
  128. * Converts to ASCII.
  129. * @param string UTF-8 encoding
  130. * @return string ASCII
  131. */
  132. public static function toAscii($s)
  133. {
  134. $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
  135. $s = strtr($s, '`\'"^~', "\x01\x02\x03\x04\x05");
  136. if (ICONV_IMPL === 'glibc') {
  137. $s = @iconv('UTF-8', 'WINDOWS-1250//TRANSLIT', $s); // intentionally @
  138. $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
  139. . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
  140. . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
  141. . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x96",
  142. "ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt-");
  143. } else {
  144. $s = @iconv('UTF-8', 'ASCII//TRANSLIT', $s); // intentionally @
  145. }
  146. $s = str_replace(array('`', "'", '"', '^', '~'), '', $s);
  147. return strtr($s, "\x01\x02\x03\x04\x05", '`\'"^~');
  148. }
  149. /**
  150. * Converts to web safe characters [a-z0-9-] text.
  151. * @param string UTF-8 encoding
  152. * @param string allowed characters
  153. * @param bool
  154. * @return string
  155. */
  156. public static function webalize($s, $charlist = NULL, $lower = TRUE)
  157. {
  158. $s = self::toAscii($s);
  159. if ($lower) {
  160. $s = strtolower($s);
  161. }
  162. $s = preg_replace('#[^a-z0-9' . preg_quote($charlist, '#') . ']+#i', '-', $s);
  163. $s = trim($s, '-');
  164. return $s;
  165. }
  166. /**
  167. * Truncates string to maximal length.
  168. * @param string UTF-8 encoding
  169. * @param int
  170. * @param string UTF-8 encoding
  171. * @return string
  172. */
  173. public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
  174. {
  175. if (self::length($s) > $maxLen) {
  176. $maxLen = $maxLen - self::length($append);
  177. if ($maxLen < 1) {
  178. return $append;
  179. } elseif ($matches = self::match($s, '#^.{1,'.$maxLen.'}(?=[\s\x00-/:-@\[-`{-~])#us')) {
  180. return $matches[0] . $append;
  181. } else {
  182. return self::substring($s, 0, $maxLen) . $append;
  183. }
  184. }
  185. return $s;
  186. }
  187. /**
  188. * Indents the content from the left.
  189. * @param string UTF-8 encoding or 8-bit
  190. * @param int
  191. * @param string
  192. * @return string
  193. */
  194. public static function indent($s, $level = 1, $chars = "\t")
  195. {
  196. return $level < 1 ? $s : self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
  197. }
  198. /**
  199. * Convert to lower case.
  200. * @param string UTF-8 encoding
  201. * @return string
  202. */
  203. public static function lower($s)
  204. {
  205. return mb_strtolower($s, 'UTF-8');
  206. }
  207. /**
  208. * Convert to upper case.
  209. * @param string UTF-8 encoding
  210. * @return string
  211. */
  212. public static function upper($s)
  213. {
  214. return mb_strtoupper($s, 'UTF-8');
  215. }
  216. /**
  217. * Convert first character to upper case.
  218. * @param string UTF-8 encoding
  219. * @return string
  220. */
  221. public static function firstUpper($s)
  222. {
  223. return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
  224. }
  225. /**
  226. * Capitalize string.
  227. * @param string UTF-8 encoding
  228. * @return string
  229. */
  230. public static function capitalize($s)
  231. {
  232. return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
  233. }
  234. /**
  235. * Case-insensitive compares UTF-8 strings.
  236. * @param string
  237. * @param string
  238. * @param int
  239. * @return bool
  240. */
  241. public static function compare($left, $right, $len = NULL)
  242. {
  243. if ($len < 0) {
  244. $left = self::substring($left, $len, -$len);
  245. $right = self::substring($right, $len, -$len);
  246. } elseif ($len !== NULL) {
  247. $left = self::substring($left, 0, $len);
  248. $right = self::substring($right, 0, $len);
  249. }
  250. return self::lower($left) === self::lower($right);
  251. }
  252. /**
  253. * Returns UTF-8 string length.
  254. * @param string
  255. * @return int
  256. */
  257. public static function length($s)
  258. {
  259. return strlen(utf8_decode($s)); // fastest way
  260. }
  261. /**
  262. * Strips whitespace.
  263. * @param string UTF-8 encoding
  264. * @param string
  265. * @return string
  266. */
  267. public static function trim($s, $charlist = " \t\n\r\0\x0B\xC2\xA0")
  268. {
  269. $charlist = preg_quote($charlist, '#');
  270. return self::replace($s, '#^['.$charlist.']+|['.$charlist.']+\z#u', '');
  271. }
  272. /**
  273. * Pad a string to a certain length with another string.
  274. * @param string UTF-8 encoding
  275. * @param int
  276. * @param string
  277. * @return string
  278. */
  279. public static function padLeft($s, $length, $pad = ' ')
  280. {
  281. $length = max(0, $length - self::length($s));
  282. $padLen = self::length($pad);
  283. return str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen) . $s;
  284. }
  285. /**
  286. * Pad a string to a certain length with another string.
  287. * @param string UTF-8 encoding
  288. * @param int
  289. * @param string
  290. * @return string
  291. */
  292. public static function padRight($s, $length, $pad = ' ')
  293. {
  294. $length = max(0, $length - self::length($s));
  295. $padLen = self::length($pad);
  296. return $s . str_repeat($pad, $length / $padLen) . self::substring($pad, 0, $length % $padLen);
  297. }
  298. /**
  299. * Reverse string.
  300. * @param string UTF-8 encoding
  301. * @return string
  302. */
  303. public static function reverse($s)
  304. {
  305. return @iconv('UTF-32LE', 'UTF-8', strrev(@iconv('UTF-8', 'UTF-32BE', $s)));
  306. }
  307. /**
  308. * Generate random string.
  309. * @param int
  310. * @param string
  311. * @return string
  312. */
  313. public static function random($length = 10, $charlist = '0-9a-z')
  314. {
  315. $charlist = str_shuffle(preg_replace_callback('#.-.#', function($m) {
  316. return implode('', range($m[0][0], $m[0][2]));
  317. }, $charlist));
  318. $chLen = strlen($charlist);
  319. static $rand3;
  320. if (!$rand3) {
  321. $rand3 = md5(serialize($_SERVER), TRUE);
  322. }
  323. $s = '';
  324. for ($i = 0; $i < $length; $i++) {
  325. if ($i % 5 === 0) {
  326. list($rand, $rand2) = explode(' ', microtime());
  327. $rand += lcg_value();
  328. }
  329. $rand *= $chLen;
  330. $s .= $charlist[($rand + $rand2 + ord($rand3[$i % strlen($rand3)])) % $chLen];
  331. $rand -= (int) $rand;
  332. }
  333. return $s;
  334. }
  335. /**
  336. * Splits string by a regular expression.
  337. * @param string
  338. * @param string
  339. * @param int
  340. * @return array
  341. */
  342. public static function split($subject, $pattern, $flags = 0)
  343. {
  344. set_error_handler(function($severity, $message) use ($pattern) { // preg_last_error does not return compile errors
  345. restore_error_handler();
  346. throw new RegexpException("$message in pattern: $pattern");
  347. });
  348. $res = preg_split($pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE);
  349. restore_error_handler();
  350. if (preg_last_error()) { // run-time error
  351. throw new RegexpException(NULL, preg_last_error(), $pattern);
  352. }
  353. return $res;
  354. }
  355. /**
  356. * Performs a regular expression match.
  357. * @param string
  358. * @param string
  359. * @param int can be PREG_OFFSET_CAPTURE (returned in bytes)
  360. * @param int offset in bytes
  361. * @return mixed
  362. */
  363. public static function match($subject, $pattern, $flags = 0, $offset = 0)
  364. {
  365. if ($offset > strlen($subject)) {
  366. return NULL;
  367. }
  368. set_error_handler(function($severity, $message) use ($pattern) { // preg_last_error does not return compile errors
  369. restore_error_handler();
  370. throw new RegexpException("$message in pattern: $pattern");
  371. });
  372. $res = preg_match($pattern, $subject, $m, $flags, $offset);
  373. restore_error_handler();
  374. if (preg_last_error()) { // run-time error
  375. throw new RegexpException(NULL, preg_last_error(), $pattern);
  376. }
  377. if ($res) {
  378. return $m;
  379. }
  380. }
  381. /**
  382. * Performs a global regular expression match.
  383. * @param string
  384. * @param string
  385. * @param int can be PREG_OFFSET_CAPTURE (returned in bytes); PREG_SET_ORDER is default
  386. * @param int offset in bytes
  387. * @return array
  388. */
  389. public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
  390. {
  391. if ($offset > strlen($subject)) {
  392. return array();
  393. }
  394. set_error_handler(function($severity, $message) use ($pattern) { // preg_last_error does not return compile errors
  395. restore_error_handler();
  396. throw new RegexpException("$message in pattern: $pattern");
  397. });
  398. $res = preg_match_all(
  399. $pattern, $subject, $m,
  400. ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
  401. $offset
  402. );
  403. restore_error_handler();
  404. if (preg_last_error()) { // run-time error
  405. throw new RegexpException(NULL, preg_last_error(), $pattern);
  406. }
  407. return $m;
  408. }
  409. /**
  410. * Perform a regular expression search and replace.
  411. * @param string
  412. * @param string|array
  413. * @param string|callable
  414. * @param int
  415. * @return string
  416. */
  417. public static function replace($subject, $pattern, $replacement = NULL, $limit = -1)
  418. {
  419. if (is_object($replacement) || is_array($replacement)) {
  420. if ($replacement instanceof Nette\Callback) {
  421. $replacement = $replacement->getNative();
  422. }
  423. if (!is_callable($replacement, FALSE, $textual)) {
  424. throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
  425. }
  426. set_error_handler(function($severity, $message) use (& $tmp) { // preg_last_error does not return compile errors
  427. restore_error_handler();
  428. throw new RegexpException("$message in pattern: $tmp");
  429. });
  430. foreach ((array) $pattern as $tmp) {
  431. preg_match($tmp, '');
  432. }
  433. restore_error_handler();
  434. $res = preg_replace_callback($pattern, $replacement, $subject, $limit);
  435. if ($res === NULL && preg_last_error()) { // run-time error
  436. throw new RegexpException(NULL, preg_last_error(), $pattern);
  437. }
  438. return $res;
  439. } elseif ($replacement === NULL && is_array($pattern)) {
  440. $replacement = array_values($pattern);
  441. $pattern = array_keys($pattern);
  442. }
  443. set_error_handler(function($severity, $message) use ($pattern) { // preg_last_error does not return compile errors
  444. restore_error_handler();
  445. throw new RegexpException("$message in pattern: " . implode(' or ', (array) $pattern));
  446. });
  447. $res = preg_replace($pattern, $replacement, $subject, $limit);
  448. restore_error_handler();
  449. if (preg_last_error()) { // run-time error
  450. throw new RegexpException(NULL, preg_last_error(), implode(' or ', (array) $pattern));
  451. }
  452. return $res;
  453. }
  454. }
  455. /**
  456. * The exception that indicates error of the last Regexp execution.
  457. */
  458. class RegexpException extends \Exception
  459. {
  460. static public $messages = array(
  461. PREG_INTERNAL_ERROR => 'Internal error',
  462. PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
  463. PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
  464. PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
  465. 5 => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point', // PREG_BAD_UTF8_OFFSET_ERROR
  466. );
  467. public function __construct($message, $code = NULL, $pattern = NULL)
  468. {
  469. if (!$message) {
  470. $message = (isset(self::$messages[$code]) ? self::$messages[$code] : 'Unknown error') . ($pattern ? " (pattern: $pattern)" : '');
  471. }
  472. parent::__construct($message, $code);
  473. }
  474. }