PageRenderTime 50ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/inc/bx/helpers/string.php

https://github.com/chregu/fluxcms
PHP | 439 lines | 323 code | 46 blank | 70 comment | 50 complexity | 27f15f9943d19340331a9a91254ac6b5 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, Apache-2.0, LGPL-2.1
  1. <?php
  2. class bx_helpers_string {
  3. static function truncate($inStr, $length = 100, $breakWords = false, $etc = '...') {
  4. if ($length == 0)
  5. return '';
  6. if (strlen($inStr) > $length) {
  7. $length -= strlen($etc);
  8. if (!$breakWords) {
  9. $inStr = preg_replace('/\s+?(\S+)?$/', '', substr($inStr, 0, $length + 1));
  10. }
  11. return substr($inStr, 0, $length) . " $etc";
  12. } else
  13. return $inStr;
  14. }
  15. static function truncate_strip($inStr, $length = 100, $breakWords = false, $etc = '...') {
  16. $inStr = strip_tags($inStr);
  17. $inStr = bx_helpers_string::truncate($inStr, $length, $breakWords, $etc);
  18. return $inStr;
  19. }
  20. static function nl2property_hegu($text) {
  21. $text = $text[0]->ownerDocument->saveXML($text[0]);
  22. $text = preg_replace(array(
  23. '#<[\/]*meta_other>#',
  24. "/^([^:\n]*):(.*)$/m"), array(
  25. '',
  26. "<name>\$1</name><value>\$2</value>"), $text);
  27. $text = "<div><p>" . preg_replace(array(
  28. "#\n#"), array(
  29. "</p>\n<p>",
  30. ''), $text) . "</p>\n</div>";
  31. $xml = new DomDocument();
  32. $xml->loadXML($text);
  33. return $xml;
  34. }
  35. static function explodeToNode($separator, $string, $childNodeName = 'child', $rootNodeName = 'root') {
  36. $dom = new DOMDocument();
  37. $dom->appendChild($dom->createElement($rootNodeName));
  38. if (!empty($string)) {
  39. $exploded = explode($separator, $string);
  40. foreach ($exploded as $element) {
  41. $child = $dom->createElement($childNodeName);
  42. $tn = $dom->createTextNode($element);
  43. $child->appendChild($tn);
  44. $dom->documentElement->appendChild($child);
  45. }
  46. }
  47. return $dom;
  48. }
  49. /**
  50. * takes a string of utf-8 encoded characters and converts it to a string of unicode entities
  51. * each unicode entitiy has the form &#nnnnn; n={0..9} and can be displayed by utf-8 supporting
  52. * browsers
  53. *
  54. * from http://ch.php.net/manual/en/function.utf8-decode.php and optimized
  55. *
  56. * @param $source string encoded using utf-8 [STRING]
  57. * @return string of unicode entities [STRING]
  58. * @access public
  59. */
  60. static function utf2entities($source, $force = false) {
  61. if (!$force && $GLOBALS['POOL']->config->dbIsUtf8) {
  62. return $source;
  63. }
  64. // array used to figure what number to decrement from character order value
  65. // according to number of characters used to map unicode to ascii by utf-8
  66. $decrement[4] = 240;
  67. $decrement[3] = 224;
  68. $decrement[2] = 192;
  69. $decrement[1] = 0;
  70. // the number of bits to shift each charNum by
  71. $shift[1][0] = 0;
  72. $shift[2][0] = 6;
  73. $shift[2][1] = 0;
  74. $shift[3][0] = 12;
  75. $shift[3][1] = 6;
  76. $shift[3][2] = 0;
  77. $shift[4][0] = 18;
  78. $shift[4][1] = 12;
  79. $shift[4][2] = 6;
  80. $shift[4][3] = 0;
  81. $pos = 0;
  82. $len = strlen($source);
  83. $encodedString = '';
  84. while ($pos < $len) {
  85. $thisLetter = substr($source, $pos, 1);
  86. $asciiPos = ord($thisLetter);
  87. $asciiRep = $asciiPos >> 4;
  88. if ($asciiPos < 128) {
  89. $pos += 1;
  90. $thisLen = 1;
  91. } else
  92. if ($asciiRep == 12 or $asciiRep == 13) {
  93. // 2 chars representing one unicode character
  94. $thisLetter = substr($source, $pos, 2);
  95. $pos += 2;
  96. $thisLen = 2;
  97. } else
  98. if ($asciiRep == 15) {
  99. // 4 chars representing one unicode character
  100. $thisLetter = substr($source, $pos, 4);
  101. $thisLen = 4;
  102. $pos += 4;
  103. } else
  104. if ($asciiRep == 14) {
  105. // 3 chars representing one unicode character
  106. $thisLetter = substr($source, $pos, 3);
  107. $thisLen = 3;
  108. $pos += 3;
  109. }
  110. // process the string representing the letter to a unicode entity
  111. if ($thisLen == 1) {
  112. $encodedLetter = $thisLetter;
  113. } else {
  114. $thisPos = 0;
  115. $decimalCode = 0;
  116. while ($thisPos < $thisLen) {
  117. $thisCharOrd = ord(substr($thisLetter, $thisPos, 1));
  118. if ($thisPos == 0) {
  119. $charNum = intval($thisCharOrd - $decrement[$thisLen]);
  120. $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
  121. } else {
  122. $charNum = intval($thisCharOrd - 128);
  123. $decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
  124. }
  125. $thisPos++;
  126. }
  127. if ($decimalCode < 65529) {
  128. $encodedLetter = "&#" . $decimalCode . ';';
  129. } else {
  130. $encodedLetter = "";
  131. }
  132. }
  133. $encodedString .= $encodedLetter;
  134. }
  135. return $encodedString;
  136. }
  137. static function array2query($params) {
  138. $str = '';
  139. if (!empty($params)) {
  140. foreach ($params as $key => $value) {
  141. $str .= (strlen($str) < 1) ? '' : '&';
  142. $str .= $key . '=' . rawurlencode($value);
  143. }
  144. }
  145. return ($str);
  146. }
  147. static function makeUri($title, $preserveDots = false, $preserveSlashes = false) {
  148. $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
  149. $title = trim($title);
  150. if (!$title) {
  151. $title = "none";
  152. }
  153. $newValue = $title;
  154. if (!$preserveDots) {
  155. $newValue = str_replace(".", "-", $newValue);
  156. }
  157. $newValue = str_replace("@", "-at-", $newValue);
  158. $newValue = preg_replace("/[öÖ]/u", "oe", $newValue);
  159. $newValue = preg_replace("/[üÜ]/u", "ue", $newValue);
  160. $newValue = preg_replace("/[äÄ]/u", "ae", $newValue);
  161. $newValue = preg_replace("/[éèê]/u", "e", $newValue);
  162. $newValue = preg_replace("/[Ïïíì]/u", "i", $newValue);
  163. $newValue = preg_replace("/[ñ]/u", "n", $newValue);
  164. $newValue = preg_replace("/[àåáâ]/u", "a", $newValue);
  165. $newValue = preg_replace("/[ùú]/u", "u", $newValue);
  166. $newValue = preg_replace("/[òó]/u", "o", $newValue);
  167. $newValue = preg_replace("/[ß]/u", "ss", $newValue);
  168. $newValue = preg_replace("/[\n\r]+/u", "", $newValue);
  169. //removing everything else
  170. $newValue = strtolower($newValue);
  171. $newValue = preg_replace("/[^a-z0-9\.\-\_\/]/", "-", $newValue);
  172. if (!$preserveDots) {
  173. $newValue = preg_replace("/_([0-9]+)$/u", "-$1", $newValue);
  174. } else {
  175. $newValue = preg_replace("/_([0-9]+)\./u", "-$1.", $newValue);
  176. }
  177. if (!$preserveSlashes) {
  178. $newValue = preg_replace("/\//u", "-$1", $newValue);
  179. }
  180. $newValue = preg_replace("/-{2,}/u", "-", $newValue);
  181. $newValue = trim($newValue, "-");
  182. if (!$newValue) {
  183. $newValue = "none";
  184. }
  185. return $newValue;
  186. }
  187. /* urify is a simple version of the above.
  188. The reason for having 2 versions is, that the above is not easy to do in XSLT only and
  189. I don't want to change makeUri 'cause of BC.
  190. in XSLT, the function is the following:
  191. <func:function name="bxf:urify">
  192. <xsl:param name="text"/>
  193. <func:result select="translate($text,'ABCDEFGHIJKLMNOPQRSTUVWXYZ&#x20;&#x9;&#xA;&#xD;ÄäÜüÖöÏïçÊèÉéÊêÀàÂâÔô_;:\.!,?+$£*ç%&amp;/()=','abcdefghijklmnopqrstuvwxyz----aauuooiiceeeeeeaaaaoo')"/>
  194. </func:function>
  195. */
  196. static function urify($text) {
  197. $newValue = strtolower(preg_replace("/[_;:\.!,?+$£*ç%&\/\(\)=]/", "", $text));
  198. $newValue = preg_replace("/[öÖÔô]/u", "o", $newValue);
  199. $newValue = preg_replace("/[üÜ]/u", "u", $newValue);
  200. $newValue = preg_replace("/[äÄàÀâ]/u", "a", $newValue);
  201. $newValue = preg_replace("/[ÊèÉéÊ]/u", "e", $newValue);
  202. $newValue = preg_replace("/[Ïï]/u", "i", $newValue);
  203. $newValue = preg_replace("/[ç]/u", "c", $newValue);
  204. return str_replace(" ", "-", $newValue);
  205. }
  206. /**
  207. * replaces all occurrences of the keys of $textfields in $subject.
  208. *
  209. * @param string $subject string containing fieldnames sourrounded by {} which should be replaced
  210. * @param array $textfields array of key=>value containing the field values
  211. * @return string string with replaced fields
  212. * @access public
  213. */
  214. function replaceTextFields($subject, $textfields) {
  215. foreach ($textfields as $field => $value) {
  216. $field = str_replace('/','\/',$field);
  217. $patterns[] = '/\{' . $field . '\}/';
  218. $replacements[] = $value;
  219. }
  220. $subject = preg_replace($patterns, $replacements, $subject);
  221. return $subject;
  222. }
  223. /**
  224. * tidily prints the given fields into a string
  225. *
  226. * @param array $fields array of key=>value containing the field values
  227. * @param boolean $printKey when set to TRUE, the key gets printed as well
  228. * @return string string with formatted fields
  229. * @access public
  230. */
  231. static function formatTextFields($fields, $printKey = TRUE, $hideFields = array()) {
  232. $out = '';
  233. foreach ($fields as $key => $value) {
  234. if ($printKey) {
  235. $out .= sprintf('%-20s: ', $key);
  236. }
  237. if (strpos($value, "\n") !== false) {
  238. $value = "\n\n " . preg_replace("#([\r\n]+)#", "$1 ", $value) . "\n****";
  239. }
  240. $out .= "$value\n";
  241. }
  242. return $out;
  243. }
  244. /**
  245. * Takes a data array and generates a simple ASCII table out of it according
  246. * to the given array of definitions.
  247. *
  248. * @param array $data Array of arrays containing the data
  249. * @param array $tableDef Array of array containing the header and column definitions
  250. * @return string Formatted ASCII table
  251. * @access public
  252. */
  253. static function asciiTable($data, $tableDef) {
  254. if (empty($data) || !is_array($data)) {
  255. return '';
  256. }
  257. $columnFormats = array();
  258. $out = '';
  259. $sep = '';
  260. $header = '';
  261. foreach ($tableDef as $i => $column) {
  262. $cHeading = $column[0];
  263. $cWidth = $column[1];
  264. $cOrientation = '-';
  265. if (isset($column[2]) && strtolower($column[2]) === 'r') {
  266. $cOrientation = '';
  267. }
  268. // resize column if needed
  269. if ($cWidth < strlen($cHeading)) {
  270. $cWidth = strlen($cHeading);
  271. }
  272. $header .= '|' . sprintf(" %-{$cWidth}.{$cWidth}s ", $column[0]);
  273. $sep .= '+' . str_repeat('-', $cWidth + 2);
  274. // save column format for later use
  275. $columnFormats[$i] = " %{$cOrientation}{$cWidth}.{$cWidth}s ";
  276. }
  277. $header .= '|';
  278. $sep .= '+';
  279. foreach ($data as $row => $columns) {
  280. foreach ($columns as $i => $column) {
  281. $out .= '|' . sprintf($columnFormats[$i], $column);
  282. }
  283. $out .= "|\n";
  284. }
  285. if ($out != ' ') {
  286. return $sep . "\n" . $header . "\n" . $sep . "\n" . $out . $sep;
  287. } else {
  288. return '';
  289. }
  290. }
  291. /**
  292. * strips all newlines (\r and \n) from the given string (utf8 save),
  293. * shortens repeating whitespaces to one character and strips ws from
  294. * the beginning and the end.
  295. *
  296. * @param string $in string to trim
  297. * @return string trimmed string
  298. * @access public
  299. */
  300. static function trim($in) {
  301. $in = trim($in);
  302. $in = preg_replace('/[\s]{2,}/u', ' ', $in);
  303. $in = preg_replace('/[\r\n]*/u', '', $in);
  304. return $in;
  305. }
  306. static function tidyfy($string) {
  307. $tidyOptions = array(
  308. "output-xhtml" => true,
  309. "show-body-only" => true,
  310. "clean" => false,
  311. "wrap" => "0",
  312. "indent" => false,
  313. "indent-spaces" => 1,
  314. "ascii-chars" => false,
  315. "wrap-attributes" => false,
  316. "alt-text" => "",
  317. "doctype" => "loose",
  318. "numeric-entities" => true,
  319. "drop-proprietary-attributes" => true);
  320. if (class_exists("tidy")) {
  321. $tidy = new tidy();
  322. if (!$tidy) {
  323. return $string;
  324. }
  325. } else {
  326. return $string;
  327. }
  328. // this preg escapes all not allowed tags...
  329. $tidy->parseString($string, $tidyOptions, "utf8");
  330. $tidy->cleanRepair();
  331. return (string) $tidy;
  332. }
  333. static function makeLinksClickable($text) {
  334. //$res=preg_replace("/((http|ftp)+(s)?:(\/\/)([\w]+(.[\w]+))([\w\-\.,@?^=%&:;\/~\+#]*[\w\-\@?^=%&:;\/~\+#])?)/i", "<a href=\"\\0\">\\0</a>", $text);
  335. //$res = preg_replace( "#([\s\(\.\:]|\A)(http[s]?:\/\/[^\s^>^<^\)]*)#m", "$1<a href=\"$2\">$2</a>", $text);
  336. $res = preg_replace("#([\s\(\.\:]|\A)(http[s]?:\/\/[^\s^>^<^\)]*[^\s^>^<^\)^\.^\,])#m", "$1<a href=\"$2\">$2</a>", $text);
  337. return $res;
  338. }
  339. static function removeDoubleSlashes($str) {
  340. return preg_replace("#\/{2,}#", "/", $str);
  341. }
  342. static function spacesToPlus($str) {
  343. return str_replace(' ', '+', $str);
  344. }
  345. static function escapeJSValue($str) {
  346. return str_replace(array(
  347. "'",
  348. "\n"), array(
  349. "\\'",
  350. "\\n"), $str);
  351. }
  352. public static function isUtf8($string) {
  353. // From http://w3.org/International/questions/qa-forms-utf-8.html
  354. return preg_match('%^(?:
  355. [\x09\x0A\x0D\x20-\x7E] # ASCII
  356. | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
  357. | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
  358. | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
  359. | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
  360. | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
  361. | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
  362. | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
  363. )*$%xs', $string);
  364. }
  365. public static function isISO88591($str) {
  366. return preg_match('/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/', $str);
  367. }
  368. static function transformFromContentTypeToUTF8($str) {
  369. if (isset($_SERVER['CONTENT_TYPE']) && preg_match('#charset=([^/s^;]+)#', $_SERVER['CONTENT_TYPE'], $matches)) {
  370. if ($matches[1] == 'UTF-8') {
  371. return $str;
  372. }
  373. if ($matches[1] == "ISO-8859-1") {
  374. return utf8_encode($str);
  375. }
  376. return iconv($matches[1], "UTF-8", $str);
  377. }
  378. //if no charset, then return as it came
  379. return $str;
  380. }
  381. static function fixXMLEncodingFromHTTP($xml) {
  382. if (!preg_match("#<?xml[^>]+encoding=#", $xml)) {
  383. return bx_helpers_string::transformFromContentTypeToUTF8($xml);
  384. }
  385. return $xml;
  386. }
  387. }