PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/core/helper/String.php

https://gitlab.com/michield/phpList
PHP | 276 lines | 224 code | 17 blank | 35 comment | 9 complexity | 345572b2d4843e152deabb6997c13406 MD5 | raw file
  1. <?php
  2. namespace phpList\helper;
  3. use phpList\phpList;
  4. /**
  5. * Class StringFunctions
  6. * Class containing string helper functions
  7. * @package phpList
  8. */
  9. class String
  10. {
  11. /**
  12. * Normalize text
  13. * @param string $var
  14. * @return string normalized var
  15. */
  16. public static function normalize($var)
  17. {
  18. $var = str_replace(" ", "_", $var);
  19. $var = str_replace(";", "", $var);
  20. return $var;
  21. }
  22. /**
  23. * Clean the input string
  24. * @param string $value
  25. * @return string
  26. */
  27. public static function clean($value)
  28. {
  29. $value = trim($value);
  30. $value = preg_replace("/\r/", "", $value);
  31. $value = preg_replace("/\n/", "", $value);
  32. $value = str_replace('"', "&quot;", $value);
  33. $value = str_replace("'", "&rsquo;", $value);
  34. $value = str_replace("`", "&lsquo;", $value);
  35. $value = stripslashes($value);
  36. return $value;
  37. }
  38. /**
  39. * Clean out chars that make preg choke
  40. * primarily used for parsing the placeholders in emails.
  41. * @param string $name
  42. * @return string
  43. */
  44. public static function cleanAttributeName($name)
  45. {
  46. return str_replace(array('(', ')', '/', '\\', '*', '.'), '', $name);
  47. }
  48. /**
  49. * Remove extra spaces
  50. * @param string $string
  51. * @return string
  52. */
  53. public static function removeDoubleSpaces($string)
  54. {
  55. while (strpos($string, ' ')) {
  56. $string = str_replace(' ', ' ', $string);
  57. }
  58. return $string;
  59. }
  60. public static function HTML2Text($text)
  61. {
  62. # strip HTML, and turn links into the full URL
  63. $text = preg_replace("/\r/", "", $text);
  64. #$text = preg_replace("/\n/","###NL###",$text);
  65. $text = preg_replace("/<script[^>]*>(.*?)<\/script\s*>/is", "", $text);
  66. $text = preg_replace("/<style[^>]*>(.*?)<\/style\s*>/is", "", $text);
  67. # would prefer to use < and > but the strip tags below would erase that.
  68. # $text = preg_replace("/<a href=\"(.*?)\"[^>]*>(.*?)<\/a>/is","\\2\n{\\1}",$text,100);
  69. # $text = preg_replace("/<a href=\"(.*?)\"[^>]*>(.*?)<\/a>/is","[URLTEXT]\\2[/URLTEXT][LINK]\\1[/LINK]",$text,100);
  70. $text = preg_replace(
  71. "/<a[^>]*href=[\"\'](.*)[\"\'][^>]*>(.*)<\/a>/Umis",
  72. "[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]\n",
  73. $text
  74. );
  75. $text = preg_replace("/<b>(.*?)<\/b\s*>/is", "*\\1*", $text);
  76. $text = preg_replace("/<h[\d]>(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text);
  77. # $text = preg_replace("/\s+/"," ",$text);
  78. $text = preg_replace("/<i>(.*?)<\/i\s*>/is", "/\\1/", $text);
  79. $text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text);
  80. $text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text);
  81. $text = preg_replace("/<br[^>]*?>/i", "<br>\n", $text);
  82. $text = preg_replace("/<br[^>]*?\/>/i", "<br\/>\n", $text);
  83. $text = preg_replace("/<table/i", "\n\n<table", $text);
  84. $text = strip_tags($text);
  85. # find all URLs and replace them back
  86. preg_match_all('~\[URLTEXT\](.*)\[ENDURLTEXT\]\[LINK\](.*)\[ENDLINK\]~Umis', $text, $links);
  87. foreach ($links[0] as $matchindex => $fullmatch) {
  88. $linktext = $links[1][$matchindex];
  89. $linkurl = $links[2][$matchindex];
  90. # check if the text linked is a repetition of the URL
  91. if (trim($linktext) == trim($linkurl) ||
  92. 'http://' . trim($linktext) == trim($linkurl)
  93. ) {
  94. $linkreplace = $linkurl;
  95. } else {
  96. ## if link is an anchor only, take it out
  97. if (strpos($linkurl, '#') !== false) {
  98. $linkreplace = $linktext;
  99. } else {
  100. $linkreplace = $linktext . ' <' . $linkurl . '>';
  101. }
  102. }
  103. # $text = preg_replace('~'.preg_quote($fullmatch).'~',$linkreplace,$text);
  104. $text = str_replace($fullmatch, $linkreplace, $text);
  105. }
  106. $text = preg_replace(
  107. "/<a href=[\"\'](.*?)[\"\'][^>]*>(.*?)<\/a>/is",
  108. "[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]",
  109. $text,
  110. 500
  111. );
  112. $text = String::replaceChars($text);
  113. $text = preg_replace("/###NL###/", "\n", $text);
  114. $text = preg_replace("/\n /", "\n", $text);
  115. $text = preg_replace("/\t/", " ", $text);
  116. # reduce whitespace
  117. while (preg_match("/ /", $text)) {
  118. $text = preg_replace("/ /", " ", $text);
  119. }
  120. while (preg_match("/\n\s*\n\s*\n/", $text)) {
  121. $text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text);
  122. }
  123. $text = wordwrap($text, 70);
  124. return $text;
  125. }
  126. public static function replaceChars($text)
  127. {
  128. // $document should contain an HTML document.
  129. // This will remove HTML tags, javascript sections
  130. // and white space. It will also convert some
  131. // common HTML entities to their text equivalent.
  132. $search = array(
  133. "'&(quot|#34);'i", // Replace html entities
  134. "'&(amp|#38);'i",
  135. "'&(lt|#60);'i",
  136. "'&(gt|#62);'i",
  137. "'&(nbsp|#160);'i",
  138. "'&(iexcl|#161);'i",
  139. "'&(cent|#162);'i",
  140. "'&(pound|#163);'i",
  141. "'&(copy|#169);'i",
  142. "'&rsquo;'i",
  143. "'&ndash;'i",
  144. "'&#(\d+);'e"
  145. ); // evaluate as php
  146. $replace = array(
  147. "\"",
  148. "&",
  149. "<",
  150. ">",
  151. " ",
  152. chr(161),
  153. chr(162),
  154. chr(163),
  155. chr(169),
  156. "'",
  157. "-",
  158. "chr(\\1)"
  159. );
  160. $text = preg_replace($search, $replace, $text);
  161. # eze
  162. # $text = html_entity_decode ( $text , ENT_QUOTES , $GLOBALS['strCharSet'] );
  163. $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
  164. return $text;
  165. }
  166. public static function addAbsoluteResources($text, $url)
  167. {
  168. $parts = parse_url($url);
  169. $tags = array(
  170. 'src\s*=\s*',
  171. 'href\s*=\s*',
  172. 'action\s*=\s*',
  173. 'background\s*=\s*',
  174. '@import\s+',
  175. '@import\s+url\('
  176. );
  177. foreach ($tags as $tag) {
  178. # preg_match_all('/'.preg_quote($tag).'"([^"|\#]*)"/Uim', $text, $foundtags);
  179. # we're only handling nicely formatted src="something" and not src=something, ie quotes are required
  180. # bit of a nightmare to not handle it with quotes.
  181. preg_match_all('/(' . $tag . ')"([^"|\#]*)"/Uim', $text, $foundtags);
  182. for ($i = 0; $i < count($foundtags[0]); $i++) {
  183. $match = $foundtags[2][$i];
  184. $tagmatch = $foundtags[1][$i];
  185. # print "$match<br/>";
  186. if (preg_match("#^(http|javascript|https|ftp|mailto):#i", $match)) {
  187. # scheme exists, leave it alone
  188. } elseif (preg_match("#\[.*\]#U", $match)) {
  189. # placeholders used, leave alone as well
  190. } elseif (preg_match("/^\//", $match)) {
  191. # starts with /
  192. $text = preg_replace(
  193. '#' . preg_quote($foundtags[0][$i]) . '#im',
  194. $tagmatch . '"' . $parts['scheme'] . '://' . $parts['host'] . $match . '"',
  195. $text,
  196. 1
  197. );
  198. } else {
  199. $path = '';
  200. if (isset($parts['path'])) {
  201. $path = $parts['path'];
  202. }
  203. if (!preg_match('#/$#', $path)) {
  204. $pathparts = explode('/', $path);
  205. array_pop($pathparts);
  206. $path = join('/', $pathparts);
  207. $path .= '/';
  208. }
  209. $text = preg_replace(
  210. '#' . preg_quote($foundtags[0][$i]) . '#im',
  211. $tagmatch . '"' . $parts['scheme'] . '://' . $parts['host'] . $path . $match . '"',
  212. $text,
  213. 1
  214. );
  215. }
  216. }
  217. }
  218. # $text = preg_replace('#PHPSESSID=[^\s]+
  219. return $text;
  220. }
  221. public static function removeJavascript($content) {
  222. $content = preg_replace('/<script[^>]*>(.*?)<\/script\s*>/mis','',$content);
  223. return $content;
  224. }
  225. public static function stripComments($content) {
  226. $content = preg_replace('/<!--(.*?)-->/mis','',$content);
  227. return $content;
  228. }
  229. public static function compressContent($content) {
  230. ## this needs loads more testing across systems to be sure
  231. return $content;
  232. /*
  233. $content = preg_replace("/\n/",' ',$content);
  234. $content = preg_replace("/\r/",'',$content);
  235. $content = removeJavascript($content);
  236. $content = stripComments($content);
  237. ## find some clean way to remove double spacing
  238. $content = preg_replace("/\t/",' ',$content);
  239. while (preg_match("/ /",$content)) {
  240. $content = preg_replace("/ /",' ',$content);
  241. }
  242. return $content;
  243. */
  244. }
  245. }