PageRenderTime 48ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/src/Scraper.php

https://gitlab.com/boptom/scraper
PHP | 413 lines | 260 code | 60 blank | 93 comment | 9 complexity | 8c63bd700ba9768794431eebe709fed5 MD5 | raw file
  1. <?php
  2. namespace Scraper;
  3. use Scraper\ScraperHelpers;
  4. class Scraper
  5. {
  6. protected $text;
  7. public function __construct($text = '')
  8. {
  9. self::setText($text);
  10. }
  11. /**
  12. * Sets $this->text
  13. *
  14. * @return $this
  15. **/
  16. public function setText($text)
  17. {
  18. $this->text = $text;
  19. return $this;
  20. }
  21. /**
  22. * Returns $this->text
  23. *
  24. * @return array|string
  25. **/
  26. public function getText()
  27. {
  28. return $this->text;
  29. }
  30. /**
  31. * Returns $this->text if numeric, otherwise zero
  32. *
  33. * @return int|float
  34. **/
  35. public function asNumeric()
  36. {
  37. self::trim();
  38. return is_numeric($this->text) ? $this->text : 0;
  39. }
  40. /**
  41. * Returns type of $this->text
  42. *
  43. * @return string
  44. **/
  45. public function getType()
  46. {
  47. return gettype($this->text);
  48. }
  49. /**
  50. * Determines if $this->text is an array
  51. *
  52. * @return bool
  53. **/
  54. public function isArray()
  55. {
  56. return self::getType() == 'array';
  57. }
  58. /**
  59. * Determines if $this->text is a string
  60. *
  61. * @return bool
  62. **/
  63. public function isString()
  64. {
  65. return self::getType() == 'string';
  66. }
  67. public function toString($glue = null)
  68. {
  69. if ($glue === null) {
  70. return self::isArray() ? json_encode($this->text) : $this->text;
  71. }
  72. return self::isArray() ? implode($glue, $this->text) : $this->text;
  73. }
  74. /**
  75. * Applies json_encode to $this->text
  76. *
  77. * @return $this
  78. **/
  79. public function jsonEncode()
  80. {
  81. $this->text = json_encode($this->text);
  82. return $this;
  83. }
  84. /**
  85. * Implodes $this->text
  86. *
  87. * @param string $glue
  88. * @return $this
  89. **/
  90. public function implode($glue = ',')
  91. {
  92. $this->text = implode($glue, (array) $this->text);
  93. return $this;
  94. }
  95. /**
  96. * Explodes $this->text
  97. *
  98. * @param string $glue
  99. * @return $this
  100. **/
  101. public function explode($glue)
  102. {
  103. $this->text = explode($glue, $this->text);
  104. return $this;
  105. }
  106. /**
  107. * Applies str_replace to $this->text
  108. *
  109. * @param string $search The string to search for
  110. * @param string $replace The replacement string
  111. * @return $this
  112. **/
  113. public function replace(string $search, string $replace)
  114. {
  115. if (self::isArray()) {
  116. $this->text = array_map(function ($subject) use ($search, $replace) {
  117. return str_replace($search, $replace, $subject);
  118. }, $this->text);
  119. }
  120. if (self::isString()) {
  121. $this->text = str_replace($search, $replace, $this->text);
  122. }
  123. return $this;
  124. }
  125. /**
  126. * Keeps only unique items in $this->text
  127. *
  128. * @return $this
  129. **/
  130. public function unique()
  131. {
  132. if (self::isArray()) {
  133. $this->text = array_unique($this->text);
  134. }
  135. return $this;
  136. }
  137. /**
  138. * Run a filter over $this->text
  139. *
  140. * @param callable|null $callback
  141. * @return $this
  142. */
  143. public function filter(callable $callback = null)
  144. {
  145. $filtered = ($callback) ? array_filter((array) $this->text, $callback) : array_filter((array) $this->text);
  146. $filtered = array_values($filtered);
  147. $filtered = self::isArray() ? $filtered : ScraperHelpers::firstOf($filtered);
  148. $this->text = $filtered === false ? '' : $filtered;
  149. return $this;
  150. }
  151. /**
  152. * Removes empty items from $this->text
  153. *
  154. * @return $this
  155. **/
  156. public function removeEmpty()
  157. {
  158. return self::filter();
  159. }
  160. /**
  161. * Keeps all items except those in $list
  162. *
  163. * @param array|string $list
  164. * @return $this
  165. **/
  166. public function except($list)
  167. {
  168. return self::filter(function ($subject) use ($list) {
  169. return !in_array($subject, (array) $list);
  170. });
  171. }
  172. /**
  173. * Sorts $this->text
  174. *
  175. * @param int $sortFlags
  176. * @return $this
  177. **/
  178. public function sort($sortFlags = SORT_REGULAR)
  179. {
  180. if (self::isArray()) {
  181. sort($this->text, $sortFlags);
  182. }
  183. return $this;
  184. }
  185. /**
  186. * Sorts $this->text in reverse order
  187. *
  188. * @param int $sortFlags
  189. * @return $this
  190. **/
  191. public function reverseSort($sortFlags = SORT_REGULAR)
  192. {
  193. if (self::isArray()) {
  194. rsort($this->text, $sortFlags);
  195. }
  196. return $this;
  197. }
  198. /**
  199. * Removes non-numeric items from $this->text
  200. *
  201. * @return $this
  202. **/
  203. public function removeNonNumericItems()
  204. {
  205. return self::filter('is_numeric');
  206. }
  207. public function getPattern(array $pattern, $start = 0)
  208. {
  209. $this->text = ScraperHelpers::getPattern($pattern, $this->text, $start);
  210. return $this;
  211. }
  212. public function getPatternRepeat(array $pattern, $start = 0)
  213. {
  214. $this->text = ScraperHelpers::getPatternRepeat($pattern, $this->text, $start);
  215. return $this;
  216. }
  217. public function removeBetween(string $from, string $to)
  218. {
  219. $this->text = ScraperHelpers::removeBetween($from, $to, $this->text);
  220. return $this;
  221. }
  222. public function removeNonNumericCharacters($keepDecimalPoint = false)
  223. {
  224. $this->text = ScraperHelpers::removeNonNumericCharacters($this->text, $keepDecimalPoint);
  225. return $this;
  226. }
  227. public function trimStripTags()
  228. {
  229. $this->text = ScraperHelpers::trimStripTags($this->text);
  230. return $this;
  231. }
  232. public function trim()
  233. {
  234. $this->text = ScraperHelpers::trim($this->text);
  235. return $this;
  236. }
  237. public function stripTags()
  238. {
  239. $this->text = ScraperHelpers::stripTags($this->text);
  240. return $this;
  241. }
  242. public function urlDecode()
  243. {
  244. $this->text = ScraperHelpers::urlDecode($this->text);
  245. return $this;
  246. }
  247. public function firstOf()
  248. {
  249. $this->text = ScraperHelpers::firstOf($this->text);
  250. return $this;
  251. }
  252. public function firstNonBlank()
  253. {
  254. $this->text = ScraperHelpers::firstNonBlank($this->text);
  255. return $this;
  256. }
  257. public function first()
  258. {
  259. $array = (array) $this->text;
  260. $this->text = reset($array);
  261. return $this;
  262. }
  263. public function last()
  264. {
  265. $array = (array) $this->text;
  266. $this->text = end($array);
  267. return $this;
  268. }
  269. public function arrayMerge(...$arrays)
  270. {
  271. foreach ($arrays as $array) {
  272. $this->text = array_merge((array) $this->text, (array) $array);
  273. }
  274. return $this;
  275. }
  276. public function firstNumeric()
  277. {
  278. $this->text = ScraperHelpers::firstNumeric($this->text);
  279. return $this;
  280. }
  281. public function getFirstNumberPosition($start = 0)
  282. {
  283. $this->text = ScraperHelpers::getFirstNumberPosition($this->text, $start);
  284. return $this;
  285. }
  286. public function getValue(string $name)
  287. {
  288. $this->text = ScraperHelpers::getValue($name, $this->text);
  289. return $this;
  290. }
  291. public function getDiv($seed, $start = 0, $inclusive = false)
  292. {
  293. $this->text = ScraperHelpers::getDiv($seed, $this->text, $start, $inclusive);
  294. return $this;
  295. }
  296. public function getDivRepeat(string $seed, $start = 0, $inclusive = false)
  297. {
  298. $this->text = ScraperHelpers::getDivRepeat($seed, $this->text, $start, $inclusive);
  299. return $this;
  300. }
  301. public function removeInlineCss()
  302. {
  303. $this->text = ScraperHelpers::removeInlineCss($this->text);
  304. return $this;
  305. }
  306. public function removeInlineJavascript()
  307. {
  308. $this->text = ScraperHelpers::removeInlineJavascript($this->text);
  309. return $this;
  310. }
  311. public function removeHtmlComments()
  312. {
  313. $this->text = ScraperHelpers::removeHtmlComments($this->text);
  314. return $this;
  315. }
  316. public function shorterOf()
  317. {
  318. $this->text = ScraperHelpers::shorterOf($this->text);
  319. return $this;
  320. }
  321. public function shortest()
  322. {
  323. return self::shorterOf();
  324. }
  325. public function append($suffix)
  326. {
  327. $this->text = ScraperHelpers::append($suffix, $this->text);
  328. return $this;
  329. }
  330. public function prepend($prefix)
  331. {
  332. $this->text = ScraperHelpers::prepend($prefix, $this->text);
  333. return $this;
  334. }
  335. public function compressHtml()
  336. {
  337. $this->text = ScraperHelpers::compressHtml($this->text);
  338. return $this;
  339. }
  340. public function removeBlankStrings()
  341. {
  342. if (!is_array($this->text)) {
  343. return $this;
  344. }
  345. $this->text = ScraperHelpers::removeBlankStrings($this->text);
  346. return $this;
  347. }
  348. public function getPostFields($includeCheckboxes = false)
  349. {
  350. $this->text = ScraperHelpers::getPostFields($this->text, $includeCheckboxes);
  351. return $this;
  352. }
  353. }