/src/Scraper.php
https://gitlab.com/boptom/scraper · PHP · 413 lines · 260 code · 60 blank · 93 comment · 9 complexity · 8c63bd700ba9768794431eebe709fed5 MD5 · raw file
- <?php
- namespace Scraper;
- use Scraper\ScraperHelpers;
- class Scraper
- {
- protected $text;
- public function __construct($text = '')
- {
- self::setText($text);
- }
- /**
- * Sets $this->text
- *
- * @return $this
- **/
- public function setText($text)
- {
- $this->text = $text;
- return $this;
- }
- /**
- * Returns $this->text
- *
- * @return array|string
- **/
- public function getText()
- {
- return $this->text;
- }
- /**
- * Returns $this->text if numeric, otherwise zero
- *
- * @return int|float
- **/
- public function asNumeric()
- {
- self::trim();
- return is_numeric($this->text) ? $this->text : 0;
- }
- /**
- * Returns type of $this->text
- *
- * @return string
- **/
- public function getType()
- {
- return gettype($this->text);
- }
- /**
- * Determines if $this->text is an array
- *
- * @return bool
- **/
- public function isArray()
- {
- return self::getType() == 'array';
- }
- /**
- * Determines if $this->text is a string
- *
- * @return bool
- **/
- public function isString()
- {
- return self::getType() == 'string';
- }
- public function toString($glue = null)
- {
- if ($glue === null) {
- return self::isArray() ? json_encode($this->text) : $this->text;
- }
- return self::isArray() ? implode($glue, $this->text) : $this->text;
- }
- /**
- * Applies json_encode to $this->text
- *
- * @return $this
- **/
- public function jsonEncode()
- {
- $this->text = json_encode($this->text);
- return $this;
- }
- /**
- * Implodes $this->text
- *
- * @param string $glue
- * @return $this
- **/
- public function implode($glue = ',')
- {
- $this->text = implode($glue, (array) $this->text);
- return $this;
- }
- /**
- * Explodes $this->text
- *
- * @param string $glue
- * @return $this
- **/
- public function explode($glue)
- {
- $this->text = explode($glue, $this->text);
- return $this;
- }
- /**
- * Applies str_replace to $this->text
- *
- * @param string $search The string to search for
- * @param string $replace The replacement string
- * @return $this
- **/
- public function replace(string $search, string $replace)
- {
- if (self::isArray()) {
- $this->text = array_map(function ($subject) use ($search, $replace) {
- return str_replace($search, $replace, $subject);
- }, $this->text);
- }
- if (self::isString()) {
- $this->text = str_replace($search, $replace, $this->text);
- }
- return $this;
- }
- /**
- * Keeps only unique items in $this->text
- *
- * @return $this
- **/
- public function unique()
- {
- if (self::isArray()) {
- $this->text = array_unique($this->text);
- }
- return $this;
- }
- /**
- * Run a filter over $this->text
- *
- * @param callable|null $callback
- * @return $this
- */
- public function filter(callable $callback = null)
- {
- $filtered = ($callback) ? array_filter((array) $this->text, $callback) : array_filter((array) $this->text);
- $filtered = array_values($filtered);
- $filtered = self::isArray() ? $filtered : ScraperHelpers::firstOf($filtered);
- $this->text = $filtered === false ? '' : $filtered;
- return $this;
- }
- /**
- * Removes empty items from $this->text
- *
- * @return $this
- **/
- public function removeEmpty()
- {
- return self::filter();
- }
- /**
- * Keeps all items except those in $list
- *
- * @param array|string $list
- * @return $this
- **/
- public function except($list)
- {
- return self::filter(function ($subject) use ($list) {
- return !in_array($subject, (array) $list);
- });
- }
- /**
- * Sorts $this->text
- *
- * @param int $sortFlags
- * @return $this
- **/
- public function sort($sortFlags = SORT_REGULAR)
- {
- if (self::isArray()) {
- sort($this->text, $sortFlags);
- }
- return $this;
- }
- /**
- * Sorts $this->text in reverse order
- *
- * @param int $sortFlags
- * @return $this
- **/
- public function reverseSort($sortFlags = SORT_REGULAR)
- {
- if (self::isArray()) {
- rsort($this->text, $sortFlags);
- }
- return $this;
- }
- /**
- * Removes non-numeric items from $this->text
- *
- * @return $this
- **/
- public function removeNonNumericItems()
- {
- return self::filter('is_numeric');
- }
- public function getPattern(array $pattern, $start = 0)
- {
- $this->text = ScraperHelpers::getPattern($pattern, $this->text, $start);
- return $this;
- }
- public function getPatternRepeat(array $pattern, $start = 0)
- {
- $this->text = ScraperHelpers::getPatternRepeat($pattern, $this->text, $start);
- return $this;
- }
- public function removeBetween(string $from, string $to)
- {
- $this->text = ScraperHelpers::removeBetween($from, $to, $this->text);
- return $this;
- }
- public function removeNonNumericCharacters($keepDecimalPoint = false)
- {
- $this->text = ScraperHelpers::removeNonNumericCharacters($this->text, $keepDecimalPoint);
- return $this;
- }
- public function trimStripTags()
- {
- $this->text = ScraperHelpers::trimStripTags($this->text);
- return $this;
- }
- public function trim()
- {
- $this->text = ScraperHelpers::trim($this->text);
- return $this;
- }
- public function stripTags()
- {
- $this->text = ScraperHelpers::stripTags($this->text);
- return $this;
- }
- public function urlDecode()
- {
- $this->text = ScraperHelpers::urlDecode($this->text);
- return $this;
- }
- public function firstOf()
- {
- $this->text = ScraperHelpers::firstOf($this->text);
- return $this;
- }
- public function firstNonBlank()
- {
- $this->text = ScraperHelpers::firstNonBlank($this->text);
- return $this;
- }
- public function first()
- {
- $array = (array) $this->text;
- $this->text = reset($array);
- return $this;
- }
- public function last()
- {
- $array = (array) $this->text;
- $this->text = end($array);
- return $this;
- }
- public function arrayMerge(...$arrays)
- {
- foreach ($arrays as $array) {
- $this->text = array_merge((array) $this->text, (array) $array);
- }
- return $this;
- }
- public function firstNumeric()
- {
- $this->text = ScraperHelpers::firstNumeric($this->text);
- return $this;
- }
- public function getFirstNumberPosition($start = 0)
- {
- $this->text = ScraperHelpers::getFirstNumberPosition($this->text, $start);
- return $this;
- }
- public function getValue(string $name)
- {
- $this->text = ScraperHelpers::getValue($name, $this->text);
- return $this;
- }
- public function getDiv($seed, $start = 0, $inclusive = false)
- {
- $this->text = ScraperHelpers::getDiv($seed, $this->text, $start, $inclusive);
- return $this;
- }
- public function getDivRepeat(string $seed, $start = 0, $inclusive = false)
- {
- $this->text = ScraperHelpers::getDivRepeat($seed, $this->text, $start, $inclusive);
- return $this;
- }
- public function removeInlineCss()
- {
- $this->text = ScraperHelpers::removeInlineCss($this->text);
- return $this;
- }
- public function removeInlineJavascript()
- {
- $this->text = ScraperHelpers::removeInlineJavascript($this->text);
- return $this;
- }
- public function removeHtmlComments()
- {
- $this->text = ScraperHelpers::removeHtmlComments($this->text);
- return $this;
- }
- public function shorterOf()
- {
- $this->text = ScraperHelpers::shorterOf($this->text);
- return $this;
- }
- public function shortest()
- {
- return self::shorterOf();
- }
- public function append($suffix)
- {
- $this->text = ScraperHelpers::append($suffix, $this->text);
- return $this;
- }
- public function prepend($prefix)
- {
- $this->text = ScraperHelpers::prepend($prefix, $this->text);
- return $this;
- }
- public function compressHtml()
- {
- $this->text = ScraperHelpers::compressHtml($this->text);
- return $this;
- }
- public function removeBlankStrings()
- {
- if (!is_array($this->text)) {
- return $this;
- }
- $this->text = ScraperHelpers::removeBlankStrings($this->text);
- return $this;
- }
- public function getPostFields($includeCheckboxes = false)
- {
- $this->text = ScraperHelpers::getPostFields($this->text, $includeCheckboxes);
- return $this;
- }
- }