PageRenderTime 27ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/myth/Docs/Search.php

https://gitlab.com/digitalpoetry/exceptionally-timed
PHP | 462 lines | 190 code | 81 blank | 191 comment | 32 complexity | 61a2c064d358ccfa76044ee213721c43 MD5 | raw file
  1. <?php namespace Myth\Docs;
  2. /**
  3. * Sprint
  4. *
  5. * A set of power tools to enhance the CodeIgniter framework and provide consistent workflow.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. *
  25. * @package Sprint
  26. * @author Lonnie Ezell
  27. * @copyright Copyright 2014-2015, New Myth Media, LLC (http://newmythmedia.com)
  28. * @license http://opensource.org/licenses/MIT (MIT)
  29. * @link http://sprintphp.com
  30. * @since Version 1.0
  31. */
  32. use Myth\Docs\DocSearchInterface;
  33. /**
  34. * Class Search
  35. *
  36. * Implements basic search capabilities for Bonfire docs. Includes application,
  37. * core bonfire, and module docs.
  38. *
  39. * @package Myth\Docs
  40. */
  41. class Search implements DocSearchInterface
  42. {
  43. /**
  44. * Minimum characters that can be submitted for a search.
  45. *
  46. * @var int
  47. */
  48. protected $min_chars = 3;
  49. /**
  50. * Maximum characters that can be submitted for a search.
  51. *
  52. * @var int
  53. */
  54. protected $max_chars = 30;
  55. /**
  56. * Valid file extensions we can search in.
  57. *
  58. * @var string
  59. */
  60. protected $allowed_file_types = 'html|htm|php|php4|php5|txt|md';
  61. /**
  62. * Which files should we skip over during our search?
  63. *
  64. * @var array
  65. */
  66. protected $skip_files = ['.', '..', '_404.md', '_toc.ini'];
  67. /**
  68. * How much of each file should we read.
  69. * Use lower values for faster searches.
  70. *
  71. * @var int
  72. */
  73. protected $byte_size = 51200;
  74. /**
  75. * Number of words long (approximately)
  76. * the result excerpt should be.
  77. *
  78. * @var int
  79. */
  80. protected $excerpt_length = 60;
  81. /**
  82. * The maximum number of results allowed from a single file.
  83. *
  84. * @var int
  85. */
  86. protected $max_per_file = 1;
  87. protected $doc_folders = array();
  88. protected $formatters = array();
  89. //--------------------------------------------------------------------
  90. /**
  91. * The entry point for performing a search of the documentation.
  92. *
  93. * @param null $terms
  94. * @param array $folders
  95. *
  96. * @return array|null
  97. */
  98. public function search($terms = null, $folders = [])
  99. {
  100. if (empty($terms) || empty($folders)) {
  101. return null;
  102. }
  103. $results = [];
  104. $this->doc_folders = $folders;
  105. foreach ($folders as $group => $folder) {
  106. $results = array_merge($results, $this->searchFolder($terms, $folder, $group));
  107. }
  108. return $results;
  109. }
  110. //--------------------------------------------------------------------
  111. //--------------------------------------------------------------------
  112. // Private Methods
  113. //--------------------------------------------------------------------
  114. /**
  115. * Searches a single directory worth of files.
  116. *
  117. * @param $term
  118. * @param $folder
  119. * @param $group_name
  120. *
  121. * @return array The results.
  122. */
  123. protected function searchFolder($term, $folder, $group_name)
  124. {
  125. $results = [];
  126. $map = $this->directory_map($folder, 2);
  127. $map = $this->flattenMap($map);
  128. // Make sure we have something to work with.
  129. if (! is_array($map) || (is_array($map) && ! count($map))) {
  130. return [];
  131. }
  132. // Loop over each file and search the contents for our term.
  133. foreach ($map as $dir => $file) {
  134. $file_count = 0;
  135. if (in_array($file, $this->skip_files)) {
  136. continue;
  137. }
  138. // Is it a folder?
  139. if (is_array($file) && count($file)) {
  140. $results = array_merge($results, $this->searchFolder($term, $folder . '/' . $dir, $group_name));
  141. continue;
  142. }
  143. // Make sure it's the right file type...
  144. if (! preg_match("/({$this->allowed_file_types})/i", $file)) {
  145. continue;
  146. }
  147. $path = is_string($dir) ? $folder . '/' . $dir . '/' . $file : $folder . '/' . $file;
  148. $term_html = htmlentities($term);
  149. // Read in the file text
  150. $handle = fopen($path, 'r');
  151. $text = fread($handle, $this->byte_size);
  152. // Do we have a match in here somewhere?
  153. $found = stristr($text, $term) || stristr($text, $term_html);
  154. if (! $found) {
  155. continue;
  156. }
  157. // Escape our terms to safely use in a preg_match
  158. $excerpt = strip_tags($text);
  159. $term = preg_quote($term);
  160. $term = str_replace("/", "\/", "{$term}");
  161. $term_html = preg_quote($term_html);
  162. $term_html = str_replace("/", "\/", "{$term_html}");
  163. // Add the item to our results with extracts.
  164. if (preg_match_all(
  165. "/((\s\S*){0,3})($term|$term_html)((\s?\S*){0,3})/i",
  166. $excerpt,
  167. $matches,
  168. PREG_OFFSET_CAPTURE | PREG_SET_ORDER
  169. )) {
  170. foreach ($matches as $match) {
  171. if ($file_count >= $this->max_per_file) {
  172. continue;
  173. }
  174. $result_url = '/docs/' . $group_name . '/' . str_replace('.md', '', $file);
  175. foreach ($this->doc_folders as $alias => $folder) {
  176. $result_url = str_replace($folder, $alias, $result_url);
  177. }
  178. $results[] = [
  179. 'title' => $this->extractTitle($excerpt, $file),
  180. 'file' => $folder . '/' . $file,
  181. 'url' => $result_url,
  182. 'extract' => $this->buildExtract($excerpt, $term, $match[0][0])
  183. ];
  184. $file_count++;
  185. }
  186. }
  187. }
  188. return $results;
  189. }
  190. //--------------------------------------------------------------------
  191. /**
  192. * Stores the name of the callback method to run to convert the source
  193. * files to viewable files. By default, this should be used to register
  194. * a Mardown Extended formatter with the system, but could be used to
  195. * extend the
  196. *
  197. * @param string $callback_name
  198. * @param bool $cascade // If FALSE the formatting of a component ends here. If TRUE, will be passed to next formatter.
  199. * @return $this
  200. */
  201. public function registerFormatter($callback_name = '', $cascade = false)
  202. {
  203. if (empty($callback_name)) return;
  204. $this->formatters[] = array($callback_name => $cascade);
  205. return $this;
  206. }
  207. //--------------------------------------------------------------------
  208. /**
  209. * Runs the text through the registered formatters.
  210. *
  211. * @param $str
  212. * @return mixed
  213. */
  214. public function format($str)
  215. {
  216. if (! is_array($this->formatters)) return $str;
  217. foreach ($this->formatters as $formatter) {
  218. $method = key($formatter);
  219. $cascade = $formatter[$method];
  220. $str = call_user_func($method, $str);
  221. if (! $cascade) return $str;
  222. }
  223. return $str;
  224. }
  225. //--------------------------------------------------------------------
  226. //--------------------------------------------------------------------
  227. // Protected Methods
  228. //--------------------------------------------------------------------
  229. /**
  230. * Converts an array generated by directory_map into a flat array of
  231. * folders, removing any nested folders and adding them to the path.
  232. *
  233. * @param $map
  234. * @param $prefix Used to recursively add the folder name...
  235. * @return mixed
  236. */
  237. protected function flattenMap($map, $prefix = '')
  238. {
  239. if (! is_array($map) || ! count($map)) {
  240. return $map;
  241. }
  242. $return = [];
  243. foreach ($map as $folder => $files) {
  244. // If it's a folder name and an array of files
  245. // then call this method recursively to flatten it out.
  246. if (is_array($files)) {
  247. $return = array_merge($return, $this->flattenMap($files, $prefix . $folder));
  248. continue;
  249. }
  250. // Else, add our prefix (if any) to the filename...
  251. $return[] = $prefix . $files;
  252. }
  253. return $return;
  254. }
  255. //--------------------------------------------------------------------
  256. /**
  257. * Handles extracting the text surrounding our match and basic match formatting.
  258. *
  259. * @param $excerpt
  260. * @param $term
  261. * @param $match_string
  262. *
  263. * @return string
  264. */
  265. protected function buildExtract($excerpt, $term, $match_string)
  266. {
  267. // Find the character positions within the string that our match was found at.
  268. // That way we'll know from what positions before and after this we want to grab it in.
  269. $start_offset = stripos($excerpt, $match_string);
  270. // Modify the start and end positions based on $this->excerpt_length / 2.
  271. $buffer = floor($this->excerpt_length / 2);
  272. // Adjust our start position
  273. $start_offset = $start_offset - $buffer;
  274. if ($start_offset < 0) {
  275. $start_offset = 0;
  276. }
  277. $extract = substr($excerpt, $start_offset);
  278. $extract = strip_tags($this->format($extract));
  279. $extract = $this->firstXWords($extract, $this->excerpt_length);
  280. // Wrap the search term in a span we can style.
  281. $extract = str_ireplace($term, '<span class="term-hilight">' . $term . '</span>', $extract);
  282. return $extract;
  283. }
  284. //--------------------------------------------------------------------
  285. /**
  286. * Extracts the title from a bit of markdown formatted text. If it doesn't
  287. * have an h1 or h2, then it uses the filename.
  288. *
  289. * @param $excerpt
  290. * @param $file
  291. * @return string
  292. */
  293. protected function extractTitle($excerpt, $file)
  294. {
  295. $title = '';
  296. // Easiest to work if this is split into lines.
  297. $lines = explode("\n", $excerpt);
  298. if (is_array($lines) && count($lines)) {
  299. foreach ($lines as $line) {
  300. if (strpos($line, '# ') === 0 || strpos($line, '## ') === 0) {
  301. $title = trim(str_replace('#', '', $line));
  302. break;
  303. }
  304. }
  305. }
  306. // If it's empty, we'll use the filename.
  307. if (empty($title)) {
  308. $title = str_replace('_', ' ', $file);
  309. $title = str_replace('.md', ' ', $title);
  310. $title = ucwords($title);
  311. }
  312. return $title;
  313. }
  314. //--------------------------------------------------------------------
  315. /**
  316. * Create a Directory Map
  317. *
  318. * Reads the specified directory and builds an array
  319. * representation of it. Sub-folders contained with the
  320. * directory will be mapped as well.
  321. *
  322. * @param string $source_dir Path to source
  323. * @param int $directory_depth Depth of directories to traverse
  324. * (0 = fully recursive, 1 = current dir, etc)
  325. * @param bool $hidden Whether to show hidden files
  326. * @return array
  327. */
  328. protected function directory_map($source_dir, $directory_depth = 0, $hidden = FALSE)
  329. {
  330. if ($fp = @opendir($source_dir)) {
  331. $filedata = array();
  332. $new_depth = $directory_depth - 1;
  333. $source_dir = rtrim($source_dir, DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR;
  334. while (FALSE !== ($file = readdir($fp))) {
  335. // Remove '.', '..', and hidden files [optional]
  336. if ($file === '.' OR $file === '..' OR ($hidden === FALSE && $file[0] === '.')) {
  337. continue;
  338. }
  339. is_dir($source_dir . $file) && $file .= DIRECTORY_SEPARATOR;
  340. if (($directory_depth < 1 OR $new_depth > 0) && is_dir($source_dir . $file))
  341. {
  342. $filedata[$file] = $this->directory_map($source_dir . $file, $new_depth, $hidden);
  343. } else
  344. {
  345. // Replace the directory separator here with a forward slash since
  346. // Windows uses backward slashes and not all browsers will auto-replace
  347. // those slashes in URLs.
  348. $filedata[] = str_replace(DIRECTORY_SEPARATOR, '/', $file);
  349. }
  350. }
  351. closedir($fp);
  352. return $filedata;
  353. }
  354. return FALSE;
  355. }
  356. //--------------------------------------------------------------------
  357. /**
  358. * Gets the first 'X' words of a string.
  359. *
  360. * @param $str
  361. * @param int $wordCount
  362. * @return string
  363. */
  364. protected function firstXWords($str, $wordCount = 10)
  365. {
  366. return implode(
  367. '',
  368. array_slice(
  369. preg_split(
  370. '/([\s,\.;\?\!]+)/',
  371. $str,
  372. $wordCount * 2 + 1,
  373. PREG_SPLIT_DELIM_CAPTURE
  374. ),
  375. 0,
  376. $wordCount * 2 - 1
  377. )
  378. );
  379. }
  380. //--------------------------------------------------------------------
  381. }