PageRenderTime 46ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/bonfire/modules/docs/libraries/docsearch.php

http://github.com/ci-bonfire/Bonfire
PHP | 291 lines | 148 code | 44 blank | 99 comment | 27 complexity | 728409a1f5a8f782b2aee67e5fc757d6 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php defined('BASEPATH') || exit('No direct script access allowed');
  2. /**
  3. * Bonfire
  4. *
  5. * An open source project to allow developers to jumpstart their development of
  6. * CodeIgniter applications.
  7. *
  8. * @package Bonfire
  9. * @author Bonfire Dev Team
  10. * @copyright Copyright (c) 2011 - 2018, Bonfire Dev Team
  11. * @license http://opensource.org/licenses/MIT The MIT License
  12. * @link http://cibonfire.com
  13. * @since Version 1.0
  14. * @filesource
  15. */
  16. /**
  17. * Class docSearch
  18. *
  19. * Implements basic search capabilities for Bonfire docs. Includes application,
  20. * core bonfire, and module docs.
  21. */
  22. class DocSearch
  23. {
  24. /** @var object The CodeIgniter instance. */
  25. protected $ci;
  26. /** @var integer Minimum characters that can be submitted for a search. */
  27. protected $min_chars = 3;
  28. /** @var integer Maximum characters that can be submitted for a search. */
  29. protected $max_chars = 30;
  30. /** @var string Valid file extensions we can search in. */
  31. protected $allowed_file_types = 'html|htm|php|php4|php5|txt|md';
  32. /** @var array Which files should we skip over during our search? */
  33. protected $skip_files = array('.', '..', '_404.md', '_toc.ini');
  34. /**
  35. * How much of each file should we read. Use lower values for faster searches.
  36. * @var integer
  37. */
  38. protected $byte_size = 51200;
  39. /**
  40. * Number of characters long (approximately) the result excerpt should be.
  41. * @var integer
  42. */
  43. protected $excerpt_length = 250;
  44. /** @var integer The maximum number of results allowed from a single file. */
  45. protected $max_per_file = 1;
  46. //--------------------------------------------------------------------------
  47. /**
  48. * Constructor loads the text helper and CommonMark library.
  49. */
  50. public function __construct()
  51. {
  52. $this->ci =& get_instance();
  53. $this->ci->load->helper('text');
  54. $this->ci->load->library('CommonMark');
  55. }
  56. /**
  57. * The entry point for performing a search of the documentation.
  58. *
  59. * @param string $terms
  60. * @param array $folders
  61. *
  62. * @return array|null
  63. */
  64. public function search($terms = null, $folders = array())
  65. {
  66. if (empty($terms) || empty($folders)) {
  67. return null;
  68. }
  69. $this->ci->load->helper('directory');
  70. $results = array();
  71. foreach ($folders as $folder) {
  72. $results = array_merge($results, $this->searchFolder($terms, $folder));
  73. }
  74. return $results;
  75. }
  76. /**
  77. * Searches a single directory worth of files.
  78. *
  79. * @param string $term
  80. * @param string $folder
  81. *
  82. * @return array The results.
  83. */
  84. private function searchFolder($term, $folder)
  85. {
  86. $results = array();
  87. $map = bcDirectoryMap($folder, 2);
  88. // Make sure we have something to work with.
  89. if (empty($map) || ! is_array($map)) {
  90. return array();
  91. }
  92. // Loop over each file and search the contents for our term.
  93. foreach ($map as $dir => $file) {
  94. $file_count = 0;
  95. if (in_array($file, $this->skip_files)) {
  96. continue;
  97. }
  98. // Is it a folder?
  99. if (is_array($file) && count($file)) {
  100. $results = array_merge($results, $this->searchFolder($term, "{$folder}/{$dir}"));
  101. continue;
  102. }
  103. // Make sure it's the right file type...
  104. if (! preg_match("/({$this->allowed_file_types})/i", $file)) {
  105. continue;
  106. }
  107. $path = $folder .'/'. $file;
  108. $term_html = htmlentities($term);
  109. // Read in the file text
  110. $handle = fopen($path, 'r');
  111. $text = fread($handle, $this->byte_size);
  112. // Do we have a match in here somewhere?
  113. $found = stristr($text, $term) || stristr($text, $term_html);
  114. if (! $found) {
  115. continue;
  116. }
  117. // Escape our terms to safely use in a preg_match.
  118. $excerpt = strip_tags($text);
  119. $term = preg_quote($term);
  120. $term = str_replace("/", "\/", "{$term}");
  121. $term_html = preg_quote($term_html);
  122. $term_html = str_replace("/", "\/", "{$term_html}");
  123. // Add the item to our results with extracts.
  124. if (preg_match_all(
  125. "/((\s\S*){0,3})($term|$term_html)((\s?\S*){0,3})/i",
  126. $excerpt,
  127. $matches,
  128. PREG_OFFSET_CAPTURE | PREG_SET_ORDER
  129. )) {
  130. foreach ($matches as $match) {
  131. if ($file_count >= $this->max_per_file) {
  132. continue;
  133. }
  134. // Remove trailing directory separator.
  135. $apppath = rtrim(APPPATH, DIRECTORY_SEPARATOR);
  136. $filename = str_replace('.md', '', $file);
  137. // Does $folder contain BFPATH?
  138. if (strpos($folder, BFPATH) !== false) {
  139. // Does $folder contain BFPATH . 'docs'?
  140. if (strpos($folder, BFPATH . 'docs') !== false) {
  141. $result_url = str_replace(BFPATH . 'docs', '', $folder);
  142. $result_url = '/docs/developer' . $result_url . '/' . $filename;
  143. } elseif (strpos($folder, BFPATH . 'modules/') !== false) {
  144. // Does $folder contain BFPATH . 'modules/'?
  145. // Does $folder end with '/docs/developer'?
  146. if (substr($folder, -strlen('/docs/developer')) == '/docs/developer') {
  147. // Remove '/docs/developer' from $folder.
  148. $result_url = str_replace('/docs/developer', '', $folder);
  149. $result_url = str_replace(BFPATH . 'modules/', '/docs/developer/', $result_url);
  150. $result_url .= '/' . $filename;
  151. } else {
  152. $result_url = str_replace(BFPATH . 'modules/', '/docs/application/', $folder);
  153. // Remove trailing 'docs' from $result_url.
  154. $result_url = rtrim($result_url, 'docs');
  155. $result_url .= $filename;
  156. }
  157. }
  158. } elseif (strpos($folder, APPPATH . 'docs') !== false) {
  159. // Does $folder contain APPATH?
  160. $result_url = str_replace(APPPATH . 'docs', '', $folder);
  161. $result_url = '/docs/application' . $result_url . '/' . $filename;
  162. } elseif (strpos($folder, $apppath . '/modules/') !== false) {
  163. // $folder contains $apppath /modules/.
  164. // Does $folder end with '/docs/developer'?
  165. if (substr($folder, -strlen('/docs/developer')) == '/docs/developer') {
  166. // Remove '/docs/developer' from $folder.
  167. $result_url = str_replace('/docs/developer', '', $folder);
  168. $result_url = str_replace($apppath . '/modules/', '/docs/developer/', $result_url);
  169. $result_url .= '/' . $filename;
  170. } else {
  171. $result_url = str_replace($apppath . '/modules/', '/docs/application/', $folder);
  172. // Remove trailing 'docs' from $result_url.
  173. $result_url = rtrim($result_url, 'docs');
  174. $result_url .= $filename;
  175. }
  176. }
  177. $results[] = array(
  178. 'title' => $this->extractTitle($excerpt, $file),
  179. 'file' => $folder .'/'. $file,
  180. 'url' => $result_url,
  181. 'extract' => $this->buildExtract($excerpt, $term, $match[0][0]),
  182. );
  183. ++$file_count;
  184. }
  185. }
  186. }
  187. return $results;
  188. }
  189. /**
  190. * Handles extracting the text surrounding our match and basic match formatting.
  191. *
  192. * @param string $excerpt
  193. * @param string $term
  194. * @param string $match_string
  195. *
  196. * @return string
  197. */
  198. private function buildExtract($excerpt, $term, $match_string)
  199. {
  200. // Find the character positions within the string that the match was found.
  201. // That way we'll know from what positions before and after this we want to grab it in.
  202. $start_offset = stripos($excerpt, $match_string);
  203. // Modify the start and end positions based on $this->excerpt_length / 2.
  204. $buffer = floor($this->excerpt_length / 2);
  205. // Adjust our start position.
  206. $start_offset = $start_offset - $buffer;
  207. if ($start_offset < 0) {
  208. $start_offset = 0;
  209. }
  210. $extract = substr($excerpt, $start_offset);
  211. $extract = strip_tags($this->ci->commonmark->convert($extract));
  212. $extract = character_limiter($extract, $this->excerpt_length);
  213. // Wrap the search term in a span we can style.
  214. $extract = str_ireplace($term, '<span class="term-hilight">' . $term . '</span>', $extract);
  215. return $extract;
  216. }
  217. /**
  218. * Extracts the title from a bit of markdown formatted text. If it doesn't
  219. * have an h1 or h2, then it uses the filename.
  220. *
  221. * @param string $excerpt
  222. * @param string $file
  223. *
  224. * @return string
  225. */
  226. private function extractTitle($excerpt, $file)
  227. {
  228. $title = '';
  229. // Easiest to work if this is split into lines.
  230. $lines = explode("\n", $excerpt);
  231. if (! empty($lines) && is_array($lines)) {
  232. foreach ($lines as $line) {
  233. if (strpos($line, '# ') === 0 || strpos($line, '## ') === 0) {
  234. $title = trim(str_replace('#', '', $line));
  235. break;
  236. }
  237. }
  238. }
  239. // If it's empty, we'll use the filename.
  240. if (empty($title)) {
  241. $title = str_replace('_', ' ', $file);
  242. $title = str_replace('.md', ' ', $title);
  243. $title = ucwords($title);
  244. }
  245. return $title;
  246. }
  247. }