PageRenderTime 72ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/application/controllers/ScrapeController.php

https://github.com/jverkoey/snaapilookup
PHP | 2654 lines | 2298 code | 304 blank | 52 comment | 301 complexity | f30d8d6021e73b335616cb04c791d2d4 MD5 | raw file
  1. <?php
  2. include_once APPLICATION_PATH . '/controllers/SnaapiController.php';
  3. class ScrapeController extends SnaapiController {
  4. private $_pages_scraped;
  5. const MAX_PAGES_TO_SCRAPE = 1;
  6. public function init() {
  7. SnaapiController::init();
  8. if( 'development' == $this->getInvokeArg('env') ) {
  9. $this->_helper->viewRenderer->setRender('index');
  10. }
  11. }
  12. public function phpAction() {
  13. if( 'development' == $this->getInvokeArg('env') ) {
  14. $this->view->results = '';
  15. $this->_pages_scraped = 0;
  16. /*$model = $this->getFunctionsModel();
  17. $db = $model->getTable()->getAdapter();
  18. $sql = "SELECT * FROM `functions` WHERE `data` LIKE '% ,%'";
  19. foreach( $db->query($sql)->fetchAll() as $result ) {
  20. $result['data'] = str_replace(" ,", ',', $result['data']);
  21. $this->getFunctionsModel()->setData(array(
  22. 'category' => $result['category'],
  23. 'id' => $result['id'],
  24. 'data' => $result['data']
  25. ));
  26. }*/
  27. $this->scrapePHPHierarchies();
  28. $this->scrapePHPFunctions();
  29. } else {
  30. $this->_forward('error', 'error');
  31. }
  32. }
  33. public function pythonAction() {
  34. if( 'development' == $this->getInvokeArg('env') ) {
  35. $this->view->results = '';
  36. $this->_pages_scraped = 0;
  37. $this->scrapePythonModules(true);
  38. } else {
  39. $this->_forward('error', 'error');
  40. }
  41. }
  42. public function cssAction() {
  43. if( 'development' == $this->getInvokeArg('env') ) {
  44. $this->view->results = '';
  45. $this->_pages_scraped = 0;
  46. $this->scrapeCSSFunctions();
  47. } else {
  48. $this->_forward('error', 'error');
  49. }
  50. }
  51. public function zendAction() {
  52. if( 'development' == $this->getInvokeArg('env') ) {
  53. $this->view->results = '';
  54. $this->_pages_scraped = 0;
  55. $this->scrapeZend();
  56. } else {
  57. $this->_forward('error', 'error');
  58. }
  59. }
  60. public function fbAction() {
  61. if( 'development' == $this->getInvokeArg('env') ) {
  62. $this->view->results = '';
  63. $this->_pages_scraped = 0;
  64. //$this->scrapeFacebook();
  65. //$this->scrapeFacebookFbml();
  66. $this->scrapeFacebookFbmlPhase2();
  67. } else {
  68. $this->_forward('error', 'error');
  69. }
  70. }
  71. public function djangoAction() {
  72. if( 'development' == $this->getInvokeArg('env') ) {
  73. $this->view->results = '';
  74. $this->_pages_scraped = 0;
  75. //$this->scrapeDjango1();
  76. $this->scrapeDjango2();
  77. } else {
  78. $this->_forward('error', 'error');
  79. }
  80. }
  81. public function iphoneAction() {
  82. if( 'development' == $this->getInvokeArg('env') ) {
  83. $this->view->results = '';
  84. $this->_pages_scraped = 0;
  85. $this->scrapeiPhone();
  86. //$this->scrapeiPhoneDir();
  87. } else {
  88. $this->_forward('error', 'error');
  89. }
  90. }
  91. public function jsAction() {
  92. if( 'development' == $this->getInvokeArg('env') ) {
  93. $this->view->results = '';
  94. $this->_pages_scraped = 0;
  95. $this->scrapeJavascript();
  96. } else {
  97. $this->_forward('error', 'error');
  98. }
  99. }
  100. public function jqueryAction() {
  101. if( 'development' == $this->getInvokeArg('env') ) {
  102. $this->view->results = '';
  103. $this->_pages_scraped = 0;
  104. //$this->scrapejQuery();
  105. $this->scrapejQuery2();
  106. } else {
  107. $this->_forward('error', 'error');
  108. }
  109. }
  110. public function androidAction() {
  111. if( 'development' == $this->getInvokeArg('env') ) {
  112. $this->view->results = '';
  113. $this->_pages_scraped = 0;
  114. //$this->scrapeAndroidPackageList();
  115. //$this->scrapeAndroidPackages(2);
  116. $this->scrapeAndroidFunctions();
  117. } else {
  118. $this->_forward('error', 'error');
  119. }
  120. }
  121. public function mootoolsAction() {
  122. if( 'development' == $this->getInvokeArg('env') ) {
  123. $this->view->results = '';
  124. $this->_pages_scraped = 0;
  125. $this->scrapeMootoolsFunctions();
  126. } else {
  127. $this->_forward('error', 'error');
  128. }
  129. }
  130. public function clojureAction() {
  131. if( 'development' == $this->getInvokeArg('env') ) {
  132. $this->view->results = '';
  133. $this->_pages_scraped = 0;
  134. //$this->scrapeClojureHierarchies();
  135. $this->scrapeClojureFunctions();
  136. } else {
  137. $this->_forward('error', 'error');
  138. }
  139. }
  140. private function scrapeClojureFunctions() {
  141. $category = 'Clojure';
  142. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  143. if( !$category_id ) {
  144. $this->invalid_category($category);
  145. return;
  146. }
  147. $contents = file_get_contents(APPLICATION_PATH . '/scraper/clojure/api.html');
  148. $hierarchies = array_slice(explode('<h2 id="', $contents), 1);
  149. foreach( $hierarchies as $hierarchy ) {
  150. if( !preg_match('/(.+?)">(.+?)<\/h2>/', $hierarchy, $matches) ) {
  151. $this->view->results .= 'No name found, skipping...' . "\n";
  152. continue;
  153. }
  154. $name = trim($matches[2]);
  155. $sub_id = $this->getHierarchiesModel()->fetchByName($category_id, 1, $name);
  156. $functions = array_slice(explode('<hr>', $hierarchy), 1);
  157. foreach( $functions as $function ) {
  158. if( !preg_match_all('/<h3 id="(.+?)">(.+?)<\/h3>/', $function, $matches) ) {
  159. $this->view->results .= 'No function info found, skipping...' . "\n";
  160. $this->view->results .= $function . "\n\n";
  161. continue;
  162. }
  163. if( !preg_match('/(?:.+<\/h3>) (.+?)<br>/', str_replace("\n", ' ', $function), $desc_matches) ) {
  164. $this->view->results .= 'No desc found, skipping...' . "\n";
  165. $this->view->results .= $function . "\n\n";
  166. continue;
  167. }
  168. $desc = trim(strip_tags($desc_matches[1]));
  169. for( $index = 0; $index < count($matches[0]); ++$index ) {
  170. $url = 'http://clojure.org/api#'.$matches[1][$index];
  171. $name = trim(str_replace('&amp;', '&', strip_tags($matches[2][$index])));
  172. $this->view->results .= $sub_id ."\n";
  173. $this->view->results .= $name ."\n";
  174. $this->view->results .= $url ."\n";
  175. $this->view->results .= $desc ."\n\n";
  176. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  177. 'category' => $category_id,
  178. 'hierarchy' => $sub_id,
  179. 'name' => $name,
  180. 'url' => $url,
  181. 'short_description' => $desc,
  182. 'scrapeable' => 0
  183. ));
  184. }
  185. }
  186. }
  187. }
  188. private function scrapeClojureHierarchies() {
  189. $category = 'Clojure';
  190. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  191. if( !$category_id ) {
  192. $this->invalid_category($category);
  193. return;
  194. }
  195. $contents = file_get_contents(APPLICATION_PATH . '/scraper/clojure/api.html');
  196. $hierarchies = array_slice(explode('<h2 id="', $contents), 1);
  197. foreach( $hierarchies as $hierarchy ) {
  198. if( !preg_match('/(.+?)">(.+?)<\/h2>/', $hierarchy, $matches) ) {
  199. $this->view->results .= 'No name found, skipping...' . "\n";
  200. continue;
  201. }
  202. $name = trim($matches[2]);
  203. $url = 'http://clojure.org/api#'.$matches[1];
  204. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, 1, $name, $url, 0)."\n";
  205. }
  206. }
  207. private function scrapeMootoolsFunctions() {
  208. $category = 'mootools';
  209. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  210. if( !$category_id ) {
  211. $this->invalid_category($category);
  212. return;
  213. }
  214. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  215. if( empty($scrapeable) ) {
  216. $this->nothing_to_scrape($category);
  217. return;
  218. }
  219. foreach( $scrapeable as $hierarchy ) {
  220. $this->view->results .= $hierarchy['name'] . "\n";
  221. if( !$hierarchy['source_url'] ) {
  222. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  223. continue;
  224. }
  225. $source_url = $hierarchy['source_url'];
  226. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  227. $contents = file_get_contents($source_url);
  228. $start_index = strpos($contents, '<h2 id="');
  229. $data = substr($contents, $start_index);
  230. if( !preg_match_all('/<h2 id=".+?"(?: class="description")?><a href="(.+?)">(?:(?:.+? )?(?:Function|Method|Property|Selector|Event)): (.+?)<\/a><\/h2>/', $data, $matches ) ) {
  231. $this->view->results .= 'No functions found, checking for features...' . "\n";
  232. if( !preg_match_all('/<li>(.+?) - \(<em>(.+?)<\/em>\) (.+?)<\/li>/', $contents, $matches) ) {
  233. $this->view->results .= 'No features found, skipping...' . "\n";
  234. continue;
  235. }
  236. for( $index = 0; $index < count($matches[0]); ++$index ) {
  237. $url = $source_url;
  238. $name = trim($matches[1][$index]);
  239. $desc = trim(strip_tags($matches[3][$index]));
  240. $this->view->results .= $name."\n";
  241. $this->view->results .= $url."\n";
  242. $this->view->results .= $desc."\n\n";
  243. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  244. 'category' => $category_id,
  245. 'hierarchy' => $hierarchy['id'],
  246. 'name' => $name,
  247. 'url' => $url,
  248. 'short_description' => $desc,
  249. 'scrapeable' => 0
  250. ));
  251. }
  252. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  253. continue;
  254. }
  255. $functions = array_slice(explode('<h2 id="', $data), 1);
  256. foreach( $functions as $function ) {
  257. $desc = '';
  258. if( preg_match('/<p class="description">(.+?)<\/p>/', str_replace("\n", ' ', $function), $matches) ) {
  259. $desc = $matches[1];
  260. }
  261. if( !preg_match('/.+?"(?: class="description")?><a href="(.+?)">(?:(?:.+? )?(?:Function|Method|Property|Selector|Event)): (.+?)<\/a><\/h2>/', $function, $matches ) ) {
  262. $this->view->results .= 'Couldn\'t find the function name, skipping...' . "\n";
  263. continue;
  264. }
  265. $url = $source_url . $matches[1];
  266. $name = trim($matches[2]);
  267. if( $hierarchy['name'] != 'Core' && $name[0] != '$' ) {
  268. $name = $hierarchy['name'].'.'.$name;
  269. }
  270. $desc = trim(strip_tags($desc));
  271. $this->view->results .= $name."\n";
  272. $this->view->results .= $url."\n";
  273. $this->view->results .= $desc."\n\n";
  274. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  275. 'category' => $category_id,
  276. 'hierarchy' => $hierarchy['id'],
  277. 'name' => $name,
  278. 'url' => $url,
  279. 'short_description' => $desc,
  280. 'scrapeable' => 0
  281. ));
  282. }
  283. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  284. }
  285. }
  286. private function scrapeAndroidFunctions() {
  287. $category = 'android';
  288. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  289. if( !$category_id ) {
  290. $this->invalid_category($category);
  291. return;
  292. }
  293. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  294. if( empty($scrapeable) ) {
  295. $this->nothing_to_scrape($category);
  296. return;
  297. }
  298. foreach( $scrapeable as $hierarchy ) {
  299. $this->view->results .= $hierarchy['name'] . "\n";
  300. if( !$hierarchy['source_url'] ) {
  301. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  302. continue;
  303. }
  304. $source_url = $hierarchy['source_url'];
  305. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  306. $contents = file_get_contents($source_url);
  307. if( !preg_match('/<td colspan="1" class="jd-inheritance-class-cell">(.+?)<\/td>/', $contents, $matches) ) {
  308. $this->view->results .= 'No name found, skipping...' . "\n";
  309. break;
  310. }
  311. $name = $matches[1];
  312. $this->view->results .= $name ."\n";
  313. $desc = '';
  314. $OVERVIEW_TXT = '<h2>Class Overview</h2>';
  315. $desc_start = strpos($contents, $OVERVIEW_TXT);
  316. if( false !== $desc_start ) {
  317. $desc_start += strlen($OVERVIEW_TXT);
  318. $desc_end = strpos($contents, '</p>', $desc_start);
  319. if( false !== $desc_end ) {
  320. $desc = trim(strip_tags(str_replace("\n", ' ', substr($contents, $desc_start, $desc_end - $desc_start))));
  321. }
  322. }
  323. if( $desc == '' ) {
  324. $this->view->results .= 'No description found...'."\n";
  325. } else {
  326. $this->view->results .= $desc ."\n";
  327. }
  328. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  329. 'category' => $category_id,
  330. 'hierarchy' => $hierarchy['id'],
  331. 'name' => $name,
  332. 'url' => $source_url,
  333. 'short_description' => $desc,
  334. 'scrapeable' => 1
  335. ));
  336. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  337. }
  338. }
  339. private function process_section($links, $name, $mode, $category_id, $hierarchy, $source_url) {
  340. $interface_start = strpos($links, '<li><h2>'.$name.'</h2>');
  341. if( $interface_start !== false ) {
  342. if( $mode == 2 ) {
  343. $interface_end = strpos($links, ' </li>', $interface_start);
  344. $data = substr($links, $interface_start, $interface_end - $interface_start);
  345. $sub_id = $this->getHierarchiesModel()->fetchByName($category_id, $hierarchy, $name);
  346. if( !$sub_id ) {
  347. $this->view->results .= $hierarchy."\n";
  348. $this->view->results .= $name."\n";
  349. $this->view->results .= 'Couldn\'t find any parent hierarchy, skipping...' . "\n";
  350. return false;
  351. }
  352. if( !preg_match_all('/<li><a href="(.+?)">(.+?)<\/a>(?:&lt;T&gt;)?<\/li>/', $data, $matches) ) {
  353. $this->view->results .= 'Couldn\'t find any members name, skipping...' . "\n";
  354. return false;
  355. }
  356. for( $index = 0; $index < count($matches[0]); ++$index ) {
  357. $name = $matches[2][$index];
  358. $url = 'http://developer.android.com'.$matches[1][$index];
  359. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, $sub_id, $name, $url, 1)."\n";
  360. }
  361. } else if( $mode == 1 ) {
  362. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, $hierarchy, $name, $source_url, 0)."\n";
  363. }
  364. }
  365. return true;
  366. }
  367. private function scrapeAndroidPackages($mode) {
  368. $category = 'android';
  369. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  370. if( !$category_id ) {
  371. $this->invalid_category($category);
  372. return;
  373. }
  374. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  375. if( empty($scrapeable) ) {
  376. $this->nothing_to_scrape($category);
  377. return;
  378. }
  379. foreach( $scrapeable as $hierarchy ) {
  380. if( !$hierarchy['source_url'] ) {
  381. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  382. continue;
  383. }
  384. $source_url = $hierarchy['source_url'];
  385. $contents = file_get_contents($source_url);
  386. $start_index = strpos($contents, '</div> <!-- end resize-packages -->');
  387. $links = substr($contents, $start_index);
  388. $succeeded = true;
  389. $succeeded = $succeeded && $this->process_section($links, 'Interfaces', $mode, $category_id, $hierarchy['id'], $source_url);
  390. $succeeded = $succeeded && $this->process_section($links, 'Classes', $mode, $category_id, $hierarchy['id'], $source_url);
  391. $succeeded = $succeeded && $this->process_section($links, 'Exceptions', $mode, $category_id, $hierarchy['id'], $source_url);
  392. $succeeded = $succeeded && $this->process_section($links, 'Enums', $mode, $category_id, $hierarchy['id'], $source_url);
  393. if( $mode == 2 && $succeeded) {
  394. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  395. }
  396. }
  397. }
  398. private function scrapeAndroidPackageList() {
  399. $category = 'android';
  400. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  401. if( !$category_id ) {
  402. $this->invalid_category($category);
  403. return;
  404. }
  405. $contents = file_get_contents('http://developer.android.com/reference/packages.html');
  406. $start_index = strpos($contents, '<div id="packages-nav">');
  407. if( $start_index === false ) {
  408. $this->view->results .= 'Couldn\'t find the packages navigation, skipping...' . "\n";
  409. return;
  410. }
  411. $links = substr($contents, $start_index);
  412. if( !preg_match_all('/<a href="(.+?)">(.+?)<\/a><\/li>/', $links, $matches) ) {
  413. $this->view->results .= 'Couldn\'t find any links, skipping...' . "\n";
  414. return;
  415. }
  416. for( $index = 0; $index < count($matches[0]); ++$index ) {
  417. $name = $matches[2][$index];
  418. $url = 'http://developer.android.com'.$matches[1][$index];
  419. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, 1, $name, $url, 1)."\n";
  420. }
  421. }
  422. private function scrapejQuery2() {
  423. $category = 'jQuery';
  424. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  425. if( !$category_id ) {
  426. $this->invalid_category($category);
  427. return;
  428. }
  429. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  430. if( empty($scrapeable) ) {
  431. $this->nothing_to_scrape($category);
  432. return;
  433. }
  434. $is_saving = true;
  435. foreach( $scrapeable as $hierarchy ) {
  436. $this->view->results .= $hierarchy['name'] . "\n";
  437. if( !$hierarchy['source_url'] ) {
  438. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  439. continue;
  440. }
  441. $source_url = $hierarchy['source_url'];
  442. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  443. $contents = file_get_contents($source_url);
  444. $start_index = strpos($contents, '<div id="options">');
  445. if( $start_index === false ) {
  446. $this->view->results .= 'Couldn\'t find the options, skipping...' . "\n";
  447. continue;
  448. }
  449. $end_index = strpos($contents, '<div id="', $start_index+1);
  450. if( $end_index === false ) {
  451. $this->view->results .= 'Couldn\'t find the end of the options, skipping...' . "\n";
  452. continue;
  453. }
  454. $source_name = strtolower($hierarchy['name']);
  455. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  456. $elements = explode('<li class="option"', $data);
  457. foreach( $elements as $element ) {
  458. if( preg_match('/<h3 class="option-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  459. $link = $source_url.$matches[1];
  460. $name = $source_name .' '.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  461. $desc = trim(strip_tags($matches[3], '<b>'));
  462. $this->view->results .= $link.' - '.$name."\n";
  463. $this->view->results .= $desc."\n\n";
  464. if( $is_saving ) {
  465. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  466. 'category' => $category_id,
  467. 'hierarchy' => $hierarchy['id'],
  468. 'name' => $name,
  469. 'url' => $link,
  470. 'short_description' => $desc
  471. ));
  472. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  473. }
  474. } else {
  475. //$this->view->results .= htmlentities($element)."\n\n";
  476. }
  477. }
  478. $start_index = strpos($contents, '<div id="events">');
  479. if( $start_index === false ) {
  480. $this->view->results .= 'Couldn\'t find the events, skipping...' . "\n";
  481. continue;
  482. }
  483. $end_index = strpos($contents, '<div id="', $start_index+1);
  484. if( $end_index === false ) {
  485. $this->view->results .= 'Couldn\'t find the end of the events, skipping...' . "\n";
  486. continue;
  487. }
  488. $source_name = strtolower($hierarchy['name']);
  489. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  490. $elements = explode('<li class="event"', $data);
  491. foreach( $elements as $element ) {
  492. if( preg_match('/<h3 class="event-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  493. $link = $source_url.$matches[1];
  494. $name = $source_name .' '.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  495. $desc = trim(strip_tags($matches[3], '<b>'));
  496. $this->view->results .= $link.' - '.$name."\n";
  497. $this->view->results .= $desc."\n\n";
  498. if( $is_saving ) {
  499. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  500. 'category' => $category_id,
  501. 'hierarchy' => $hierarchy['id'],
  502. 'name' => $name,
  503. 'url' => $link,
  504. 'short_description' => $desc
  505. ));
  506. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  507. }
  508. } else {
  509. //$this->view->results .= htmlentities($element)."\n\n";
  510. }
  511. }
  512. $start_index = strpos($contents, '<div id="methods">');
  513. if( $start_index === false ) {
  514. $this->view->results .= 'Couldn\'t find the methods, skipping...' . "\n";
  515. continue;
  516. }
  517. $end_index = strpos($contents, '<div id="', $start_index+1);
  518. if( $end_index === false ) {
  519. $this->view->results .= 'Couldn\'t find the end of the methods, skipping...' . "\n";
  520. continue;
  521. }
  522. $source_name = strtolower($hierarchy['name']);
  523. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  524. $elements = explode('<li class="method"', $data);
  525. foreach( $elements as $element ) {
  526. if( preg_match('/<h3 class="method-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  527. $link = $source_url.$matches[1];
  528. $name = $source_name .'(\''.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2])))).'\')';
  529. $desc = trim(strip_tags($matches[3], '<b>'));
  530. $this->view->results .= $link.' - '.$name."\n";
  531. $this->view->results .= $desc."\n\n";
  532. if( $is_saving ) {
  533. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  534. 'category' => $category_id,
  535. 'hierarchy' => $hierarchy['id'],
  536. 'name' => $name,
  537. 'url' => $link,
  538. 'short_description' => $desc
  539. ));
  540. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  541. }
  542. } else {
  543. //$this->view->results .= htmlentities($element)."\n\n";
  544. }
  545. }
  546. }
  547. }
  548. private function scrapejQuery() {
  549. $category = 'jQuery';
  550. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  551. if( !$category_id ) {
  552. $this->invalid_category($category);
  553. return;
  554. }
  555. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  556. if( empty($scrapeable) ) {
  557. $this->nothing_to_scrape($category);
  558. return;
  559. }
  560. foreach( $scrapeable as $hierarchy ) {
  561. $this->view->results .= $hierarchy['name'] . "\n";
  562. if( !$hierarchy['source_url'] ) {
  563. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  564. continue;
  565. }
  566. $source_url = $hierarchy['source_url'];
  567. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  568. $contents = file_get_contents($source_url);
  569. $start_index = strpos($contents, '<div class="options list">');
  570. if( $start_index === false ) {
  571. $this->view->results .= 'Couldn\'t find the options list, skipping...' . "\n";
  572. continue;
  573. }
  574. $end_index = strpos($contents, '<div class="printfooter">', $start_index);
  575. if( $end_index === false ) {
  576. $this->view->results .= 'Couldn\'t find the end of the options list, skipping...' . "\n";
  577. continue;
  578. }
  579. $data = substr($contents, $start_index, $end_index - $start_index);
  580. $elements = explode('tr class="option"', $data);
  581. foreach( $elements as $element ) {
  582. if( preg_match('/<a href="(.+?)" title=".+?">(.+?)<\/a><\/b>.+?<td colspan="2" class="desc">(.+?)<\/td>/', $element, $matches) ) {
  583. $link = 'http://docs.jquery.com'.$matches[1];
  584. $name = trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  585. $desc = trim(strip_tags($matches[3], '<b>'));
  586. $this->view->results .= $link.' - '.$name."\n";
  587. $this->view->results .= $desc."\n\n";
  588. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  589. 'category' => $category_id,
  590. 'hierarchy' => $hierarchy['id'],
  591. 'name' => $name,
  592. 'url' => $link,
  593. 'short_description' => $desc
  594. ));
  595. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  596. } else {
  597. //$this->view->results .= htmlentities($element)."\n\n";
  598. }
  599. }
  600. }
  601. }
  602. private function scrapeJavascript() {
  603. $category = 'Javascript';
  604. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  605. if( !$category_id ) {
  606. $this->invalid_category($category);
  607. return;
  608. }
  609. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  610. if( empty($scrapeable) ) {
  611. $this->nothing_to_scrape($category);
  612. return;
  613. }
  614. foreach( $scrapeable as $hierarchy ) {
  615. $this->view->results .= $hierarchy['name'] . "\n";
  616. if( !$hierarchy['source_url'] ) {
  617. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  618. continue;
  619. }
  620. $source_url = $hierarchy['source_url'];
  621. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  622. $contents = file_get_contents(APPLICATION_PATH . '/scraper/js/'.$hierarchy['id'].'.html');
  623. if( preg_match("/<h2>The (.+?) Object<\/h2>\n<p>(.+)?<\/p>/", $contents, $matches) ) {
  624. $object_name = $matches[1];
  625. $description = $matches[2];
  626. $this->view->results .= $object_name."\n";
  627. $this->view->results .= $description."\n";
  628. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  629. 'category' => $category_id,
  630. 'hierarchy' => $hierarchy['id'],
  631. 'name' => $object_name,
  632. 'url' => $source_url,
  633. 'short_description' => $description
  634. ));
  635. } else {
  636. $this->view->results .= 'We couldn\'t find the description...' . "\n";
  637. }
  638. $is_dom = strpos($contents, 'HTML DOM <span class="color_h1">') !== false;
  639. if( $is_dom ) {
  640. $object_name = strtolower(str_replace(' ', '', $hierarchy['name']));
  641. }
  642. $properties_index = strpos($contents, 'Object Collections</h');
  643. $end_index = strpos($contents, '</table>', $properties_index);
  644. if( $properties_index !== FALSE && $end_index !== FALSE ) {
  645. $properties = array_slice(
  646. explode(
  647. '<tr>',
  648. substr($contents, $properties_index, $end_index - $properties_index)
  649. ),
  650. 2
  651. );
  652. foreach( $properties as $property ) {
  653. $elements = explode('<td', $property);
  654. foreach( $elements as &$element ) {
  655. $element = trim(
  656. str_replace(
  657. '&nbsp;',
  658. '',
  659. preg_replace(
  660. '/^.+?>/',
  661. '',
  662. str_replace(
  663. "\n",
  664. '',
  665. strip_tags(
  666. $element,
  667. '<a>'
  668. )
  669. )
  670. )
  671. )
  672. );
  673. }
  674. if( count($elements) <= 1 ) {
  675. $this->view->results .= 'Invalid element list.'."\n";
  676. $this->view->results .= print_r($property, true);
  677. break;
  678. }
  679. $link = $elements[1];
  680. $desc = $elements[2];
  681. $ff = $elements[3];
  682. if( count($elements) >= 6 ) {
  683. $ns = $elements[4];
  684. $ie = $elements[5];
  685. } else {
  686. $ie = $elements[4];
  687. }
  688. $name = '';
  689. if( $link ) {
  690. if( preg_match('/<a href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  691. $link = $matches[1];
  692. $name = $matches[2];
  693. } else {
  694. $name = $link;
  695. $link = '';
  696. }
  697. }
  698. $name = str_replace('[]', '', $name);
  699. $this->view->results .= $object_name.'.'.$name ." - ";
  700. $this->view->results .= $link ." - ".$is_dom.' - ';
  701. $this->view->results .= $desc ."\n";
  702. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  703. 'category' => $category_id,
  704. 'hierarchy' => $hierarchy['id'],
  705. 'name' => $object_name.'.'.$name,
  706. 'url' => $link,
  707. 'short_description' => $desc
  708. ));
  709. }
  710. }
  711. $properties_index = strpos($contents, 'Object Properties</h');
  712. $end_index = strpos($contents, '</table>', $properties_index);
  713. if( $properties_index !== FALSE && $end_index !== FALSE ) {
  714. $properties = array_slice(
  715. explode(
  716. '<tr>',
  717. substr($contents, $properties_index, $end_index - $properties_index)
  718. ),
  719. 2
  720. );
  721. foreach( $properties as $property ) {
  722. $elements = explode('<td', $property);
  723. foreach( $elements as &$element ) {
  724. $element = trim(
  725. str_replace(
  726. '&nbsp;',
  727. '',
  728. preg_replace(
  729. '/^.+?>/',
  730. '',
  731. str_replace(
  732. "\n",
  733. '',
  734. strip_tags(
  735. $element,
  736. '<a>'
  737. )
  738. )
  739. )
  740. )
  741. );
  742. }
  743. if( count($elements) <= 1 ) {
  744. $this->view->results .= 'Invalid element list.'."\n";
  745. $this->view->results .= print_r($property, true);
  746. break;
  747. }
  748. $link = $elements[1];
  749. $desc = $elements[2];
  750. $ff = $elements[3];
  751. if( count($elements) >= 6 ) {
  752. $ns = $elements[4];
  753. $ie = $elements[5];
  754. } else {
  755. $ie = $elements[4];
  756. }
  757. $name = '';
  758. if( $link ) {
  759. if( preg_match('/<a href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  760. $link = $matches[1];
  761. $name = $matches[2];
  762. } else {
  763. $name = $link;
  764. $link = '';
  765. }
  766. }
  767. $this->view->results .= $object_name.'.'.$name ." - ";
  768. $this->view->results .= $link ." - ".$is_dom.' - ';
  769. $this->view->results .= $desc ."\n";
  770. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  771. 'category' => $category_id,
  772. 'hierarchy' => $hierarchy['id'],
  773. 'name' => $object_name.'.'.$name,
  774. 'url' => $link,
  775. 'short_description' => $desc
  776. ));
  777. }
  778. }
  779. $methods_index = strpos($contents, 'Object Methods</h');
  780. $end_index = strpos($contents, '</table>', $methods_index);
  781. if( $methods_index !== FALSE && $end_index !== FALSE ) {
  782. $methods = array_slice(
  783. explode(
  784. '<tr>',
  785. substr($contents, $methods_index, $end_index - $methods_index)
  786. ),
  787. 2
  788. );
  789. foreach( $methods as $method ) {
  790. $elements = explode('<td valign="top">', $method);
  791. foreach( $elements as &$element ) {
  792. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  793. }
  794. $link = $elements[1];
  795. $desc = $elements[2];
  796. $ff = $elements[3];
  797. if( count($elements) >= 6 ) {
  798. $ns = $elements[4];
  799. $ie = $elements[5];
  800. } else {
  801. $ie = $elements[4];
  802. }
  803. $name = '';
  804. if( $link ) {
  805. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  806. $link = $matches[1];
  807. $name = $matches[2];
  808. } else {
  809. $name = $link;
  810. $link = '';
  811. }
  812. }
  813. $name = preg_replace('/(\(.*?\))/', '', $name);
  814. $this->view->results .= $object_name.'.'.$name ." - ";
  815. $this->view->results .= $link ." - ";
  816. $this->view->results .= $desc ."\n";
  817. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  818. 'category' => $category_id,
  819. 'hierarchy' => $hierarchy['id'],
  820. 'name' => $object_name.'.'.$name,
  821. 'url' => $link,
  822. 'short_description' => $desc,
  823. 'scrapeable' => 1
  824. ));
  825. }
  826. continue;
  827. }
  828. $start_index = strpos($contents, 'Top-level Functions</h2>');
  829. $end_index = strpos($contents, '</table>', $start_index);
  830. $start_prop_index = strpos($contents, 'Top-level Properties</h2>');
  831. $end_prop_index = strpos($contents, '</table>', $start_prop_index);
  832. if( $start_index !== false && $end_index !== false &&
  833. $start_prop_index !== false && $end_prop_index !== false ) {
  834. $functions = array_slice(
  835. explode(
  836. '<tr>',
  837. substr($contents, $start_index, $end_index - $start_index)
  838. ),
  839. 2
  840. );
  841. foreach( $functions as $function ) {
  842. $elements = explode('<td valign="top">', $function);
  843. foreach( $elements as &$element ) {
  844. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  845. }
  846. $link = $elements[1];
  847. $desc = $elements[2];
  848. $ff = $elements[3];
  849. if( count($elements) >= 6 ) {
  850. $ns = $elements[4];
  851. $ie = $elements[5];
  852. } else {
  853. $ie = $elements[4];
  854. }
  855. $name = '';
  856. if( $link ) {
  857. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  858. $link = $matches[1];
  859. $name = $matches[2];
  860. } else {
  861. $name = $link;
  862. $link = '';
  863. }
  864. }
  865. $name = preg_replace('/(\(.*?\))/', '', $name);
  866. $this->view->results .= $name ." - ";
  867. $this->view->results .= $link ." - ";
  868. $this->view->results .= $desc ."\n";
  869. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  870. 'category' => $category_id,
  871. 'hierarchy' => $hierarchy['id'],
  872. 'name' => $name,
  873. 'url' => $link,
  874. 'short_description' => $desc,
  875. 'scrapeable' => 1
  876. ));
  877. }
  878. $properties = array_slice(
  879. explode(
  880. '<tr>',
  881. substr($contents, $start_prop_index, $end_prop_index - $start_prop_index)
  882. ),
  883. 2
  884. );
  885. foreach( $properties as $property ) {
  886. $elements = explode('<td valign="top">', $property);
  887. foreach( $elements as &$element ) {
  888. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  889. }
  890. $link = $elements[1];
  891. $desc = $elements[2];
  892. $ff = $elements[3];
  893. if( count($elements) >= 6 ) {
  894. $ns = $elements[4];
  895. $ie = $elements[5];
  896. } else {
  897. $ie = $elements[4];
  898. }
  899. $name = '';
  900. if( $link ) {
  901. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  902. $link = $matches[1];
  903. $name = $matches[2];
  904. } else {
  905. $name = $link;
  906. $link = '';
  907. }
  908. }
  909. $name = preg_replace('/(\(.*?\))/', '', $name);
  910. $this->view->results .= $name ." - ";
  911. $this->view->results .= $link ." - ";
  912. $this->view->results .= $desc ."\n";
  913. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  914. 'category' => $category_id,
  915. 'hierarchy' => $hierarchy['id'],
  916. 'name' => $name,
  917. 'url' => $link,
  918. 'short_description' => $desc,
  919. 'scrapeable' => 1
  920. ));
  921. }
  922. continue;
  923. }
  924. $start_index = strpos($contents, '<h2>Event Handlers</h2>');
  925. $end_index = strpos($contents, '</table>', $start_index);
  926. if( $start_index !== false && $end_index !== false ) {
  927. $events = array_slice(
  928. explode(
  929. '<tr>',
  930. substr($contents, $start_index, $end_index - $start_index)
  931. ),
  932. 2
  933. );
  934. foreach( $events as $event ) {
  935. $elements = explode('<td valign="top">', $event);
  936. foreach( $elements as &$element ) {
  937. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  938. }
  939. $link = $elements[1];
  940. $desc = $elements[2];
  941. $ff = $elements[3];
  942. if( count($elements) >= 6 ) {
  943. $ns = $elements[4];
  944. $ie = $elements[5];
  945. } else {
  946. $ie = $elements[4];
  947. }
  948. $name = '';
  949. if( $link ) {
  950. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  951. $link = $matches[1];
  952. $name = $matches[2];
  953. } else {
  954. $name = $link;
  955. $link = '';
  956. }
  957. }
  958. $name = preg_replace('/(\(.*?\))/', '', $name);
  959. if( $is_dom ) {
  960. $name = 'event.'.$name;
  961. }
  962. $this->view->results .= $name ." - ";
  963. $this->view->results .= $link ." - " . $is_dom.' - ';
  964. $this->view->results .= $desc ."\n";
  965. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  966. 'category' => $category_id,
  967. 'hierarchy' => $hierarchy['id'],
  968. 'name' => $name,
  969. 'url' => $link,
  970. 'short_description' => $desc,
  971. 'scrapeable' => 1
  972. ));
  973. }
  974. if( !$is_dom ) {
  975. continue;
  976. }
  977. }
  978. $start_index = strpos($contents, 'Keyboard Attributes</h');
  979. $end_index = strpos($contents, '</table>', $start_index);
  980. if( $start_index !== false && $end_index !== false ) {
  981. $events = array_slice(
  982. explode(
  983. '<tr>',
  984. substr($contents, $start_index, $end_index - $start_index)
  985. ),
  986. 2
  987. );
  988. foreach( $events as $event ) {
  989. $elements = explode('<td valign="top">', $event);
  990. foreach( $elements as &$element ) {
  991. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  992. }
  993. $link = $elements[1];
  994. $desc = $elements[2];
  995. $ff = $elements[3];
  996. if( count($elements) >= 6 ) {
  997. $ns = $elements[4];
  998. $ie = $elements[5];
  999. } else {
  1000. $ie = $elements[4];
  1001. }
  1002. $name = '';
  1003. if( $link ) {
  1004. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1005. $link = $matches[1];
  1006. $name = $matches[2];
  1007. } else {
  1008. $name = $link;
  1009. $link = '';
  1010. }
  1011. }
  1012. $name = preg_replace('/(\(.*?\))/', '', $name);
  1013. if( $is_dom ) {
  1014. $name = 'event.'.$name;
  1015. }
  1016. $this->view->results .= $name ." - ";
  1017. $this->view->results .= $link ." - " . $is_dom.' - ';
  1018. $this->view->results .= $desc ."\n";
  1019. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1020. 'category' => $category_id,
  1021. 'hierarchy' => $hierarchy['id'],
  1022. 'name' => $name,
  1023. 'url' => $link,
  1024. 'short_description' => $desc,
  1025. 'scrapeable' => 1
  1026. ));
  1027. }
  1028. if( !$is_dom ) {
  1029. continue;
  1030. }
  1031. }
  1032. $start_index = strpos($contents, 'Event Attributes</h');
  1033. $end_index = strpos($contents, '</table>', $start_index);
  1034. if( $start_index !== false && $end_index !== false ) {
  1035. $events = array_slice(
  1036. explode(
  1037. '<tr>',
  1038. substr($contents, $start_index, $end_index - $start_index)
  1039. ),
  1040. 2
  1041. );
  1042. foreach( $events as $event ) {
  1043. $elements = explode('<td valign="top">', $event);
  1044. foreach( $elements as &$element ) {
  1045. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1046. }
  1047. $link = $elements[1];
  1048. $desc = $elements[2];
  1049. $ff = $elements[3];
  1050. if( count($elements) >= 6 ) {
  1051. $ns = $elements[4];
  1052. $ie = $elements[5];
  1053. } else {
  1054. $ie = $elements[4];
  1055. }
  1056. $name = '';
  1057. if( $link ) {
  1058. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1059. $link = $matches[1];
  1060. $name = $matches[2];
  1061. } else {
  1062. $name = $link;
  1063. $link = '';
  1064. }
  1065. }
  1066. $name = preg_replace('/(\(.*?\))/', '', $name);
  1067. if( $is_dom ) {
  1068. $name = 'event.'.$name;
  1069. }
  1070. $this->view->results .= $name ." - ";
  1071. $this->view->results .= $link ." - " . $is_dom.' - ';
  1072. $this->view->results .= $desc ."\n";
  1073. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1074. 'category' => $category_id,
  1075. 'hierarchy' => $hierarchy['id'],
  1076. 'name' => $name,
  1077. 'url' => $link,
  1078. 'short_description' => $desc,
  1079. 'scrapeable' => 1
  1080. ));
  1081. }
  1082. if( !$is_dom ) {
  1083. continue;
  1084. }
  1085. }
  1086. $start_index = strpos($contents, '<h3>Properties</h3>');
  1087. $end_index = strpos($contents, '</table>', $start_index);
  1088. if( $start_index !== false && $end_index !== false ) {
  1089. $events = array_slice(
  1090. explode(
  1091. '<tr>',
  1092. substr($contents, $start_index, $end_index - $start_index)
  1093. ),
  1094. 2
  1095. );
  1096. foreach( $events as $event ) {
  1097. $elements = explode('<td valign="top">', $event);
  1098. foreach( $elements as &$element ) {
  1099. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1100. }
  1101. if( count($elements) < 2 ) {
  1102. $this->view->results .= 'Missing '.print_r($event);
  1103. continue;
  1104. }
  1105. $link = $elements[1];
  1106. $desc = $elements[2];
  1107. $ff = $elements[3];
  1108. if( count($elements) >= 6 ) {
  1109. $ns = $elements[4];
  1110. $ie = $elements[5];
  1111. } else {
  1112. $ie = $elements[4];
  1113. }
  1114. $name = '';
  1115. if( $link ) {
  1116. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1117. $link = $matches[1];
  1118. $name = $matches[2];
  1119. } else {
  1120. $name = $link;
  1121. $link = '';
  1122. }
  1123. }
  1124. $name = preg_replace('/(\(.*?\))/', '', $name);
  1125. $this->view->results .= $object_name.'.'.$name ." - ";
  1126. $this->view->results .= $link ." - " . $is_dom.' - ';
  1127. $this->view->results .= $desc ."\n";
  1128. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1129. 'category' => $category_id,
  1130. 'hierarchy' => $hierarchy['id'],
  1131. 'name' => $object_name.'.'.$name,
  1132. 'url' => $link,
  1133. 'short_description' => $desc,
  1134. 'scrapeable' => 1
  1135. ));
  1136. }
  1137. if( !$is_dom ) {
  1138. continue;
  1139. }
  1140. }
  1141. $start_index = 0;
  1142. do {
  1143. $start_index = strpos($contents, 'properties</a></h3>', $start_index);
  1144. $end_index = strpos($contents, '</table>', $start_index);
  1145. if( $start_index !== false && $end_index !== false ) {
  1146. $events = array_slice(
  1147. explode(
  1148. '<tr>',
  1149. substr($contents, $start_index, $end_index - $start_index)
  1150. ),
  1151. 2
  1152. );
  1153. foreach( $events as $event ) {
  1154. $elements = explode('<td valign="top">', $event);
  1155. foreach( $elements as &$element ) {
  1156. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1157. }
  1158. $link = $elements[1];
  1159. $desc = $elements[2];
  1160. $ff = $elements[3];
  1161. if( count($elements) >= 6 ) {
  1162. $ns = $elements[4];
  1163. $ie = $elements[5];
  1164. } else {
  1165. $ie = $elements[4];
  1166. }
  1167. $name = '';
  1168. if( $link ) {
  1169. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1170. $link = $matches[1];
  1171. $name = $matches[2];
  1172. } else {
  1173. $name = $link;
  1174. $link = '';
  1175. }
  1176. }
  1177. $name = preg_replace('/(\(.*?\))/', '', $name);
  1178. $this->view->results .= $object_name.'.'.$name ." - ";
  1179. $this->view->results .= $link ." - " . $is_dom.' - ';
  1180. $this->view->results .= $desc ."\n";
  1181. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1182. 'category' => $category_id,
  1183. 'hierarchy' => $hierarchy['id'],
  1184. 'name' => $object_name.'.'.$name,
  1185. 'url' => $link,
  1186. 'short_description' => $desc,
  1187. 'scrapeable' => 1
  1188. ));
  1189. }
  1190. if( !$is_dom ) {
  1191. continue;
  1192. }
  1193. }
  1194. $start_index++;
  1195. } while( $start_index !== false );
  1196. $start_index = strpos($contents, 'Standard Properties</h3>');
  1197. $end_index = strpos($contents, '</table>', $start_index);
  1198. if( $start_index !== false && $end_index !== false ) {
  1199. $events = array_slice(
  1200. explode(
  1201. '<tr>',
  1202. substr($contents, $start_index, $end_index - $start_index)
  1203. ),
  1204. 2
  1205. );
  1206. foreach( $events as $event ) {
  1207. $elements = explode('<td valign="top">', $event);
  1208. foreach( $elements as &$element ) {
  1209. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1210. }
  1211. $link = $elements[1];
  1212. $desc = $elements[2];
  1213. $ff = $elements[3];
  1214. if( count($elements) >= 6 ) {
  1215. $ns = $elements[4];
  1216. $ie = $elements[5];
  1217. } else {
  1218. $ie = $elements[4];
  1219. }
  1220. $name = '';
  1221. if( $link ) {
  1222. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1223. $link = $matches[1];
  1224. $name = $matches[2];
  1225. } else {
  1226. $name = $link;
  1227. $link = '';
  1228. }
  1229. }
  1230. $name = preg_replace('/(\(.*?\))/', '', $name);
  1231. $this->view->results .= $object_name.'.'.$name ." - ";
  1232. $this->view->results .= $link ." - " . $is_dom.' - ';
  1233. $this->view->results .= $desc ."\n";
  1234. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1235. 'category' => $category_id,
  1236. 'hierarchy' => $hierarchy['id'],
  1237. 'name' => $object_name.'.'.$name,
  1238. 'url' => $link,
  1239. 'short_description' => $desc,
  1240. 'scrapeable' => 1
  1241. ));
  1242. }
  1243. if( !$is_dom ) {
  1244. continue;
  1245. }
  1246. }
  1247. $this->view->results .= 'We couldn\'t find the properties or methods...' . "\n";
  1248. }
  1249. }
  1250. private function scrapeiPhoneDir() {
  1251. $category = 'iPhone';
  1252. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1253. if( !$category_id ) {
  1254. $this->invalid_category($category);
  1255. return;
  1256. }
  1257. $hierarchies = array(
  1258. /*'6' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CocoaTouch/AddressBookUI',
  1259. '7' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CocoaTouch/UIKit',
  1260. '81' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AudioToolbox',
  1261. '82' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AudioUnit',
  1262. '83' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AVFoundation',
  1263. '84' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/CoreAudio',
  1264. '85' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/CoreGraphics',
  1265. '86' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/MediaPlayer',
  1266. '87' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/OpenGLES',
  1267. '88' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/QuartzCore',*/
  1268. '110' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/AddressBook',
  1269. '111' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/CoreFoundation',
  1270. '112' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/CoreLocation',
  1271. '113' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/Foundation',
  1272. '114' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/SystemConfiguration',
  1273. '115' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreOS/CFNetwork',
  1274. '116' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreOS/Security',
  1275. '117' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreOS/System',
  1276. );
  1277. foreach( $hierarchies as $parent_id => $base_url ) {
  1278. $contents = file_get_contents($base_url.'/docdata.js');
  1279. $contents = str_replace('"', '\"', $contents);
  1280. $contents = str_replace("'", '"', $contents);
  1281. $data = Zend_Json::decode($contents);
  1282. foreach( $data as $item ) {
  1283. if( strpos($item['title'], 'Class Reference') !== FALSE ||
  1284. strpos($item['title'], 'Protocol Reference') !== FALSE ) {
  1285. $name = str_replace(' Reference', '', $item['title']);
  1286. $ref_url = explode('/', $base_url);
  1287. $navigator = explode('/', $item['installPath']);
  1288. foreach( $navigator as $dir ) {
  1289. if( $dir == '..' ) {
  1290. $ref_url = array_splice($ref_url, 0, -1);
  1291. } else {
  1292. $ref_url []= $dir;
  1293. }
  1294. }
  1295. $ref_url = implode('/', $ref_url);
  1296. //$this->view->results .= $name."\n";
  1297. //$this->view->results .= $ref_url."\n";
  1298. $subdata = file_get_contents($ref_url);
  1299. if( !preg_match('/<META ID="refresh" HTTP-EQUIV=refresh CONTENT="0; URL=(.+?)">/', $subdata, $matches) ) {
  1300. $this->view->results .= 'Unable to get redirected link, skipping...'."\n";
  1301. continue;
  1302. }
  1303. $ref_url = str_replace('index.html', '', $ref_url).$matches[1];
  1304. //$this->view->results .= $ref_url."\n";
  1305. $id = $this->getHierarchiesModel()->fetchByName($category_id, $parent_id, $name);
  1306. if( !$id ) {
  1307. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, $parent_id, $name, $ref_url, 1)."\n";
  1308. }
  1309. }
  1310. }
  1311. }
  1312. }
  1313. private function scrapeiPhone() {
  1314. $category = 'iPhone';
  1315. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1316. if( !$category_id ) {
  1317. $this->invalid_category($category);
  1318. return;
  1319. }
  1320. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  1321. if( empty($scrapeable) ) {
  1322. $this->nothing_to_scrape($category);
  1323. return;
  1324. }
  1325. foreach( $scrapeable as $hierarchy ) {
  1326. $this->view->results .= "\n".$hierarchy['name'] . "\n";
  1327. if( !$hierarchy['source_url'] ) {
  1328. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  1329. continue;
  1330. }
  1331. $source_url = $hierarchy['source_url'];
  1332. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  1333. $contents = file_get_contents($source_url);
  1334. if( !preg_match('/<BODY bgcolor="#ffffff" onload="initialize_page\(\);"><a name=".+?" title="(.+?)"><\/a>/', $contents, $matches) ) {
  1335. $this->view->results .= 'We didn\'t find the name, skipping...' . "\n";
  1336. continue;
  1337. }
  1338. $name = $matches[1];
  1339. $this->view->results .= ' name: '.$name."\n";
  1340. $this->view->results .= ' link: '.$source_url."\n";
  1341. $OVERVIEW_START = '<h2>Overview</h2>';
  1342. $start_index = strpos($contents, $OVERVIEW_START);
  1343. if( $start_index === FALSE ) {
  1344. $this->view->results .= 'We didn\'t find an overview, skipping...' . "\n";
  1345. continue;
  1346. }
  1347. $start_index += strlen($OVERVIEW_START);
  1348. $end_index = strpos($contents, '</p>', $start_index);
  1349. if( $end_index === FALSE ) {
  1350. $this->view->results .= 'We couldn\'t find the end of the overview, skipping...' . "\n";
  1351. continue;
  1352. }
  1353. $overview = str_replace("\n", ' ', substr($contents, $start_index, $end_index - $start_index));
  1354. $overview = strip_tags($overview, '<b><code>');
  1355. $this->view->results .= ' desc: '.$overview."\n";
  1356. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1357. 'category' => $category_id,
  1358. 'hierarchy' => $hierarchy['id'],
  1359. 'name' => $name,
  1360. 'url' => $source_url,
  1361. 'short_description' => $overview,
  1362. 'data' => '{}'
  1363. ));
  1364. $rest = substr($contents, $end_index);
  1365. $subsections = array_slice(explode('></a><h2>', $rest), 1);
  1366. $type_map = array(
  1367. 'Properties' => 1,
  1368. 'Class Methods' => 2,
  1369. 'Instance Methods' => 3
  1370. );
  1371. foreach( $subsections as $subsection ) {
  1372. $sub_name = substr($subsection, 0, strpos($subsection, '</h2>'));
  1373. $this->view->results .= '<b>'.$sub_name."</b>\n";
  1374. if( !isset($type_map[$sub_name]) ) {
  1375. $this->view->results .= 'Invalid type of section, skipping...'."\n";
  1376. continue;
  1377. }
  1378. $type = $type_map[$sub_name];
  1379. $this->view->results .= $type."\n";
  1380. $items = explode('<h3 class="verytight">', $subsection);
  1381. for( $index = 1; $index < count($items); ++$index ) {
  1382. $item = $items[$index];
  1383. $prev_item = $items[$index-1];
  1384. $iteration = 0;
  1385. $anchor_index = strlen($prev_item);
  1386. do {
  1387. $new_index = strrpos($prev_item, '<a name=', -(strlen($prev_item) - $anchor_index + 1));
  1388. if( $new_index === FALSE ) {
  1389. break;
  1390. }
  1391. $anchor_index = $new_index;
  1392. $iteration++;
  1393. } while( $iteration < 4 );
  1394. $anchor = substr($prev_item, $anchor_index, strpos($prev_item, '</a>', $anchor_index) - $anchor_index);
  1395. $item_name = substr($item, 0, strpos($item, '</h3>'));
  1396. if( !preg_match('/<a name="(.+?)"/', $anchor, $matches) ) {
  1397. $this->view->results .= 'Couldn\'t find anchor, skipping...'."\n\n";
  1398. continue;
  1399. }
  1400. $anchor = $source_url.'#'.trim($matches[1]);
  1401. $this->view->results .= ' Name: '.$item_name."\n";
  1402. $this->view->results .= ' Link: '.$anchor."\n";
  1403. if( !preg_match('/<p class="spaceabove">(.+?)<\/p>/', str_replace("\n", ' ', $item), $matches) ) {
  1404. $this->view->results .= 'Couldn\'t find item summary, skipping...'."\n\n";
  1405. continue;
  1406. }
  1407. $summary = trim(strip_tags($matches[1]));
  1408. $this->view->results .= ' Desc: '.$summary."\n";
  1409. if( !preg_match('/<p class="spaceabovemethod">(.+?)<\/p>/', $item, $matches) ) {
  1410. if( !preg_match('/<pre><code>(.+?)<\/code><br><\/pre>/', $item, $matches) ) {
  1411. $this->view->results .= 'Couldn\'t find method info, filling with empty string...'."\n\n";
  1412. $method_info = '';
  1413. } else {
  1414. $method_info = trim(strip_tags($matches[1]));
  1415. }
  1416. } else {
  1417. $method_info = trim(strip_tags($matches[1]));
  1418. }
  1419. $data = Zend_Json::encode(array(
  1420. 'i' => $method_info,
  1421. 't' => $type
  1422. ));
  1423. $data = preg_replace('/"([a-z])"/', '$1', $data);
  1424. $this->view->results .= ' data: '.$data."\n\n";
  1425. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1426. 'category' => $category_id,
  1427. 'hierarchy' => $hierarchy['id'],
  1428. 'name' => $name.' '.$item_name,
  1429. 'url' => $anchor,
  1430. 'short_description' => $summary,
  1431. 'data' => $data
  1432. ));
  1433. }
  1434. }
  1435. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  1436. }
  1437. }
  1438. private function scrapeDjango2() {
  1439. $category = 'django';
  1440. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1441. if( !$category_id ) {
  1442. $this->invalid_category($category);
  1443. return;
  1444. }
  1445. $functions = array(
  1446. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#date-hierarchy", "admin.ModelAdmin.date_hierarchy"),
  1447. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#form", "admin.ModelAdmin.form"),
  1448. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#fieldsets", "admin.ModelAdmin.fieldsets"),
  1449. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#fields", "admin.ModelAdmin.fields"),
  1450. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#exclude", "admin.ModelAdmin.exclude"),
  1451. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#filter-horizontal", "admin.ModelAdmin.filter_horizontal"),
  1452. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#filter-vertical", "admin.ModelAdmin.filter_vertical"),
  1453. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#list-display", "admin.ModelAdmin.list_display"),
  1454. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#list-display-links", "admin.ModelAdmin.list_display_links"),
  1455. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#list-filter", "admin.ModelAdmin.list_filter"),
  1456. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#list-per-page", "admin.ModelAdmin.list_per_page"),
  1457. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#list-select-related", "admin.ModelAdmin.list_select_related"),
  1458. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#inlines", "admin.ModelAdmin.inlines"),
  1459. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#ordering", "admin.ModelAdmin.ordering"),
  1460. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#prepopulated-fields", "admin.ModelAdmin.prepopulated_fields"),
  1461. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#radio-fields", "admin.ModelAdmin.radio_fields"),
  1462. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#raw-id-fields", "admin.ModelAdmin.raw_id_fields"),
  1463. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#save-as", "admin.ModelAdmin.save_as"),
  1464. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#save-on-top", "admin.ModelAdmin.save_on_top"),
  1465. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#search-fields", "admin.ModelAdmin.search_fields"),
  1466. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#formfield-overrides", "admin.ModelAdmin.formfield_overrides"),
  1467. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#save-model-self-request-obj-form-change", "admin.ModelAdmin.save_model", "save_model(self, request, obj, form, change)"),
  1468. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#save-formset-self-request-form-formset-change", "admin.ModelAdmin.save_formset", "save_formset(self, request, form, formset, change)"),
  1469. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#get-urls-self", "admin.ModelAdmin.get_urls", "get_urls(self)"),
  1470. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#formfield-for-foreignkey-self-db-field-request-kwargs", "admin.ModelAdmin.formfield_for_foreignkey", "formfield_for_foreignkey(self, db_field, request, **kwargs)")
  1471. );
  1472. for( $index = 0; $index < count($functions); ++$index ) {
  1473. $data = '';
  1474. if( count($functions[$index]) > 2 ) {
  1475. $data = $functions[$index][2];
  1476. }
  1477. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1478. 'category' => $category_id,
  1479. 'hierarchy' => 2,
  1480. 'name' => $functions[$index][1],
  1481. 'url' => $functions[$index][0],
  1482. 'short_description' => "",
  1483. 'data' => $data
  1484. ));
  1485. }
  1486. }
  1487. private function scrapeDjango1() {
  1488. $category = 'django';
  1489. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1490. if( !$category_id ) {
  1491. $this->invalid_category($category);
  1492. return;
  1493. }
  1494. $categories = array(
  1495. array("http://docs.djangoproject.com/en/dev/ref/contrib/admin/#module-django.contrib.admin", "django.contrib.admin",
  1496. "Django's admin site."),
  1497. array("http://docs.djangoproject.com/en/dev/topics/auth/#module-django.contrib.auth", "django.contrib.auth",
  1498. "Django's authentication framework."),
  1499. array("http://docs.djangoproject.com/en/dev/topics/auth/#module-django.contrib.auth.forms", "django.contrib.auth.forms",
  1500. ""),
  1501. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.contrib.auth.middleware", "django.contrib.auth.middleware",
  1502. "Authentication middleware."),
  1503. array("http://docs.djangoproject.com/en/dev/ref/contrib/comments/#module-django.contrib.comments", "django.contrib.comments",
  1504. "Django's comment framework"),
  1505. array("http://docs.djangoproject.com/en/dev/ref/contrib/comments/signals/#module-django.contrib.comments.signals", "django.contrib.comments.signals",
  1506. "Signals sent by the comment module."),
  1507. array("http://docs.djangoproject.com/en/dev/ref/contrib/contenttypes/#module-django.contrib.contenttypes", "django.contrib.contenttypes",
  1508. "Provides generic interface to installed models."),
  1509. array("http://docs.djangoproject.com/en/dev/ref/contrib/csrf/#module-django.contrib.csrf", "django.contrib.csrf",
  1510. "Protects against Cross Site Request Forgeries"),
  1511. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.contrib.csrf.middleware", "django.contrib.csrf.middleware",
  1512. "Middleware adding protection against Cross Site Request Forgeries."),
  1513. array("http://docs.djangoproject.com/en/dev/ref/contrib/databrowse/#module-django.contrib.databrowse", "django.contrib.databrowse",
  1514. "Databrowse is a Django application that lets you browse your data."),
  1515. array("http://docs.djangoproject.com/en/dev/ref/contrib/flatpages/#module-django.contrib.flatpages", "django.contrib.flatpages",
  1516. "A framework for managing simple ?flat? HTML content in a database."),
  1517. array("http://docs.djangoproject.com/en/dev/ref/contrib/formtools/form-preview/#module-django.contrib.formtools", "django.contrib.formtools",
  1518. "Displays an HTML form, forces a preview, then does something with the submission."),
  1519. array("http://docs.djangoproject.com/en/dev/ref/contrib/formtools/form-wizard/#module-django.contrib.formtools.wizard", "django.contrib.formtools.wizard",
  1520. "Splits forms across multiple Web pages."),
  1521. array("http://docs.djangoproject.com/en/dev/ref/contrib/humanize/#module-django.contrib.humanize", "django.contrib.humanize",
  1522. "A set of Django template filters useful for adding a \"human touch\" to data."),
  1523. array("http://docs.djangoproject.com/en/dev/ref/contrib/localflavor/#module-django.contrib.localflavor", "django.contrib.localflavor",
  1524. "A collection of various Django snippets that are useful only for a particular country or culture."),
  1525. array("http://docs.djangoproject.com/en/dev/ref/contrib/redirects/#module-django.contrib.redirects", "django.contrib.redirects",
  1526. "A framework for managing redirects."),
  1527. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.contrib.sessions.middleware", "django.contrib.sessions.middleware",
  1528. "Session middleware."),
  1529. array("http://docs.djangoproject.com/en/dev/ref/contrib/sitemaps/#module-django.contrib.sitemaps", "django.contrib.sitemaps",
  1530. "A framework for generating Google sitemap XML files."),
  1531. array("http://docs.djangoproject.com/en/dev/ref/contrib/sites/#module-django.contrib.sites", "django.contrib.sites",
  1532. "Lets you operate multiple web sites from the same database and Django project"),
  1533. array("http://docs.djangoproject.com/en/dev/ref/contrib/syndication/#module-django.contrib.syndication", "django.contrib.syndication",
  1534. "A framework for generating syndication feeds, in RSS and Atom, quite easily."),
  1535. array("http://docs.djangoproject.com/en/dev/ref/contrib/webdesign/#module-django.contrib.webdesign", "django.contrib.webdesign",
  1536. "Helpers and utilities targeted primarily at Web *designers* rather than Web *developers*."),
  1537. array("http://docs.djangoproject.com/en/dev/ref/files/#module-django.core.files", "django.core.files",
  1538. "File handling and storage"),
  1539. array("http://docs.djangoproject.com/en/dev/topics/email/#module-django.core.mail", "django.core.mail",
  1540. "Helpers to easily send e-mail."),
  1541. array("http://docs.djangoproject.com/en/dev/topics/pagination/#module-django.core.paginator", "django.core.paginator",
  1542. "Classes to help you easily manage paginated data."),
  1543. array("http://docs.djangoproject.com/en/dev/ref/signals/#module-django.core.signals", "django.core.signals",
  1544. "Core signals sent by the request/response system."),
  1545. array("http://docs.djangoproject.com/en/dev/topics/db/models/#module-django.db.models", "django.db.models",
  1546. ""),
  1547. array("http://docs.djangoproject.com/en/dev/ref/models/fields/#module-django.db.models.fields", "django.db.models.fields",
  1548. "Built-in field types."),
  1549. array("http://docs.djangoproject.com/en/dev/ref/models/fields/#module-django.db.models.fields.related", "django.db.models.fields.related",
  1550. "Related field types"),
  1551. array("http://docs.djangoproject.com/en/dev/ref/signals/#module-django.db.models.signals", "django.db.models.signals",
  1552. "Signals sent by the model system."),
  1553. array("http://docs.djangoproject.com/en/dev/topics/signals/#module-django.dispatch", "django.dispatch",
  1554. "Signal dispatch"),
  1555. array("http://docs.djangoproject.com/en/dev/ref/forms/fields/#module-django.forms.fields", "django.forms.fields",
  1556. "Django's built-in form fields."),
  1557. array("http://docs.djangoproject.com/en/dev/ref/forms/widgets/#module-django.forms.widgets", "django.forms.widgets",
  1558. "Django's built-in form widgets."),
  1559. array("http://docs.djangoproject.com/en/dev/ref/request-response/#module-django.http", "django.http",
  1560. "Classes dealing with HTTP requests and responses."),
  1561. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware", "django.middleware",
  1562. "Django's built-in middleware classes."),
  1563. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.cache", "django.middleware.cache",
  1564. "Middleware for the site-wide cache."),
  1565. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.common", "django.middleware.common",
  1566. "Middleware adding \"common\" conveniences for perfectionists."),
  1567. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.doc", "django.middleware.doc",
  1568. "Middleware to help your app self-document."),
  1569. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.gzip", "django.middleware.gzip",
  1570. "Middleware to serve gziped content for performance."),
  1571. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.http", "django.middleware.http",
  1572. "Middleware handling advanced HTTP features."),
  1573. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.locale", "django.middleware.locale",
  1574. "Middleware to enable language selection based on the request."),
  1575. array("http://docs.djangoproject.com/en/dev/ref/middleware/#module-django.middleware.transaction", "django.middleware.transaction",
  1576. "Middleware binding a database transaction to each web request."),
  1577. array("http://docs.djangoproject.com/en/dev/topics/testing/#module-django.test", "django.test",
  1578. "Testing tools for Django applications."),
  1579. array("http://docs.djangoproject.com/en/dev/topics/testing/#module-django.test.client", "django.test.client",
  1580. "Django's test client."),
  1581. array("http://docs.djangoproject.com/en/dev/ref/signals/#module-django.test.signals", "django.test.signals",
  1582. "Signals sent during testing."),
  1583. array("http://docs.djangoproject.com/en/dev/topics/testing/#module-django.test.utils", "django.test.utils",
  1584. "Helpers to write custom test runners."),
  1585. array("http://docs.djangoproject.com/en/dev/howto/static-files/#module-django.views.static", "django.views.static",
  1586. "Serving of static files during development.")
  1587. );
  1588. for( $index = 0; $index < count($categories); ++$index ) {
  1589. $this->view->results .= $this->getHierarchiesModel()->insert(
  1590. $category_id,
  1591. 1,
  1592. $categories[$index][1],
  1593. $categories[$index][0])."\n";
  1594. }
  1595. }
  1596. private function scrapeFacebookFbmlPhase2() {
  1597. $category = 'Facebook API';
  1598. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1599. if( !$category_id ) {
  1600. $this->invalid_category($category);
  1601. return;
  1602. }
  1603. $scrapeable = $this->getFunctionsModel()->fetchAllScrapeable($category_id);
  1604. if( empty($scrapeable) ) {
  1605. $this->nothing_to_scrape($category);
  1606. return;
  1607. }
  1608. foreach( $scrapeable as $function ) {
  1609. if( $function['hierarchy'] < 5 || $function['hierarchy'] > 20 ) {
  1610. continue;
  1611. }
  1612. $this->view->results .= $function['name'] . "\n";
  1613. if( !$function['url'] ) {
  1614. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  1615. continue;
  1616. }
  1617. $source_url = $function['url'];
  1618. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  1619. $contents = file_get_contents($source_url);
  1620. $start_index = strpos($contents, '<a name="Description">');
  1621. if( $start_index === FALSE ) {
  1622. $this->view->results .= 'We didn\'t find a description, skipping...' . "\n";
  1623. continue;
  1624. }
  1625. $start_index = strpos($contents, '<p>', $start_index);
  1626. if( $start_index === FALSE ) {
  1627. $this->view->results .= 'We couldn\'t find the beginning of the description, skipping...' . "\n";
  1628. continue;
  1629. }
  1630. $end_index = strpos($contents, '</p>', $start_index);
  1631. if( $end_index === FALSE ) {
  1632. $this->view->results .= 'We couldn\'t find the end of the description, skipping...' . "\n";
  1633. continue;
  1634. }
  1635. $line = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  1636. $line = strip_tags($line, '<b><code>');
  1637. $name = $this->getFunctionsModel()->fetchName($category_id, $function['id']);
  1638. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1639. 'category' => $category_id,
  1640. 'hierarchy' => $function['hierarchy'],
  1641. 'name' => $name,
  1642. 'short_description' => $line
  1643. ));
  1644. /*
  1645. $this->getFunctionsModel()->setData(array(
  1646. 'category' => $category_id,
  1647. 'id' => $function['id'],
  1648. 'data' => $line
  1649. ));*/
  1650. }
  1651. }
  1652. private function scrapeFacebookFbml() {
  1653. $category = 'Facebook API';
  1654. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1655. if( !$category_id ) {
  1656. $this->invalid_category($category);
  1657. return;
  1658. }
  1659. $contents = file_get_contents('http://wiki.developers.facebook.com/index.php/FBML');
  1660. $start_index = strpos($contents, '<div class="fbml_section">');
  1661. if( $start_index === FALSE ) {
  1662. $this->view->results .= 'We didn\'t find an fbml section, skipping...' . "\n";
  1663. return;
  1664. }
  1665. $end_index = strpos($contents, '<p><br clear="all"/>', $start_index);
  1666. if( $end_index === FALSE ) {
  1667. $this->view->results .= 'We couldn\'t find the end of the list, skipping...' . "\n";
  1668. return;
  1669. }
  1670. $list_data = substr($contents, $start_index, $end_index - $start_index);
  1671. $list = array_slice(explode('<a name="', $list_data), 1);
  1672. foreach( $list as $item ) {
  1673. $link = substr($item, 0, strpos($item, '"'));
  1674. if( !preg_match('/<span class="mw-headline">(.+?)<\/span>/', $item, $matches) ) {
  1675. $this->view->results .= 'We couldn\'t find the headline, skipping...' . "\n";
  1676. continue;
  1677. }
  1678. $title = trim($matches[1]);
  1679. $item = str_replace("\n", '', $item);
  1680. if( !preg_match_all('/<a href="(.+?)" title=".+?">(.+?)<\/a>/', $item, $matches) ) {
  1681. $this->view->results .= 'We couldn\'t find links, skipping...' . "\n";
  1682. continue;
  1683. }
  1684. // Scrape hierarchies.
  1685. /*$this->view->results .= $this->getHierarchiesModel()->insert(
  1686. $category_id,
  1687. 4,
  1688. $title,
  1689. 'http://wiki.developers.facebook.com/index.php/FBML#'.$link
  1690. )."\n";*/
  1691. // Scrape functions.
  1692. $hierarchy = $this->getHierarchiesModel()->fetchByName($category_id, 4, $title);
  1693. for( $index = 0; $index < count($matches[0]); ++$index ) {
  1694. echo $matches[2][$index] . "\n";
  1695. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1696. 'category' => $category_id,
  1697. 'hierarchy' => $hierarchy,
  1698. 'name' => $matches[2][$index],
  1699. 'url' => 'http://wiki.developers.facebook.com'.$matches[1][$index]
  1700. ));
  1701. }
  1702. }
  1703. }
  1704. private function scrapeFacebook() {
  1705. $category = 'Facebook API';
  1706. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1707. if( !$category_id ) {
  1708. $this->invalid_category($category);
  1709. return;
  1710. }
  1711. $functions = array(
  1712. array("http://wiki.developers.facebook.com/index.php/Admin.getAllocation", "admin.getAllocation",
  1713. "Returns the current allocation limit for your application for the specified integration point."),
  1714. array("http://wiki.developers.facebook.com/index.php/Admin.getAppProperties", "admin.getAppProperties",
  1715. "Returns values of properties for your applications from the Facebook Developer application."),
  1716. array("", "admin.getDailyMetrics",
  1717. "This method is deprecated. Please use Admin.getMetrics instead."),
  1718. array("http://wiki.developers.facebook.com/index.php/Admin.getMetrics", "admin.getMetrics",
  1719. "Returns specified metrics for your application, given a time period."),
  1720. array("http://wiki.developers.facebook.com/index.php/Admin.getRestrictionInfo", "admin.getRestrictionInfo",
  1721. "Returns the demographic restrictions for the application."),
  1722. array("http://wiki.developers.facebook.com/index.php/Admin.setAppProperties", "admin.setAppProperties",
  1723. "Sets values for properties for your applications in the Facebook Developer application."),
  1724. array("http://wiki.developers.facebook.com/index.php/Admin.setRestrictionInfo", "admin.setRestrictionInfo",
  1725. "Sets the demographic restrictions for the application."),
  1726. array("http://wiki.developers.facebook.com/index.php/Application.getPublicInfo", "application.getPublicInfo",
  1727. "Returns public information about a given application (not necessarily your own)."),
  1728. array("http://wiki.developers.facebook.com/index.php/Auth.createToken", "auth.createToken",
  1729. "Creates an auth_token to be passed in as a parameter to login.php and then to auth.getSession after the user has logged in."),
  1730. array("http://wiki.developers.facebook.com/index.php/Auth.expireSession", "auth.expireSession",
  1731. "Expires the session indicated in the API call, for your application."),
  1732. array("http://wiki.developers.facebook.com/index.php/Auth.getSession", "auth.getSession",
  1733. "Returns the session key bound to an auth_token, as returned by auth.createToken or in the callback URL."),
  1734. array("http://wiki.developers.facebook.com/index.php/Auth.promoteSession", "auth.promoteSession",
  1735. "Returns a temporary session secret associated to the current existing session, for use in a client-side component to an application."),
  1736. array("http://wiki.developers.facebook.com/index.php/Auth.revokeAuthorization", "auth.revokeAuthorization",
  1737. "If this method is called for the logged in user, then no further API calls can be made on that user's behalf until the user decides to authorize the application again."),
  1738. array("http://wiki.developers.facebook.com/index.php/Auth.revokeExtendedPermission", "auth.revokeExtendedPermission",
  1739. "Removes a specific extended permission that a user explicitly granted to your application."),
  1740. array("http://wiki.developers.facebook.com/index.php/Batch.run", "batch.run",
  1741. "Execute a list of individual API calls in a single batch."),
  1742. array("http://wiki.developers.facebook.com/index.php/Comments.get", "comments.get",
  1743. "Returns all comments for a given xid posted through fb:comments. This method is a wrapper for the FQL query on the comment FQL table."),
  1744. array("http://wiki.developers.facebook.com/index.php/Data.getCookies", "data.getCookies",
  1745. "Returns all cookies for a given user and application."),
  1746. array("http://wiki.developers.facebook.com/index.php/Data.setCookie", "data.setCookie",
  1747. "Sets a cookie for a given user and application."),
  1748. array("http://wiki.developers.facebook.com/index.php/Events.cancel", "events.cancel",
  1749. "Cancels an event. The application must be an admin of the event."),
  1750. array("http://wiki.developers.facebook.com/index.php/Events.create", "events.create",
  1751. "Creates an event on behalf of the user if the application has an active session; otherwise it creates an event on behalf of the application."),
  1752. array("http://wiki.developers.facebook.com/index.php/Events.edit", "events.edit",
  1753. "Edits an existing event. The application must be an admin of the event."),
  1754. array("http://wiki.developers.facebook.com/index.php/Events.get", "events.get",
  1755. "Returns all visible events according to the filters specified."),
  1756. array("http://wiki.developers.facebook.com/index.php/Events.getMembers", "events.getMembers",
  1757. "Returns membership list data associated with an event."),
  1758. array("http://wiki.developers.facebook.com/index.php/Events.rsvp", "events.rsvp",
  1759. "Sets the attendance option for the current user."),
  1760. array("http://wiki.developers.facebook.com/index.php/Fbml.deleteCustomTags", "fbml.deleteCustomTags",
  1761. "Deletes one or more custom tags you previously registered for the calling application with fbml.registerCustomTags"),
  1762. array("http://wiki.developers.facebook.com/index.php/Fbml.getCustomTags", "fbml.getCustomTags",
  1763. "Returns the custom tag definitions for tags that were previously defined using fbml.registerCustomTags"),
  1764. array("http://wiki.developers.facebook.com/index.php/Fbml.refreshImgSrc", "fbml.refreshImgSrc",
  1765. "Fetches and re-caches the image stored at the given URL."),
  1766. array("http://wiki.developers.facebook.com/index.php/Fbml.refreshRefUrl", "fbml.refreshRefUrl",
  1767. "Fetches and re-caches the content stored at the given URL."),
  1768. array("http://wiki.developers.facebook.com/index.php/Fbml.registerCustomTags", "fbml.registerCustomTags",
  1769. "Registers custom tags you can include in your that applications' FBML markup. Custom tags consist of FBML snippets that are rendered during parse time on the containing page that references the custom tag."),
  1770. array("http://wiki.developers.facebook.com/index.php/Fbml.setRefHandle", "fbml.setRefHandle",
  1771. "Associates a given \"handle\" with FBML markup so that the handle can be used within the fb:ref FBML tag."),
  1772. array("http://wiki.developers.facebook.com/index.php/Fbml.uploadNativeStrings", "fbml.uploadNativeStrings",
  1773. "Lets you insert text strings into the Facebook Translations database so they can be translated."),
  1774. array("http://wiki.developers.facebook.com/index.php/Feed.deactivateTemplateBundleByID", "feed.deactivateTemplateBundleByID",
  1775. "Deactivates a previously registered template bundle."),
  1776. array("http://wiki.developers.facebook.com/index.php/Feed.getRegisteredTemplateBundleByID", "feed.getRegisteredTemplateBundleByID",
  1777. "Retrieves information about a specified template bundle previously registered by the requesting application."),
  1778. array("http://wiki.developers.facebook.com/index.php/Feed.getRegisteredTemplateBundles", "feed.getRegisteredTemplateBundles",
  1779. "Retrieves the full list of all the template bundles registered by the requesting application."),
  1780. array("", "feed.publishActionOfUser",
  1781. "This method is deprecated. Please use feed.publishUserAction instead."),
  1782. array("", "feed.publishStoryToUser",
  1783. "This method is deprecated. Please use feed.publishUserAction instead."),
  1784. array("http://wiki.developers.facebook.com/index.php/Feed.publishTemplatizedAction", "feed.publishTemplatizedAction",
  1785. "Publishes a Mini-Feed story to the Facebook Page corresponding to the page_actor_id parameter. Note: This method is deprecated for actions taken by users only; it still works for actions taken by Facebook Pages."),
  1786. array("http://wiki.developers.facebook.com/index.php/Feed.publishUserAction", "feed.publishUserAction",
  1787. "Publishes a story on behalf of the user owning the session, using the specified template bundle."),
  1788. array("http://wiki.developers.facebook.com/index.php/Feed.registerTemplateBundle", "feed.registerTemplateBundle",
  1789. "Builds a template bundle around the specified templates, registers them on Facebook, and responds with a template bundle ID that can be used to identify your template bundle to other Feed-related API calls."),
  1790. array("http://wiki.developers.facebook.com/index.php/Fql.query", "fql.query",
  1791. "Evaluates an FQL (Facebook Query Language) query."),
  1792. array("http://wiki.developers.facebook.com/index.php/Friends.areFriends", "friends.areFriends",
  1793. "Returns whether or not each pair of specified users is friends with each other."),
  1794. array("http://wiki.developers.facebook.com/index.php/Friends.get", "friends.get",
  1795. "Returns the identifiers for the current user's Facebook friends."),
  1796. array("http://wiki.developers.facebook.com/index.php/Friends.getAppUsers", "friends.getAppUsers",
  1797. "Returns the identifiers for the current user's Facebook friends who have authorized the specific calling application."),
  1798. array("http://wiki.developers.facebook.com/index.php/Friends.getLists", "friends.getLists",
  1799. "Returns the identifiers for the current user's Facebook friend lists."),
  1800. array("http://wiki.developers.facebook.com/index.php/Groups.get", "groups.get",
  1801. "Returns all visible groups according to the filters specified."),
  1802. array("http://wiki.developers.facebook.com/index.php/Groups.getMembers", "groups.getMembers",
  1803. "Returns membership list data associated with a group."),
  1804. array("http://wiki.developers.facebook.com/index.php/Links.get", "links.get",
  1805. "Returns all links the user has posted on their profile through your application."),
  1806. array("http://wiki.developers.facebook.com/index.php/Links.post", "links.post",
  1807. "Lets a user post a link on their Wall through your application."),
  1808. array("http://wiki.developers.facebook.com/index.php/LiveMessage.send", "liveMessage.send",
  1809. "Sends a \"message\" directly to a user's browser, which can be handled in FBJS."),
  1810. array("http://wiki.developers.facebook.com/index.php/Marketplace.createListing", "marketplace.createListing",
  1811. "Create or modify a listing in Marketplace."),
  1812. array("http://wiki.developers.facebook.com/index.php/Marketplace.getCategories", "marketplace.getCategories",
  1813. "Returns all the Marketplace categories."),
  1814. array("http://wiki.developers.facebook.com/index.php/Marketplace.getListings", "marketplace.getListings",
  1815. "Return all Marketplace listings either by listing ID or by user."),
  1816. array("http://wiki.developers.facebook.com/index.php/Marketplace.getSubCategories", "marketplace.getSubCategories",
  1817. "Returns the Marketplace subcategories for a particular category."),
  1818. array("http://wiki.developers.facebook.com/index.php/Marketplace.removeListing", "marketplace.removeListing",
  1819. "Remove a listing from Marketplace."),
  1820. array("http://wiki.developers.facebook.com/index.php/Marketplace.search", "marketplace.search",
  1821. "Search Marketplace for listings filtering by category, subcategory and a query string."),
  1822. array("http://wiki.developers.facebook.com/index.php/Notes.create", "notes.create",
  1823. "Lets a user write a Facebook note through your application."),
  1824. array("http://wiki.developers.facebook.com/index.php/Notes.delete", "notes.delete",
  1825. "Lets a user delete a Facebook note that was written through your application."),
  1826. array("http://wiki.developers.facebook.com/index.php/Notes.edit", "notes.edit",
  1827. "Lets a user edit a Facebook note through your application."),
  1828. array("http://wiki.developers.facebook.com/index.php/Notes.get", "notes.get",
  1829. "Returns a list of all of the visible notes written by the specified user."),
  1830. array("http://wiki.developers.facebook.com/index.php/Notifications.get", "notifications.get",
  1831. "Returns information on outstanding Facebook notifications for current session user."),
  1832. array("http://wiki.developers.facebook.com/index.php/Notifications.send", "notifications.send",
  1833. "Sends a notification to a set of users."),
  1834. array("http://wiki.developers.facebook.com/index.php/Notifications.sendEmail", "notifications.sendEmail",
  1835. "Sends an email to the specified users who have the application."),
  1836. array("http://wiki.developers.facebook.com/index.php/Pages.getInfo", "pages.getInfo",
  1837. "Returns all visible pages to the filters specified."),
  1838. array("http://wiki.developers.facebook.com/index.php/Pages.isAdmin", "pages.isAdmin",
  1839. "Checks whether the logged-in user is the admin for a given Page."),
  1840. array("http://wiki.developers.facebook.com/index.php/Pages.isAppAdded", "pages.isAppAdded",
  1841. "Checks whether the Page has added the application."),
  1842. array("http://wiki.developers.facebook.com/index.php/Pages.isFan", "pages.isFan",
  1843. "Checks whether a user is a fan of a given Page."),
  1844. array("http://wiki.developers.facebook.com/index.php/Photos.addTag", "photos.addTag",
  1845. "Adds a tag with the given information to a photo."),
  1846. array("http://wiki.developers.facebook.com/index.php/Photos.createAlbum", "photos.createAlbum",
  1847. "Creates and returns a new album owned by the current session user."),
  1848. array("http://wiki.developers.facebook.com/index.php/Photos.get", "photos.get",
  1849. "Returns all visible photos according to the filters specified."),
  1850. array("http://wiki.developers.facebook.com/index.php/Photos.getAlbums", "photos.getAlbums",
  1851. "Returns metadata about all of the photo albums uploaded by the specified user."),
  1852. array("http://wiki.developers.facebook.com/index.php/Photos.getTags", "photos.getTags",
  1853. "Returns the set of user tags on all photos specified."),
  1854. array("http://wiki.developers.facebook.com/index.php/Photos.upload", "photos.upload",
  1855. "Uploads a photo owned by the current session user and returns the new photo."),
  1856. array("http://wiki.developers.facebook.com/index.php/Profile.getFBML", "profile.getFBML",
  1857. "Gets the FBML that is currently set for a user's profile."),
  1858. array("http://wiki.developers.facebook.com/index.php/Profile.getInfo", "profile.getInfo",
  1859. "Returns the specified user's application info section for the calling application."),
  1860. array("http://wiki.developers.facebook.com/index.php/Profile.getInfoOptions", "profile.getInfoOptions",
  1861. "Returns the options associated with the specified field for an application info section."),
  1862. array("http://wiki.developers.facebook.com/index.php/Profile.setFBML", "profile.setFBML",
  1863. "Sets the FBML for a user's profile, including the content for both the profile box and the profile actions."),
  1864. array("http://wiki.developers.facebook.com/index.php/Profile.setInfo", "profile.setInfo",
  1865. "Configures an application info section that the specified user can install on the Info tab of her profile."),
  1866. array("http://wiki.developers.facebook.com/index.php/Profile.setInfoOptions", "profile.setInfoOptions",
  1867. "Specifies the objects for a field for an application info section."),
  1868. array("http://wiki.developers.facebook.com/index.php/Status.get", "status.get",
  1869. "Returns the user's current and most recent statuses. This is a streamlined version of users.setStatus."),
  1870. array("http://wiki.developers.facebook.com/index.php/Status.set", "status.set",
  1871. "Updates a user's Facebook status through your application."),
  1872. array("http://wiki.developers.facebook.com/index.php/Users.getInfo", "users.getInfo",
  1873. "Returns a wide array of user-specific information for each user identifier passed, limited by the view of the current user."),
  1874. array("http://wiki.developers.facebook.com/index.php/Users.getLoggedInUser", "users.getLoggedInUser",
  1875. "Gets the user ID (uid) associated with the current session."),
  1876. array("", "users.getStandardInfo",
  1877. "Returns an array of user-specific information for use by the application itself."),
  1878. array("http://wiki.developers.facebook.com/index.php/Users.hasAppPermission", "users.hasAppPermission",
  1879. "Checks whether the user has opted in to an extended application permission."),
  1880. array("", "users.isAppAdded",
  1881. "This method is deprecated. Please use users.isAppUser instead."),
  1882. array("http://wiki.developers.facebook.com/index.php/Users.isAppUser", "users.isAppUser",
  1883. "Returns whether the user (either the session user or user specified by UID) has authorized the calling application."),
  1884. array("http://wiki.developers.facebook.com/index.php/Users.isVerified", "users.isVerified",
  1885. "Returns whether the user is a verified Facebook user."),
  1886. array("http://wiki.developers.facebook.com/index.php/Users.setStatus", "users.setStatus",
  1887. "Updates a user's Facebook status."),
  1888. array("http://wiki.developers.facebook.com/index.php/Video.getUploadLimits", "video.getUploadLimits",
  1889. "Returns the file size and length limits for a video that the current user can upload through your application."),
  1890. array("http://wiki.developers.facebook.com/index.php/Video.upload", "video.upload",
  1891. "Uploads a video owned by the current session user and returns the video.")
  1892. );
  1893. for( $index = 0; $index < count($functions); ++$index ) {
  1894. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1895. 'category' => $category_id,
  1896. 'hierarchy' => 2,
  1897. 'name' => $functions[$index][1],
  1898. 'url' => $functions[$index][0],
  1899. 'short_description' => $functions[$index][2]
  1900. ));
  1901. }
  1902. }
  1903. private function scrapeZend() {
  1904. $category = 'Zend';
  1905. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1906. if( !$category_id ) {
  1907. $this->invalid_category($category);
  1908. return;
  1909. }
  1910. $categories = array(
  1911. "Zend_Acl",
  1912. "Zend_Amf",
  1913. "Zend_Auth",
  1914. "Zend_Cache",
  1915. "Zend_Captcha",
  1916. "Zend_Config",
  1917. "Zend_Config_Writer",
  1918. "Zend_Console_Getopt",
  1919. "Zend_Controller",
  1920. "Zend_Currency",
  1921. "Zend_Date",
  1922. "Zend_Db",
  1923. "Zend_Debug",
  1924. "Zend_Dojo",
  1925. "Zend_Dom",
  1926. "Zend_Exception",
  1927. "Zend_Feed",
  1928. "Zend_File",
  1929. "Zend_Filter",
  1930. "Zend_Filter_Input",
  1931. "Zend_Form",
  1932. "Zend_Gdata",
  1933. "Zend_Http",
  1934. "Zend_Infocard",
  1935. "Zend_Json",
  1936. "Zend_Layout",
  1937. "Zend_Ldap",
  1938. "Zend_Loader",
  1939. "Zend_Locale",
  1940. "Zend_Log",
  1941. "Zend_Mail",
  1942. "Zend_Measure",
  1943. "Zend_Memory",
  1944. "Zend_Mime",
  1945. "Zend_OpenId",
  1946. "Zend_Paginator",
  1947. "Zend_Pdf",
  1948. "Zend_ProgressBar",
  1949. "Zend_Registry",
  1950. "Zend_Rest",
  1951. "Zend_Search_Lucene",
  1952. "Zend_Server_Reflection",
  1953. "Zend_Service_Akismet",
  1954. "Zend_Service_Amazon",
  1955. "Zend_Service_Audioscrobbler",
  1956. "Zend_Service_Delicious",
  1957. "Zend_Service_Flickr",
  1958. "Zend_Service_Nirvanix",
  1959. "Zend_Service_ReCaptcha",
  1960. "Zend_Service_Simpy",
  1961. "Zend_Service_SlideShare",
  1962. "Zend_Service_StrikeIron",
  1963. "Zend_Service_Technorati",
  1964. "Zend_Service_Twitter",
  1965. "Zend_Service_Yahoo",
  1966. "Zend_Session",
  1967. "Zend_Soap",
  1968. "Zend_Test",
  1969. "Zend_Text",
  1970. "Zend_Timesync",
  1971. "Zend_Translate",
  1972. "Zend_Uri",
  1973. "Zend_Validate",
  1974. "Zend_Version",
  1975. "Zend_View",
  1976. "Zend_Wildfire",
  1977. "Zend_XmlRpc",
  1978. "ZendX_Console_Process_Unix",
  1979. "ZendX_JQuery"
  1980. );
  1981. $urls = array(
  1982. "http://framework.zend.com/manual/en/zend.acl.html",
  1983. "http://framework.zend.com/manual/en/zend.amf.html",
  1984. "http://framework.zend.com/manual/en/zend.auth.html",
  1985. "http://framework.zend.com/manual/en/zend.cache.html",
  1986. "http://framework.zend.com/manual/en/zend.captcha.html",
  1987. "http://framework.zend.com/manual/en/zend.config.html",
  1988. "http://framework.zend.com/manual/en/zend.config.writer.html",
  1989. "http://framework.zend.com/manual/en/zend.console.getopt.html",
  1990. "http://framework.zend.com/manual/en/zend.controller.html",
  1991. "http://framework.zend.com/manual/en/zend.currency.html",
  1992. "http://framework.zend.com/manual/en/zend.date.html",
  1993. "http://framework.zend.com/manual/en/zend.db.html",
  1994. "http://framework.zend.com/manual/en/zend.debug.html",
  1995. "http://framework.zend.com/manual/en/zend.dojo.html",
  1996. "http://framework.zend.com/manual/en/zend.dom.html",
  1997. "http://framework.zend.com/manual/en/zend.exception.html",
  1998. "http://framework.zend.com/manual/en/zend.feed.html",
  1999. "http://framework.zend.com/manual/en/zend.file.html",
  2000. "http://framework.zend.com/manual/en/zend.filter.html",
  2001. "http://framework.zend.com/manual/en/zend.filter.input.html",
  2002. "http://framework.zend.com/manual/en/zend.form.html",
  2003. "http://framework.zend.com/manual/en/zend.gdata.html",
  2004. "http://framework.zend.com/manual/en/zend.http.html",
  2005. "http://framework.zend.com/manual/en/zend.infocard.html",
  2006. "http://framework.zend.com/manual/en/zend.json.html",
  2007. "http://framework.zend.com/manual/en/zend.layout.html",
  2008. "http://framework.zend.com/manual/en/zend.ldap.html",
  2009. "http://framework.zend.com/manual/en/zend.loader.html",
  2010. "http://framework.zend.com/manual/en/zend.locale.html",
  2011. "http://framework.zend.com/manual/en/zend.log.html",
  2012. "http://framework.zend.com/manual/en/zend.mail.html",
  2013. "http://framework.zend.com/manual/en/zend.measure.html",
  2014. "http://framework.zend.com/manual/en/zend.memory.html",
  2015. "http://framework.zend.com/manual/en/zend.mime.html",
  2016. "http://framework.zend.com/manual/en/zend.openid.html",
  2017. "http://framework.zend.com/manual/en/zend.paginator.html",
  2018. "http://framework.zend.com/manual/en/zend.pdf.html",
  2019. "http://framework.zend.com/manual/en/zend.progressbar.html",
  2020. "http://framework.zend.com/manual/en/zend.registry.html",
  2021. "http://framework.zend.com/manual/en/zend.rest.html",
  2022. "http://framework.zend.com/manual/en/zend.search.lucene.html",
  2023. "http://framework.zend.com/manual/en/zend.server.reflection.html",
  2024. "http://framework.zend.com/manual/en/zend.service.akismet.html",
  2025. "http://framework.zend.com/manual/en/zend.service.amazon.html",
  2026. "http://framework.zend.com/manual/en/zend.service.audioscrobbler.html",
  2027. "http://framework.zend.com/manual/en/zend.service.delicious.html",
  2028. "http://framework.zend.com/manual/en/zend.service.flickr.html",
  2029. "http://framework.zend.com/manual/en/zend.service.nirvanix.html",
  2030. "http://framework.zend.com/manual/en/zend.service.recaptcha.html",
  2031. "http://framework.zend.com/manual/en/zend.service.simpy.html",
  2032. "http://framework.zend.com/manual/en/zend.service.slideshare.html",
  2033. "http://framework.zend.com/manual/en/zend.service.strikeiron.html",
  2034. "http://framework.zend.com/manual/en/zend.service.technorati.html",
  2035. "http://framework.zend.com/manual/en/zend.service.twitter.html",
  2036. "http://framework.zend.com/manual/en/zend.service.yahoo.html",
  2037. "http://framework.zend.com/manual/en/zend.session.html",
  2038. "http://framework.zend.com/manual/en/zend.soap.html",
  2039. "http://framework.zend.com/manual/en/zend.test.html",
  2040. "http://framework.zend.com/manual/en/zend.text.html",
  2041. "http://framework.zend.com/manual/en/zend.timesync.html",
  2042. "http://framework.zend.com/manual/en/zend.translate.html",
  2043. "http://framework.zend.com/manual/en/zend.uri.html",
  2044. "http://framework.zend.com/manual/en/zend.validate.html",
  2045. "http://framework.zend.com/manual/en/zend.version.html",
  2046. "http://framework.zend.com/manual/en/zend.view.html",
  2047. "http://framework.zend.com/manual/en/zend.wildfire.html",
  2048. "http://framework.zend.com/manual/en/zend.xmlrpc.html",
  2049. "http://framework.zend.com/manual/en/zendx.console.process.unix.html",
  2050. "http://framework.zend.com/manual/en/zendx.jquery.html"
  2051. );
  2052. for( $index = 0; $index < count($categories); ++$index ) {
  2053. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, 1, $categories[$index], $urls[$index])."\n";
  2054. }
  2055. }
  2056. private function scrapeCSSFunctions() {
  2057. $category = 'CSS';
  2058. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  2059. if( !$category_id ) {
  2060. $this->invalid_category($category);
  2061. return;
  2062. }
  2063. $scrapeable = $this->getFunctionsModel()->fetchAllScrapeable($category_id);
  2064. if( empty($scrapeable) ) {
  2065. $this->nothing_to_scrape($category);
  2066. return;
  2067. }
  2068. foreach( $scrapeable as $function ) {
  2069. $this->_pages_scraped++;
  2070. if( $this->_pages_scraped % 100 == 0 ) {
  2071. sleep(1);
  2072. }
  2073. if( $this->_pages_scraped > ScrapeController::MAX_PAGES_TO_SCRAPE ) {
  2074. $this->view->results .= 'Hit max page count for scraping.' . "\n";
  2075. return;
  2076. }
  2077. $this->view->results .= $function['name'] . "\n";
  2078. if( !$function['url'] ) {
  2079. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  2080. continue;
  2081. }
  2082. $source_url = $function['url'];
  2083. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  2084. $contents = file_get_contents($source_url);
  2085. $start_index = strpos($contents, '<h2>Possible Values</h2>');
  2086. if( $start_index === FALSE ) {
  2087. $this->view->results .= 'We didn\'t find possible values, skipping...' . "\n";
  2088. continue;
  2089. }
  2090. $start_index += strlen('<h2>Possible Values</h2>');
  2091. $end_index = strpos($contents, '</table>', $start_index);
  2092. if( $end_index === FALSE ) {
  2093. $this->view->results .= 'We couldn\'t find the end of the possible values, skipping...' . "\n";
  2094. continue;
  2095. }
  2096. $data = substr($contents, $start, $end-$start);
  2097. $this->view->results .= $data."\n";
  2098. /*
  2099. $this->getFunctionsModel()->setData(array(
  2100. 'category' => $category_id,
  2101. 'id' => $function['id'],
  2102. 'data' => $line
  2103. ));*/
  2104. }
  2105. }
  2106. private function scrapePythonModules($createFunctions) {
  2107. //scrapePythonModuleVersion($createFunctions, 'Python 3.0.1', ''http://docs.python.org/3.0/');
  2108. $this->scrapePythonModuleVersion($createFunctions, 'Python 2.6.1', 'http://docs.python.org/');
  2109. }
  2110. private function scrapePythonModuleVersion($createFunctions, $version, $url_base) {
  2111. $category = $version;
  2112. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  2113. if( !$category_id ) {
  2114. $this->invalid_category($category);
  2115. return;
  2116. }
  2117. $contents = str_replace("\n", ' ', file_get_contents($url_base.'modindex.html'));
  2118. if( preg_match_all('/(?:<td>(?:&nbsp;&nbsp;&nbsp;)? <a href="(.+?)"><tt class="xref">(.+?)<\/tt><\/a>(?: <em>.+?<\/em>)?<\/td><td>)|(?: <tt class="xref">(.+?)<\/tt><\/td><td>)/', $contents, $matches) &&
  2119. preg_match_all('/<em>(.*?)<\/em><\/td><\/tr>/', $contents, $desc_matches) ) {
  2120. for( $index = 0; $index < count($matches[1]); ++$index ) {
  2121. $link = $url_base.$matches[1][$index];
  2122. $name = $matches[2][$index];
  2123. if( !$name ) {
  2124. $link = '';
  2125. $name = $matches[3][$index];
  2126. }
  2127. $desc = $desc_matches[1][$index];
  2128. if( $createFunctions ) {
  2129. if( $link ) {
  2130. $hierarchy_name = implode('', array_slice(explode('.', $name), 0, 1));
  2131. $hierarchy = $this->getHierarchiesModel()->fetchByName($category_id, 1, $hierarchy_name);
  2132. if( $hierarchy ) {
  2133. $this->view->results .= 'Adding '.$name."\n";
  2134. $this->view->results .= $link."\n";
  2135. $this->view->results .= $desc."\n\n";
  2136. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  2137. 'category' => $category_id,
  2138. 'hierarchy' => $hierarchy,
  2139. 'name' => $name,
  2140. 'url' => $link,
  2141. 'short_description' => $desc,
  2142. 'scrapeable' => 1
  2143. ));
  2144. }
  2145. }
  2146. } else {
  2147. if( strpos($name, '.') === false ) {
  2148. $this->view->results .= $this->getHierarchiesModel()->insert(
  2149. $category_id,
  2150. 1,
  2151. $name,
  2152. $link,
  2153. 1)."\n";
  2154. }
  2155. }
  2156. }
  2157. }
  2158. }
  2159. private function scrapePHPHierarchies() {
  2160. $category = 'PHP';
  2161. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  2162. if( !$category_id ) {
  2163. $this->invalid_category($category);
  2164. return;
  2165. }
  2166. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  2167. if( empty($scrapeable) ) {
  2168. $this->nothing_to_scrape($category);
  2169. return;
  2170. }
  2171. foreach( $scrapeable as $hierarchy ) {
  2172. $this->_pages_scraped++;
  2173. if( $this->_pages_scraped > ScrapeController::MAX_PAGES_TO_SCRAPE ) {
  2174. $this->view->results .= 'Hit max page count for scraping.' . "\n";
  2175. return;
  2176. }
  2177. $this->view->results .= $hierarchy['name'] . "\n";
  2178. if( !$hierarchy['source_url'] ) {
  2179. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  2180. continue;
  2181. }
  2182. $source_url = $hierarchy['source_url'];
  2183. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  2184. $contents = file_get_contents($source_url);
  2185. $start_index = strpos($contents, '<h2>Table of Contents</h2>');
  2186. if( $start_index === FALSE ) {
  2187. $this->view->results .= 'We didn\'t find a Table of Contents, skipping...' . "\n";
  2188. continue;
  2189. }
  2190. $end_index = strpos($contents, '</ul>', $start_index);
  2191. if( $end_index === FALSE ) {
  2192. $this->view->results .= 'We couldn\'t find the end of the list, skipping...' . "\n";
  2193. continue;
  2194. }
  2195. $line = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  2196. if( !preg_match_all('/<li><a href="([a-zA-Z0-9_\-.]+)">([a-zA-Z0-9_:.\->]+)<\/a> — ([a-zA-Z0-9 \-_,.+;\[:\]<>=\/\'\(\)"#\\\\]+)<\/li>/', $line, $matches) ) {
  2197. $this->view->results .= 'We coulnd\'t find any functions in this list, skipping...' . "\n";
  2198. $this->view->results .= $line . "\n";
  2199. $this->view->results .= $start_index.'-'.$end_index . "\n";
  2200. continue;
  2201. }
  2202. $list_item_count = preg_match_all('/<li>/', $line, $nothing);
  2203. if( $list_item_count != count($matches[1]) ) {
  2204. $this->view->results .= 'We missed some items ('.($list_item_count - count($matches[1])).') in the list, skipping...' . "\n";
  2205. $this->view->results .= print_r($matches[2], TRUE);
  2206. continue;
  2207. }
  2208. $dirname = dirname($source_url).'/';
  2209. for( $index = 0; $index < count($matches[1]); ++$index ) {
  2210. $name = $matches[2][$index];
  2211. $url = $dirname . $matches[1][$index];
  2212. $description = $matches[3][$index];
  2213. //$this->view->results .= $name.' - '.$description."\n";
  2214. //$this->view->results .= ' <a href="'.$url.'">'.$url."</a>\n";
  2215. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  2216. 'category' => $category_id,
  2217. 'hierarchy' => $hierarchy['id'],
  2218. 'name' => $name,
  2219. 'url' => $url,
  2220. 'short_description' => $description
  2221. ));
  2222. }
  2223. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  2224. }
  2225. }
  2226. private function scrapePHPFunctions() {
  2227. $category = 'PHP';
  2228. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  2229. if( !$category_id ) {
  2230. $this->invalid_category($category);
  2231. return;
  2232. }
  2233. $scrapeable = $this->getFunctionsModel()->fetchAllScrapeable($category_id);
  2234. if( empty($scrapeable) ) {
  2235. $this->nothing_to_scrape($category);
  2236. return;
  2237. }
  2238. foreach( $scrapeable as $function ) {
  2239. $this->_pages_scraped++;
  2240. if( $this->_pages_scraped % 100 == 0 ) {
  2241. sleep(1);
  2242. }
  2243. if( $this->_pages_scraped > ScrapeController::MAX_PAGES_TO_SCRAPE ) {
  2244. $this->view->results .= 'Hit max page count for scraping.' . "\n";
  2245. return;
  2246. }
  2247. $this->view->results .= $function['name'] . "\n";
  2248. if( !$function['url'] ) {
  2249. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  2250. continue;
  2251. }
  2252. $source_url = $function['url'];
  2253. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  2254. $contents = file_get_contents($source_url);
  2255. if( strpos($contents, 'classsynopsis') !== FALSE ) {
  2256. $this->view->results .= 'This is a class definition, skipping...' . "\n";
  2257. continue;
  2258. }
  2259. if( strpos($contents, '<span class="simpara">') !== FALSE ) {
  2260. $this->view->results .= 'This is not a function, skipping...' . "\n";
  2261. continue;
  2262. }
  2263. $start_index = strpos($contents, '<h3 class="title">Description</h3>');
  2264. if( $start_index === FALSE ) {
  2265. $this->view->results .= 'We didn\'t find a Description, skipping...' . "\n";
  2266. continue;
  2267. }
  2268. $start_index += strlen('<h3 class="title">Description</h3>');
  2269. $end_index = strpos($contents, '</div>', $start_index);
  2270. if( $end_index === FALSE ) {
  2271. $this->view->results .= 'We couldn\'t find the end of the description, skipping...' . "\n";
  2272. continue;
  2273. }
  2274. if( strpos($contents, 'This function is an alias of:') !== FALSE ) {
  2275. $this->view->results .= 'This function appears to be an alias, skipping...' . "\n";
  2276. $this->getFunctionsModel()->touch($category_id, $function['id']);
  2277. continue;
  2278. }
  2279. $start_index = strpos($contents, '<span', $start_index);
  2280. if( $start_index === FALSE ) {
  2281. $this->view->results .= 'We couldn\'t find any description tags, skipping...' . "\n";
  2282. continue;
  2283. }
  2284. $line = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  2285. $line = str_replace("<span class=\"type.+?\">", '<st>', $line);
  2286. $line = str_replace("<b>", '', $line);
  2287. $line = str_replace("</b>", '', $line);
  2288. $line = str_replace("<span class=\"modifier\">", '<st>', $line);
  2289. $line = str_replace("<span class=\"methodname\">", '<sm>', $line);
  2290. $line = str_replace("<span class=\"methodparam\">", '<smp>', $line);
  2291. $line = str_replace("<span class=\"initializer\">", '<si>', $line);
  2292. $line = preg_replace("/<tt.+?>/", '<sp>', $line);
  2293. $line = str_replace("</tt>", '</s>', $line);
  2294. $line = str_replace("</span>", '</s>', $line);
  2295. $line = str_replace("</a>", '', $line);
  2296. $line = preg_replace("/<a.+?>/", '', $line);
  2297. $line = preg_replace('/( ){2,}/', ' ', $line);
  2298. //$this->view->results .= $line."\n";
  2299. if( strlen($line) < 5 ) {
  2300. $this->view->results .= 'Line is unreasonably small, skipping...' . "\n";
  2301. continue;
  2302. }
  2303. $this->getFunctionsModel()->setData(array(
  2304. 'category' => $category_id,
  2305. 'id' => $function['id'],
  2306. 'data' => $line
  2307. ));
  2308. }
  2309. }
  2310. private function invalid_category($name) {
  2311. $this->view->results .= 'We can\'t find the category you requested: '.$name."\n";
  2312. }
  2313. private function nothing_to_scrape($name) {
  2314. $this->view->results .= 'We can\'t find anything to scrape in the '.$name.' category.' . "\n";
  2315. }
  2316. }