PageRenderTime 58ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/application/controllers/ScrapeController.php

https://github.com/jverkoey/snaapilookup
PHP | 2654 lines | 2298 code | 304 blank | 52 comment | 301 complexity | f30d8d6021e73b335616cb04c791d2d4 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. include_once APPLICATION_PATH . '/controllers/SnaapiController.php';
  3. class ScrapeController extends SnaapiController {
  4. private $_pages_scraped;
  5. const MAX_PAGES_TO_SCRAPE = 1;
  6. public function init() {
  7. SnaapiController::init();
  8. if( 'development' == $this->getInvokeArg('env') ) {
  9. $this->_helper->viewRenderer->setRender('index');
  10. }
  11. }
  12. public function phpAction() {
  13. if( 'development' == $this->getInvokeArg('env') ) {
  14. $this->view->results = '';
  15. $this->_pages_scraped = 0;
  16. /*$model = $this->getFunctionsModel();
  17. $db = $model->getTable()->getAdapter();
  18. $sql = "SELECT * FROM `functions` WHERE `data` LIKE '% ,%'";
  19. foreach( $db->query($sql)->fetchAll() as $result ) {
  20. $result['data'] = str_replace(" ,", ',', $result['data']);
  21. $this->getFunctionsModel()->setData(array(
  22. 'category' => $result['category'],
  23. 'id' => $result['id'],
  24. 'data' => $result['data']
  25. ));
  26. }*/
  27. $this->scrapePHPHierarchies();
  28. $this->scrapePHPFunctions();
  29. } else {
  30. $this->_forward('error', 'error');
  31. }
  32. }
  33. public function pythonAction() {
  34. if( 'development' == $this->getInvokeArg('env') ) {
  35. $this->view->results = '';
  36. $this->_pages_scraped = 0;
  37. $this->scrapePythonModules(true);
  38. } else {
  39. $this->_forward('error', 'error');
  40. }
  41. }
  42. public function cssAction() {
  43. if( 'development' == $this->getInvokeArg('env') ) {
  44. $this->view->results = '';
  45. $this->_pages_scraped = 0;
  46. $this->scrapeCSSFunctions();
  47. } else {
  48. $this->_forward('error', 'error');
  49. }
  50. }
  51. public function zendAction() {
  52. if( 'development' == $this->getInvokeArg('env') ) {
  53. $this->view->results = '';
  54. $this->_pages_scraped = 0;
  55. $this->scrapeZend();
  56. } else {
  57. $this->_forward('error', 'error');
  58. }
  59. }
  60. public function fbAction() {
  61. if( 'development' == $this->getInvokeArg('env') ) {
  62. $this->view->results = '';
  63. $this->_pages_scraped = 0;
  64. //$this->scrapeFacebook();
  65. //$this->scrapeFacebookFbml();
  66. $this->scrapeFacebookFbmlPhase2();
  67. } else {
  68. $this->_forward('error', 'error');
  69. }
  70. }
  71. public function djangoAction() {
  72. if( 'development' == $this->getInvokeArg('env') ) {
  73. $this->view->results = '';
  74. $this->_pages_scraped = 0;
  75. //$this->scrapeDjango1();
  76. $this->scrapeDjango2();
  77. } else {
  78. $this->_forward('error', 'error');
  79. }
  80. }
  81. public function iphoneAction() {
  82. if( 'development' == $this->getInvokeArg('env') ) {
  83. $this->view->results = '';
  84. $this->_pages_scraped = 0;
  85. $this->scrapeiPhone();
  86. //$this->scrapeiPhoneDir();
  87. } else {
  88. $this->_forward('error', 'error');
  89. }
  90. }
  91. public function jsAction() {
  92. if( 'development' == $this->getInvokeArg('env') ) {
  93. $this->view->results = '';
  94. $this->_pages_scraped = 0;
  95. $this->scrapeJavascript();
  96. } else {
  97. $this->_forward('error', 'error');
  98. }
  99. }
  100. public function jqueryAction() {
  101. if( 'development' == $this->getInvokeArg('env') ) {
  102. $this->view->results = '';
  103. $this->_pages_scraped = 0;
  104. //$this->scrapejQuery();
  105. $this->scrapejQuery2();
  106. } else {
  107. $this->_forward('error', 'error');
  108. }
  109. }
  110. public function androidAction() {
  111. if( 'development' == $this->getInvokeArg('env') ) {
  112. $this->view->results = '';
  113. $this->_pages_scraped = 0;
  114. //$this->scrapeAndroidPackageList();
  115. //$this->scrapeAndroidPackages(2);
  116. $this->scrapeAndroidFunctions();
  117. } else {
  118. $this->_forward('error', 'error');
  119. }
  120. }
  121. public function mootoolsAction() {
  122. if( 'development' == $this->getInvokeArg('env') ) {
  123. $this->view->results = '';
  124. $this->_pages_scraped = 0;
  125. $this->scrapeMootoolsFunctions();
  126. } else {
  127. $this->_forward('error', 'error');
  128. }
  129. }
  130. public function clojureAction() {
  131. if( 'development' == $this->getInvokeArg('env') ) {
  132. $this->view->results = '';
  133. $this->_pages_scraped = 0;
  134. //$this->scrapeClojureHierarchies();
  135. $this->scrapeClojureFunctions();
  136. } else {
  137. $this->_forward('error', 'error');
  138. }
  139. }
  140. private function scrapeClojureFunctions() {
  141. $category = 'Clojure';
  142. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  143. if( !$category_id ) {
  144. $this->invalid_category($category);
  145. return;
  146. }
  147. $contents = file_get_contents(APPLICATION_PATH . '/scraper/clojure/api.html');
  148. $hierarchies = array_slice(explode('<h2 id="', $contents), 1);
  149. foreach( $hierarchies as $hierarchy ) {
  150. if( !preg_match('/(.+?)">(.+?)<\/h2>/', $hierarchy, $matches) ) {
  151. $this->view->results .= 'No name found, skipping...' . "\n";
  152. continue;
  153. }
  154. $name = trim($matches[2]);
  155. $sub_id = $this->getHierarchiesModel()->fetchByName($category_id, 1, $name);
  156. $functions = array_slice(explode('<hr>', $hierarchy), 1);
  157. foreach( $functions as $function ) {
  158. if( !preg_match_all('/<h3 id="(.+?)">(.+?)<\/h3>/', $function, $matches) ) {
  159. $this->view->results .= 'No function info found, skipping...' . "\n";
  160. $this->view->results .= $function . "\n\n";
  161. continue;
  162. }
  163. if( !preg_match('/(?:.+<\/h3>) (.+?)<br>/', str_replace("\n", ' ', $function), $desc_matches) ) {
  164. $this->view->results .= 'No desc found, skipping...' . "\n";
  165. $this->view->results .= $function . "\n\n";
  166. continue;
  167. }
  168. $desc = trim(strip_tags($desc_matches[1]));
  169. for( $index = 0; $index < count($matches[0]); ++$index ) {
  170. $url = 'http://clojure.org/api#'.$matches[1][$index];
  171. $name = trim(str_replace('&amp;', '&', strip_tags($matches[2][$index])));
  172. $this->view->results .= $sub_id ."\n";
  173. $this->view->results .= $name ."\n";
  174. $this->view->results .= $url ."\n";
  175. $this->view->results .= $desc ."\n\n";
  176. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  177. 'category' => $category_id,
  178. 'hierarchy' => $sub_id,
  179. 'name' => $name,
  180. 'url' => $url,
  181. 'short_description' => $desc,
  182. 'scrapeable' => 0
  183. ));
  184. }
  185. }
  186. }
  187. }
  188. private function scrapeClojureHierarchies() {
  189. $category = 'Clojure';
  190. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  191. if( !$category_id ) {
  192. $this->invalid_category($category);
  193. return;
  194. }
  195. $contents = file_get_contents(APPLICATION_PATH . '/scraper/clojure/api.html');
  196. $hierarchies = array_slice(explode('<h2 id="', $contents), 1);
  197. foreach( $hierarchies as $hierarchy ) {
  198. if( !preg_match('/(.+?)">(.+?)<\/h2>/', $hierarchy, $matches) ) {
  199. $this->view->results .= 'No name found, skipping...' . "\n";
  200. continue;
  201. }
  202. $name = trim($matches[2]);
  203. $url = 'http://clojure.org/api#'.$matches[1];
  204. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, 1, $name, $url, 0)."\n";
  205. }
  206. }
  207. private function scrapeMootoolsFunctions() {
  208. $category = 'mootools';
  209. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  210. if( !$category_id ) {
  211. $this->invalid_category($category);
  212. return;
  213. }
  214. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  215. if( empty($scrapeable) ) {
  216. $this->nothing_to_scrape($category);
  217. return;
  218. }
  219. foreach( $scrapeable as $hierarchy ) {
  220. $this->view->results .= $hierarchy['name'] . "\n";
  221. if( !$hierarchy['source_url'] ) {
  222. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  223. continue;
  224. }
  225. $source_url = $hierarchy['source_url'];
  226. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  227. $contents = file_get_contents($source_url);
  228. $start_index = strpos($contents, '<h2 id="');
  229. $data = substr($contents, $start_index);
  230. if( !preg_match_all('/<h2 id=".+?"(?: class="description")?><a href="(.+?)">(?:(?:.+? )?(?:Function|Method|Property|Selector|Event)): (.+?)<\/a><\/h2>/', $data, $matches ) ) {
  231. $this->view->results .= 'No functions found, checking for features...' . "\n";
  232. if( !preg_match_all('/<li>(.+?) - \(<em>(.+?)<\/em>\) (.+?)<\/li>/', $contents, $matches) ) {
  233. $this->view->results .= 'No features found, skipping...' . "\n";
  234. continue;
  235. }
  236. for( $index = 0; $index < count($matches[0]); ++$index ) {
  237. $url = $source_url;
  238. $name = trim($matches[1][$index]);
  239. $desc = trim(strip_tags($matches[3][$index]));
  240. $this->view->results .= $name."\n";
  241. $this->view->results .= $url."\n";
  242. $this->view->results .= $desc."\n\n";
  243. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  244. 'category' => $category_id,
  245. 'hierarchy' => $hierarchy['id'],
  246. 'name' => $name,
  247. 'url' => $url,
  248. 'short_description' => $desc,
  249. 'scrapeable' => 0
  250. ));
  251. }
  252. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  253. continue;
  254. }
  255. $functions = array_slice(explode('<h2 id="', $data), 1);
  256. foreach( $functions as $function ) {
  257. $desc = '';
  258. if( preg_match('/<p class="description">(.+?)<\/p>/', str_replace("\n", ' ', $function), $matches) ) {
  259. $desc = $matches[1];
  260. }
  261. if( !preg_match('/.+?"(?: class="description")?><a href="(.+?)">(?:(?:.+? )?(?:Function|Method|Property|Selector|Event)): (.+?)<\/a><\/h2>/', $function, $matches ) ) {
  262. $this->view->results .= 'Couldn\'t find the function name, skipping...' . "\n";
  263. continue;
  264. }
  265. $url = $source_url . $matches[1];
  266. $name = trim($matches[2]);
  267. if( $hierarchy['name'] != 'Core' && $name[0] != '$' ) {
  268. $name = $hierarchy['name'].'.'.$name;
  269. }
  270. $desc = trim(strip_tags($desc));
  271. $this->view->results .= $name."\n";
  272. $this->view->results .= $url."\n";
  273. $this->view->results .= $desc."\n\n";
  274. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  275. 'category' => $category_id,
  276. 'hierarchy' => $hierarchy['id'],
  277. 'name' => $name,
  278. 'url' => $url,
  279. 'short_description' => $desc,
  280. 'scrapeable' => 0
  281. ));
  282. }
  283. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  284. }
  285. }
  286. private function scrapeAndroidFunctions() {
  287. $category = 'android';
  288. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  289. if( !$category_id ) {
  290. $this->invalid_category($category);
  291. return;
  292. }
  293. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  294. if( empty($scrapeable) ) {
  295. $this->nothing_to_scrape($category);
  296. return;
  297. }
  298. foreach( $scrapeable as $hierarchy ) {
  299. $this->view->results .= $hierarchy['name'] . "\n";
  300. if( !$hierarchy['source_url'] ) {
  301. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  302. continue;
  303. }
  304. $source_url = $hierarchy['source_url'];
  305. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  306. $contents = file_get_contents($source_url);
  307. if( !preg_match('/<td colspan="1" class="jd-inheritance-class-cell">(.+?)<\/td>/', $contents, $matches) ) {
  308. $this->view->results .= 'No name found, skipping...' . "\n";
  309. break;
  310. }
  311. $name = $matches[1];
  312. $this->view->results .= $name ."\n";
  313. $desc = '';
  314. $OVERVIEW_TXT = '<h2>Class Overview</h2>';
  315. $desc_start = strpos($contents, $OVERVIEW_TXT);
  316. if( false !== $desc_start ) {
  317. $desc_start += strlen($OVERVIEW_TXT);
  318. $desc_end = strpos($contents, '</p>', $desc_start);
  319. if( false !== $desc_end ) {
  320. $desc = trim(strip_tags(str_replace("\n", ' ', substr($contents, $desc_start, $desc_end - $desc_start))));
  321. }
  322. }
  323. if( $desc == '' ) {
  324. $this->view->results .= 'No description found...'."\n";
  325. } else {
  326. $this->view->results .= $desc ."\n";
  327. }
  328. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  329. 'category' => $category_id,
  330. 'hierarchy' => $hierarchy['id'],
  331. 'name' => $name,
  332. 'url' => $source_url,
  333. 'short_description' => $desc,
  334. 'scrapeable' => 1
  335. ));
  336. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  337. }
  338. }
  339. private function process_section($links, $name, $mode, $category_id, $hierarchy, $source_url) {
  340. $interface_start = strpos($links, '<li><h2>'.$name.'</h2>');
  341. if( $interface_start !== false ) {
  342. if( $mode == 2 ) {
  343. $interface_end = strpos($links, ' </li>', $interface_start);
  344. $data = substr($links, $interface_start, $interface_end - $interface_start);
  345. $sub_id = $this->getHierarchiesModel()->fetchByName($category_id, $hierarchy, $name);
  346. if( !$sub_id ) {
  347. $this->view->results .= $hierarchy."\n";
  348. $this->view->results .= $name."\n";
  349. $this->view->results .= 'Couldn\'t find any parent hierarchy, skipping...' . "\n";
  350. return false;
  351. }
  352. if( !preg_match_all('/<li><a href="(.+?)">(.+?)<\/a>(?:&lt;T&gt;)?<\/li>/', $data, $matches) ) {
  353. $this->view->results .= 'Couldn\'t find any members name, skipping...' . "\n";
  354. return false;
  355. }
  356. for( $index = 0; $index < count($matches[0]); ++$index ) {
  357. $name = $matches[2][$index];
  358. $url = 'http://developer.android.com'.$matches[1][$index];
  359. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, $sub_id, $name, $url, 1)."\n";
  360. }
  361. } else if( $mode == 1 ) {
  362. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, $hierarchy, $name, $source_url, 0)."\n";
  363. }
  364. }
  365. return true;
  366. }
  367. private function scrapeAndroidPackages($mode) {
  368. $category = 'android';
  369. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  370. if( !$category_id ) {
  371. $this->invalid_category($category);
  372. return;
  373. }
  374. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  375. if( empty($scrapeable) ) {
  376. $this->nothing_to_scrape($category);
  377. return;
  378. }
  379. foreach( $scrapeable as $hierarchy ) {
  380. if( !$hierarchy['source_url'] ) {
  381. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  382. continue;
  383. }
  384. $source_url = $hierarchy['source_url'];
  385. $contents = file_get_contents($source_url);
  386. $start_index = strpos($contents, '</div> <!-- end resize-packages -->');
  387. $links = substr($contents, $start_index);
  388. $succeeded = true;
  389. $succeeded = $succeeded && $this->process_section($links, 'Interfaces', $mode, $category_id, $hierarchy['id'], $source_url);
  390. $succeeded = $succeeded && $this->process_section($links, 'Classes', $mode, $category_id, $hierarchy['id'], $source_url);
  391. $succeeded = $succeeded && $this->process_section($links, 'Exceptions', $mode, $category_id, $hierarchy['id'], $source_url);
  392. $succeeded = $succeeded && $this->process_section($links, 'Enums', $mode, $category_id, $hierarchy['id'], $source_url);
  393. if( $mode == 2 && $succeeded) {
  394. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  395. }
  396. }
  397. }
  398. private function scrapeAndroidPackageList() {
  399. $category = 'android';
  400. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  401. if( !$category_id ) {
  402. $this->invalid_category($category);
  403. return;
  404. }
  405. $contents = file_get_contents('http://developer.android.com/reference/packages.html');
  406. $start_index = strpos($contents, '<div id="packages-nav">');
  407. if( $start_index === false ) {
  408. $this->view->results .= 'Couldn\'t find the packages navigation, skipping...' . "\n";
  409. return;
  410. }
  411. $links = substr($contents, $start_index);
  412. if( !preg_match_all('/<a href="(.+?)">(.+?)<\/a><\/li>/', $links, $matches) ) {
  413. $this->view->results .= 'Couldn\'t find any links, skipping...' . "\n";
  414. return;
  415. }
  416. for( $index = 0; $index < count($matches[0]); ++$index ) {
  417. $name = $matches[2][$index];
  418. $url = 'http://developer.android.com'.$matches[1][$index];
  419. $this->view->results .= $this->getHierarchiesModel()->insert($category_id, 1, $name, $url, 1)."\n";
  420. }
  421. }
  422. private function scrapejQuery2() {
  423. $category = 'jQuery';
  424. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  425. if( !$category_id ) {
  426. $this->invalid_category($category);
  427. return;
  428. }
  429. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  430. if( empty($scrapeable) ) {
  431. $this->nothing_to_scrape($category);
  432. return;
  433. }
  434. $is_saving = true;
  435. foreach( $scrapeable as $hierarchy ) {
  436. $this->view->results .= $hierarchy['name'] . "\n";
  437. if( !$hierarchy['source_url'] ) {
  438. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  439. continue;
  440. }
  441. $source_url = $hierarchy['source_url'];
  442. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  443. $contents = file_get_contents($source_url);
  444. $start_index = strpos($contents, '<div id="options">');
  445. if( $start_index === false ) {
  446. $this->view->results .= 'Couldn\'t find the options, skipping...' . "\n";
  447. continue;
  448. }
  449. $end_index = strpos($contents, '<div id="', $start_index+1);
  450. if( $end_index === false ) {
  451. $this->view->results .= 'Couldn\'t find the end of the options, skipping...' . "\n";
  452. continue;
  453. }
  454. $source_name = strtolower($hierarchy['name']);
  455. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  456. $elements = explode('<li class="option"', $data);
  457. foreach( $elements as $element ) {
  458. if( preg_match('/<h3 class="option-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  459. $link = $source_url.$matches[1];
  460. $name = $source_name .' '.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  461. $desc = trim(strip_tags($matches[3], '<b>'));
  462. $this->view->results .= $link.' - '.$name."\n";
  463. $this->view->results .= $desc."\n\n";
  464. if( $is_saving ) {
  465. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  466. 'category' => $category_id,
  467. 'hierarchy' => $hierarchy['id'],
  468. 'name' => $name,
  469. 'url' => $link,
  470. 'short_description' => $desc
  471. ));
  472. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  473. }
  474. } else {
  475. //$this->view->results .= htmlentities($element)."\n\n";
  476. }
  477. }
  478. $start_index = strpos($contents, '<div id="events">');
  479. if( $start_index === false ) {
  480. $this->view->results .= 'Couldn\'t find the events, skipping...' . "\n";
  481. continue;
  482. }
  483. $end_index = strpos($contents, '<div id="', $start_index+1);
  484. if( $end_index === false ) {
  485. $this->view->results .= 'Couldn\'t find the end of the events, skipping...' . "\n";
  486. continue;
  487. }
  488. $source_name = strtolower($hierarchy['name']);
  489. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  490. $elements = explode('<li class="event"', $data);
  491. foreach( $elements as $element ) {
  492. if( preg_match('/<h3 class="event-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  493. $link = $source_url.$matches[1];
  494. $name = $source_name .' '.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  495. $desc = trim(strip_tags($matches[3], '<b>'));
  496. $this->view->results .= $link.' - '.$name."\n";
  497. $this->view->results .= $desc."\n\n";
  498. if( $is_saving ) {
  499. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  500. 'category' => $category_id,
  501. 'hierarchy' => $hierarchy['id'],
  502. 'name' => $name,
  503. 'url' => $link,
  504. 'short_description' => $desc
  505. ));
  506. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  507. }
  508. } else {
  509. //$this->view->results .= htmlentities($element)."\n\n";
  510. }
  511. }
  512. $start_index = strpos($contents, '<div id="methods">');
  513. if( $start_index === false ) {
  514. $this->view->results .= 'Couldn\'t find the methods, skipping...' . "\n";
  515. continue;
  516. }
  517. $end_index = strpos($contents, '<div id="', $start_index+1);
  518. if( $end_index === false ) {
  519. $this->view->results .= 'Couldn\'t find the end of the methods, skipping...' . "\n";
  520. continue;
  521. }
  522. $source_name = strtolower($hierarchy['name']);
  523. $data = str_replace("\n", '', substr($contents, $start_index, $end_index - $start_index));
  524. $elements = explode('<li class="method"', $data);
  525. foreach( $elements as $element ) {
  526. if( preg_match('/<h3 class="method-name"><a href="(.+?)">(.+?)<\/a><\/h3>.+?<p>(.+?)<\/p>/', $element, $matches) ) {
  527. $link = $source_url.$matches[1];
  528. $name = $source_name .'(\''.trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2])))).'\')';
  529. $desc = trim(strip_tags($matches[3], '<b>'));
  530. $this->view->results .= $link.' - '.$name."\n";
  531. $this->view->results .= $desc."\n\n";
  532. if( $is_saving ) {
  533. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  534. 'category' => $category_id,
  535. 'hierarchy' => $hierarchy['id'],
  536. 'name' => $name,
  537. 'url' => $link,
  538. 'short_description' => $desc
  539. ));
  540. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  541. }
  542. } else {
  543. //$this->view->results .= htmlentities($element)."\n\n";
  544. }
  545. }
  546. }
  547. }
  548. private function scrapejQuery() {
  549. $category = 'jQuery';
  550. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  551. if( !$category_id ) {
  552. $this->invalid_category($category);
  553. return;
  554. }
  555. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  556. if( empty($scrapeable) ) {
  557. $this->nothing_to_scrape($category);
  558. return;
  559. }
  560. foreach( $scrapeable as $hierarchy ) {
  561. $this->view->results .= $hierarchy['name'] . "\n";
  562. if( !$hierarchy['source_url'] ) {
  563. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  564. continue;
  565. }
  566. $source_url = $hierarchy['source_url'];
  567. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  568. $contents = file_get_contents($source_url);
  569. $start_index = strpos($contents, '<div class="options list">');
  570. if( $start_index === false ) {
  571. $this->view->results .= 'Couldn\'t find the options list, skipping...' . "\n";
  572. continue;
  573. }
  574. $end_index = strpos($contents, '<div class="printfooter">', $start_index);
  575. if( $end_index === false ) {
  576. $this->view->results .= 'Couldn\'t find the end of the options list, skipping...' . "\n";
  577. continue;
  578. }
  579. $data = substr($contents, $start_index, $end_index - $start_index);
  580. $elements = explode('tr class="option"', $data);
  581. foreach( $elements as $element ) {
  582. if( preg_match('/<a href="(.+?)" title=".+?">(.+?)<\/a><\/b>.+?<td colspan="2" class="desc">(.+?)<\/td>/', $element, $matches) ) {
  583. $link = 'http://docs.jquery.com'.$matches[1];
  584. $name = trim(str_replace(' )', ')', str_replace('&nbsp;', '', strip_tags($matches[2]))));
  585. $desc = trim(strip_tags($matches[3], '<b>'));
  586. $this->view->results .= $link.' - '.$name."\n";
  587. $this->view->results .= $desc."\n\n";
  588. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  589. 'category' => $category_id,
  590. 'hierarchy' => $hierarchy['id'],
  591. 'name' => $name,
  592. 'url' => $link,
  593. 'short_description' => $desc
  594. ));
  595. $this->getHierarchiesModel()->touch($category_id, $hierarchy['id']);
  596. } else {
  597. //$this->view->results .= htmlentities($element)."\n\n";
  598. }
  599. }
  600. }
  601. }
  602. private function scrapeJavascript() {
  603. $category = 'Javascript';
  604. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  605. if( !$category_id ) {
  606. $this->invalid_category($category);
  607. return;
  608. }
  609. $scrapeable = $this->getHierarchiesModel()->fetchAllScrapeable($category_id);
  610. if( empty($scrapeable) ) {
  611. $this->nothing_to_scrape($category);
  612. return;
  613. }
  614. foreach( $scrapeable as $hierarchy ) {
  615. $this->view->results .= $hierarchy['name'] . "\n";
  616. if( !$hierarchy['source_url'] ) {
  617. $this->view->results .= 'No source URL specified, skipping...' . "\n";
  618. continue;
  619. }
  620. $source_url = $hierarchy['source_url'];
  621. $this->view->results .= '<a href="'.$source_url.'">'.$source_url."</a>\n";
  622. $contents = file_get_contents(APPLICATION_PATH . '/scraper/js/'.$hierarchy['id'].'.html');
  623. if( preg_match("/<h2>The (.+?) Object<\/h2>\n<p>(.+)?<\/p>/", $contents, $matches) ) {
  624. $object_name = $matches[1];
  625. $description = $matches[2];
  626. $this->view->results .= $object_name."\n";
  627. $this->view->results .= $description."\n";
  628. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  629. 'category' => $category_id,
  630. 'hierarchy' => $hierarchy['id'],
  631. 'name' => $object_name,
  632. 'url' => $source_url,
  633. 'short_description' => $description
  634. ));
  635. } else {
  636. $this->view->results .= 'We couldn\'t find the description...' . "\n";
  637. }
  638. $is_dom = strpos($contents, 'HTML DOM <span class="color_h1">') !== false;
  639. if( $is_dom ) {
  640. $object_name = strtolower(str_replace(' ', '', $hierarchy['name']));
  641. }
  642. $properties_index = strpos($contents, 'Object Collections</h');
  643. $end_index = strpos($contents, '</table>', $properties_index);
  644. if( $properties_index !== FALSE && $end_index !== FALSE ) {
  645. $properties = array_slice(
  646. explode(
  647. '<tr>',
  648. substr($contents, $properties_index, $end_index - $properties_index)
  649. ),
  650. 2
  651. );
  652. foreach( $properties as $property ) {
  653. $elements = explode('<td', $property);
  654. foreach( $elements as &$element ) {
  655. $element = trim(
  656. str_replace(
  657. '&nbsp;',
  658. '',
  659. preg_replace(
  660. '/^.+?>/',
  661. '',
  662. str_replace(
  663. "\n",
  664. '',
  665. strip_tags(
  666. $element,
  667. '<a>'
  668. )
  669. )
  670. )
  671. )
  672. );
  673. }
  674. if( count($elements) <= 1 ) {
  675. $this->view->results .= 'Invalid element list.'."\n";
  676. $this->view->results .= print_r($property, true);
  677. break;
  678. }
  679. $link = $elements[1];
  680. $desc = $elements[2];
  681. $ff = $elements[3];
  682. if( count($elements) >= 6 ) {
  683. $ns = $elements[4];
  684. $ie = $elements[5];
  685. } else {
  686. $ie = $elements[4];
  687. }
  688. $name = '';
  689. if( $link ) {
  690. if( preg_match('/<a href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  691. $link = $matches[1];
  692. $name = $matches[2];
  693. } else {
  694. $name = $link;
  695. $link = '';
  696. }
  697. }
  698. $name = str_replace('[]', '', $name);
  699. $this->view->results .= $object_name.'.'.$name ." - ";
  700. $this->view->results .= $link ." - ".$is_dom.' - ';
  701. $this->view->results .= $desc ."\n";
  702. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  703. 'category' => $category_id,
  704. 'hierarchy' => $hierarchy['id'],
  705. 'name' => $object_name.'.'.$name,
  706. 'url' => $link,
  707. 'short_description' => $desc
  708. ));
  709. }
  710. }
  711. $properties_index = strpos($contents, 'Object Properties</h');
  712. $end_index = strpos($contents, '</table>', $properties_index);
  713. if( $properties_index !== FALSE && $end_index !== FALSE ) {
  714. $properties = array_slice(
  715. explode(
  716. '<tr>',
  717. substr($contents, $properties_index, $end_index - $properties_index)
  718. ),
  719. 2
  720. );
  721. foreach( $properties as $property ) {
  722. $elements = explode('<td', $property);
  723. foreach( $elements as &$element ) {
  724. $element = trim(
  725. str_replace(
  726. '&nbsp;',
  727. '',
  728. preg_replace(
  729. '/^.+?>/',
  730. '',
  731. str_replace(
  732. "\n",
  733. '',
  734. strip_tags(
  735. $element,
  736. '<a>'
  737. )
  738. )
  739. )
  740. )
  741. );
  742. }
  743. if( count($elements) <= 1 ) {
  744. $this->view->results .= 'Invalid element list.'."\n";
  745. $this->view->results .= print_r($property, true);
  746. break;
  747. }
  748. $link = $elements[1];
  749. $desc = $elements[2];
  750. $ff = $elements[3];
  751. if( count($elements) >= 6 ) {
  752. $ns = $elements[4];
  753. $ie = $elements[5];
  754. } else {
  755. $ie = $elements[4];
  756. }
  757. $name = '';
  758. if( $link ) {
  759. if( preg_match('/<a href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  760. $link = $matches[1];
  761. $name = $matches[2];
  762. } else {
  763. $name = $link;
  764. $link = '';
  765. }
  766. }
  767. $this->view->results .= $object_name.'.'.$name ." - ";
  768. $this->view->results .= $link ." - ".$is_dom.' - ';
  769. $this->view->results .= $desc ."\n";
  770. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  771. 'category' => $category_id,
  772. 'hierarchy' => $hierarchy['id'],
  773. 'name' => $object_name.'.'.$name,
  774. 'url' => $link,
  775. 'short_description' => $desc
  776. ));
  777. }
  778. }
  779. $methods_index = strpos($contents, 'Object Methods</h');
  780. $end_index = strpos($contents, '</table>', $methods_index);
  781. if( $methods_index !== FALSE && $end_index !== FALSE ) {
  782. $methods = array_slice(
  783. explode(
  784. '<tr>',
  785. substr($contents, $methods_index, $end_index - $methods_index)
  786. ),
  787. 2
  788. );
  789. foreach( $methods as $method ) {
  790. $elements = explode('<td valign="top">', $method);
  791. foreach( $elements as &$element ) {
  792. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  793. }
  794. $link = $elements[1];
  795. $desc = $elements[2];
  796. $ff = $elements[3];
  797. if( count($elements) >= 6 ) {
  798. $ns = $elements[4];
  799. $ie = $elements[5];
  800. } else {
  801. $ie = $elements[4];
  802. }
  803. $name = '';
  804. if( $link ) {
  805. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  806. $link = $matches[1];
  807. $name = $matches[2];
  808. } else {
  809. $name = $link;
  810. $link = '';
  811. }
  812. }
  813. $name = preg_replace('/(\(.*?\))/', '', $name);
  814. $this->view->results .= $object_name.'.'.$name ." - ";
  815. $this->view->results .= $link ." - ";
  816. $this->view->results .= $desc ."\n";
  817. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  818. 'category' => $category_id,
  819. 'hierarchy' => $hierarchy['id'],
  820. 'name' => $object_name.'.'.$name,
  821. 'url' => $link,
  822. 'short_description' => $desc,
  823. 'scrapeable' => 1
  824. ));
  825. }
  826. continue;
  827. }
  828. $start_index = strpos($contents, 'Top-level Functions</h2>');
  829. $end_index = strpos($contents, '</table>', $start_index);
  830. $start_prop_index = strpos($contents, 'Top-level Properties</h2>');
  831. $end_prop_index = strpos($contents, '</table>', $start_prop_index);
  832. if( $start_index !== false && $end_index !== false &&
  833. $start_prop_index !== false && $end_prop_index !== false ) {
  834. $functions = array_slice(
  835. explode(
  836. '<tr>',
  837. substr($contents, $start_index, $end_index - $start_index)
  838. ),
  839. 2
  840. );
  841. foreach( $functions as $function ) {
  842. $elements = explode('<td valign="top">', $function);
  843. foreach( $elements as &$element ) {
  844. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  845. }
  846. $link = $elements[1];
  847. $desc = $elements[2];
  848. $ff = $elements[3];
  849. if( count($elements) >= 6 ) {
  850. $ns = $elements[4];
  851. $ie = $elements[5];
  852. } else {
  853. $ie = $elements[4];
  854. }
  855. $name = '';
  856. if( $link ) {
  857. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  858. $link = $matches[1];
  859. $name = $matches[2];
  860. } else {
  861. $name = $link;
  862. $link = '';
  863. }
  864. }
  865. $name = preg_replace('/(\(.*?\))/', '', $name);
  866. $this->view->results .= $name ." - ";
  867. $this->view->results .= $link ." - ";
  868. $this->view->results .= $desc ."\n";
  869. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  870. 'category' => $category_id,
  871. 'hierarchy' => $hierarchy['id'],
  872. 'name' => $name,
  873. 'url' => $link,
  874. 'short_description' => $desc,
  875. 'scrapeable' => 1
  876. ));
  877. }
  878. $properties = array_slice(
  879. explode(
  880. '<tr>',
  881. substr($contents, $start_prop_index, $end_prop_index - $start_prop_index)
  882. ),
  883. 2
  884. );
  885. foreach( $properties as $property ) {
  886. $elements = explode('<td valign="top">', $property);
  887. foreach( $elements as &$element ) {
  888. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  889. }
  890. $link = $elements[1];
  891. $desc = $elements[2];
  892. $ff = $elements[3];
  893. if( count($elements) >= 6 ) {
  894. $ns = $elements[4];
  895. $ie = $elements[5];
  896. } else {
  897. $ie = $elements[4];
  898. }
  899. $name = '';
  900. if( $link ) {
  901. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  902. $link = $matches[1];
  903. $name = $matches[2];
  904. } else {
  905. $name = $link;
  906. $link = '';
  907. }
  908. }
  909. $name = preg_replace('/(\(.*?\))/', '', $name);
  910. $this->view->results .= $name ." - ";
  911. $this->view->results .= $link ." - ";
  912. $this->view->results .= $desc ."\n";
  913. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  914. 'category' => $category_id,
  915. 'hierarchy' => $hierarchy['id'],
  916. 'name' => $name,
  917. 'url' => $link,
  918. 'short_description' => $desc,
  919. 'scrapeable' => 1
  920. ));
  921. }
  922. continue;
  923. }
  924. $start_index = strpos($contents, '<h2>Event Handlers</h2>');
  925. $end_index = strpos($contents, '</table>', $start_index);
  926. if( $start_index !== false && $end_index !== false ) {
  927. $events = array_slice(
  928. explode(
  929. '<tr>',
  930. substr($contents, $start_index, $end_index - $start_index)
  931. ),
  932. 2
  933. );
  934. foreach( $events as $event ) {
  935. $elements = explode('<td valign="top">', $event);
  936. foreach( $elements as &$element ) {
  937. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  938. }
  939. $link = $elements[1];
  940. $desc = $elements[2];
  941. $ff = $elements[3];
  942. if( count($elements) >= 6 ) {
  943. $ns = $elements[4];
  944. $ie = $elements[5];
  945. } else {
  946. $ie = $elements[4];
  947. }
  948. $name = '';
  949. if( $link ) {
  950. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  951. $link = $matches[1];
  952. $name = $matches[2];
  953. } else {
  954. $name = $link;
  955. $link = '';
  956. }
  957. }
  958. $name = preg_replace('/(\(.*?\))/', '', $name);
  959. if( $is_dom ) {
  960. $name = 'event.'.$name;
  961. }
  962. $this->view->results .= $name ." - ";
  963. $this->view->results .= $link ." - " . $is_dom.' - ';
  964. $this->view->results .= $desc ."\n";
  965. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  966. 'category' => $category_id,
  967. 'hierarchy' => $hierarchy['id'],
  968. 'name' => $name,
  969. 'url' => $link,
  970. 'short_description' => $desc,
  971. 'scrapeable' => 1
  972. ));
  973. }
  974. if( !$is_dom ) {
  975. continue;
  976. }
  977. }
  978. $start_index = strpos($contents, 'Keyboard Attributes</h');
  979. $end_index = strpos($contents, '</table>', $start_index);
  980. if( $start_index !== false && $end_index !== false ) {
  981. $events = array_slice(
  982. explode(
  983. '<tr>',
  984. substr($contents, $start_index, $end_index - $start_index)
  985. ),
  986. 2
  987. );
  988. foreach( $events as $event ) {
  989. $elements = explode('<td valign="top">', $event);
  990. foreach( $elements as &$element ) {
  991. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  992. }
  993. $link = $elements[1];
  994. $desc = $elements[2];
  995. $ff = $elements[3];
  996. if( count($elements) >= 6 ) {
  997. $ns = $elements[4];
  998. $ie = $elements[5];
  999. } else {
  1000. $ie = $elements[4];
  1001. }
  1002. $name = '';
  1003. if( $link ) {
  1004. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1005. $link = $matches[1];
  1006. $name = $matches[2];
  1007. } else {
  1008. $name = $link;
  1009. $link = '';
  1010. }
  1011. }
  1012. $name = preg_replace('/(\(.*?\))/', '', $name);
  1013. if( $is_dom ) {
  1014. $name = 'event.'.$name;
  1015. }
  1016. $this->view->results .= $name ." - ";
  1017. $this->view->results .= $link ." - " . $is_dom.' - ';
  1018. $this->view->results .= $desc ."\n";
  1019. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1020. 'category' => $category_id,
  1021. 'hierarchy' => $hierarchy['id'],
  1022. 'name' => $name,
  1023. 'url' => $link,
  1024. 'short_description' => $desc,
  1025. 'scrapeable' => 1
  1026. ));
  1027. }
  1028. if( !$is_dom ) {
  1029. continue;
  1030. }
  1031. }
  1032. $start_index = strpos($contents, 'Event Attributes</h');
  1033. $end_index = strpos($contents, '</table>', $start_index);
  1034. if( $start_index !== false && $end_index !== false ) {
  1035. $events = array_slice(
  1036. explode(
  1037. '<tr>',
  1038. substr($contents, $start_index, $end_index - $start_index)
  1039. ),
  1040. 2
  1041. );
  1042. foreach( $events as $event ) {
  1043. $elements = explode('<td valign="top">', $event);
  1044. foreach( $elements as &$element ) {
  1045. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1046. }
  1047. $link = $elements[1];
  1048. $desc = $elements[2];
  1049. $ff = $elements[3];
  1050. if( count($elements) >= 6 ) {
  1051. $ns = $elements[4];
  1052. $ie = $elements[5];
  1053. } else {
  1054. $ie = $elements[4];
  1055. }
  1056. $name = '';
  1057. if( $link ) {
  1058. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1059. $link = $matches[1];
  1060. $name = $matches[2];
  1061. } else {
  1062. $name = $link;
  1063. $link = '';
  1064. }
  1065. }
  1066. $name = preg_replace('/(\(.*?\))/', '', $name);
  1067. if( $is_dom ) {
  1068. $name = 'event.'.$name;
  1069. }
  1070. $this->view->results .= $name ." - ";
  1071. $this->view->results .= $link ." - " . $is_dom.' - ';
  1072. $this->view->results .= $desc ."\n";
  1073. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1074. 'category' => $category_id,
  1075. 'hierarchy' => $hierarchy['id'],
  1076. 'name' => $name,
  1077. 'url' => $link,
  1078. 'short_description' => $desc,
  1079. 'scrapeable' => 1
  1080. ));
  1081. }
  1082. if( !$is_dom ) {
  1083. continue;
  1084. }
  1085. }
  1086. $start_index = strpos($contents, '<h3>Properties</h3>');
  1087. $end_index = strpos($contents, '</table>', $start_index);
  1088. if( $start_index !== false && $end_index !== false ) {
  1089. $events = array_slice(
  1090. explode(
  1091. '<tr>',
  1092. substr($contents, $start_index, $end_index - $start_index)
  1093. ),
  1094. 2
  1095. );
  1096. foreach( $events as $event ) {
  1097. $elements = explode('<td valign="top">', $event);
  1098. foreach( $elements as &$element ) {
  1099. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1100. }
  1101. if( count($elements) < 2 ) {
  1102. $this->view->results .= 'Missing '.print_r($event);
  1103. continue;
  1104. }
  1105. $link = $elements[1];
  1106. $desc = $elements[2];
  1107. $ff = $elements[3];
  1108. if( count($elements) >= 6 ) {
  1109. $ns = $elements[4];
  1110. $ie = $elements[5];
  1111. } else {
  1112. $ie = $elements[4];
  1113. }
  1114. $name = '';
  1115. if( $link ) {
  1116. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1117. $link = $matches[1];
  1118. $name = $matches[2];
  1119. } else {
  1120. $name = $link;
  1121. $link = '';
  1122. }
  1123. }
  1124. $name = preg_replace('/(\(.*?\))/', '', $name);
  1125. $this->view->results .= $object_name.'.'.$name ." - ";
  1126. $this->view->results .= $link ." - " . $is_dom.' - ';
  1127. $this->view->results .= $desc ."\n";
  1128. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1129. 'category' => $category_id,
  1130. 'hierarchy' => $hierarchy['id'],
  1131. 'name' => $object_name.'.'.$name,
  1132. 'url' => $link,
  1133. 'short_description' => $desc,
  1134. 'scrapeable' => 1
  1135. ));
  1136. }
  1137. if( !$is_dom ) {
  1138. continue;
  1139. }
  1140. }
  1141. $start_index = 0;
  1142. do {
  1143. $start_index = strpos($contents, 'properties</a></h3>', $start_index);
  1144. $end_index = strpos($contents, '</table>', $start_index);
  1145. if( $start_index !== false && $end_index !== false ) {
  1146. $events = array_slice(
  1147. explode(
  1148. '<tr>',
  1149. substr($contents, $start_index, $end_index - $start_index)
  1150. ),
  1151. 2
  1152. );
  1153. foreach( $events as $event ) {
  1154. $elements = explode('<td valign="top">', $event);
  1155. foreach( $elements as &$element ) {
  1156. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1157. }
  1158. $link = $elements[1];
  1159. $desc = $elements[2];
  1160. $ff = $elements[3];
  1161. if( count($elements) >= 6 ) {
  1162. $ns = $elements[4];
  1163. $ie = $elements[5];
  1164. } else {
  1165. $ie = $elements[4];
  1166. }
  1167. $name = '';
  1168. if( $link ) {
  1169. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1170. $link = $matches[1];
  1171. $name = $matches[2];
  1172. } else {
  1173. $name = $link;
  1174. $link = '';
  1175. }
  1176. }
  1177. $name = preg_replace('/(\(.*?\))/', '', $name);
  1178. $this->view->results .= $object_name.'.'.$name ." - ";
  1179. $this->view->results .= $link ." - " . $is_dom.' - ';
  1180. $this->view->results .= $desc ."\n";
  1181. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1182. 'category' => $category_id,
  1183. 'hierarchy' => $hierarchy['id'],
  1184. 'name' => $object_name.'.'.$name,
  1185. 'url' => $link,
  1186. 'short_description' => $desc,
  1187. 'scrapeable' => 1
  1188. ));
  1189. }
  1190. if( !$is_dom ) {
  1191. continue;
  1192. }
  1193. }
  1194. $start_index++;
  1195. } while( $start_index !== false );
  1196. $start_index = strpos($contents, 'Standard Properties</h3>');
  1197. $end_index = strpos($contents, '</table>', $start_index);
  1198. if( $start_index !== false && $end_index !== false ) {
  1199. $events = array_slice(
  1200. explode(
  1201. '<tr>',
  1202. substr($contents, $start_index, $end_index - $start_index)
  1203. ),
  1204. 2
  1205. );
  1206. foreach( $events as $event ) {
  1207. $elements = explode('<td valign="top">', $event);
  1208. foreach( $elements as &$element ) {
  1209. $element = trim(str_replace('&nbsp;', '', str_replace("\n", '', strip_tags($element, '<a>'))));
  1210. }
  1211. $link = $elements[1];
  1212. $desc = $elements[2];
  1213. $ff = $elements[3];
  1214. if( count($elements) >= 6 ) {
  1215. $ns = $elements[4];
  1216. $ie = $elements[5];
  1217. } else {
  1218. $ie = $elements[4];
  1219. }
  1220. $name = '';
  1221. if( $link ) {
  1222. if( preg_match('/<a(?: target="_top")? href="(.+?)">(.+)?<\/a>/', $link, $matches) ) {
  1223. $link = $matches[1];
  1224. $name = $matches[2];
  1225. } else {
  1226. $name = $link;
  1227. $link = '';
  1228. }
  1229. }
  1230. $name = preg_replace('/(\(.*?\))/', '', $name);
  1231. $this->view->results .= $object_name.'.'.$name ." - ";
  1232. $this->view->results .= $link ." - " . $is_dom.' - ';
  1233. $this->view->results .= $desc ."\n";
  1234. $this->getFunctionsModel()->insertOrUpdateFunction(array(
  1235. 'category' => $category_id,
  1236. 'hierarchy' => $hierarchy['id'],
  1237. 'name' => $object_name.'.'.$name,
  1238. 'url' => $link,
  1239. 'short_description' => $desc,
  1240. 'scrapeable' => 1
  1241. ));
  1242. }
  1243. if( !$is_dom ) {
  1244. continue;
  1245. }
  1246. }
  1247. $this->view->results .= 'We couldn\'t find the properties or methods...' . "\n";
  1248. }
  1249. }
  1250. private function scrapeiPhoneDir() {
  1251. $category = 'iPhone';
  1252. $category_id = $this->getCategoriesModel()->fetchCategoryByName($category);
  1253. if( !$category_id ) {
  1254. $this->invalid_category($category);
  1255. return;
  1256. }
  1257. $hierarchies = array(
  1258. /*'6' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CocoaTouch/AddressBookUI',
  1259. '7' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CocoaTouch/UIKit',
  1260. '81' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AudioToolbox',
  1261. '82' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AudioUnit',
  1262. '83' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/AVFoundation',
  1263. '84' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/CoreAudio',
  1264. '85' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/CoreGraphics',
  1265. '86' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/MediaPlayer',
  1266. '87' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/OpenGLES',
  1267. '88' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/Media/QuartzCore',*/
  1268. '110' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/AddressBook',
  1269. '111' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/CoreFoundation',
  1270. '112' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/CoreLocation',
  1271. '113' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/Foundation',
  1272. '114' => 'http://developer.apple.com/iphone/library/navigation/Frameworks/CoreServices/SystemConfiguration',
  1273. '115' => 'http://developer.apple.com/iph…

Large files files are truncated, but you can click here to view the full file