PageRenderTime 70ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 0ms

/links/check.php

https://github.com/rair/yacs
PHP | 468 lines | 234 code | 111 blank | 123 comment | 62 complexity | 4b3eb1bbb1e4f7f44206d593799f8015 MD5 | raw file
  1. <?php
  2. /**
  3. * check the integrity of the database for links
  4. *
  5. * This script first displays available options, then it performs selected action:
  6. * - check links
  7. * - check referrals
  8. * - normalize referrals for search engines, and extract keywords
  9. * - look for orphans
  10. *
  11. * Links should be checked quite often, once a week or once a month, to maintain an accurate database.
  12. *
  13. * Check links
  14. *
  15. * During this operation the script validates each link of the database, starting with more recent links,
  16. * and reports on broken links. It stops when 5 broken links have been found, or when the whole database has been browsed.
  17. *
  18. * If valid [code]Last-Modified[/code] headers are returned during the check, related links
  19. * are stamped with edit_action code '[code]link:stamp[/code]'.
  20. *
  21. *
  22. * Look for orphans
  23. *
  24. * Orphans are records without anchors
  25. *
  26. *
  27. * Of course, access to this page is restricted to associates.
  28. *
  29. * @author Bernard Paques
  30. * @author GnapZ
  31. * @reference
  32. * @license http://www.gnu.org/copyleft/lesser.txt GNU Lesser General Public License
  33. */
  34. // common definitions and initial processing
  35. include_once '../shared/global.php';
  36. include_once 'links.php';
  37. include_once 'link.php';
  38. // load the skin
  39. load_skin('links');
  40. // the absolute limit for checks
  41. if(!defined('MAXIMUM_SIZE'))
  42. define('MAXIMUM_SIZE', 500);
  43. // the size of each chunk
  44. if(!defined('CHUNK_SIZE'))
  45. define('CHUNK_SIZE', 50);
  46. // when to stop
  47. if(!defined('ERRORS_THRESHOLD'))
  48. define('ERRORS_THRESHOLD', 20);
  49. // the path to this page
  50. $context['path_bar'] = array( 'links/' => i18n::s('Links') );
  51. // the title of the page
  52. $context['page_title'] = i18n::s('Maintenance');
  53. // the user has to be an associate
  54. if(!Surfer::is_associate()) {
  55. Safe::header('Status: 401 Unauthorized', TRUE, 401);
  56. Logger::error(i18n::s('You are not allowed to perform this operation.'));
  57. // forward to the index page
  58. $menu = array('links/' => i18n::s('Links'));
  59. $context['text'] .= Skin::build_list($menu, 'menu_bar');
  60. // check links through the Internet
  61. } elseif(isset($_REQUEST['action']) && ($_REQUEST['action'] == 'check')) {
  62. // scan links
  63. $context['text'] .= '<p>'.sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links'))."</p>\n";
  64. // process all links
  65. $links_offset = 0;
  66. while($links_offset < MAXIMUM_SIZE) {
  67. // seek the database
  68. if(($rows = Links::list_by_date($links_offset, CHUNK_SIZE, 'review')) && count($rows)) {
  69. // analyze each link
  70. foreach($rows as $view_url => $label) {
  71. $prefix = $suffix = $variant = $actual_url = '';
  72. if(is_array($label)) {
  73. $prefix = $label[0];
  74. $suffix = $label[2];
  75. $variant = $label[3];
  76. $actual_url = $label[4];
  77. $label = $label[1];
  78. }
  79. // the url is valid
  80. if($stamp = Link::validate($actual_url)) {
  81. $context['text'] .= '.';
  82. // remember Last-Modified data, if any
  83. if(preg_match('/\d\d\d\d-\d\d-\d\d/', $stamp)) {
  84. $query ="UPDATE ".SQL::table_name('links')." SET "
  85. ."edit_action='link:stamp', "
  86. ."edit_date='".$stamp."', "
  87. ." WHERE links.link_url = '".$actual_url."'";
  88. SQL::query($query);
  89. }
  90. // remember broken links
  91. } else {
  92. $context['text'] .= '!';
  93. $broken[$view_url] = array($prefix, $label, $suffix, $variant, NULL);
  94. }
  95. // ensure enough execution time
  96. Safe::set_time_limit(30);
  97. }
  98. // process one chunk
  99. $context['text'] .= BR."\n";
  100. $links_offset += count($rows);
  101. // detect the end of the list
  102. if(count($rows) < CHUNK_SIZE)
  103. break;
  104. // stop if too many errors
  105. if(count($broken) >= ERRORS_THRESHOLD)
  106. break;
  107. // empty list
  108. } elseif($links_offset == 0) {
  109. $context['text'] .= '<p>'.i18n::s('No link to check.')."</p>\n";
  110. break;
  111. }
  112. }
  113. // process end
  114. if($links_offset > 1)
  115. $context['text'] .= sprintf(i18n::s('%d links have been processed.'), $links_offset).BR."\n";
  116. // display broken links
  117. if(is_array($broken) && count($broken)) {
  118. $context['text'] .= Skin::build_block(i18n::s('Broken links to review'), 'title');
  119. $context['text'] .= Skin::build_list($broken, 'decorated');
  120. // woaouh, a clean server
  121. } elseif($links_offset)
  122. $context['text'] .= '<p>'.i18n::s('No broken link has been found.').'</p>';
  123. // display the execution time
  124. $time = round(get_micro_time() - $context['start_time'], 2);
  125. $context['text'] .= '<p>'.sprintf(i18n::s('Script terminated in %.2f seconds.'), $time).'</p>';
  126. // forward to the index page
  127. $menu = array('links/' => i18n::s('Links'));
  128. $context['text'] .= Skin::build_list($menu, 'menu_bar');
  129. // check referrals through the Internet
  130. } elseif(isset($_REQUEST['action']) && ($_REQUEST['action'] == 'referrals')) {
  131. // scan links
  132. $context['text'] .= '<p>'.sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links'))."</p>\n";
  133. // avoid banned sources
  134. include_once $context['path_to_root'].'servers/servers.php';
  135. $banned_pattern = Servers::get_banned_pattern();
  136. // process all links
  137. $links_offset = 0;
  138. while($links_offset < MAXIMUM_SIZE) {
  139. // seek the database and check newest referrals
  140. include_once '../agents/referrals.php';
  141. if(($rows = Referrals::list_by_dates($links_offset, CHUNK_SIZE)) && count($rows)) {
  142. // analyze each link
  143. foreach($rows as $item) {
  144. $url = $item['referer'];
  145. // avoid banned sources
  146. if(preg_match($banned_pattern, $url)) {
  147. $context['text'] .= 'x';
  148. $banned[] = $url;
  149. // delete the referral from the database
  150. Referrals::delete($url);
  151. // the url is valid
  152. } elseif($stamp = Link::validate($url)) {
  153. $context['text'] .= '.';
  154. // remember broken links
  155. } else {
  156. $context['text'] .= '!';
  157. $broken[] = $url;
  158. // delete the referral from the database
  159. Referrals::delete($url);
  160. }
  161. // ensure enough execution time
  162. Safe::set_time_limit(30);
  163. }
  164. // process one chunk
  165. $context['text'] .= BR."\n";
  166. $links_offset += count($rows);
  167. // detect the end of the list
  168. if(count($rows) < CHUNK_SIZE)
  169. break;
  170. // stop if too many errors
  171. if((@count($broken) + @count($banned)) >= ERRORS_THRESHOLD)
  172. break;
  173. // empty list
  174. } elseif($links_offset == 0) {
  175. $context['text'] .= '<p>'.i18n::s('No link to check.')."</p>\n";
  176. break;
  177. }
  178. }
  179. // process end
  180. if($links_offset > 1)
  181. $context['text'] .= sprintf(i18n::s('%d links have been processed.'), $links_offset).BR."\n";
  182. // list processed links
  183. if(@count($broken) + @count($banned)) {
  184. $context['text'] .= Skin::build_block(i18n::s('Deleted referrals'), 'title');
  185. if(@count($broken)) {
  186. $context['text'] .= i18n::s('Following referrals have been deleted:').BR."\n";
  187. $context['text'] .= '<ul>';
  188. foreach($broken as $url)
  189. $context['text'] .= '<li>'.$url."</li>\n";
  190. $context['text'] .= '</ul>';
  191. }
  192. if(@count($banned)) {
  193. $context['text'] .= i18n::s('Following referrals have been banned:').BR."\n";
  194. $context['text'] .= '<ul>';
  195. foreach($banned as $url)
  196. $context['text'] .= '<li>'.$url."</li>\n";
  197. $context['text'] .= '</ul>';
  198. }
  199. // woaouh, a clean server
  200. } elseif($links_offset)
  201. $context['text'] .= '<p>'.i18n::s('No broken referral has been found.').'</p>';
  202. // display the execution time
  203. $time = round(get_micro_time() - $context['start_time'], 2);
  204. $context['text'] .= '<p>'.sprintf(i18n::s('Script terminated in %.2f seconds.'), $time).'</p>';
  205. // forward to the index page
  206. $menu = array('links/' => i18n::s('Links'));
  207. $context['text'] .= Skin::build_list($menu, 'menu_bar');
  208. // normalize referrals
  209. } elseif(isset($_REQUEST['action']) && ($_REQUEST['action'] == 'normalize')) {
  210. // scan links
  211. $context['text'] .= '<p>'.sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links'))."</p>\n";
  212. // process all links, but stop after CHUNK_SIZE updates
  213. $links_offset = 0;
  214. $changes = 0;
  215. while($changes < MAXIMUM_SIZE) {
  216. // seek the database and check newest referrals
  217. include_once '../agents/referrals.php';
  218. if($result = Referrals::list_by_dates($links_offset, CHUNK_SIZE)) {
  219. // analyze each link
  220. while($item = SQL::fetch($result)) {
  221. list($link, $domain, $keywords) = Referrals::normalize($item['referer']);
  222. // we suppose the referral is already ok
  223. $ok = TRUE;
  224. // link has been changed
  225. if($item['referer'] != $link) {
  226. $context['text'] .= BR.'< '.htmlspecialchars($item['referer']).BR.'> '.htmlspecialchars($link).BR;
  227. $item['referer'] = $link;
  228. $ok = FALSE;
  229. }
  230. // domain has been changed
  231. if(!isset($item['domain']) || ($item['domain'] != $domain)) {
  232. if(isset($item['domain']) && $item['domain'])
  233. $context['text'] .= BR.'< '.htmlspecialchars($item['domain']).BR.'> '.htmlspecialchars($domain).BR;
  234. else
  235. $context['text'] .= BR.'d '.htmlspecialchars($domain).BR;
  236. $item['domain'] = $domain;
  237. $ok = FALSE;
  238. }
  239. // keywords have been found
  240. if($keywords && (!isset($item['keywords']) || ($item['keywords'] != $keywords))) {
  241. if(isset($item['keywords']) && $item['keywords'])
  242. $context['text'] .= BR.'< '.htmlspecialchars($item['keywords']).BR.'> '.$keywords.BR;
  243. else
  244. $context['text'] .= BR.'k '.$keywords.BR;
  245. $item['keywords'] = $keywords;
  246. $ok = FALSE;
  247. }
  248. // the link is ok
  249. if($ok)
  250. $context['text'] .= '.';
  251. // save updated referrals
  252. else {
  253. $query = "UPDATE ".SQL::table_name('referrals')." SET"
  254. ." referer='".SQL::escape($item['referer'])."',"
  255. ." domain='".SQL::escape($item['domain'])."',"
  256. ." keywords='".SQL::escape($item['keywords'])."'"
  257. ." WHERE id = ".$item['id'];
  258. SQL::query($query);
  259. // update statistics
  260. $changes += 1;
  261. }
  262. }
  263. // we have processed one chunk
  264. $links_offset += SQL::count($result);
  265. $context['text'] .= BR."\n";
  266. // ensure enough execution time
  267. Safe::set_time_limit(30);
  268. // detect the end of the list
  269. if(SQL::count($result) < CHUNK_SIZE)
  270. break;
  271. // empty list
  272. } elseif($links_offset == 0) {
  273. $context['text'] .= '<p>'.i18n::s('No link to check.')."</p>\n";
  274. break;
  275. }
  276. }
  277. // process end
  278. if($links_offset > 1)
  279. $context['text'] .= sprintf(i18n::s('%d links have been processed.'), $links_offset).BR."\n";
  280. // list broken links
  281. if($changes)
  282. $context['text'] .= sprintf(i18n::s('%d referrals have been normalized.'), $changes).BR."\n";
  283. // woaouh, a clean server
  284. elseif($links_offset)
  285. $context['text'] .= '<p>'.i18n::s('All referrals are looking ok.').'</p>';
  286. // display the execution time
  287. $time = round(get_micro_time() - $context['start_time'], 2);
  288. $context['text'] .= '<p>'.sprintf(i18n::s('Script terminated in %.2f seconds.'), $time).'</p>';
  289. // forward to the index page
  290. $menu = array('links/' => i18n::s('Links'));
  291. $context['text'] .= Skin::build_list($menu, 'menu_bar');
  292. // look for orphans
  293. } elseif(isset($_REQUEST['action']) && ($_REQUEST['action'] == 'orphans')) {
  294. // scan links
  295. $context['text'] .= Skin::build_block(sprintf(i18n::s('Analyzing table %s...'), SQL::table_name('links')), 'title');
  296. // scan many items
  297. $count = 0;
  298. $query = "SELECT id, anchor, link_url, title FROM ".SQL::table_name('links')
  299. ." ORDER BY anchor LIMIT 0, 20000";
  300. if(!($result = SQL::query($query))) {
  301. $context['text'] .= Logger::error_pop().BR."\n";
  302. return;
  303. // parse the whole list
  304. } else {
  305. // fetch one anchor and the linked member
  306. $errors_count = 0;
  307. while($row = SQL::fetch($result)) {
  308. // animate user screen and take care of time
  309. $count++;
  310. if(!($count%100)) {
  311. $context['text'] .= sprintf(i18n::s('%d records have been processed.'), $count).BR."\n";
  312. // ensure enough execution time
  313. Safe::set_time_limit(30);
  314. }
  315. // check that the anchor exists, if any
  316. if($row['anchor'] && !Anchors::get($row['anchor'])) {
  317. $context['text'] .= sprintf(i18n::s('Orphan: %s'), 'link '.Skin::build_link($row['link_url'], $row['id'].' '.$row['link_url'])).BR."\n";
  318. if(++$errors_count >= 5) {
  319. $context['text'] .= i18n::s('Too many successive errors. Aborted').BR."\n";
  320. break;
  321. }
  322. } else
  323. $errors_count = 0;
  324. }
  325. }
  326. // ending message
  327. $context['text'] .= sprintf(i18n::s('%d records have been processed'), $count).BR."\n";
  328. // display the execution time
  329. $time = round(get_micro_time() - $context['start_time'], 2);
  330. $context['text'] .= '<p>'.sprintf(i18n::s('Script terminated in %.2f seconds.'), $time).'</p>';
  331. // forward to the index page
  332. $menu = array('links/' => i18n::s('Links'));
  333. $context['text'] .= Skin::build_list($menu, 'menu_bar');
  334. // which check?
  335. } else {
  336. // the splash message
  337. $context['text'] .= '<p>'.i18n::s('Please select the action to perform.')."</p>\n";
  338. // the form
  339. $context['text'] .= '<form method="post" action="'.$context['script_url'].'" id="main_form">';
  340. // check links
  341. $context['text'] .= '<p><input type="radio" name="action" id="action" value="check" /> '.i18n::s('Check links through the Internet. If you click on the button above, the database will be scanned, and the server will attempt to open each link through the network. Broken URLS will be reported to you for further troubleshoting. Note that the program will stop automatically if too many broken links are found.').'</p>';
  342. // check referrals
  343. $context['text'] .= '<p><input type="radio" name="action" value="referrals" /> '.i18n::s('Check referrals through the Internet. If you click on the button above, referrals will be checked through the network. Note that the program will stop automatically if too many broken links are found.').'</p>';
  344. // normalize referrals
  345. $context['text'] .= '<p><input type="radio" name="action" value="normalize" /> '.i18n::s('Normalize referrals. Referrals from search engines will be simplified as much as possible. Also, keywords are extracted for further use.').'</p>';
  346. // look for orphan articles
  347. $context['text'] .= '<p><input type="radio" name="action" value="orphans" /> '.i18n::s('Look for orphan records').'</p>';
  348. // the submit button
  349. $context['text'] .= '<p>'.Skin::build_submit_button(i18n::s('Start')).'</p>'."\n";
  350. // end of the form
  351. $context['text'] .= '</form>';
  352. // set the focus on the button
  353. Page::insert_script('$("#action").focus();');
  354. }
  355. // render the skin
  356. render_skin();
  357. ?>