PageRenderTime 21ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/import/import_sitemap.php

http://showslow.googlecode.com/
PHP | 118 lines | 84 code | 27 blank | 7 comment | 13 complexity | 58ddc2e0afd1121016636023bfa8a479 MD5 | raw file
  1. <?php
  2. // this tool accepts a list of user IDs and sitemap URLs to import into user account for monitoring
  3. // The list is tab separated like so:
  4. //
  5. // 1 http://www.showslow.com/sitemap.xml
  6. // 1 http://www.sergeychernyshev.com/sitemap.xml
  7. require_once(dirname(dirname(__FILE__)).'/global.php');
  8. $user_id = null;
  9. $temp_path = '/tmp/';
  10. $depth = array();
  11. $inLocTag = false;
  12. $buffer = '';
  13. function startElement($parser, $name, $attrs)
  14. {
  15. global $inLocTag, $buffer;
  16. if (strtolower($name) == 'loc') {
  17. $buffer = '';
  18. $inLocTag = true;
  19. }
  20. }
  21. function endElement($parser, $name)
  22. {
  23. global $inLocTag, $buffer, $user_id;
  24. if (strtolower($name) == 'loc') {
  25. $inLocTag = false;
  26. // Now, let's process the contents
  27. $url = $buffer;
  28. $buffer = '';
  29. $url_id = getUrlId(resolveRedirects($url), false);
  30. if (is_null($url_id)) {
  31. error_log("Troubles getting / creating a URL for $url. Skipping.");
  32. return;
  33. }
  34. $query = sprintf("INSERT IGNORE INTO user_urls (user_id, url_id) VALUES (%d, %d)",
  35. $user_id,
  36. $url_id
  37. );
  38. $result = mysql_query($query);
  39. if (!$result) {
  40. error_log(mysql_error());
  41. }
  42. }
  43. }
  44. function charData($xml_parser, $data)
  45. {
  46. global $buffer;
  47. $buffer .= $data;
  48. }
  49. if ($list_fp = fopen('php://stdin', 'r')) {
  50. while ($line = fgets($list_fp)) {
  51. $params = explode("\t", $line);
  52. $user_id = trim($params[0]);
  53. $url = trim($params[1]);
  54. $tempfile = $temp_path . 'showslow_import_sitemap.xml.'.getmypid().'.'.time();
  55. $temp_fp = fopen($tempfile, 'w');
  56. // Now, let's download the sitemap
  57. $ch = curl_init($url);
  58. curl_setopt_array($ch, array(
  59. CURLOPT_FILE => $temp_fp,
  60. CURLOPT_FOLLOWLOCATION => TRUE,
  61. CURLOPT_MAXREDIRS => 10
  62. ));
  63. $curl_success = curl_exec($ch);
  64. curl_close($ch);
  65. fclose($temp_fp);
  66. if (!$curl_success) {
  67. error_log("Can't download the the sitemap: $url");
  68. continue;
  69. }
  70. // Now, let's open and parse the file
  71. if (!($fp = fopen($tempfile, "r"))) {
  72. error_log("Could not open XML input: $tempfile");
  73. continue;
  74. }
  75. $xml_parser = xml_parser_create();
  76. xml_set_element_handler($xml_parser, "startElement", "endElement");
  77. xml_set_character_data_handler($xml_parser, "charData");
  78. while ($data = fread($fp, 4096)) {
  79. if (!xml_parse($xml_parser, $data, feof($fp))) {
  80. die(sprintf("XML error: %s at line %d",
  81. xml_error_string(xml_get_error_code($xml_parser)),
  82. xml_get_current_line_number($xml_parser)));
  83. }
  84. }
  85. fclose($fp);
  86. unlink($tempfile);
  87. xml_parser_free($xml_parser);
  88. }
  89. }
  90. fclose($list_fp);