PageRenderTime 82ms CodeModel.GetById 40ms app.highlight 5ms RepoModel.GetById 35ms app.codeStats 1ms

/import/import_sitemap.php

http://showslow.googlecode.com/
PHP | 118 lines | 84 code | 27 blank | 7 comment | 13 complexity | 58ddc2e0afd1121016636023bfa8a479 MD5 | raw file
  1<?php
  2// this tool accepts a list of user IDs and sitemap URLs to import into user account for monitoring
  3// The list is tab separated like so:
  4//
  5//	1	http://www.showslow.com/sitemap.xml
  6//	1	http://www.sergeychernyshev.com/sitemap.xml
  7
  8require_once(dirname(dirname(__FILE__)).'/global.php');
  9
 10$user_id = null; 
 11$temp_path = '/tmp/';
 12$depth = array();
 13
 14$inLocTag = false;
 15$buffer = '';
 16
 17function startElement($parser, $name, $attrs) 
 18{
 19	global $inLocTag, $buffer;
 20
 21	if (strtolower($name) == 'loc') {
 22		$buffer = '';
 23		$inLocTag = true;
 24	}
 25}
 26
 27function endElement($parser, $name) 
 28{
 29	global $inLocTag, $buffer, $user_id;
 30
 31	if (strtolower($name) == 'loc') {
 32		$inLocTag = false;
 33
 34		// Now, let's process the contents
 35		$url = $buffer;
 36		$buffer = '';
 37
 38		$url_id = getUrlId(resolveRedirects($url), false);
 39
 40		if (is_null($url_id)) {
 41			error_log("Troubles getting / creating a URL for $url. Skipping.");
 42			return;
 43		}
 44
 45		$query = sprintf("INSERT IGNORE INTO user_urls (user_id, url_id) VALUES (%d, %d)",
 46			$user_id,
 47			$url_id
 48		);
 49
 50		$result = mysql_query($query);
 51
 52		if (!$result) {
 53			error_log(mysql_error());
 54		}
 55	}
 56}
 57
 58function charData($xml_parser, $data)
 59{
 60	global $buffer;
 61	$buffer .= $data;
 62}
 63
 64
 65if ($list_fp = fopen('php://stdin', 'r')) {
 66	while ($line = fgets($list_fp)) {
 67		$params = explode("\t", $line);
 68
 69		$user_id = trim($params[0]);
 70		$url = trim($params[1]);
 71
 72		$tempfile = $temp_path . 'showslow_import_sitemap.xml.'.getmypid().'.'.time();
 73		$temp_fp = fopen($tempfile, 'w');
 74
 75		// Now, let's download the sitemap
 76		$ch = curl_init($url);
 77
 78		curl_setopt_array($ch, array(
 79			CURLOPT_FILE => $temp_fp,
 80			CURLOPT_FOLLOWLOCATION => TRUE,
 81			CURLOPT_MAXREDIRS => 10
 82		));
 83
 84		$curl_success = curl_exec($ch);
 85
 86		curl_close($ch);
 87		fclose($temp_fp);
 88
 89		if (!$curl_success) {
 90			error_log("Can't download the the sitemap: $url");
 91			continue;
 92		}
 93
 94		// Now, let's open and parse the file
 95		if (!($fp = fopen($tempfile, "r"))) {
 96			error_log("Could not open XML input: $tempfile");
 97			continue;
 98		}
 99
100		$xml_parser = xml_parser_create();
101
102		xml_set_element_handler($xml_parser, "startElement", "endElement");
103		xml_set_character_data_handler($xml_parser, "charData");
104
105		while ($data = fread($fp, 4096)) {
106		    if (!xml_parse($xml_parser, $data, feof($fp))) {
107			die(sprintf("XML error: %s at line %d",
108				    xml_error_string(xml_get_error_code($xml_parser)),
109				    xml_get_current_line_number($xml_parser)));
110		    }
111		}
112		fclose($fp);
113		unlink($tempfile);
114
115		xml_parser_free($xml_parser);
116	}
117}
118fclose($list_fp);