/pi.xmlgrab.php
PHP | 1195 lines | 834 code | 227 blank | 134 comment | 211 complexity | 2364c4ca22dc74d46adee8705e859c5c MD5 | raw file
- <?php
- /* VERSIONS
- NOYTEs changeg is_unique fn return value, added else to unique test, did update code, changed catgeory ceation sql, fixed mb_oncdoing bug
- 0.8 Add support to update entries
- 0.7.4 Fixed blank date parsing bug
- 0.7.3 MSM support
- 0.7.2 Now supports xml attributes, repeated elements, uk date stamps, merge multiple categories
- Based on codebase of FeedGrab 0.7.1
- */
- $plugin_info = array(
- 'pi_name' => 'XMLGrab',
- 'pi_version' => '0.8',
- 'pi_author' => 'Andrew Weaver',
- 'pi_author_url' => 'http://www.brandnewbox.co.uk/docs/',
- 'pi_description' => 'Allows you to extract data from an XML feed and insert it into a weblog',
- 'pi_usage' => XMLGrab::usage()
- );
- class XMLGrab {
- var $debug = true;
- var $runsql = true;
- var $uk_dates = true; // convert MM/DD/YYYY to DD/MM/YYYY
- var $page_url = '';
- var $title = 'title';
- var $publish_date = 'dc:date';
- var $author_id = 1;
- var $feed_weblog_id = '';
- var $feed_fields = '';
- var $weblog_fields = '';
- var $cats = '';
- var $cat_group = 0;
- var $cat_field = '';
- var $cat_delimiter = ',';
- var $cat_group_2 = 0;
- var $cat_field_2 = '';
- var $cat_delimiter_2 = ',';
- var $unique = 'title,date';
- var $itempath = '';
- var $site_id = '';
- var $do_update = false;
- var $status = '';
- var $return_data = '';
- var $time_offset = 0;
- function XMLGrab() {
- global $TMPL, $LOC, $FNS, $PREFS, $SESS, $REGX, $DB;
- // Check parameters
- $this->page_url = ( ! $TMPL->fetch_param('url') ) ? '' : str_replace('/', '/',trim($TMPL->fetch_param('url')));
- $this->feed_weblog_id = ( ! $TMPL->fetch_param('weblog') ) ? '' : $TMPL->fetch_param('weblog');
- $this->author_id = ( ! $TMPL->fetch_param('author') ) ? 1 : $TMPL->fetch_param('author');
- $this->title = ( ! $TMPL->fetch_param('title') ) ? '' : $TMPL->fetch_param('title');
- $this->publish_date = ( ! $TMPL->fetch_param('date') ) ? '' : $TMPL->fetch_param('date');
- $this->feed_fields = ( ! $TMPL->fetch_param('use') ) ? '' : $TMPL->fetch_param('use');
- $this->weblog_fields = ( ! $TMPL->fetch_param('fields') ) ? '' : $TMPL->fetch_param('fields');
- $this->time_offset = ( ! $TMPL->fetch_param('offset') ) ? 0 : $TMPL->fetch_param('offset');
- $this->cats = ( ! $TMPL->fetch_param('category') ) ? '' : $TMPL->fetch_param('category');
- $this->cat_group = ( ! $TMPL->fetch_param('category_group') ) ? 0 : $TMPL->fetch_param('category_group');
- $this->cat_field = ( ! $TMPL->fetch_param('category_field') ) ? '' : $TMPL->fetch_param('category_field');
- $this->cat_delimiter = ( ! $TMPL->fetch_param('category_delimiter') ) ? ',' : $TMPL->fetch_param('category_delimiter');
- $this->cat_group_2 = ( ! $TMPL->fetch_param('category_group_2') ) ? 0 : $TMPL->fetch_param('category_group_2');
- $this->cat_field_2 = ( ! $TMPL->fetch_param('category_field_2') ) ? '' : $TMPL->fetch_param('category_field_2');
- $this->cat_delimiter_2 = ( ! $TMPL->fetch_param('category_delimiter_2') ) ? ',' : $TMPL->fetch_param('category_delimiter_2');
- $this->unique = ( ! $TMPL->fetch_param('unique') ) ? 'title,date' : $TMPL->fetch_param('unique');
- $this->itempath = ( ! $TMPL->fetch_param('itempath') ) ? '/xml/item' : $TMPL->fetch_param('itempath');
- $this->site_id = ( ! $TMPL->fetch_param('site_id') ) ? '' : $TMPL->fetch_param('site_id');
- $this->do_update = ( $TMPL->fetch_param('update') ) ? true : false;
- $this->status = ( ! $TMPL->fetch_param('status') ) ? '' : $TMPL->fetch_param('status');
- $this->debug = ( ! $TMPL->fetch_param('debug') ) ? true : false;
- if ($this->page_url == '') {
- return $this->return_data;
- }
- if ($this->feed_weblog_id == '') {
- return $this->return_data;
- }
- if ($this->cat_delimiter == 'TAB') {
- $this->cat_delimiter = "\t";
- }
- if ($this->cat_delimiter == 'SPACE') {
- $this->cat_delimiter = " ";
- }
- if ($this->cat_delimiter_2 == 'TAB') {
- $this->cat_delimiter_2 = "\t";
- }
- if ($this->cat_delimiter_2 == 'SPACE') {
- $this->cat_delimiter_2 = " ";
- }
- if ($this->feed_fields == '') {
- return $this->return_data;
- } else {
- $this->feed_fields = str_replace(SLASH, '/', $this->feed_fields);
- $fieldsArray = explode( "|", $this->feed_fields );
- }
- if ($this->weblog_fields == '') {
- return $this->return_data;
- } else {
- $this->weblog_fields = str_replace(SLASH, '/', $this->weblog_fields);
- $weblogArray = explode( "|", $this->weblog_fields );
- }
- // Check number of input and weblog fields match
- if ( count($fieldsArray) != count($weblogArray) ) {
- return $this->return_data;
- }
- $weblog_to_feed[ 'title' ] = Array();
- $weblog_to_feed[ 'title' ][ 'is_custom' ] = 0;
- $weblog_to_feed[ 'title' ][ 'field' ] = $this->title;
- $weblog_to_feed[ 'date' ] = Array();
- $weblog_to_feed[ 'date' ][ 'is_custom' ] = 0;
- $weblog_to_feed[ 'date' ][ 'field' ] = $this->publish_date;
- // Use fields array to map feed fields to weblog fields
- for( $i=0; $i<count( $weblogArray ); $i++ ) {
- $weblog_to_feed[ $weblogArray[ $i ] ] = Array();
- $weblog_to_feed[ $weblogArray[ $i ] ][ 'is_custom' ] = 1;
- $weblog_to_feed[ $weblogArray[ $i ] ][ 'field' ] = $fieldsArray[ $i ];
- }
- // Get custom fields from database
- $customfieldsArray = $DB->query("SELECT exp_weblog_fields.field_id,
- exp_weblog_fields.field_name,
- exp_weblog_fields.field_label, exp_weblog_fields.field_fmt,
- exp_weblog_fields.field_type, exp_weblog_fields.field_related_id
- FROM exp_weblogs, exp_weblog_fields
- WHERE exp_weblogs.field_group = exp_weblog_fields.group_id
- AND exp_weblogs.weblog_id = '".$this->feed_weblog_id."'");
- $field_ids = '';
-
- // Map weblog custom fields to id's
- foreach ( $customfieldsArray->result as $row ) {
- $weblog_to_feed[ $row[ "field_name" ] ][ 'id' ] = $row[ "field_id" ];
- $weblog_to_feed[ $row[ "field_name" ] ][ 'format' ] = $row[ "field_fmt" ];
- $weblog_to_feed[ $row[ "field_name" ] ][ 'type' ] = $row[ "field_type" ];
- if ( $row[ "field_type" ] == "rel" ) {
- $weblog_to_feed[ $row[ "field_name" ] ][ 'related_id' ] = $row[ "field_related_id" ];
- }
- if ( substr($row[ "field_type" ], 0, 8) == "ftype_id" ) {
- // Is the field a FF type?
- $ftype_id = substr( $row[ "field_type" ], 9 );
- // Find out what type
- $sql = "SELECT class FROM exp_ff_fieldtypes WHERE fieldtype_id = " . $ftype_id;
- $results = $DB->query($sql);
- $weblog_to_feed[ $row[ "field_name" ] ][ 'type' ] = $results->row["class"];
-
- // If it is matrix, look for a matching parameter and store it
- if( $results->row["class"] == "matrix" ) {
- $weblog_to_feed[ $row[ "field_name" ] ][ 'matrix' ] = $TMPL->fetch_param( $row[ "field_name" ] );
- }
- // Store any FF settings for this type
- $sql = "SELECT ff_settings FROM exp_weblog_fields WHERE field_id = " . $row[ "field_id" ];
- $field_settings = $DB->query($sql);
- $weblog_to_feed[ $row[ "field_name" ] ][ 'field_settings' ] = unserialize($field_settings->row["ff_settings"] );
- }
- }
- // Retrieve xml
- if ( $this->debug ) { print "Fetching: " . $this->page_url . "<br/>\n"; }
- if ( function_exists('curl_init'))
- {
- $xml = $this->_curl_fetch( $this->page_url );
- }
- else
- {
- $xml = $this->_fsockopen_fetch( $this->page_url );
- }
- // Parse XML using EE's builtin functions
- include_once( PATH_CORE . "core.xmlparser.php" );
- $XML = new EE_XMLparser;
- $xml_obj = $XML->parse_xml( $xml );
- // EE replaces slashes with entities earlier in process, convert back here
- $this->itempath = str_replace(SLASH, '/', $this->itempath);
- $items = null; // initialise array to store output
- // Recurse through XML structure looking for nodes that match $this->itempath
- $this->fetch_xml( $xml_obj, $this->itempath, $items );
- $entries_added = 0;
- // Loop over all feed items
- foreach ( $items as $item ) {
- $this->post = null;
- // Get title using title parameter, generate url
- $this->post[ "title" ] = $this->get_namespaced_field(
- $item,
- $weblog_to_feed[ 'title' ][ 'field' ]
- );
- // Check for XSS
- $this->post['title'] = $REGX->xss_clean( $this->post['title'] );
- // Get unique url
- $source_encoding = ( isset( $this->RSS->source_encoding ) ? $this->RSS->source_encoding : 'UTF-8' );
- // Clean title
- if (function_exists('mb_convert_encoding'))
- {
- $encoded_title = mb_convert_encoding($this->post[ "title" ], strtoupper('UTF-8'), $source_encoding );
- }
- elseif(function_exists('iconv') AND ($iconvstr = @iconv($source_encoding, 'UTF-8', $this->post["title"])) !== FALSE)
- {
- $encoded_title = $iconvstr;
- }
- else
- {
- $encoded_title = utf8_encode($this->post["title"]);
- }
- $this->post[ "url_title" ] = $REGX->create_url_title( $encoded_title );
- $sql = "SELECT count(*) AS count FROM exp_weblog_titles WHERE url_title = '".$DB->escape_str( $this->post[ "url_title" ] )."' AND weblog_id = '$this->feed_weblog_id'";
- $results = $DB->query($sql);
- if ($results->row['count'] > 0) {
- // Duplicate exists, add a counter on the end
- $sql = "SELECT count(*) AS count FROM exp_weblog_titles WHERE url_title LIKE '".$DB->escape_str( $this->post[ "url_title" ] )."%' AND weblog_id = '$this->feed_weblog_id'";
- $results = $DB->query($sql);
- $this->post[ "url_title" ] .= $results->row['count']+1;
- }
- // Get date field using date parameter
- $this->post[ "date" ] = time();
- if( isset( $item[ $weblog_to_feed[ 'date' ][ 'field' ] ] ) ) {
- $this->post[ "date" ] = $this->parse_date(
- $item[ $weblog_to_feed[ 'date' ][ 'field' ] ]
- );
- }
- // Load this->post array with data
- foreach ( $weblog_to_feed as $field => $data ) {
- if ( isset( $data[ "is_custom" ] ) && $data[ "is_custom" ] ) {
- if( !isset($data["type"]) || $data["type"] != "matrix" ) {
- $this->post[ $field ] = $this->get_namespaced_field( $item, $data[ "field" ] );
- } else {
- // Handle Pixel & Tonic Matrix field
- $no_subrows = $this->get_namespaced_field( $item, $data[ "field" ] . '#' );
- // loop over fields adding to matrix array
- $matrix_data = array();
- $matrix_fields = explode( "|", $data["matrix"] );
- for ( $i=0; $i<$no_subrows; $i++ ) {
- $matrix_data[ $i ] = array();
- foreach( $matrix_fields as $j => $matrix_field ) {
- $matrix_data[ $i ][ 'col_id_'. $data["field_settings"]["col_ids"][ $j ] ] = $this->get_namespaced_field(
- $item,
- $data[ "field" ] . '/' . $matrix_field . ( $i > 0 ? '#'. ($i+1) : '' )
- );
- }
- }
- // Store array of data to insert into exp_matrix_data
- $this->post[ $field ] = $matrix_data;
- }
- }
- }
- if ( $this->debug ) { print " Checking: " . $this->post['title'] . "<br />\n"; }
- // Look for duplicate entries
- $entry_id = $this->is_entry_unique( $this->post, $this->unique, $weblog_to_feed );
- if ( $entry_id == 0 ) {
- if ( $this->debug ) {
- print " Found new entry: " . $this->post[ "title" ] . "<br/>\n";
- }
- // Get weblog details for default values
- $query = $DB->query("SELECT blog_title, blog_url, rss_url,
- ping_return_url, deft_comments, deft_trackbacks,
- deft_status, cat_group, field_group
- FROM exp_weblogs
- WHERE weblog_id = '$this->feed_weblog_id'");
- if ($query->num_rows == 0) {
- return false;
- }
- // Check status
- $status = $query->row['deft_status'];
- if( $this->status != '' ) {
- $status = $this->get_namespaced_field( $item, $this->status );
- }
- // Insert into weblog_titles
- $data = array(
- 'entry_id' => '',
- 'weblog_id' => $this->feed_weblog_id,
- 'author_id' => $this->author_id,
- 'title' => $this->post['title'],
- 'url_title' => $this->post['url_title'],
- 'ip_address' => '127.0.0.1',
- 'entry_date' => ($this->post['date'] - $this->time_offset),
- 'year' => gmdate('Y', $this->post['date'] - $this->time_offset),
- 'month' => gmdate('m', $this->post['date'] - $this->time_offset),
- 'day' => gmdate('d', $this->post['date'] - $this->time_offset),
- 'sticky' => 'n',
- 'status' => $status,
- 'allow_comments' => $query->row['deft_comments'],
- 'allow_trackbacks' => $query->row['deft_trackbacks']
- );
- if ( $this->site_id != '' ) {
- $data[ 'site_id' ] = $this->site_id;
- }
- $sql = $DB->insert_string('exp_weblog_titles', $data);
- if ( $this->runsql ) {
- $DB->query($sql);
- $entry_id = $DB->insert_id;
- } else {
- $entry_id = 99999;
- if ( $this->debug ) { print $sql . "<br/>\n"; }
- }
-
- // Insert into weblog_data
- unset( $this->post['title'] );
- unset( $this->post['url_title'] );
- unset( $this->post['date'] );
- $data = array();
- $data["entry_id"] = $entry_id;
- $data["weblog_id"] = $this->feed_weblog_id;
- $data["entry_id"] = $entry_id;
- if ( $this->site_id != '' ) {
- $data[ 'site_id' ] = $this->site_id;
- }
- // Load custom field data into data array
- foreach ( $this->post as $custom_field => $custom_data ) {
-
- if( !isset($weblog_to_feed[ $custom_field ][ "type" ]) || $weblog_to_feed[ $custom_field ][ "type" ] == "date" ) {
-
- $date = $REGX->xss_clean( trim( $custom_data ) );
- $pat = "/(\d{4})-(\d{1,2})-(\d{1,2})/";
- // Date field
- if ( preg_match( $pat, $date, $match ) ) {
- list( $year, $month, $day) = array( $match[1], $match[2], $match[3]);
- $epoch = gmmktime( 0, 0, 0, $month, $day, $year);
- $epoch += $LOC->set_localized_offset();
- }
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $epoch;
-
- } elseif ( $weblog_to_feed[ $custom_field ][ "type" ] == "matrix" ) {
-
- // Insert Matrix data
- $row = 0;
- foreach( $custom_data as $matrix_row ) {
- // Loop through data rows and insert record into exp_matrix_data
- if ( $this->site_id != '' ) {
- $matrix_row["site_id"] = $this->site_id;
- }
- $matrix_row["entry_id"] = $entry_id;
- $matrix_row["field_id"] = $weblog_to_feed[ $custom_field ][ "id" ];
- $matrix_row["row_order"] = $row++;
- $sql = $DB->insert_string('exp_matrix_data', $matrix_row);
- $DB->query($sql);
- }
- // Update $this->post to set value to 1 to indcate matrix data exists
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = 1;
-
- } else {
-
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $REGX->xss_clean( trim( $custom_data ) );
-
- }
-
- $data['field_ft_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $weblog_to_feed[ $custom_field ][ "format" ];
-
- }
- $sql = $DB->insert_string('exp_weblog_data', $data);
- if ( $this->runsql ) {
- $DB->query($sql);
- } else {
- if ( $this->debug ) { print $sql . "<br/>\n"; }
- }
- // Insert category details
- // Default category for all entries
- if (isset($this->cats) && $this->cats != '')
- {
- $cats = explode(',',$this->cats);
- $cats = array_unique($cats);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$query->row['cat_group']."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- // Specific category by feed field
- $cat_ids = array();
- // Category group 1
- if ( $this->cat_field != '' ) {
- $this->cat_field = str_replace(SLASH, '/', $this->cat_field);
- // Find category item
- $nameList = $this->get_namespaced_field( $item, $this->cat_field );
- // Does entry have multiple category items
- $num_cats = $this->get_namespaced_field( $item, $this->cat_field . "#" );
- if ( $num_cats > 1 ) {
- for( $i=2; $i<=$num_cats; $i++ ) {
- $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field . "#" . $i );
- }
- }
- $names = explode( $this->cat_delimiter, $nameList );
- foreach ( $names as $name ) {
- $name = trim( $name );
- $query =$DB->query("SELECT cat_id
- FROM exp_categories
- WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
- AND exp_categories.parent_id = '0'
- AND exp_categories.group_id = '".$DB->escape_str($this->cat_group)."'");
- if ($query->num_rows == 0) {
- // Create primary category
- $insert_array = array(
- 'group_id' => $this->cat_group,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'cat_name' => $name,
- 'cat_url_title' => $REGX->create_url_title( $name ),
- 'cat_image' => '',
- 'parent_id' => '0'
- );
-
- print "<p>Try to add category " . $name . "</p>";
- if ($this->runsql) {
- $DB->query($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = $DB->insert_id;
- } else {
- print($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = 0;
- }
-
- $insert_array = array(
- 'cat_id' => $DB->insert_id,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'group_id' => $this->cat_group
- );
- $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
-
- } else {
- print "<p>Category " . $name . " exists</p>";
- $cat_ids[] = $query->row['cat_id'];
- }
- }
- }
- // Insert category details
- if ( isset($cat_ids) && count($cat_ids) > 0 )
- {
- $cats = array_unique($cat_ids);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$this->cat_group."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- }
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- }
-
- // Category group 2
- $cat_ids = array();
- if ( $this->cat_field_2 != '' ) {
- $this->cat_field_2 = str_replace(SLASH, '/', $this->cat_field_2);
- // Find category item
- $nameList = $this->get_namespaced_field( $item, $this->cat_field_2 );
- // Does entry have multiple category items
- $num_cats = $this->get_namespaced_field( $item, $this->cat_field_2 . "#" );
- if ( $num_cats > 1 ) {
- for( $i=2; $i<=$num_cats; $i++ ) {
- $nameList .= $this->cat_delimiter_2 . " " . $this->get_namespaced_field( $item, $this->cat_field_2 . "#" . $i );
- }
- }
- $names = explode( $this->cat_delimiter_2, $nameList );
- foreach ( $names as $name ) {
- $name = trim( $name );
- $query =$DB->query("SELECT cat_id
- FROM exp_categories
- WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
- AND exp_categories.parent_id = '0'
- AND exp_categories.group_id = '".$DB->escape_str($this->cat_group_2)."'");
- if ($query->num_rows == 0) {
- // Create primary category
- $insert_array = array(
- 'group_id' => $this->cat_group_2,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'cat_name' => $name,
- 'cat_url_title' => $REGX->create_url_title( $name ),
- 'cat_image' => '',
- 'parent_id' => '0'
- );
-
- print "<p>Try to add category " . $name . "</p>";
- if ($this->runsql) {
- $DB->query($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = $DB->insert_id;
- } else {
- print($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = 0;
- }
-
- $insert_array = array(
- 'cat_id' => $DB->insert_id,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'group_id' => $this->cat_group_2
- );
- $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
-
- } else {
- print "<p>Category " . $name . " exists</p>";
- $cat_ids[] = $query->row['cat_id'];
- }
- }
- }
- // Insert category details
- if ( isset($cat_ids) && count($cat_ids) > 0 )
- {
- $cats = array_unique($cat_ids);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$this->cat_group_2."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- }
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- }
-
- $entries_added++;
- } else {
- // DUPLICATE ENTRY
-
- if ( $this->debug ) {
- print " Found existing entry: " . $this->post[ "title" ] . "<br/>\n";
- }
- if ( $this->do_update ) {
- // Update weblog_titles
-
- $data = array(
- 'title' => $this->post['title'],
- 'entry_date' => ($this->post['date'] - $this->time_offset),
- 'year' => gmdate('Y', $this->post['date'] - $this->time_offset),
- 'month' => gmdate('m', $this->post['date'] - $this->time_offset),
- 'day' => gmdate('d', $this->post['date'] - $this->time_offset),
- );
- $sql = $DB->update_string('exp_weblog_titles', $data, "entry_id = '".$entry_id."'");
- $DB->query($sql);
-
- // Update custom fields
-
- $data = array();
-
- unset( $this->post['title'] );
- unset( $this->post['url_title'] );
- unset( $this->post['date'] );
-
- // Load custom field data into data array
- foreach ( $this->post as $custom_field => $custom_data ) {
- if( $weblog_to_feed[ $custom_field ][ "type" ] == "date" ) {
-
- $date = $REGX->xss_clean( trim( $custom_data ) );
- $pat = "/(\d{4})-(\d{1,2})-(\d{1,2})/";
- // Date field
- if ( preg_match( $pat, $date, $match ) ) {
- list( $year, $month, $day) = array( $match[1], $match[2], $match[3]);
- $epoch = gmmktime( 0, 0, 0, $month, $day, $year);
- $epoch += $LOC->set_localized_offset();
- }
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $epoch;
-
- } elseif ( $weblog_to_feed[ $custom_field ][ "type" ] == "matrix" ) {
-
- $sql = "DELETE FROM exp_matrix_data WHERE entry_id = " . $entry_id . " AND field_id = " . $weblog_to_feed[ $custom_field ][ "id" ];
- $DB->query($sql);
-
- // Insert Matrix data
- $row = 0;
- foreach( $custom_data as $matrix_row ) {
- // Loop through data rows and insert record into exp_matrix_data
- if ( $this->site_id != '' ) {
- $matrix_row["site_id"] = $this->site_id;
- }
- $matrix_row["entry_id"] = $entry_id;
- $matrix_row["field_id"] = $weblog_to_feed[ $custom_field ][ "id" ];
- $matrix_row["row_order"] = $row++;
- $sql = $DB->insert_string('exp_matrix_data', $matrix_row);
- $DB->query($sql);
- }
- // Update $this->post to set value to 1 to indcate matrix data exists
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = 1;
- } else {
-
- $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $REGX->xss_clean( trim( $custom_data ) );
- }
- $data['field_ft_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $weblog_to_feed[ $custom_field ][ "format" ];
- }
- $sql = $DB->update_string('exp_weblog_data', $data, "entry_id = '".$entry_id."'");
- $DB->query($sql);
-
- // Update categories
-
- if (isset($this->cats) && $this->cats != '')
- {
- $cats = explode(',',$this->cats);
- $cats = array_unique($cats);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$query->row['cat_group']."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- // Category group 1
-
- $query = $DB->query("DELETE p FROM exp_category_posts p LEFT JOIN exp_categories c USING (cat_id) WHERE entry_id = " . $DB->escape_str($entry_id) . " AND c.group_id = " . $DB->escape_str($this->cat_group) );
-
- if ( $this->cat_field != '' ) {
- $this->cat_field = str_replace(SLASH, '/', $this->cat_field);
- // Find category item
- $nameList = $this->get_namespaced_field( $item, $this->cat_field );
- // Does entry have multiple category items
- $num_cats = $this->get_namespaced_field( $item, $this->cat_field . "#" );
- if ( $num_cats > 1 ) {
- for( $i=2; $i<=$num_cats; $i++ ) {
- $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field . "#" . $i );
- }
- }
- $cat_ids = array();
- $names = explode( $this->cat_delimiter, $nameList );
- foreach ( $names as $name ) {
- $name = trim( $name );
- $query =$DB->query("SELECT cat_id
- FROM exp_categories
- WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
- AND exp_categories.parent_id = '0'
- AND exp_categories.group_id = '".$DB->escape_str($this->cat_group)."'");
- if ($query->num_rows == 0) {
- // Create primary category
- $insert_array = array(
- 'group_id' => $this->cat_group,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'cat_name' => $name,
- 'cat_url_title' => $REGX->create_url_title( $name ),
- 'cat_image' => '',
- 'parent_id' => '0'
- );
-
- print "<p>Try to add category " . $name . "</p>";
- $DB->query($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = $DB->insert_id;
- $insert_array = array(
- 'cat_id' => $DB->insert_id,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'group_id' => $this->cat_group
- );
- $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
- } else {
- print "<p>Category " . $name . " exists</p>";
- $cat_ids[] = $query->row['cat_id'];
- }
- }
- }
- // Insert category details
- if ( isset($cat_ids) && count($cat_ids) > 0 )
- {
- $cats = array_unique($cat_ids);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$this->cat_group."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- }
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- }
-
- // Category group 2
- $query = $DB->query("DELETE p FROM exp_category_posts p LEFT JOIN exp_categories c USING (cat_id) WHERE entry_id = " . $DB->escape_str($entry_id) . " AND c.group_id = " . $DB->escape_str($this->cat_group_2) );
-
- if ( $this->cat_field_2 != '' ) {
- $this->cat_field_2 = str_replace(SLASH, '/', $this->cat_field_2);
- // Find category item
- $nameList = $this->get_namespaced_field( $item, $this->cat_field_2 );
- // Does entry have multiple category items
- $num_cats = $this->get_namespaced_field( $item, $this->cat_field_2 . "#" );
- if ( $num_cats > 1 ) {
- for( $i=2; $i<=$num_cats; $i++ ) {
- $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field_2. "#" . $i );
- }
- }
- $cat_ids = array();
- $names = explode( $this->cat_delimiter_2, $nameList );
- foreach ( $names as $name ) {
- $name = trim( $name );
- $query =$DB->query("SELECT cat_id
- FROM exp_categories
- WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
- AND exp_categories.parent_id = '0'
- AND exp_categories.group_id = '".$DB->escape_str($this->cat_group_2)."'");
- if ($query->num_rows == 0) {
- // Create primary category
- $insert_array = array(
- 'group_id' => $this->cat_group_2,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'cat_name' => $name,
- 'cat_url_title' => $REGX->create_url_title( $name ),
- 'cat_image' => '',
- 'parent_id' => '0'
- );
-
- print "<p>Try to add category " . $name . "</p>";
- $DB->query($DB->insert_string('exp_categories', $insert_array));
- $cat_ids[] = $DB->insert_id;
- $insert_array = array(
- 'cat_id' => $DB->insert_id,
- 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
- 'group_id' => $this->cat_group_2
- );
- $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
- } else {
- print "<p>Category " . $name . " exists</p>";
- $cat_ids[] = $query->row['cat_id'];
- }
- }
- }
- // Insert category details
- if ( isset($cat_ids) && count($cat_ids) > 0 )
- {
- $cats = array_unique($cat_ids);
- $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
- WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
- AND group_id = '".$this->cat_group_2."'");
- if ($results->num_rows > 0)
- {
- foreach($results->result as $row)
- {
- if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
- {
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
- }
- }
- if ($this->runsql) {
- $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
- }
- }
- }
- }
- }
- }
- }
- if ($entries_added > 0) {
- if ( $this->debug ) {
- print "<br/>\nNew entries: " . $entries_added . "<br/>\n";
- }
-
- if ($PREFS->ini('new_posts_clear_caches') == 'y') {
- $FNS->clear_caching('all');
- } else {
- $FNS->clear_caching('sql_cache');
- }
- }
- }
- public static function usage() {
- ob_start();
- ?>
- XMLGrab allows you to extract data from an XML feed and insert it into a weblog.
- See: http://www.brandnewbox.co.uk/docs/
- <?php
- $buffer = ob_get_contents();
- ob_end_clean();
- return $buffer;
- }
- function get_namespaced_field( $item, $field ) {
- // $field = strtolower( $field );
- if ( strpos( $field, ':' ) ) {
- $subfieldArray = explode( ":", $field );
- if ( isset( $item[ $subfieldArray[0] ][ $subfieldArray[1] ] ) ) {
- return( $item[ $subfieldArray[0] ][ $subfieldArray[1] ] );
- }
- } else {
- if ( isset( $item[ $field ] ) ) {
- return( $item[ $field ] );
- }
- }
- }
- function parse_date( $datestr ) {
- print "<p>$datestr</p>";
- /*
- if ( $this->uk_dates ) {
- $datestr = $this->reformat_to_dd_mm_yyyy( $datestr );
- }
- */
- $date = strtotime( $datestr );
- /*
- if ( $date == -1 ) {
- $date = $this->parse_w3cdtf( $datestr );
- }
- if ( $date == -1 ) {
- $date = $this->parse_twitter_created_at( $datestr );
- }
- */
- if ( $date == -1 || $date == "" ) {
- $date = time();
- }
- return( $date );
- }
- function parse_twitter_created_at( $datestr ) {
- // Format: Wed Apr 18 13:17:34 +0000 2007
- $parts = explode( " ", $datestr );
- $newdatestr = $parts[2] . " " . $parts[1] . " " . $parts[5] . " " . $parts[3] . " " . $parts[4];
- return strtotime( $newdatestr );
- }
- function reformat_to_dd_mm_yyyy( $datestr ) {
- // reformats: MM/DD/YYYY to DD/MM/YYYY
- // should pass through if does not match regex
- $newdatestr = preg_replace("/^\s*([0-9]{1,2})[\/\. -]+([0-9]{1,2})[\/\. -]+([0-9]{2,4})/", "\\2/\\1/\\3", $datestr);
- return $newdatestr;
- }
- function parse_w3cdtf ( $date_str ) {
- # regex to match wc3dtf
- $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):((\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
- if ( preg_match( $pat, $date_str, $match ) ) {
- list( $year, $month, $day, $hours, $minutes, $seconds) =
- array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
- # calc epoch for current date assuming GMT
- $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
- $offset = 0;
- if ( $match[10] == 'Z' ) {
- # zulu time, aka GMT
- }
- else {
- list( $tz_mod, $tz_hour, $tz_min ) =
- array( $match[8], $match[9], $match[10]);
- # zero out the variables
- if ( ! $tz_hour ) { $tz_hour = 0; }
- if ( ! $tz_min ) { $tz_min = 0; }
- $offset_secs = (($tz_hour*60)+$tz_min)*60;
- # is timezone ahead of GMT? then subtract offset
- #
- if ( $tz_mod == '+' ) {
- $offset_secs = $offset_secs * -1;
- }
- $offset = $offset_secs;
- }
- $epoch = $epoch + $offset;
- return $epoch;
- }
- else {
- return -1;
- }
- }
- function is_entry_unique( $post, $unique, $weblog_to_feed ) {
- global $DB;
- // Default for backwards compatibility
- if ( $unique == "title,date" || $unique == "" ) {
- $query = $DB->query("SELECT * FROM exp_weblog_titles WHERE LEFT(title,100) = LEFT('".$DB->escape_str($post[ "title" ])."',100) AND entry_date = '".$DB->escape_str($post[ "date" ])."'");
- } else {
- /* Build custom query */
- $sql = "SELECT * FROM exp_weblog_titles t, exp_weblog_data d WHERE t.entry_id = d.entry_id";
- $uniqueArray = explode(",", $unique);
- foreach ( $uniqueArray as $value ) {
- switch ( $value ) {
- case 'title': {
- $sql .= " AND " . $value . "=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
- break;
- }
- case 'date': {
- $sql .= " AND entry_date=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
- break;
- }
- default: {
- if ( $weblog_to_feed[ $value ][ "is_custom" ] ) {
- $name = "field_id_" . $weblog_to_feed[ $value ][ "id" ];
- $sql .= " AND " . $name . "=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
- }
- }
- }
- }
- $query = $DB->query( $sql );
- }
- if ( $query->num_rows > 0) {
- return $query->row['entry_id'];
- }
- return 0;
- }
- function fetch_xml( $x, $search, &$items, $path="", $element=0, $in_element=false, $subpath="" ) {
- $path = $path . "/" . $x->tag ;
- // print "@" . $search . "@ v @" . $path . "@<br/>";
- if ( $path == $search ) {
- // Path matches exactly our search element - we are in a new item
- $element++;
- $items[ $element ] = array();
- $subpath = "";
- $in_element = true;
- } elseif ( $str = strstr( $path, $search ) ) {
- // We are within an existing item - get xpath of subcomponent
- $subpath = substr( $str, strlen( $search )+1 );
- if ( ! isset( $items[ $element ][ $subpath . "#" ] ) ) {
- $items[ $element ][ $subpath . "#" ] = 0;
- }
- $count = $items[ $element ][ $subpath . "#" ]++;
- if ( isset( $items[ $element ][ $subpath ] ) ) {
- $subpath .= "#" . ( $count + 1);
- }
- } else {
- $in_element = false;
- }
- if ( count( $x->children ) == 0 ) {
- // Element has children ie, is not a parent element
- if ( $in_element ) {
- // If within an item, add to its array
- $items[ $element ][ $subpath ] = $x->value;
- }
- } else {
- // Loop over all child elements...
- foreach ( $x->children as $key => $value ) {
- // ...and recurse through xml structure
- $element = $this->fetch_xml( $value, $search, $items, $path, $element, $in_element, $subpath );
- }
- }
- // Add attributes
- if( $in_element ) {
- if ( is_array( $x->attributes ) ) {
- foreach ( $x->attributes as $attr_key => $attr_value ) {
- $items[ $element ][ $subpath . "@" . $attr_key ] = $attr_value;
- }
- }
- }
- return $element;
- }
- // --------------------------------------------------------------------
- /**
- * curl Fetch
- *
- * From pi.twitter_timeline.php
- *
- * @access public
- * @param string
- * @return string
- */
- function _curl_fetch($url)
- {
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
- // curl_setopt($ch, CURLOPT_USERPWD, "{$this->user}:{$this->password}");
- $data = curl_exec($ch);
- curl_close($ch);
- return $data;
- }
- // --------------------------------------------------------------------
- /**
- * fsockopen Fetch
- *
- * From pi.twitter_timeline.php
- *
- * @access public
- * @param string
- * @return string
- */
- function _fsockopen_fetch($url)
- {
- $target = parse_url($url);
- $data = '';
- $fp = fsockopen($target['host'], 80, $error_num, $error_str, 8);
- if (is_resource($fp))
- {
- fputs($fp, "GET {$url} HTTP/1.0\r\n");
- fputs($fp, "Host: {$target['host']}\r\n");
- // fputs($fp, "Authorization: Basic ".base64_encode("$this->user:$this->password")."\r\n");
- fputs($fp, "User-Agent: EE/xmlgrab PHP/" . phpversion() . "\r\n\r\n");
- $headers = TRUE;
- while( ! feof($fp))
- {
- $line = fgets($fp, 4096);
- if ($headers === FALSE)
- {
- $data .= $line;
- }
- elseif (trim($line) == '')
- {
- $headers = FALSE;
- }
- }
- fclose($fp);
- }
- return $data;
- }
- } // end class XMLGrab
- ?>