PageRenderTime 103ms CodeModel.GetById 37ms RepoModel.GetById 0ms app.codeStats 0ms

/pi.xmlgrab.php

https://bitbucket.org/ajweaver/xmlgrab
PHP | 1195 lines | 834 code | 227 blank | 134 comment | 211 complexity | 2364c4ca22dc74d46adee8705e859c5c MD5 | raw file
  1. <?php
  2. /* VERSIONS
  3. NOYTEs changeg is_unique fn return value, added else to unique test, did update code, changed catgeory ceation sql, fixed mb_oncdoing bug
  4. 0.8 Add support to update entries
  5. 0.7.4 Fixed blank date parsing bug
  6. 0.7.3 MSM support
  7. 0.7.2 Now supports xml attributes, repeated elements, uk date stamps, merge multiple categories
  8. Based on codebase of FeedGrab 0.7.1
  9. */
  10. $plugin_info = array(
  11. 'pi_name' => 'XMLGrab',
  12. 'pi_version' => '0.8',
  13. 'pi_author' => 'Andrew Weaver',
  14. 'pi_author_url' => 'http://www.brandnewbox.co.uk/docs/',
  15. 'pi_description' => 'Allows you to extract data from an XML feed and insert it into a weblog',
  16. 'pi_usage' => XMLGrab::usage()
  17. );
  18. class XMLGrab {
  19. var $debug = true;
  20. var $runsql = true;
  21. var $uk_dates = true; // convert MM/DD/YYYY to DD/MM/YYYY
  22. var $page_url = '';
  23. var $title = 'title';
  24. var $publish_date = 'dc:date';
  25. var $author_id = 1;
  26. var $feed_weblog_id = '';
  27. var $feed_fields = '';
  28. var $weblog_fields = '';
  29. var $cats = '';
  30. var $cat_group = 0;
  31. var $cat_field = '';
  32. var $cat_delimiter = ',';
  33. var $cat_group_2 = 0;
  34. var $cat_field_2 = '';
  35. var $cat_delimiter_2 = ',';
  36. var $unique = 'title,date';
  37. var $itempath = '';
  38. var $site_id = '';
  39. var $do_update = false;
  40. var $status = '';
  41. var $return_data = '';
  42. var $time_offset = 0;
  43. function XMLGrab() {
  44. global $TMPL, $LOC, $FNS, $PREFS, $SESS, $REGX, $DB;
  45. // Check parameters
  46. $this->page_url = ( ! $TMPL->fetch_param('url') ) ? '' : str_replace('&#47;', '/',trim($TMPL->fetch_param('url')));
  47. $this->feed_weblog_id = ( ! $TMPL->fetch_param('weblog') ) ? '' : $TMPL->fetch_param('weblog');
  48. $this->author_id = ( ! $TMPL->fetch_param('author') ) ? 1 : $TMPL->fetch_param('author');
  49. $this->title = ( ! $TMPL->fetch_param('title') ) ? '' : $TMPL->fetch_param('title');
  50. $this->publish_date = ( ! $TMPL->fetch_param('date') ) ? '' : $TMPL->fetch_param('date');
  51. $this->feed_fields = ( ! $TMPL->fetch_param('use') ) ? '' : $TMPL->fetch_param('use');
  52. $this->weblog_fields = ( ! $TMPL->fetch_param('fields') ) ? '' : $TMPL->fetch_param('fields');
  53. $this->time_offset = ( ! $TMPL->fetch_param('offset') ) ? 0 : $TMPL->fetch_param('offset');
  54. $this->cats = ( ! $TMPL->fetch_param('category') ) ? '' : $TMPL->fetch_param('category');
  55. $this->cat_group = ( ! $TMPL->fetch_param('category_group') ) ? 0 : $TMPL->fetch_param('category_group');
  56. $this->cat_field = ( ! $TMPL->fetch_param('category_field') ) ? '' : $TMPL->fetch_param('category_field');
  57. $this->cat_delimiter = ( ! $TMPL->fetch_param('category_delimiter') ) ? ',' : $TMPL->fetch_param('category_delimiter');
  58. $this->cat_group_2 = ( ! $TMPL->fetch_param('category_group_2') ) ? 0 : $TMPL->fetch_param('category_group_2');
  59. $this->cat_field_2 = ( ! $TMPL->fetch_param('category_field_2') ) ? '' : $TMPL->fetch_param('category_field_2');
  60. $this->cat_delimiter_2 = ( ! $TMPL->fetch_param('category_delimiter_2') ) ? ',' : $TMPL->fetch_param('category_delimiter_2');
  61. $this->unique = ( ! $TMPL->fetch_param('unique') ) ? 'title,date' : $TMPL->fetch_param('unique');
  62. $this->itempath = ( ! $TMPL->fetch_param('itempath') ) ? '/xml/item' : $TMPL->fetch_param('itempath');
  63. $this->site_id = ( ! $TMPL->fetch_param('site_id') ) ? '' : $TMPL->fetch_param('site_id');
  64. $this->do_update = ( $TMPL->fetch_param('update') ) ? true : false;
  65. $this->status = ( ! $TMPL->fetch_param('status') ) ? '' : $TMPL->fetch_param('status');
  66. $this->debug = ( ! $TMPL->fetch_param('debug') ) ? true : false;
  67. if ($this->page_url == '') {
  68. return $this->return_data;
  69. }
  70. if ($this->feed_weblog_id == '') {
  71. return $this->return_data;
  72. }
  73. if ($this->cat_delimiter == 'TAB') {
  74. $this->cat_delimiter = "\t";
  75. }
  76. if ($this->cat_delimiter == 'SPACE') {
  77. $this->cat_delimiter = " ";
  78. }
  79. if ($this->cat_delimiter_2 == 'TAB') {
  80. $this->cat_delimiter_2 = "\t";
  81. }
  82. if ($this->cat_delimiter_2 == 'SPACE') {
  83. $this->cat_delimiter_2 = " ";
  84. }
  85. if ($this->feed_fields == '') {
  86. return $this->return_data;
  87. } else {
  88. $this->feed_fields = str_replace(SLASH, '/', $this->feed_fields);
  89. $fieldsArray = explode( "|", $this->feed_fields );
  90. }
  91. if ($this->weblog_fields == '') {
  92. return $this->return_data;
  93. } else {
  94. $this->weblog_fields = str_replace(SLASH, '/', $this->weblog_fields);
  95. $weblogArray = explode( "|", $this->weblog_fields );
  96. }
  97. // Check number of input and weblog fields match
  98. if ( count($fieldsArray) != count($weblogArray) ) {
  99. return $this->return_data;
  100. }
  101. $weblog_to_feed[ 'title' ] = Array();
  102. $weblog_to_feed[ 'title' ][ 'is_custom' ] = 0;
  103. $weblog_to_feed[ 'title' ][ 'field' ] = $this->title;
  104. $weblog_to_feed[ 'date' ] = Array();
  105. $weblog_to_feed[ 'date' ][ 'is_custom' ] = 0;
  106. $weblog_to_feed[ 'date' ][ 'field' ] = $this->publish_date;
  107. // Use fields array to map feed fields to weblog fields
  108. for( $i=0; $i<count( $weblogArray ); $i++ ) {
  109. $weblog_to_feed[ $weblogArray[ $i ] ] = Array();
  110. $weblog_to_feed[ $weblogArray[ $i ] ][ 'is_custom' ] = 1;
  111. $weblog_to_feed[ $weblogArray[ $i ] ][ 'field' ] = $fieldsArray[ $i ];
  112. }
  113. // Get custom fields from database
  114. $customfieldsArray = $DB->query("SELECT exp_weblog_fields.field_id,
  115. exp_weblog_fields.field_name,
  116. exp_weblog_fields.field_label, exp_weblog_fields.field_fmt,
  117. exp_weblog_fields.field_type, exp_weblog_fields.field_related_id
  118. FROM exp_weblogs, exp_weblog_fields
  119. WHERE exp_weblogs.field_group = exp_weblog_fields.group_id
  120. AND exp_weblogs.weblog_id = '".$this->feed_weblog_id."'");
  121. $field_ids = '';
  122. // Map weblog custom fields to id's
  123. foreach ( $customfieldsArray->result as $row ) {
  124. $weblog_to_feed[ $row[ "field_name" ] ][ 'id' ] = $row[ "field_id" ];
  125. $weblog_to_feed[ $row[ "field_name" ] ][ 'format' ] = $row[ "field_fmt" ];
  126. $weblog_to_feed[ $row[ "field_name" ] ][ 'type' ] = $row[ "field_type" ];
  127. if ( $row[ "field_type" ] == "rel" ) {
  128. $weblog_to_feed[ $row[ "field_name" ] ][ 'related_id' ] = $row[ "field_related_id" ];
  129. }
  130. if ( substr($row[ "field_type" ], 0, 8) == "ftype_id" ) {
  131. // Is the field a FF type?
  132. $ftype_id = substr( $row[ "field_type" ], 9 );
  133. // Find out what type
  134. $sql = "SELECT class FROM exp_ff_fieldtypes WHERE fieldtype_id = " . $ftype_id;
  135. $results = $DB->query($sql);
  136. $weblog_to_feed[ $row[ "field_name" ] ][ 'type' ] = $results->row["class"];
  137. // If it is matrix, look for a matching parameter and store it
  138. if( $results->row["class"] == "matrix" ) {
  139. $weblog_to_feed[ $row[ "field_name" ] ][ 'matrix' ] = $TMPL->fetch_param( $row[ "field_name" ] );
  140. }
  141. // Store any FF settings for this type
  142. $sql = "SELECT ff_settings FROM exp_weblog_fields WHERE field_id = " . $row[ "field_id" ];
  143. $field_settings = $DB->query($sql);
  144. $weblog_to_feed[ $row[ "field_name" ] ][ 'field_settings' ] = unserialize($field_settings->row["ff_settings"] );
  145. }
  146. }
  147. // Retrieve xml
  148. if ( $this->debug ) { print "Fetching: " . $this->page_url . "<br/>\n"; }
  149. if ( function_exists('curl_init'))
  150. {
  151. $xml = $this->_curl_fetch( $this->page_url );
  152. }
  153. else
  154. {
  155. $xml = $this->_fsockopen_fetch( $this->page_url );
  156. }
  157. // Parse XML using EE's builtin functions
  158. include_once( PATH_CORE . "core.xmlparser.php" );
  159. $XML = new EE_XMLparser;
  160. $xml_obj = $XML->parse_xml( $xml );
  161. // EE replaces slashes with entities earlier in process, convert back here
  162. $this->itempath = str_replace(SLASH, '/', $this->itempath);
  163. $items = null; // initialise array to store output
  164. // Recurse through XML structure looking for nodes that match $this->itempath
  165. $this->fetch_xml( $xml_obj, $this->itempath, $items );
  166. $entries_added = 0;
  167. // Loop over all feed items
  168. foreach ( $items as $item ) {
  169. $this->post = null;
  170. // Get title using title parameter, generate url
  171. $this->post[ "title" ] = $this->get_namespaced_field(
  172. $item,
  173. $weblog_to_feed[ 'title' ][ 'field' ]
  174. );
  175. // Check for XSS
  176. $this->post['title'] = $REGX->xss_clean( $this->post['title'] );
  177. // Get unique url
  178. $source_encoding = ( isset( $this->RSS->source_encoding ) ? $this->RSS->source_encoding : 'UTF-8' );
  179. // Clean title
  180. if (function_exists('mb_convert_encoding'))
  181. {
  182. $encoded_title = mb_convert_encoding($this->post[ "title" ], strtoupper('UTF-8'), $source_encoding );
  183. }
  184. elseif(function_exists('iconv') AND ($iconvstr = @iconv($source_encoding, 'UTF-8', $this->post["title"])) !== FALSE)
  185. {
  186. $encoded_title = $iconvstr;
  187. }
  188. else
  189. {
  190. $encoded_title = utf8_encode($this->post["title"]);
  191. }
  192. $this->post[ "url_title" ] = $REGX->create_url_title( $encoded_title );
  193. $sql = "SELECT count(*) AS count FROM exp_weblog_titles WHERE url_title = '".$DB->escape_str( $this->post[ "url_title" ] )."' AND weblog_id = '$this->feed_weblog_id'";
  194. $results = $DB->query($sql);
  195. if ($results->row['count'] > 0) {
  196. // Duplicate exists, add a counter on the end
  197. $sql = "SELECT count(*) AS count FROM exp_weblog_titles WHERE url_title LIKE '".$DB->escape_str( $this->post[ "url_title" ] )."%' AND weblog_id = '$this->feed_weblog_id'";
  198. $results = $DB->query($sql);
  199. $this->post[ "url_title" ] .= $results->row['count']+1;
  200. }
  201. // Get date field using date parameter
  202. $this->post[ "date" ] = time();
  203. if( isset( $item[ $weblog_to_feed[ 'date' ][ 'field' ] ] ) ) {
  204. $this->post[ "date" ] = $this->parse_date(
  205. $item[ $weblog_to_feed[ 'date' ][ 'field' ] ]
  206. );
  207. }
  208. // Load this->post array with data
  209. foreach ( $weblog_to_feed as $field => $data ) {
  210. if ( isset( $data[ "is_custom" ] ) && $data[ "is_custom" ] ) {
  211. if( !isset($data["type"]) || $data["type"] != "matrix" ) {
  212. $this->post[ $field ] = $this->get_namespaced_field( $item, $data[ "field" ] );
  213. } else {
  214. // Handle Pixel & Tonic Matrix field
  215. $no_subrows = $this->get_namespaced_field( $item, $data[ "field" ] . '#' );
  216. // loop over fields adding to matrix array
  217. $matrix_data = array();
  218. $matrix_fields = explode( "|", $data["matrix"] );
  219. for ( $i=0; $i<$no_subrows; $i++ ) {
  220. $matrix_data[ $i ] = array();
  221. foreach( $matrix_fields as $j => $matrix_field ) {
  222. $matrix_data[ $i ][ 'col_id_'. $data["field_settings"]["col_ids"][ $j ] ] = $this->get_namespaced_field(
  223. $item,
  224. $data[ "field" ] . '/' . $matrix_field . ( $i > 0 ? '#'. ($i+1) : '' )
  225. );
  226. }
  227. }
  228. // Store array of data to insert into exp_matrix_data
  229. $this->post[ $field ] = $matrix_data;
  230. }
  231. }
  232. }
  233. if ( $this->debug ) { print " Checking: " . $this->post['title'] . "<br />\n"; }
  234. // Look for duplicate entries
  235. $entry_id = $this->is_entry_unique( $this->post, $this->unique, $weblog_to_feed );
  236. if ( $entry_id == 0 ) {
  237. if ( $this->debug ) {
  238. print " Found new entry: " . $this->post[ "title" ] . "<br/>\n";
  239. }
  240. // Get weblog details for default values
  241. $query = $DB->query("SELECT blog_title, blog_url, rss_url,
  242. ping_return_url, deft_comments, deft_trackbacks,
  243. deft_status, cat_group, field_group
  244. FROM exp_weblogs
  245. WHERE weblog_id = '$this->feed_weblog_id'");
  246. if ($query->num_rows == 0) {
  247. return false;
  248. }
  249. // Check status
  250. $status = $query->row['deft_status'];
  251. if( $this->status != '' ) {
  252. $status = $this->get_namespaced_field( $item, $this->status );
  253. }
  254. // Insert into weblog_titles
  255. $data = array(
  256. 'entry_id' => '',
  257. 'weblog_id' => $this->feed_weblog_id,
  258. 'author_id' => $this->author_id,
  259. 'title' => $this->post['title'],
  260. 'url_title' => $this->post['url_title'],
  261. 'ip_address' => '127.0.0.1',
  262. 'entry_date' => ($this->post['date'] - $this->time_offset),
  263. 'year' => gmdate('Y', $this->post['date'] - $this->time_offset),
  264. 'month' => gmdate('m', $this->post['date'] - $this->time_offset),
  265. 'day' => gmdate('d', $this->post['date'] - $this->time_offset),
  266. 'sticky' => 'n',
  267. 'status' => $status,
  268. 'allow_comments' => $query->row['deft_comments'],
  269. 'allow_trackbacks' => $query->row['deft_trackbacks']
  270. );
  271. if ( $this->site_id != '' ) {
  272. $data[ 'site_id' ] = $this->site_id;
  273. }
  274. $sql = $DB->insert_string('exp_weblog_titles', $data);
  275. if ( $this->runsql ) {
  276. $DB->query($sql);
  277. $entry_id = $DB->insert_id;
  278. } else {
  279. $entry_id = 99999;
  280. if ( $this->debug ) { print $sql . "<br/>\n"; }
  281. }
  282. // Insert into weblog_data
  283. unset( $this->post['title'] );
  284. unset( $this->post['url_title'] );
  285. unset( $this->post['date'] );
  286. $data = array();
  287. $data["entry_id"] = $entry_id;
  288. $data["weblog_id"] = $this->feed_weblog_id;
  289. $data["entry_id"] = $entry_id;
  290. if ( $this->site_id != '' ) {
  291. $data[ 'site_id' ] = $this->site_id;
  292. }
  293. // Load custom field data into data array
  294. foreach ( $this->post as $custom_field => $custom_data ) {
  295. if( !isset($weblog_to_feed[ $custom_field ][ "type" ]) || $weblog_to_feed[ $custom_field ][ "type" ] == "date" ) {
  296. $date = $REGX->xss_clean( trim( $custom_data ) );
  297. $pat = "/(\d{4})-(\d{1,2})-(\d{1,2})/";
  298. // Date field
  299. if ( preg_match( $pat, $date, $match ) ) {
  300. list( $year, $month, $day) = array( $match[1], $match[2], $match[3]);
  301. $epoch = gmmktime( 0, 0, 0, $month, $day, $year);
  302. $epoch += $LOC->set_localized_offset();
  303. }
  304. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $epoch;
  305. } elseif ( $weblog_to_feed[ $custom_field ][ "type" ] == "matrix" ) {
  306. // Insert Matrix data
  307. $row = 0;
  308. foreach( $custom_data as $matrix_row ) {
  309. // Loop through data rows and insert record into exp_matrix_data
  310. if ( $this->site_id != '' ) {
  311. $matrix_row["site_id"] = $this->site_id;
  312. }
  313. $matrix_row["entry_id"] = $entry_id;
  314. $matrix_row["field_id"] = $weblog_to_feed[ $custom_field ][ "id" ];
  315. $matrix_row["row_order"] = $row++;
  316. $sql = $DB->insert_string('exp_matrix_data', $matrix_row);
  317. $DB->query($sql);
  318. }
  319. // Update $this->post to set value to 1 to indcate matrix data exists
  320. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = 1;
  321. } else {
  322. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $REGX->xss_clean( trim( $custom_data ) );
  323. }
  324. $data['field_ft_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $weblog_to_feed[ $custom_field ][ "format" ];
  325. }
  326. $sql = $DB->insert_string('exp_weblog_data', $data);
  327. if ( $this->runsql ) {
  328. $DB->query($sql);
  329. } else {
  330. if ( $this->debug ) { print $sql . "<br/>\n"; }
  331. }
  332. // Insert category details
  333. // Default category for all entries
  334. if (isset($this->cats) && $this->cats != '')
  335. {
  336. $cats = explode(',',$this->cats);
  337. $cats = array_unique($cats);
  338. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  339. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  340. AND group_id = '".$query->row['cat_group']."'");
  341. if ($results->num_rows > 0)
  342. {
  343. foreach($results->result as $row)
  344. {
  345. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  346. {
  347. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  348. }
  349. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  350. }
  351. }
  352. }
  353. // Specific category by feed field
  354. $cat_ids = array();
  355. // Category group 1
  356. if ( $this->cat_field != '' ) {
  357. $this->cat_field = str_replace(SLASH, '/', $this->cat_field);
  358. // Find category item
  359. $nameList = $this->get_namespaced_field( $item, $this->cat_field );
  360. // Does entry have multiple category items
  361. $num_cats = $this->get_namespaced_field( $item, $this->cat_field . "#" );
  362. if ( $num_cats > 1 ) {
  363. for( $i=2; $i<=$num_cats; $i++ ) {
  364. $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field . "#" . $i );
  365. }
  366. }
  367. $names = explode( $this->cat_delimiter, $nameList );
  368. foreach ( $names as $name ) {
  369. $name = trim( $name );
  370. $query =$DB->query("SELECT cat_id
  371. FROM exp_categories
  372. WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
  373. AND exp_categories.parent_id = '0'
  374. AND exp_categories.group_id = '".$DB->escape_str($this->cat_group)."'");
  375. if ($query->num_rows == 0) {
  376. // Create primary category
  377. $insert_array = array(
  378. 'group_id' => $this->cat_group,
  379. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  380. 'cat_name' => $name,
  381. 'cat_url_title' => $REGX->create_url_title( $name ),
  382. 'cat_image' => '',
  383. 'parent_id' => '0'
  384. );
  385. print "<p>Try to add category " . $name . "</p>";
  386. if ($this->runsql) {
  387. $DB->query($DB->insert_string('exp_categories', $insert_array));
  388. $cat_ids[] = $DB->insert_id;
  389. } else {
  390. print($DB->insert_string('exp_categories', $insert_array));
  391. $cat_ids[] = 0;
  392. }
  393. $insert_array = array(
  394. 'cat_id' => $DB->insert_id,
  395. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  396. 'group_id' => $this->cat_group
  397. );
  398. $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
  399. } else {
  400. print "<p>Category " . $name . " exists</p>";
  401. $cat_ids[] = $query->row['cat_id'];
  402. }
  403. }
  404. }
  405. // Insert category details
  406. if ( isset($cat_ids) && count($cat_ids) > 0 )
  407. {
  408. $cats = array_unique($cat_ids);
  409. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  410. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  411. AND group_id = '".$this->cat_group."'");
  412. if ($results->num_rows > 0)
  413. {
  414. foreach($results->result as $row)
  415. {
  416. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  417. {
  418. if ($this->runsql) {
  419. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  420. }
  421. }
  422. if ($this->runsql) {
  423. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  424. }
  425. }
  426. }
  427. }
  428. // Category group 2
  429. $cat_ids = array();
  430. if ( $this->cat_field_2 != '' ) {
  431. $this->cat_field_2 = str_replace(SLASH, '/', $this->cat_field_2);
  432. // Find category item
  433. $nameList = $this->get_namespaced_field( $item, $this->cat_field_2 );
  434. // Does entry have multiple category items
  435. $num_cats = $this->get_namespaced_field( $item, $this->cat_field_2 . "#" );
  436. if ( $num_cats > 1 ) {
  437. for( $i=2; $i<=$num_cats; $i++ ) {
  438. $nameList .= $this->cat_delimiter_2 . " " . $this->get_namespaced_field( $item, $this->cat_field_2 . "#" . $i );
  439. }
  440. }
  441. $names = explode( $this->cat_delimiter_2, $nameList );
  442. foreach ( $names as $name ) {
  443. $name = trim( $name );
  444. $query =$DB->query("SELECT cat_id
  445. FROM exp_categories
  446. WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
  447. AND exp_categories.parent_id = '0'
  448. AND exp_categories.group_id = '".$DB->escape_str($this->cat_group_2)."'");
  449. if ($query->num_rows == 0) {
  450. // Create primary category
  451. $insert_array = array(
  452. 'group_id' => $this->cat_group_2,
  453. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  454. 'cat_name' => $name,
  455. 'cat_url_title' => $REGX->create_url_title( $name ),
  456. 'cat_image' => '',
  457. 'parent_id' => '0'
  458. );
  459. print "<p>Try to add category " . $name . "</p>";
  460. if ($this->runsql) {
  461. $DB->query($DB->insert_string('exp_categories', $insert_array));
  462. $cat_ids[] = $DB->insert_id;
  463. } else {
  464. print($DB->insert_string('exp_categories', $insert_array));
  465. $cat_ids[] = 0;
  466. }
  467. $insert_array = array(
  468. 'cat_id' => $DB->insert_id,
  469. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  470. 'group_id' => $this->cat_group_2
  471. );
  472. $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
  473. } else {
  474. print "<p>Category " . $name . " exists</p>";
  475. $cat_ids[] = $query->row['cat_id'];
  476. }
  477. }
  478. }
  479. // Insert category details
  480. if ( isset($cat_ids) && count($cat_ids) > 0 )
  481. {
  482. $cats = array_unique($cat_ids);
  483. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  484. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  485. AND group_id = '".$this->cat_group_2."'");
  486. if ($results->num_rows > 0)
  487. {
  488. foreach($results->result as $row)
  489. {
  490. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  491. {
  492. if ($this->runsql) {
  493. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  494. }
  495. }
  496. if ($this->runsql) {
  497. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  498. }
  499. }
  500. }
  501. }
  502. $entries_added++;
  503. } else {
  504. // DUPLICATE ENTRY
  505. if ( $this->debug ) {
  506. print " Found existing entry: " . $this->post[ "title" ] . "<br/>\n";
  507. }
  508. if ( $this->do_update ) {
  509. // Update weblog_titles
  510. $data = array(
  511. 'title' => $this->post['title'],
  512. 'entry_date' => ($this->post['date'] - $this->time_offset),
  513. 'year' => gmdate('Y', $this->post['date'] - $this->time_offset),
  514. 'month' => gmdate('m', $this->post['date'] - $this->time_offset),
  515. 'day' => gmdate('d', $this->post['date'] - $this->time_offset),
  516. );
  517. $sql = $DB->update_string('exp_weblog_titles', $data, "entry_id = '".$entry_id."'");
  518. $DB->query($sql);
  519. // Update custom fields
  520. $data = array();
  521. unset( $this->post['title'] );
  522. unset( $this->post['url_title'] );
  523. unset( $this->post['date'] );
  524. // Load custom field data into data array
  525. foreach ( $this->post as $custom_field => $custom_data ) {
  526. if( $weblog_to_feed[ $custom_field ][ "type" ] == "date" ) {
  527. $date = $REGX->xss_clean( trim( $custom_data ) );
  528. $pat = "/(\d{4})-(\d{1,2})-(\d{1,2})/";
  529. // Date field
  530. if ( preg_match( $pat, $date, $match ) ) {
  531. list( $year, $month, $day) = array( $match[1], $match[2], $match[3]);
  532. $epoch = gmmktime( 0, 0, 0, $month, $day, $year);
  533. $epoch += $LOC->set_localized_offset();
  534. }
  535. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $epoch;
  536. } elseif ( $weblog_to_feed[ $custom_field ][ "type" ] == "matrix" ) {
  537. $sql = "DELETE FROM exp_matrix_data WHERE entry_id = " . $entry_id . " AND field_id = " . $weblog_to_feed[ $custom_field ][ "id" ];
  538. $DB->query($sql);
  539. // Insert Matrix data
  540. $row = 0;
  541. foreach( $custom_data as $matrix_row ) {
  542. // Loop through data rows and insert record into exp_matrix_data
  543. if ( $this->site_id != '' ) {
  544. $matrix_row["site_id"] = $this->site_id;
  545. }
  546. $matrix_row["entry_id"] = $entry_id;
  547. $matrix_row["field_id"] = $weblog_to_feed[ $custom_field ][ "id" ];
  548. $matrix_row["row_order"] = $row++;
  549. $sql = $DB->insert_string('exp_matrix_data', $matrix_row);
  550. $DB->query($sql);
  551. }
  552. // Update $this->post to set value to 1 to indcate matrix data exists
  553. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = 1;
  554. } else {
  555. $data['field_id_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $REGX->xss_clean( trim( $custom_data ) );
  556. }
  557. $data['field_ft_' . $weblog_to_feed[ $custom_field ][ "id" ] ] = $weblog_to_feed[ $custom_field ][ "format" ];
  558. }
  559. $sql = $DB->update_string('exp_weblog_data', $data, "entry_id = '".$entry_id."'");
  560. $DB->query($sql);
  561. // Update categories
  562. if (isset($this->cats) && $this->cats != '')
  563. {
  564. $cats = explode(',',$this->cats);
  565. $cats = array_unique($cats);
  566. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  567. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  568. AND group_id = '".$query->row['cat_group']."'");
  569. if ($results->num_rows > 0)
  570. {
  571. foreach($results->result as $row)
  572. {
  573. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  574. {
  575. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  576. }
  577. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  578. }
  579. }
  580. }
  581. // Category group 1
  582. $query = $DB->query("DELETE p FROM exp_category_posts p LEFT JOIN exp_categories c USING (cat_id) WHERE entry_id = " . $DB->escape_str($entry_id) . " AND c.group_id = " . $DB->escape_str($this->cat_group) );
  583. if ( $this->cat_field != '' ) {
  584. $this->cat_field = str_replace(SLASH, '/', $this->cat_field);
  585. // Find category item
  586. $nameList = $this->get_namespaced_field( $item, $this->cat_field );
  587. // Does entry have multiple category items
  588. $num_cats = $this->get_namespaced_field( $item, $this->cat_field . "#" );
  589. if ( $num_cats > 1 ) {
  590. for( $i=2; $i<=$num_cats; $i++ ) {
  591. $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field . "#" . $i );
  592. }
  593. }
  594. $cat_ids = array();
  595. $names = explode( $this->cat_delimiter, $nameList );
  596. foreach ( $names as $name ) {
  597. $name = trim( $name );
  598. $query =$DB->query("SELECT cat_id
  599. FROM exp_categories
  600. WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
  601. AND exp_categories.parent_id = '0'
  602. AND exp_categories.group_id = '".$DB->escape_str($this->cat_group)."'");
  603. if ($query->num_rows == 0) {
  604. // Create primary category
  605. $insert_array = array(
  606. 'group_id' => $this->cat_group,
  607. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  608. 'cat_name' => $name,
  609. 'cat_url_title' => $REGX->create_url_title( $name ),
  610. 'cat_image' => '',
  611. 'parent_id' => '0'
  612. );
  613. print "<p>Try to add category " . $name . "</p>";
  614. $DB->query($DB->insert_string('exp_categories', $insert_array));
  615. $cat_ids[] = $DB->insert_id;
  616. $insert_array = array(
  617. 'cat_id' => $DB->insert_id,
  618. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  619. 'group_id' => $this->cat_group
  620. );
  621. $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
  622. } else {
  623. print "<p>Category " . $name . " exists</p>";
  624. $cat_ids[] = $query->row['cat_id'];
  625. }
  626. }
  627. }
  628. // Insert category details
  629. if ( isset($cat_ids) && count($cat_ids) > 0 )
  630. {
  631. $cats = array_unique($cat_ids);
  632. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  633. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  634. AND group_id = '".$this->cat_group."'");
  635. if ($results->num_rows > 0)
  636. {
  637. foreach($results->result as $row)
  638. {
  639. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  640. {
  641. if ($this->runsql) {
  642. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  643. }
  644. }
  645. if ($this->runsql) {
  646. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  647. }
  648. }
  649. }
  650. }
  651. // Category group 2
  652. $query = $DB->query("DELETE p FROM exp_category_posts p LEFT JOIN exp_categories c USING (cat_id) WHERE entry_id = " . $DB->escape_str($entry_id) . " AND c.group_id = " . $DB->escape_str($this->cat_group_2) );
  653. if ( $this->cat_field_2 != '' ) {
  654. $this->cat_field_2 = str_replace(SLASH, '/', $this->cat_field_2);
  655. // Find category item
  656. $nameList = $this->get_namespaced_field( $item, $this->cat_field_2 );
  657. // Does entry have multiple category items
  658. $num_cats = $this->get_namespaced_field( $item, $this->cat_field_2 . "#" );
  659. if ( $num_cats > 1 ) {
  660. for( $i=2; $i<=$num_cats; $i++ ) {
  661. $nameList .= $this->cat_delimiter . " " . $this->get_namespaced_field( $item, $this->cat_field_2. "#" . $i );
  662. }
  663. }
  664. $cat_ids = array();
  665. $names = explode( $this->cat_delimiter_2, $nameList );
  666. foreach ( $names as $name ) {
  667. $name = trim( $name );
  668. $query =$DB->query("SELECT cat_id
  669. FROM exp_categories
  670. WHERE exp_categories.cat_name = '".$DB->escape_str($name)."'
  671. AND exp_categories.parent_id = '0'
  672. AND exp_categories.group_id = '".$DB->escape_str($this->cat_group_2)."'");
  673. if ($query->num_rows == 0) {
  674. // Create primary category
  675. $insert_array = array(
  676. 'group_id' => $this->cat_group_2,
  677. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  678. 'cat_name' => $name,
  679. 'cat_url_title' => $REGX->create_url_title( $name ),
  680. 'cat_image' => '',
  681. 'parent_id' => '0'
  682. );
  683. print "<p>Try to add category " . $name . "</p>";
  684. $DB->query($DB->insert_string('exp_categories', $insert_array));
  685. $cat_ids[] = $DB->insert_id;
  686. $insert_array = array(
  687. 'cat_id' => $DB->insert_id,
  688. 'site_id' => ( $this->site_id != '' ? $this->site_id : 1 ),
  689. 'group_id' => $this->cat_group_2
  690. );
  691. $DB->query($DB->insert_string('exp_category_field_data', $insert_array));
  692. } else {
  693. print "<p>Category " . $name . " exists</p>";
  694. $cat_ids[] = $query->row['cat_id'];
  695. }
  696. }
  697. }
  698. // Insert category details
  699. if ( isset($cat_ids) && count($cat_ids) > 0 )
  700. {
  701. $cats = array_unique($cat_ids);
  702. $results = $DB->query("SELECT cat_id, parent_id FROM exp_categories
  703. WHERE (cat_id IN ('".implode("','",$cats)."') OR cat_name IN ('".implode("','",$cats)."'))
  704. AND group_id = '".$this->cat_group_2."'");
  705. if ($results->num_rows > 0)
  706. {
  707. foreach($results->result as $row)
  708. {
  709. if ($PREFS->ini('auto_assign_cat_parents') == 'y' && $row['parent_id'] != '0')
  710. {
  711. if ($this->runsql) {
  712. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['parent_id']."')");
  713. }
  714. }
  715. if ($this->runsql) {
  716. $DB->query("INSERT INTO exp_category_posts (entry_id, cat_id) VALUES ('$entry_id', '".$row['cat_id']."')");
  717. }
  718. }
  719. }
  720. }
  721. }
  722. }
  723. }
  724. if ($entries_added > 0) {
  725. if ( $this->debug ) {
  726. print "<br/>\nNew entries: " . $entries_added . "<br/>\n";
  727. }
  728. if ($PREFS->ini('new_posts_clear_caches') == 'y') {
  729. $FNS->clear_caching('all');
  730. } else {
  731. $FNS->clear_caching('sql_cache');
  732. }
  733. }
  734. }
  735. public static function usage() {
  736. ob_start();
  737. ?>
  738. XMLGrab allows you to extract data from an XML feed and insert it into a weblog.
  739. See: http://www.brandnewbox.co.uk/docs/
  740. <?php
  741. $buffer = ob_get_contents();
  742. ob_end_clean();
  743. return $buffer;
  744. }
  745. function get_namespaced_field( $item, $field ) {
  746. // $field = strtolower( $field );
  747. if ( strpos( $field, ':' ) ) {
  748. $subfieldArray = explode( ":", $field );
  749. if ( isset( $item[ $subfieldArray[0] ][ $subfieldArray[1] ] ) ) {
  750. return( $item[ $subfieldArray[0] ][ $subfieldArray[1] ] );
  751. }
  752. } else {
  753. if ( isset( $item[ $field ] ) ) {
  754. return( $item[ $field ] );
  755. }
  756. }
  757. }
  758. function parse_date( $datestr ) {
  759. print "<p>$datestr</p>";
  760. /*
  761. if ( $this->uk_dates ) {
  762. $datestr = $this->reformat_to_dd_mm_yyyy( $datestr );
  763. }
  764. */
  765. $date = strtotime( $datestr );
  766. /*
  767. if ( $date == -1 ) {
  768. $date = $this->parse_w3cdtf( $datestr );
  769. }
  770. if ( $date == -1 ) {
  771. $date = $this->parse_twitter_created_at( $datestr );
  772. }
  773. */
  774. if ( $date == -1 || $date == "" ) {
  775. $date = time();
  776. }
  777. return( $date );
  778. }
  779. function parse_twitter_created_at( $datestr ) {
  780. // Format: Wed Apr 18 13:17:34 +0000 2007
  781. $parts = explode( " ", $datestr );
  782. $newdatestr = $parts[2] . " " . $parts[1] . " " . $parts[5] . " " . $parts[3] . " " . $parts[4];
  783. return strtotime( $newdatestr );
  784. }
  785. function reformat_to_dd_mm_yyyy( $datestr ) {
  786. // reformats: MM/DD/YYYY to DD/MM/YYYY
  787. // should pass through if does not match regex
  788. $newdatestr = preg_replace("/^\s*([0-9]{1,2})[\/\. -]+([0-9]{1,2})[\/\. -]+([0-9]{2,4})/", "\\2/\\1/\\3", $datestr);
  789. return $newdatestr;
  790. }
  791. function parse_w3cdtf ( $date_str ) {
  792. # regex to match wc3dtf
  793. $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):((\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
  794. if ( preg_match( $pat, $date_str, $match ) ) {
  795. list( $year, $month, $day, $hours, $minutes, $seconds) =
  796. array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
  797. # calc epoch for current date assuming GMT
  798. $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
  799. $offset = 0;
  800. if ( $match[10] == 'Z' ) {
  801. # zulu time, aka GMT
  802. }
  803. else {
  804. list( $tz_mod, $tz_hour, $tz_min ) =
  805. array( $match[8], $match[9], $match[10]);
  806. # zero out the variables
  807. if ( ! $tz_hour ) { $tz_hour = 0; }
  808. if ( ! $tz_min ) { $tz_min = 0; }
  809. $offset_secs = (($tz_hour*60)+$tz_min)*60;
  810. # is timezone ahead of GMT? then subtract offset
  811. #
  812. if ( $tz_mod == '+' ) {
  813. $offset_secs = $offset_secs * -1;
  814. }
  815. $offset = $offset_secs;
  816. }
  817. $epoch = $epoch + $offset;
  818. return $epoch;
  819. }
  820. else {
  821. return -1;
  822. }
  823. }
  824. function is_entry_unique( $post, $unique, $weblog_to_feed ) {
  825. global $DB;
  826. // Default for backwards compatibility
  827. if ( $unique == "title,date" || $unique == "" ) {
  828. $query = $DB->query("SELECT * FROM exp_weblog_titles WHERE LEFT(title,100) = LEFT('".$DB->escape_str($post[ "title" ])."',100) AND entry_date = '".$DB->escape_str($post[ "date" ])."'");
  829. } else {
  830. /* Build custom query */
  831. $sql = "SELECT * FROM exp_weblog_titles t, exp_weblog_data d WHERE t.entry_id = d.entry_id";
  832. $uniqueArray = explode(",", $unique);
  833. foreach ( $uniqueArray as $value ) {
  834. switch ( $value ) {
  835. case 'title': {
  836. $sql .= " AND " . $value . "=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
  837. break;
  838. }
  839. case 'date': {
  840. $sql .= " AND entry_date=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
  841. break;
  842. }
  843. default: {
  844. if ( $weblog_to_feed[ $value ][ "is_custom" ] ) {
  845. $name = "field_id_" . $weblog_to_feed[ $value ][ "id" ];
  846. $sql .= " AND " . $name . "=\"" . $DB->escape_str( $post[ $value ] ) . "\"";
  847. }
  848. }
  849. }
  850. }
  851. $query = $DB->query( $sql );
  852. }
  853. if ( $query->num_rows > 0) {
  854. return $query->row['entry_id'];
  855. }
  856. return 0;
  857. }
  858. function fetch_xml( $x, $search, &$items, $path="", $element=0, $in_element=false, $subpath="" ) {
  859. $path = $path . "/" . $x->tag ;
  860. // print "@" . $search . "@ v @" . $path . "@<br/>";
  861. if ( $path == $search ) {
  862. // Path matches exactly our search element - we are in a new item
  863. $element++;
  864. $items[ $element ] = array();
  865. $subpath = "";
  866. $in_element = true;
  867. } elseif ( $str = strstr( $path, $search ) ) {
  868. // We are within an existing item - get xpath of subcomponent
  869. $subpath = substr( $str, strlen( $search )+1 );
  870. if ( ! isset( $items[ $element ][ $subpath . "#" ] ) ) {
  871. $items[ $element ][ $subpath . "#" ] = 0;
  872. }
  873. $count = $items[ $element ][ $subpath . "#" ]++;
  874. if ( isset( $items[ $element ][ $subpath ] ) ) {
  875. $subpath .= "#" . ( $count + 1);
  876. }
  877. } else {
  878. $in_element = false;
  879. }
  880. if ( count( $x->children ) == 0 ) {
  881. // Element has children ie, is not a parent element
  882. if ( $in_element ) {
  883. // If within an item, add to its array
  884. $items[ $element ][ $subpath ] = $x->value;
  885. }
  886. } else {
  887. // Loop over all child elements...
  888. foreach ( $x->children as $key => $value ) {
  889. // ...and recurse through xml structure
  890. $element = $this->fetch_xml( $value, $search, $items, $path, $element, $in_element, $subpath );
  891. }
  892. }
  893. // Add attributes
  894. if( $in_element ) {
  895. if ( is_array( $x->attributes ) ) {
  896. foreach ( $x->attributes as $attr_key => $attr_value ) {
  897. $items[ $element ][ $subpath . "@" . $attr_key ] = $attr_value;
  898. }
  899. }
  900. }
  901. return $element;
  902. }
  903. // --------------------------------------------------------------------
  904. /**
  905. * curl Fetch
  906. *
  907. * From pi.twitter_timeline.php
  908. *
  909. * @access public
  910. * @param string
  911. * @return string
  912. */
  913. function _curl_fetch($url)
  914. {
  915. $ch = curl_init();
  916. curl_setopt($ch, CURLOPT_URL, $url);
  917. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  918. curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
  919. // curl_setopt($ch, CURLOPT_USERPWD, "{$this->user}:{$this->password}");
  920. $data = curl_exec($ch);
  921. curl_close($ch);
  922. return $data;
  923. }
  924. // --------------------------------------------------------------------
  925. /**
  926. * fsockopen Fetch
  927. *
  928. * From pi.twitter_timeline.php
  929. *
  930. * @access public
  931. * @param string
  932. * @return string
  933. */
  934. function _fsockopen_fetch($url)
  935. {
  936. $target = parse_url($url);
  937. $data = '';
  938. $fp = fsockopen($target['host'], 80, $error_num, $error_str, 8);
  939. if (is_resource($fp))
  940. {
  941. fputs($fp, "GET {$url} HTTP/1.0\r\n");
  942. fputs($fp, "Host: {$target['host']}\r\n");
  943. // fputs($fp, "Authorization: Basic ".base64_encode("$this->user:$this->password")."\r\n");
  944. fputs($fp, "User-Agent: EE/xmlgrab PHP/" . phpversion() . "\r\n\r\n");
  945. $headers = TRUE;
  946. while( ! feof($fp))
  947. {
  948. $line = fgets($fp, 4096);
  949. if ($headers === FALSE)
  950. {
  951. $data .= $line;
  952. }
  953. elseif (trim($line) == '')
  954. {
  955. $headers = FALSE;
  956. }
  957. }
  958. fclose($fp);
  959. }
  960. return $data;
  961. }
  962. } // end class XMLGrab
  963. ?>