PageRenderTime 44ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/_plugins_/importer_blog/action/importer_blog.php

https://bitbucket.org/pombredanne/spip-zone-treemap
PHP | 433 lines | 305 code | 101 blank | 27 comment | 45 complexity | 5d98c6fd24f7c5dc599cee77c3b47813 MD5 | raw file
  1. <?php
  2. function action_importer_blog() {
  3. header('Content-Type: text/html; charset=utf-8');
  4. include_spip('inc/minipres');
  5. if (!autoriser('webmestre')) {
  6. echo minipres();
  7. exit;
  8. }
  9. if ($_FILES
  10. AND $f = array_pop($_FILES)
  11. AND !$f['error']) {
  12. $content = file_get_contents($f['tmp_name']);
  13. }
  14. else {
  15. echo minipres('Erreur de fichier');
  16. exit;
  17. }
  18. // Mode de stockage des liens mots<->articles
  19. $trouver_table = charger_fonction('trouver_table', 'base');
  20. define('_MODE_MOTS', ($trouver_table('spip_mots_liens') ? 1 : 0));
  21. define('_MODE_AUTEURS', ($trouver_table('spip_auteurs_liens') ? 1 : 0));
  22. define('_MODE_FORUM', _MODE_MOTS); // je suis flemmard, là...
  23. echo install_debut_html('Import de '.$f['name']);
  24. include_spip('iterateur/data');
  25. $r = importer_blogspot($content);
  26. if (!$r)
  27. echo "<h1>Import terminé avec succès</h1>";
  28. else
  29. echo "<h1>Erreur : $r</h1>\n";
  30. echo install_fin_html();
  31. }
  32. function nettoyer_html($texte) {
  33. /*
  34. $texte = preg_replace(',<br ?/?><br ?/?>,i', "\n\n", $texte);
  35. $texte = preg_replace(',<br ?/?>,i', "\n_ ", $texte);
  36. $texte = preg_replace(',</div>,i', "\0\n\n", $texte);
  37. */
  38. return $texte;
  39. }
  40. function importer_post($a, $rub =1) {
  41. $ref = "*".$a['id'];
  42. $s = sql_query($q = "SELECT id_article FROM spip_articles WHERE nom_site=".sql_quote((string)$ref));
  43. if ($t = sql_fetch($s))
  44. $id = $t['id_article'];
  45. else {
  46. $id = sql_insertq('spip_articles', array(
  47. 'nom_site' => $ref,
  48. 'statut' => 'publie'
  49. ));
  50. }
  51. if (!$id) {
  52. echo "erreur sur $ref";
  53. return;
  54. }
  55. $texte = importer_texte($a['content']);
  56. $p = sql_updateq('spip_articles',
  57. array(
  58. 'titre' => $a['title'],
  59. 'texte' => $texte,
  60. 'date' => $a['date'],
  61. 'id_rubrique' => $rub,
  62. 'id_secteur' => $rub,
  63. 'lang' => 'fr',
  64. ),
  65. 'id_article='.$id
  66. );
  67. if (_MODE_AUTEURS) { /* SPIP 3 */
  68. sql_delete('spip_auteurs_liens', 'id_objet='.$id.' AND objet="article"');
  69. if ($id_auteur = get_id_auteur($a['author'], $a['email'])) {
  70. sql_insertq('spip_auteurs_liens', array('id_objet'=>$id, 'id_auteur' => $id_auteur, 'objet' => 'article'));
  71. }
  72. } else { /* spip 2.1 */
  73. sql_delete('spip_auteurs_articles', 'id_article='.$id);
  74. if ($id_auteur = get_id_auteur($a['author'], $a['email'])) {
  75. sql_insertq('spip_auteurs_articles', array('id_article'=>$id, 'id_auteur' => $id_auteur));
  76. }
  77. }
  78. if (_MODE_MOTS) { /* SPIP 3 */
  79. sql_delete('spip_mots_liens', 'id_objet='.$id.' AND objet="article"');
  80. if (is_array($a['terms']))
  81. foreach($a['terms'] as $term)
  82. if ($id_mot = get_id_mot($term)) {
  83. sql_insertq('spip_mots_liens', array('id_objet'=>$id, 'id_mot' => $id_mot, 'objet' => 'article'));
  84. }
  85. } else { /* spip 2.1 */
  86. sql_delete('spip_mots_articles', 'id_article='.$id);
  87. if (is_array($a['terms']))
  88. foreach($a['terms'] as $term)
  89. if ($id_mot = get_id_mot($term)) {
  90. sql_insertq('spip_mots_articles', array('id_article'=>$id, 'id_mot' => $id_mot));
  91. }
  92. }
  93. echo "<dd><a href='"._DIR_RESTREINT."?exec=articles&amp;id_article=$id'>article $id</a></dd>\n";
  94. # var_dump($a, $p, $id, $id_auteur);
  95. # exit;
  96. }
  97. function importer_comment($a) {
  98. static $vu = array();
  99. $ref_article = "*".$a['parent'];
  100. $s = sql_query('SELECT id_article AS id FROM spip_articles WHERE nom_site='._q($ref_article));
  101. if (!$t = sql_fetch($s)) {
  102. echo "l'article $ref_article n'existe pas (encore?), on passe.\n";
  103. return false;
  104. }
  105. $id_objet = $t['id'];
  106. $ref = '*'.$a['id'];
  107. $s = sql_query($q = "SELECT id_forum FROM spip_forum WHERE nom_site=".sql_quote((string)$ref));
  108. if ($t = sql_fetch($s))
  109. $id = $t['id_forum'];
  110. else {
  111. $id = sql_insertq('spip_forum', array(
  112. 'nom_site' => $ref,
  113. 'statut' => 'publie'
  114. ));
  115. }
  116. if (!$id) {
  117. echo "erreur sur $ref";
  118. return;
  119. }
  120. $texte = importer_texte($a['content']);
  121. $f = array(
  122. 'titre' => '', ## $a['title'], sur blogspot le titre n'est que le debut du content
  123. 'texte' => $texte,
  124. 'date_heure' => $a['date'],
  125. 'date_thread' => $a['date'],
  126. 'auteur' => $a['author'],
  127. 'email_auteur' => $a['email'],
  128. );
  129. if (_MODE_FORUM) {
  130. $f['objet'] = 'article';
  131. $f['id_objet'] = $id_objet;
  132. } else {
  133. $f['id_article'] = $id_objet;
  134. }
  135. $p = sql_updateq('spip_forum',
  136. $f,
  137. 'id_forum='.$id
  138. );
  139. echo "<dd><a href='"._DIR_RESTREINT."?exec=articles&amp;id_article=$id_objet'>forum $id</a></dd>\n";
  140. }
  141. function get_id_auteur($name, $email='') {
  142. static $mem = array();
  143. if (!isset($mem[$name])) {
  144. $s = sql_query("SELECT id_auteur FROM spip_auteurs WHERE nom="._q($name));
  145. if ($t = sql_fetch($s))
  146. $id = $t['id_auteur'];
  147. else
  148. $id = sql_insertq('spip_auteurs', array(
  149. 'nom' => $name,
  150. 'statut' => '1comite',
  151. 'email' => $email
  152. ));
  153. $mem[$name] = $id;
  154. }
  155. return $mem[$name];
  156. }
  157. function get_id_rubrique($name, $desc='') {
  158. static $mem = array();
  159. if (!isset($mem[$name])) {
  160. $s = sql_query("SELECT id_rubrique FROM spip_rubriques WHERE titre="._q($name)." AND id_parent=0");
  161. if ($t = sql_fetch($s))
  162. $id = $t['id_rubrique'];
  163. else
  164. $id = sql_insertq('spip_rubriques', array(
  165. 'titre' => $name,
  166. 'texte' => $desc,
  167. 'statut' => 'publie',
  168. 'id_parent' => 0
  169. ));
  170. $mem[$name] = $id;
  171. }
  172. return $mem[$name];
  173. }
  174. function get_id_mot($name) {
  175. static $mem = array();
  176. if (!isset($mem[$name])) {
  177. $s = sql_query("SELECT id_mot FROM spip_mots WHERE titre="._q($name));
  178. if ($t = sql_fetch($s))
  179. $id = $t['id_mot'];
  180. else
  181. $id = sql_insertq('spip_mots', array(
  182. 'titre' => $name,
  183. 'id_groupe' => '1',
  184. 'type' => 'tag',
  185. ));
  186. $mem[$name] = $id;
  187. }
  188. return $mem[$name];
  189. }
  190. function importer_blogspot(&$content) {
  191. $it = new SimpleXmlIterator(
  192. str_replace('xmlns=', 'ns=',$content)
  193. );
  194. foreach ($it->xpath('entry') as $key => $val) {
  195. $id = ((string)$val->id);
  196. # echo htmlspecialchars($txt = (string) $val->content);
  197. # echo "$key <pre>\n".htmlspecialchars(var_export(/*ObjectToArray*/($val),true))."</pre><hr />";
  198. # if ($n++>100) exit;
  199. if (preg_match(',\.post-(.*)$,', $id, $r)) {
  200. $ref = $r[1];
  201. $a = array('id' => $ref);
  202. echo "<dt>$ref</dt>\n";
  203. echo "<dd>".htmlspecialchars($tit = (string) $val->title)."</dd>";
  204. ## etablir le type (post / comment) de l'item
  205. $type = $val->xpath('category[@scheme=\'http://schemas.google.com/g/2005#kind\']');
  206. $a['type'] = preg_replace(',^.*#,', '', (string) $type[0]->attributes()->term);
  207. ## etablir l'url
  208. if (
  209. $link = $val->xpath('link[@rel=\'alternate\']')) {
  210. $a['link'] = preg_replace(',[?].*,', '', (string) $link[0]->attributes()->href);
  211. #var_export($link);
  212. }
  213. ## si c'est un commentaire, aller chercher l'article parent
  214. if ($a['type'] == 'comment') {
  215. $link = $val->xpath('link[@rel=\'self\']');
  216. $link = $link[0]->attributes()->href;
  217. preg_match(',(\d+)/comments/,', $link, $r );
  218. $a['parent'] = $r[1];
  219. }
  220. $terms = array();
  221. foreach($val->xpath('category[@scheme="http://www.blogger.com/atom/ns#"]') as $t)
  222. $terms[] = (string)$t->attributes()->term;
  223. if ($terms) $a['terms'] = $terms;
  224. # var_dump($val->xpath('content')->attributes()->type);
  225. $a['title'] = nettoyer_html((string) $val->title);
  226. $a['content'] = nettoyer_html((string) $val->content);
  227. $a['date'] = date('Y-m-d H:i:s', strtotime((string) $val->published));
  228. $a['author'] = (string) $val->author->name;
  229. $a['email'] = (string) $val->author->email;
  230. if($a['email'] == 'noreply@blogger.com')
  231. $a['email'] = '';
  232. switch($a['type']) {
  233. case "post":
  234. $rub = get_id_rubrique(
  235. $settings['blog_name'], $settings['blog_description']);
  236. importer_post($a, $rub);
  237. break;
  238. case "comment":
  239. importer_comment($a);
  240. break;
  241. default:
  242. echo "type inconnu: ".$type."\n";
  243. break;
  244. }
  245. }
  246. else {
  247. echo "<s>$id</s> <br />\n";
  248. if (preg_match(',\.settings\.(\w+)$,', $id, $r)) {
  249. $settings[strtolower($r[1])] = (string) $val->content;
  250. }
  251. }
  252. }
  253. }
  254. function importer_texte($t) {
  255. ecrire_fichier('../tmp/x.html', $t);
  256. #$a = `/opt/local/bin/pandoc ../tmp/x.html -t mediawiki`;
  257. #echo "<pre>".htmlspecialchars($a)."</pre>\n";
  258. #exit;
  259. # italiques
  260. #<span style="font-style: italic;">...</span>
  261. $t = preg_replace(',<span style="font-style: italic;">(.*)</span>,Ums', '{\1}', $t);
  262. $t = preg_replace(',<i>(.*)</i>,Ums', '{\1}', $t);
  263. # images
  264. foreach (extraire_balises($t, 'a') as $l) {
  265. if (preg_match(',^<a [^>]*><img [^>]*></a>$,Uims', $l)
  266. AND $href = extraire_attribut($l, 'href')
  267. AND $src = extraire_attribut(extraire_balise($l, 'img'), 'src')
  268. #AND $height = extraire_attribut(extraire_balise($l, 'img'), 'height')
  269. #AND $width = extraire_attribut(extraire_balise($l, 'img'), 'width')
  270. AND preg_match(',\.(jpg|gif|png)$,i', $src, $r)
  271. AND $extension = $r[1]
  272. ) {
  273. $doc = importer_doc(array('fichier' => $src, 'extension' => strtolower($extension), 'distant' => 'oui', 'mode' => 'image'));
  274. if (preg_match('@<div style="text-align: center;">'.preg_quote($l,'@').'((?:.*?\n){0,5})</div>@ms', $t, $r)) {
  275. $legende = trim($r[1]);
  276. sql_updateq('spip_documents', array('descriptif' => $legende), 'id_document='.$doc);
  277. $repl = "\n".'<doc'.$doc.'|center>'."\n";
  278. $t = str_replace($r[0], $repl, $t);
  279. }
  280. else {
  281. $repl = "\n".'<img'.$doc.'|center>'."\n";
  282. $t = str_replace($l, $repl, $t);
  283. }
  284. } else
  285. if (preg_match(',^<a [^>]*>(.*)</a>$,Uims', $l, $r)
  286. AND $href = extraire_attribut($l, 'href')
  287. ) {
  288. $repl = '['.$r[1].'->'.trim($href).']';
  289. $t = str_replace($l, $repl, $t);
  290. }
  291. }
  292. # sauts de lignes
  293. $t = preg_replace(',<br />,', "\n_ ", $t);
  294. $t = preg_replace(',{\n_ },', "\n_ ", $t);
  295. $t = preg_replace(',\n_ \n_ ,', "\n\n", $t);
  296. ## videos
  297. # youtube
  298. foreach (extraire_balises($t, 'object') as $l) {
  299. if (preg_match(',http://(www\.)?youtube.com/v/[^"\']*,', $l, $r)
  300. AND $a = extraire_balise($l, 'embed')
  301. AND $height=extraire_attribut($a, 'height')
  302. AND $width=extraire_attribut($a, 'width')
  303. )
  304. {
  305. $doc = importer_doc(array('fichier' => $r[0], 'hauteur' => $height, 'largeur' => $width, 'extension' => 'swf', 'distant' => 'oui', 'mode' => 'document'));
  306. $t = str_replace($l, '<emb'.$doc.'|center>', $t);
  307. }
  308. }
  309. $t = preg_replace(",\n_ </div>,S", "</div>\n_ ", $t);
  310. $t = preg_replace(',<div style="text-align: center;"></div>,S', '', $t);
  311. $t = preg_replace(',(\n_ )*<div class="blogger-post-footer">.*$,Sms', '', $t);
  312. $t = str_replace("\n\n</span>", "</span>\n\n", $t);
  313. $t = str_replace("\n\n</div>", "</div>\n\n", $t);
  314. $t = str_replace("\n\n_ ", "\n\n\n", $t);
  315. return $t;
  316. }
  317. function importer_doc($doc) {
  318. $s = sql_query($q = "SELECT id_document FROM spip_documents WHERE fichier=".sql_quote($doc['fichier']));
  319. if ($t = sql_fetch($s))
  320. $id = $t['id_document'];
  321. else {
  322. $id = sql_insertq('spip_documents', array(
  323. 'fichier' => $doc['fichier'],
  324. 'date' => date('Y-m-d H:i:s')
  325. ));
  326. }
  327. $p = sql_updateq('spip_documents',
  328. $doc,
  329. 'id_document='.$id
  330. );
  331. return $id;
  332. }