PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/doku2sites.php

https://github.com/cpierret/doku2sites
PHP | 314 lines | 248 code | 34 blank | 32 comment | 24 complexity | 64ea096b1c26b6602523e4b9e3e0b10a MD5 | raw file
  1. <?php
  2. /*
  3. Copyright (C) 2010 Christophe Pierret.
  4. Doku2Sites is free software; you can redistribute it and/or
  5. modify it under the terms of the Cecill-B license version 1.
  6. Doku2Sites is distributed in the hope that it will be useful,
  7. but the licensor does not warrant that the Software is free
  8. from any error, that it will operate without interruption,
  9. that it will be compatible with the Licensee's own equipment
  10. and software configuration, nor that it will meet the Licensee's
  11. requirements. See the Cecill-B License for more details.
  12. You should have received a copy of the Cecill-B License
  13. along with Doku2Sites; if not, you can get it here:
  14. http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
  15. The PHP XML RPC library, used by Doku2Sites, is not part of
  16. Doku2Sites and is licensed under its own BSD-style license.
  17. */
  18. require_once 'xmlrpc-2.2.2/lib/xmlrpc.inc';
  19. $opts = getopt('u:p:h:a:m:d:s:?');
  20. if ($opts==FALSE)
  21. {
  22. print_help_message();
  23. exit(1);
  24. }
  25. $rpcurl = "";
  26. $username = '';
  27. $pwd ='';
  28. $default_author = "Doku2Sites";
  29. $default_author_email = "doku2sites@gmail.com";
  30. $host = 'localhost';
  31. $directory = ".";
  32. // Handle command line arguments
  33. foreach ($opts as $opt=>$value) switch ($opt) {
  34. case 'u':
  35. $username = $value;
  36. break;
  37. case 'p':
  38. $pwd = $value;
  39. break;
  40. case 'h':
  41. if (empty($rpcurl))
  42. {
  43. $host = $value;
  44. $rpcurl = 'https://'.$host.'/wiki/lib/exe/xmlrpc.php';
  45. }
  46. break;
  47. case 'd':
  48. if (is_dir($value))
  49. $directory = $value;
  50. else
  51. die("'$value' is not a directory");
  52. break;
  53. case 's':
  54. if (!empty($value) && strlen($value)>4 && substr($value, 0,4)=="http")
  55. $rpcurl = $value;
  56. else
  57. die("'$value' is not an url");
  58. break;
  59. case 'a':
  60. $default_author = $value;
  61. break;
  62. case 'm':
  63. $default_author_email = $value;
  64. break;
  65. case 'p':
  66. print_help_message();
  67. exit(1);
  68. }
  69. if (empty($rpcurl))
  70. $rpcurl = "https://localhost/wiki/lib/exe/xmlrpc.php";
  71. echo "doku2sites will connect to DokuWiki with username: ".$username."\nAnd using URL:\n".$rpcurl."\n";
  72. $tidy_config = array(
  73. 'clean' => true,
  74. 'output-xhtml' => true,
  75. 'show-body-only' => true,
  76. 'wrap' => 0,
  77. 'quote-nbsp' => false,
  78. );
  79. function print_help_message()
  80. {
  81. echo "php doku2sites.php -u DOKU_USERNAME -p DOKU_PASSWORD [-h DOKU_HOSTNAME] [-d DIRECTORY] [-s DOKU_URL] [-a AUTHOR] [-m AUTHOR_EMAIL] [-?]\n";
  82. echo "Options:\n";
  83. echo " -u DOKU_USERNAME a DokuWiki username with access to the DokuWiki XMLRPC API.\n";
  84. echo " -p DOKU_PASSWORD the DokuWiki password.\n";
  85. echo ' -h DOKU_HOSTNAME the DokuWiki host name ( as in https://${DOKU_HOSTNAME}/wiki/lib/exe/xmlrpc.php).'."\n";
  86. echo ' -s DOKU_URL the DokuWiki XML RPC URL, for example https://myserver.com/wiki/lib/exe/xmlrpc.php'."\n";
  87. echo ' -d DIRECTORY an empty directory in which files will be generated (ensure you rince it before use)'."\n";
  88. echo ' -a AUTHOR the author name that will appear in the footer of all documents.'."\n";
  89. echo ' -m AUTHOR_EMAIL the author email that will appear in the footer of all documents.'."\n";
  90. echo ' -? show this help message'."\n";
  91. echo "Purpose:\n";
  92. echo " doku2google is a migration tool to export the pages of a DokuWiki for import into Google Sites\n";
  93. echo "\n";
  94. echo " You should ensure that the XML RPC api is enabled in DokuWiki before use.\n";
  95. echo " And download the Google Sites Liberation tool to import the result into Google Sites.\n";
  96. echo " You can get it here: http://code.google.com/p/google-sites-liberation/\n";
  97. }
  98. function normalize_page_title($title)
  99. {
  100. return preg_replace('/[^a-zA-Z0-9_]/','_',$title);
  101. }
  102. function get_footer($author,$email)
  103. {
  104. return '<small>Updated on <abbr class="updated" title="'
  105. . gmdate('Y-m-d\TH:i:s.000\Z')
  106. . '">'
  107. . gmdate('M. j, Y')
  108. . '</abbr> by <span class="author"><span class="vcard"><a class="fn" href="mailto:'
  109. . htmlspecialchars($email,ENT_QUOTES,'UTF-8')
  110. . '">'
  111. . htmlspecialchars($author,ENT_QUOTES,'UTF-8')
  112. . '</a></span></span> (Version <span class="sites:revision">4</span>)</small>';
  113. }
  114. function build_sites_page($subdir,$title,$html_content,$author,$email)
  115. {
  116. $title = normalize_page_title($title);
  117. $full_page = "<html>\n\t<head>\n\t\t<title>".$title."</title>\n\t</head>\n\t<body>\n\t\t";
  118. $full_page .= '<div class="hentry webpage"';
  119. $full_page .= '><span class="entry-title">'.$title.'</span><div><div class="entry-content">';
  120. $full_page .= $html_content;
  121. $full_page .= '</div></div>';
  122. $full_page .= get_footer($author,$email);
  123. $full_page .= "\n\t\t</div>\n\t</body>\n</html>";
  124. file_put_contents($subdir .'/index.html',$full_page);
  125. }
  126. function new_rpc_client($rpcurl,$username,$pwd)
  127. {
  128. $client = new xmlrpc_client($rpcurl);
  129. $client->setCredentials($username,$pwd,CURLAUTH_BASIC);
  130. $client->return_type = "phpvals";
  131. return $client;
  132. }
  133. function get_all_pages($client)
  134. {
  135. $message = new xmlrpcmsg("wiki.getAllPages", array());
  136. $resp = $client->send($message);
  137. if ($resp->faultCode()) {
  138. echo "Error talking to dokuwiki: ".$resp->faultString()."\n";
  139. echo "Check that the xmlrpc API access is enabled in the admin interface of DokuWiki.\n";
  140. exit(1);
  141. }
  142. return $resp->value();
  143. }
  144. function get_page_html($client,$id)
  145. {
  146. $message = new xmlrpcmsg("wiki.getPageHTML", array(new xmlrpcval($id, 'string')));
  147. $resp = $client->send($message);
  148. if ($resp->faultCode()) {
  149. echo "Error talking to dokuwiki: ".$resp->faultString()."\n";
  150. exit(1);
  151. }
  152. return $resp->value();
  153. }
  154. function preprocess_page_html($page)
  155. {
  156. global $tidy_config;
  157. // transform doku hrefs
  158. $result = preg_replace('#href="/wiki/doku.php\?id=([^"&]+)(&[^"]+)?"#','href="\1/index.html"',$page);
  159. // removes : from hrefs
  160. $matches = array();
  161. preg_match_all('#href="[^"]+"#',$result,$matches,PREG_OFFSET_CAPTURE);
  162. foreach($matches[0] as $key=>$m)
  163. {
  164. $l = strlen($m[0]);
  165. $repl = str_replace(':','/',$m[0]);
  166. $offset = $m[1];
  167. $result = substr_replace($result,$repl,$offset,$l);
  168. }
  169. $fragment = html_entity_decode(utf8_encode($result));
  170. $tidy = tidy_parse_string($fragment, $tidy_config, 'UTF8');
  171. $tidy->cleanRepair();
  172. $result = tidy_get_output($tidy);
  173. // fix quirk in Tidy (preserves PHP code)
  174. $result = str_replace('<?php','&lt;?php',$result);
  175. $result = str_replace('?>','?&gt;',$result);
  176. $result = str_replace('>&<','>&amp;<',$result);
  177. return $result;
  178. }
  179. /**
  180. *
  181. * Make Google Liberation tool directories
  182. * @param string $basedir
  183. * @param string $id
  184. * @return default title
  185. */
  186. function make_dirs($basedir,$id)
  187. {
  188. $dirs= preg_split("/:/",$id);
  189. $curdir = $basedir;
  190. $jdir = $dirs[0];
  191. foreach($dirs as $d)
  192. {
  193. if ($d != $jdir)
  194. $jdir .= ':' . $d;
  195. $curdir .= '/';
  196. $curdir .= normalize_page_title($d);
  197. if (!is_dir($curdir))
  198. {
  199. mkdir($curdir);
  200. }
  201. }
  202. return end($dirs);
  203. }
  204. function get_subpages($path, $exclude = ".|..")
  205. {
  206. $path = rtrim($path, "/") . "/";
  207. $folder_handle = opendir($path);
  208. $exclude_array = explode("|", $exclude);
  209. $result = array();
  210. while(false !== ($filename = readdir($folder_handle))) {
  211. if(!in_array(strtolower($filename), $exclude_array)) {
  212. if(file_exists($path . $filename . "/index.html")) {
  213. $result[] = $filename;
  214. }
  215. }
  216. }
  217. return $result;
  218. }
  219. function make_indexes($basedir, $subdir, $author, $email, $exclude = array(".",".."))
  220. {
  221. if (empty($subdir))
  222. $curdir = $basedir . '/';
  223. else
  224. $curdir = $basedir . '/' . $subdir . '/';
  225. $folder_handle = opendir($curdir);
  226. $result = array();
  227. while(false !== ($filename = readdir($folder_handle))) {
  228. if(!in_array($filename, $exclude)) {
  229. if (empty($subdir))
  230. $new_subdir = $filename;
  231. else
  232. $new_subdir = $subdir . '/'. $filename;
  233. if(is_dir($basedir.'/'.$new_subdir)) {
  234. make_indexes($basedir, $new_subdir,$author, $email, $exclude);
  235. if (!file_exists($curdir . $filename . "/index.html"))
  236. {
  237. make_index($basedir,$subdir,$filename,$author,$email);
  238. }
  239. }
  240. }
  241. }
  242. return $result;
  243. }
  244. function make_index($basedir,$subdir,$filename,$author,$email)
  245. {
  246. $curdir = $basedir . '/' . $subdir . '/'. $filename;
  247. if (!file_exists($curdir .'/index.html'))
  248. {
  249. $subpages = get_subpages($curdir);
  250. $content = "<ul>\n";
  251. foreach($subpages as $subpage)
  252. {
  253. $content .= '<li><a href="'.$filename.'/'.$subpage.'">'.normalize_page_title($subpage)."</a></li>\n";
  254. }
  255. $content .= "</ul>\n";
  256. build_sites_page($curdir,$filename,$content,$author,$email);
  257. }
  258. }
  259. $client = new_rpc_client($rpcurl,$username,$pwd);
  260. $all_pages = get_all_pages($client);
  261. $ids = array();
  262. foreach($all_pages as $key=>$data)
  263. {
  264. //echo $data['id']." - " .$data['perms']." - " .$data['size']." - " .$data['lastModified']."\n";
  265. $id = $data['id'];
  266. $page = get_page_html($client,$id);
  267. $content = preprocess_page_html($page);
  268. $title = normalize_page_title(make_dirs($directory,$id));
  269. $ids[$id] = $title;
  270. $fname = str_replace(':','/',$id);
  271. $fname = preg_replace('#[^a-zA-Z0-9_/]#','_',$fname);
  272. $subdir = $directory.'/'. $fname;
  273. build_sites_page($subdir, $title, $content, $default_author, $default_author_email);
  274. }
  275. make_indexes($directory, "", $default_author, $default_author_email);
  276. ?>