/mod/lesson/importppt.php

https://bitbucket.org/ceu/moodle_demo · PHP · 588 lines · 374 code · 99 blank · 115 comment · 65 complexity · 1f0dcea7b064357fe3fa3793c13232b9 MD5 · raw file

  1. <?php // $Id: importppt.php,v 1.22.2.3 2009/05/06 16:10:15 skodak Exp $
  2. /**
  3. * This is a very rough importer for powerpoint slides
  4. * Export a powerpoint presentation with powerpoint as html pages
  5. * Do it with office 2002 (I think?) and no special settings
  6. * Then zip the directory with all of the html pages
  7. * and the zip file is what you want to upload
  8. *
  9. * The script supports book and lesson.
  10. *
  11. * @version $Id: importppt.php,v 1.22.2.3 2009/05/06 16:10:15 skodak Exp $
  12. * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
  13. * @package lesson
  14. **/
  15. require_once("../../config.php");
  16. require_once("locallib.php");
  17. $id = required_param('id', PARAM_INT); // Course Module ID
  18. $pageid = optional_param('pageid', '', PARAM_INT); // Page ID
  19. global $matches;
  20. if (! $cm = get_coursemodule_from_id('lesson', $id)) {
  21. error("Course Module ID was incorrect");
  22. }
  23. if (! $course = get_record("course", "id", $cm->course)) {
  24. error("Course is misconfigured");
  25. }
  26. // allows for adaption for multiple modules
  27. if(! $modname = get_field('modules', 'name', 'id', $cm->module)) {
  28. error("Could not find module name");
  29. }
  30. if (! $mod = get_record($modname, "id", $cm->instance)) {
  31. error("Course module is incorrect");
  32. }
  33. require_login($course->id, false, $cm);
  34. $context = get_context_instance(CONTEXT_MODULE, $cm->id);
  35. require_capability('mod/lesson:edit', $context);
  36. $strimportppt = get_string("importppt", "lesson");
  37. $strlessons = get_string("modulenameplural", "lesson");
  38. $navigation = build_navigation($strimportppt, $cm);
  39. print_header_simple("$strimportppt", " $strimportppt", $navigation);
  40. if ($form = data_submitted()) { /// Filename
  41. if (empty($_FILES['newfile'])) { // file was just uploaded
  42. notify(get_string("uploadproblem") );
  43. }
  44. if ((!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0)) {
  45. notify(get_string("uploadnofilefound") );
  46. } else { // Valid file is found
  47. if ($rawpages = readdata($_FILES, $course->id, $modname)) { // first try to reall all of the data in
  48. $pageobjects = extract_data($rawpages, $course->id, $mod->name, $modname); // parse all the html files into objects
  49. clean_temp(); // all done with files so dump em
  50. $mod_create_objects = $modname.'_create_objects';
  51. $mod_save_objects = $modname.'_save_objects';
  52. $objects = $mod_create_objects($pageobjects, $mod->id); // function to preps the data to be sent to DB
  53. if(! $mod_save_objects($objects, $mod->id, $pageid)) { // sends it to DB
  54. error("could not save");
  55. }
  56. } else {
  57. error('could not get data');
  58. }
  59. echo "<hr>";
  60. print_continue("$CFG->wwwroot/mod/$modname/view.php?id=$cm->id");
  61. print_footer($course);
  62. exit;
  63. }
  64. }
  65. /// Print upload form
  66. print_heading_with_help($strimportppt, "importppt", "lesson");
  67. print_simple_box_start("center");
  68. echo "<form id=\"theform\" enctype=\"multipart/form-data\" method=\"post\">";
  69. echo "<input type=\"hidden\" name=\"id\" value=\"$cm->id\" />\n";
  70. echo "<input type=\"hidden\" name=\"pageid\" value=\"$pageid\" />\n";
  71. echo "<table cellpadding=\"5\">";
  72. echo "<tr><td align=\"right\">";
  73. print_string("upload");
  74. echo ":</td><td>";
  75. echo "<input name=\"newfile\" type=\"file\" size=\"50\" />";
  76. echo "</td></tr><tr><td>&nbsp;</td><td>";
  77. echo "<input type=\"submit\" name=\"save\" value=\"".get_string("uploadthisfile")."\" />";
  78. echo "</td></tr>";
  79. echo "</table>";
  80. echo "</form>";
  81. print_simple_box_end();
  82. print_footer($course);
  83. // START OF FUNCTIONS
  84. function readdata($file, $courseid, $modname) {
  85. // this function expects a zip file to be uploaded. Then it parses
  86. // outline.htm to determine the slide path. Then parses each
  87. // slide to get data for the content
  88. global $CFG;
  89. // create an upload directory in temp
  90. make_upload_directory('temp/'.$modname);
  91. $base = $CFG->dataroot."/temp/$modname/";
  92. $zipfile = $_FILES["newfile"]["name"];
  93. $tempzipfile = $_FILES["newfile"]["tmp_name"];
  94. // create our directory
  95. $path_parts = pathinfo($zipfile);
  96. $dirname = substr($zipfile, 0, strpos($zipfile, '.'.$path_parts['extension'])); // take off the extension
  97. if (!file_exists($base.$dirname)) {
  98. mkdir($base.$dirname, $CFG->directorypermissions);
  99. }
  100. // move our uploaded file to temp/lesson
  101. move_uploaded_file($tempzipfile, $base.$zipfile);
  102. // unzip it!
  103. unzip_file($base.$zipfile, $base, false);
  104. $base = $base.$dirname; // update the base
  105. // this is the file where we get the names of the files for the slides (in the correct order too)
  106. $outline = $base.'/outline.htm';
  107. $pages = array();
  108. if (file_exists($outline) and is_readable($outline)) {
  109. $outlinecontents = file_get_contents($outline);
  110. $filenames = array();
  111. preg_match_all("/javascript:GoToSld\('(.*)'\)/", $outlinecontents, $filenames); // this gets all of our files names
  112. // file $pages with the contents of all of the slides
  113. foreach ($filenames[1] as $file) {
  114. $path = $base.'/'.$file;
  115. if (is_readable($path)) {
  116. $pages[$path] = file_get_contents($path);
  117. } else {
  118. return false;
  119. }
  120. }
  121. } else {
  122. // cannot find the outline, so grab all files that start with slide
  123. $dh = opendir($base);
  124. while (false !== ($file = readdir($dh))) { // read throug the directory
  125. if ('slide' == substr($file, 0, 5)) { // check for name (may want to check extension later)
  126. $path = $base.'/'.$file;
  127. if (is_readable($path)) {
  128. $pages[$path] = file_get_contents($path);
  129. } else {
  130. return false;
  131. }
  132. }
  133. }
  134. ksort($pages); // order them by file name
  135. }
  136. if (empty($pages)) {
  137. return false;
  138. }
  139. return $pages;
  140. }
  141. function extract_data($pages, $courseid, $lessonname, $modname) {
  142. // this function attempts to extract the content out of the slides
  143. // the slides are ugly broken xml. and the xml is broken... yeah...
  144. global $CFG;
  145. global $matches;
  146. $extratedpages = array();
  147. // directory for images
  148. make_mod_upload_directory($courseid); // make sure moddata is made
  149. make_upload_directory($courseid.'/moddata/'.$modname, false); // we store our images in a subfolder in here
  150. $imagedir = $CFG->dataroot.'/'.$courseid.'/moddata/'.$modname;
  151. require_once($CFG->libdir .'/filelib.php');
  152. $imagelink = get_file_url($courseid.'/moddata/'.$modname);
  153. // try to make a unique subfolder to store the images
  154. $lessonname = str_replace(' ', '_', $lessonname); // get rid of spaces
  155. $i = 0;
  156. while(true) {
  157. if (!file_exists($imagedir.'/'.$lessonname.$i)) {
  158. // ok doesnt exist so make the directory and update our paths
  159. mkdir($imagedir.'/'.$lessonname.$i, $CFG->directorypermissions);
  160. $imagedir = $imagedir.'/'.$lessonname.$i;
  161. $imagelink = $imagelink.'/'.$lessonname.$i;
  162. break;
  163. }
  164. $i++;
  165. }
  166. foreach ($pages as $file => $content) {
  167. // to make life easier on our preg_match_alls, we strip out all tags except
  168. // for div and img (where our content is). We want div because sometimes we
  169. // can identify the content in the div based on the div's class
  170. $tags = '<div><img>'; // should also allow <b><i>
  171. $string = strip_tags($content,$tags);
  172. //echo s($string);
  173. $matches = array();
  174. // this will look for a non nested tag that is closed
  175. // want to allow <b><i>(maybe more) tags but when we do that
  176. // the preg_match messes up.
  177. preg_match_all("/(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>)/", $string, $matches);
  178. //(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>) original pattern
  179. //(<(div+)[^>]*>)[^(<div*)](<\/div>) work in progress
  180. $path_parts = pathinfo($file);
  181. $file = substr($path_parts['basename'], 0, strpos($path_parts['basename'], '.')); // get rid of the extension
  182. $imgs = array();
  183. // this preg matches all images
  184. preg_match_all("/<img[^>]*(src\=\"(".$file."\_image[^>^\"]*)\"[^>]*)>/i", $string, $imgs);
  185. // start building our page
  186. $page = new stdClass;
  187. $page->title = '';
  188. $page->contents = array();
  189. $page->images = array();
  190. $page->source = $path_parts['basename']; // need for book only
  191. // this foreach keeps the style intact. Found it doesn't help much. But if you want back uncomment
  192. // this foreach and uncomment the line with the comment imgstyle in it. Also need to comment out
  193. // the $page->images[]... line in the next foreach
  194. /*foreach ($imgs[1] as $img) {
  195. $page->images[] = '<img '.str_replace('src="', "src=\"$imagelink/", $img).' />';
  196. }*/
  197. foreach ($imgs[2] as $img) {
  198. copy($path_parts['dirname'].'/'.$img, $imagedir.'/'.$img);
  199. $page->images[] = "<img src=\"$imagelink/$img\" title=\"$img\" />"; // comment out this line if you are using the above foreach loop
  200. }
  201. for($i = 0; $i < count($matches[1]); $i++) { // go through all of our div matches
  202. $class = isolate_class($matches[1][$i]); // first step in isolating the class
  203. // check for any static classes
  204. switch ($class) {
  205. case 'T': // class T is used for Titles
  206. $page->title = $matches[3][$i];
  207. break;
  208. case 'B': // I would guess that all bullet lists would start with B then go to B1, B2, etc
  209. case 'B1': // B1-B4 are just insurance, should just hit B and all be taken care of
  210. case 'B2':
  211. case 'B3':
  212. case 'B4':
  213. $page->contents[] = build_list('<ul>', $i, 0); // this is a recursive function that will grab all the bullets and rebuild the list in html
  214. break;
  215. default:
  216. if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
  217. if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
  218. $page->contents[] = substr($matches[3][$i], 1); // get rid of :
  219. } else {
  220. $page->contents[] = $matches[3][$i];
  221. }
  222. }
  223. break;
  224. }
  225. }
  226. /*if (count($page->contents) == 0) { // didnt find anything, grab everything
  227. // potential to pull in a lot of crap
  228. for($i = 0; $i < count($matches[1]); $i++) {
  229. //if($class = isolate_class($matches[1][$i])) {
  230. //if ($class == 'O') {
  231. if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
  232. if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
  233. $page->contents[] = substr($matches[3][$i], 1); // get rid of :
  234. } else {
  235. $page->contents[] = $matches[3][$i];
  236. }
  237. }
  238. //}
  239. //}
  240. }
  241. }*/
  242. // add the page to the array;
  243. $extratedpages[] = $page;
  244. } // end $pages foreach loop
  245. return $extratedpages;
  246. }
  247. /**
  248. A recursive function to build a html list
  249. */
  250. function build_list($list, &$i, $depth) {
  251. global $matches; // not sure why I global this...
  252. while($i < count($matches[1])) {
  253. $class = isolate_class($matches[1][$i]);
  254. if (strstr($class, 'B')) { // make sure we are still working with bullet classes
  255. if ($class == 'B') {
  256. $this_depth = 0; // calling class B depth 0
  257. } else {
  258. // set the depth number. So B1 is depth 1 and B2 is depth 2 and so on
  259. $this_depth = substr($class, 1);
  260. if (!is_numeric($this_depth)) {
  261. error("Depth not parsed!");
  262. }
  263. }
  264. if ($this_depth < $depth) {
  265. // we are moving back a level in the nesting
  266. break;
  267. }
  268. if ($this_depth > $depth) {
  269. // we are moving in a lvl in nesting
  270. $list .= '<ul>';
  271. $list = build_list($list, $i, $this_depth);
  272. // once we return back, should go to the start of the while
  273. continue;
  274. }
  275. // no depth changes, so add the match to our list
  276. if ($cleanstring = ppt_clean_text($matches[3][$i])) {
  277. $list .= '<li>'.ppt_clean_text($matches[3][$i]).'</li>';
  278. }
  279. $i++;
  280. } else {
  281. // not a B class, so get out of here...
  282. break;
  283. }
  284. }
  285. // end the list and return it
  286. $list .= '</ul>';
  287. return $list;
  288. }
  289. /**
  290. Given an html tag, this function will
  291. */
  292. function isolate_class($string) {
  293. if($class = strstr($string, 'class=')) { // first step in isolating the class
  294. $class = substr($class, strpos($class, '=')+1); // this gets rid of <div blawblaw class= there are no "" or '' around the class name ...sigh...
  295. if (strstr($class, ' ')) {
  296. // spaces found, so cut off everything off after the first space
  297. return substr($class, 0, strpos($class, ' '));
  298. } else {
  299. // no spaces so nothing else in the div tag, cut off the >
  300. return substr($class, 0, strpos($class, '>'));
  301. }
  302. } else {
  303. // no class defined in the tag
  304. return '';
  305. }
  306. }
  307. /**
  308. This function strips off the random chars that ppt puts infront of bullet lists
  309. */
  310. function ppt_clean_text($string) {
  311. $chop = 1; // default: just a single char infront of the content
  312. // look for any other crazy things that may be infront of the content
  313. if (strstr($string, '&lt;') and strpos($string, '&lt;') == 0) { // look for the &lt; in the sting and make sure it is in the front
  314. $chop = 4; // increase the $chop
  315. }
  316. // may need to add more later....
  317. $string = substr($string, $chop);
  318. if ($string != '&#13;') {
  319. return $string;
  320. } else {
  321. return false;
  322. }
  323. }
  324. /**
  325. Clean up the temp directory
  326. */
  327. function clean_temp() {
  328. global $CFG;
  329. // this function is broken, use it to clean up later
  330. // should only clean up what we made as well because someone else could be importing ppt as well
  331. //delDirContents($CFG->dataroot.'/temp/lesson');
  332. }
  333. /**
  334. Creates objects an object with the page and answers that are to be inserted into the database
  335. */
  336. function lesson_create_objects($pageobjects, $lessonid) {
  337. $branchtables = array();
  338. $branchtable = new stdClass;
  339. // all pages have this info
  340. $page->lessonid = $lessonid;
  341. $page->prevpageid = 0;
  342. $page->nextpageid = 0;
  343. $page->qtype = LESSON_BRANCHTABLE;
  344. $page->qoption = 0;
  345. $page->layout = 1;
  346. $page->display = 1;
  347. $page->timecreated = time();
  348. $page->timemodified = 0;
  349. // all answers are the same
  350. $answer->lessonid = $lessonid;
  351. $answer->jumpto = LESSON_NEXTPAGE;
  352. $answer->grade = 0;
  353. $answer->score = 0;
  354. $answer->flags = 0;
  355. $answer->timecreated = time();
  356. $answer->timemodified = 0;
  357. $answer->answer = "Next";
  358. $answer->response = "";
  359. $answers[] = clone($answer);
  360. $answer->jumpto = LESSON_PREVIOUSPAGE;
  361. $answer->answer = "Previous";
  362. $answers[] = clone($answer);
  363. $branchtable->answers = $answers;
  364. $i = 1;
  365. foreach ($pageobjects as $pageobject) {
  366. $temp = prep_page($pageobject, $i); // makes our title and contents
  367. $page->title = $temp->title;
  368. $page->contents = $temp->contents;
  369. $branchtable->page = clone($page); // add the page
  370. $branchtables[] = clone($branchtable); // add it all to our array
  371. $i++;
  372. }
  373. return $branchtables;
  374. }
  375. /**
  376. Creates objects an chapter object that is to be inserted into the database
  377. */
  378. function book_create_objects($pageobjects, $bookid) {
  379. $chapters = array();
  380. $chapter = new stdClass;
  381. // same for all chapters
  382. $chapter->bookid = $bookid;
  383. $chapter->pagenum = count_records('book_chapters', 'bookid', $bookid)+1;
  384. $chapter->timecreated = time();
  385. $chapter->timemodified = time();
  386. $chapter->subchapter = 0;
  387. $i = 1;
  388. foreach ($pageobjects as $pageobject) {
  389. $page = prep_page($pageobject, $i); // get title and contents
  390. $chapter->importsrc = addslashes($pageobject->source); // add the source
  391. $chapter->title = $page->title;
  392. $chapter->content = $page->contents;
  393. $chapters[] = $chapter;
  394. // increment our page number and our counter
  395. $chapter->pagenum = $chapter->pagenum + 1;
  396. $i++;
  397. }
  398. return $chapters;
  399. }
  400. /**
  401. Builds the title and content strings from an object
  402. */
  403. function prep_page($pageobject, $count) {
  404. if ($pageobject->title == '') {
  405. $page->title = "Page $count"; // no title set so make a generic one
  406. } else {
  407. $page->title = addslashes($pageobject->title);
  408. }
  409. $page->contents = '';
  410. // nab all the images first
  411. foreach ($pageobject->images as $image) {
  412. $image = str_replace("\n", '', $image);
  413. $image = str_replace("\r", '', $image);
  414. $image = str_replace("'", '"', $image); // imgstyle
  415. $page->contents .= addslashes($image);
  416. }
  417. // go through the contents array and put <p> tags around each element and strip out \n which I have found to be uneccessary
  418. foreach ($pageobject->contents as $content) {
  419. $content = str_replace("\n", '', $content);
  420. $content = str_replace("\r", '', $content);
  421. $content = str_replace('&#13;', '', $content); // puts in returns?
  422. $content = '<p>'.$content.'</p>';
  423. $page->contents .= addslashes($content);
  424. }
  425. return $page;
  426. }
  427. /**
  428. Saves the branchtable objects to the DB
  429. */
  430. function lesson_save_objects($branchtables, $lessonid, $after) {
  431. // first set up the prevpageid and nextpageid
  432. if ($after == 0) { // adding it to the top of the lesson
  433. $prevpageid = 0;
  434. // get the id of the first page. If not found, then no pages in the lesson
  435. if (!$nextpageid = get_field('lesson_pages', 'id', 'prevpageid', 0, 'lessonid', $lessonid)) {
  436. $nextpageid = 0;
  437. }
  438. } else {
  439. // going after an actual page
  440. $prevpageid = $after;
  441. $nextpageid = get_field('lesson_pages', 'nextpageid', 'id', $after);
  442. }
  443. foreach ($branchtables as $branchtable) {
  444. // set the doubly linked list
  445. $branchtable->page->nextpageid = $nextpageid;
  446. $branchtable->page->prevpageid = $prevpageid;
  447. // insert the page
  448. if(!$id = insert_record('lesson_pages', $branchtable->page)) {
  449. error("insert page");
  450. }
  451. // update the link of the page previous to the one we just updated
  452. if ($prevpageid != 0) { // if not the first page
  453. if (!set_field("lesson_pages", "nextpageid", $id, "id", $prevpageid)) {
  454. error("Insert page: unable to update next link $prevpageid");
  455. }
  456. }
  457. // insert the answers
  458. foreach ($branchtable->answers as $answer) {
  459. $answer->pageid = $id;
  460. if(!insert_record('lesson_answers', $answer)) {
  461. error("insert answer $id");
  462. }
  463. }
  464. $prevpageid = $id;
  465. }
  466. // all done with inserts. Now check to update our last page (this is when we import between two lesson pages)
  467. if ($nextpageid != 0) { // if the next page is not the end of lesson
  468. if (!set_field("lesson_pages", "prevpageid", $id, "id", $nextpageid)) {
  469. error("Insert page: unable to update next link $prevpageid");
  470. }
  471. }
  472. return true;
  473. }
  474. /**
  475. Save the chapter objects to the database
  476. */
  477. function book_save_objects($chapters, $bookid, $pageid='0') {
  478. // nothing fancy, just save them all in order
  479. foreach ($chapters as $chapter) {
  480. if (!$chapter->id = insert_record('book_chapters', $chapter)) {
  481. error('Could not update your book');
  482. }
  483. }
  484. return true;
  485. }
  486. ?>