PageRenderTime 27ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/php/include/tprScriptParser.php

https://github.com/yoyar/tprsp
PHP | 456 lines | 433 code | 17 blank | 6 comment | 2 complexity | 0a95d8c014df6e9e4e1e9bb0348f89cd MD5 | raw file
  1. <?php
  2. class tprScriptParser {
  3. private $script;
  4. private $doc;
  5. private $xmlScript;
  6. private $zip;
  7. private $show;
  8. public function __construct(tprZipEntry $zipEntry) {
  9. $this->zip = $zipEntry;
  10. $this->script = trim($zipEntry->getDocFileAsPlainText());
  11. //drupal_set_message($this->script);
  12. }
  13. public function getMp3ZipEntry() {
  14. $mp3NameIdx = substr($this->zip->getNameIndex(), 0, -3) . 'mp3' ;
  15. $mp3ZipEntry = new tprZipEntry($mp3NameIdx, $this->zip->getZipArchive());
  16. return $mp3ZipEntry;
  17. }
  18. public function parse() {
  19. //drupal_set_message("start to parse");
  20. // clean up the script first
  21. $this->script = str_replace(array('“', '”'), '"', $this->script);
  22. $this->script = str_replace(array('``',"''"), '"', $this->script);
  23. $this->script = str_replace('–', '-', $this->script);
  24. $this->script = str_replace('’', "'", $this->script);
  25. $this->script = str_replace('…', "... ", $this->script);
  26. //$this->script = str_replace('é', 'eeeee', $this->script);
  27. //drupal_set_message($this->script);
  28. $this->script = htmlentities(
  29. $this->script, ENT_NOQUOTES , 'UTF-8'
  30. );
  31. //drupal_set_message('script: '. $this->script);
  32. $tok = trim(strtok($this->script, "\n"));
  33. $this->doc = new DOMDocument();
  34. $this->doc->formatOutput = true;
  35. $this->xmlScript = $this->doc->createElement('script');
  36. $this->doc->appendChild($this->xmlScript);
  37. while( $tok !== false ) {
  38. $tok = trim($tok);
  39. echo "LINE: ", $tok, "\n";
  40. //drupal_set_message("TOK: '" . $tok . "'");
  41. $parseResult = true;
  42. switch( true ) {
  43. case 'WEEK' == substr($tok, 0, 4):
  44. $parseResult = $this->parseWeek($tok);
  45. break;
  46. case 'feature' == strtolower(substr($tok, 0, 7)):
  47. $parseResult = $this->parseFeature($tok);
  48. break;
  49. case 'joanne tease' == strtolower(substr($tok, 0, 12)):
  50. $parseResult = $this->parseTease($tok);
  51. break;
  52. case 'joanne intro:' == strtolower(substr($tok, 0, 13)):
  53. $parseResult = $this->parseIntro($tok);
  54. break;
  55. case 'air date' == strtolower(substr($tok, 0, 8)):
  56. $parseResult = $this->parseAirDate($tok);
  57. break;
  58. case 'categories:' == strtolower(substr($tok, 0, 11)):
  59. $parseResult = $this->parseCategories($tok);
  60. break;
  61. case 'ages:' == strtolower(substr($tok, 0, 5)):
  62. $parseResult = $this->parseAges($tok);
  63. break;
  64. case 'joanne bridge:' == strtolower(substr($tok, 0, 14)):
  65. $parseResult = $this->parseBridge($tok);
  66. break;
  67. case 'clip:' == strtolower(substr($tok, 0, 5)):
  68. $tok = trim(trim(strtok("\n")), '()');
  69. $parseResult = $this->parseClip($tok);
  70. break;
  71. case 'joanne wrap:' == strtolower(substr($tok, 0, 12)):
  72. $parseResult = $this->parseWrap($tok);
  73. break;
  74. } // switch
  75. if( ! $parseResult ) $this->handleParseError($tok);
  76. $tok = strtok("\n");
  77. } // while
  78. $fullText = $this->doc->createElement('fullText', $this->script);
  79. $this->xmlScript->appendChild($fullText);
  80. $xml = $this->doc->saveXml();
  81. try {
  82. $s = new SimpleXmlElement($xml);
  83. $this->val($s->show->title, 'title');
  84. $this->val($s->show->intro, 'intro');
  85. $this->val($s->show->airDate, 'airDate');
  86. $this->val($s->show->tease, 'tease');
  87. $this->val($s->show->clip, 'clip');
  88. $this->val($s->fullText, 'fullText');
  89. $this->val($s->week->title, 'week');
  90. } catch (Exception $e) {
  91. throw new Exception(
  92. "Unable to read xml. An element is empty. "
  93. . $e->getMessage()
  94. );
  95. }
  96. //var_dump( (string)$s->show->airDate . " --- ". (string) $s->show->title . " --- " .(string)$s->show->intro);
  97. return $xml;
  98. }
  99. /**
  100. * check if an xml element is empty, which is not allowed.
  101. */
  102. private function val(SimpleXmlElement $x, $elementName ) {
  103. if( ! trim( (string)$x )) {
  104. throw new tprScriptParseException(
  105. "Xml element empty: &lt;$elementName&gt;. For entry: " . $this->zip
  106. );
  107. }
  108. } //
  109. private function parseWrap($line) {
  110. if( ! preg_match('/Joanne Wrap:\s*(.*)\s*\(:?\d+\.?\d*\)/i', $line, $matches) ) {
  111. return false;
  112. }
  113. $wrap = $this->doc->createElement('wrap', $matches[1]);
  114. $this->show->appendChild($wrap);
  115. return true;
  116. }
  117. private function parseIntro($line) {
  118. if( ! preg_match('/Joanne Intro:\s+.*?\..*?\.\s+(.*)\s*\(:?\s*\d+\.?\d*\)/i', $line, $matches) ) {
  119. return false;
  120. }
  121. $intro = $this->doc->createElement('intro', $matches[1]);
  122. $this->show->appendChild($intro);
  123. return true;
  124. }
  125. private function parseBridge($line) {
  126. if( ! preg_match('/Joanne Bridge:\s*(.*?)\s*\(:?\d+\.?\d*\)/i', $line, $matches) ) {
  127. return false;
  128. }
  129. $bridge = $this->doc->createElement('bridge', $matches[1]);
  130. $this->show->appendChild($bridge);
  131. return true;
  132. }
  133. private function findTease($line) {
  134. // drupal_set_message($line);
  135. if( ! preg_match('/Joanne Tease:\s*(.*?)\(:?\d+\.?\d*( secs)?\)/i', $line, $matches) ) {
  136. return false;
  137. }
  138. // var_dump($matches);
  139. $teaser = trim($matches[1]);
  140. // remove the last period, and any other periods at the end of the string
  141. while( substr($teaser, -1 ) == '.' ) {
  142. $teaser = substr($teaser, 0, -1);
  143. $teaser = trim($teaser);
  144. }
  145. // var_dump($teaser);
  146. if( FALSE !== ($pos = strpos($teaser, '?')) ) {
  147. return substr($teaser, 0, $pos) . '?';
  148. } elseif ( preg_match('/([,])[^,]*?([Pp]arent\s+[Rr]eport)\..*?$/', trim($matches[1]), $m) ) {
  149. // var_dump($m);
  150. $punct = $m[1];
  151. ',' == $punct && $punct = '.';
  152. $teaser = substr(trim($matches[1]), 0, - (strlen($m[0]))) . $punct;
  153. return $teaser;
  154. } elseif( FALSE !== ($pos = strpos(substr($teaser, 0, -1), '.'))) {
  155. $t = substr($teaser, 0, $pos) . '.';
  156. return $t;
  157. }
  158. return false;
  159. }
  160. private function parseTease($line) {
  161. $teaser = $this->findTease($line);
  162. if( ! $teaser ) return false;
  163. $tease = $this->doc->createElement('tease', $teaser);
  164. $this->show->appendChild($tease);
  165. return true;
  166. }
  167. private function parseAges($line) {
  168. if( ! preg_match('/Ages:(.*)/i', $line, $matches) ) {
  169. return false;
  170. }
  171. if( ! isset($matches[1]) ) {
  172. return true; // ignore badly formed Ages line.
  173. }
  174. $letters = array_unique(explode(',', $matches[1]));
  175. if( ! empty($letters) ) {
  176. $ages = $this->doc->createElement( 'ages' );
  177. $this->xmlScript->appendChild($ages);
  178. foreach( $letters as $letter ) {
  179. if( '' == trim($letter) ) continue;
  180. $ages->appendChild(
  181. $this->doc->createElement('age', $this->agesLookup(trim($letter)))
  182. );
  183. }
  184. }
  185. return true;
  186. }
  187. private function parseClip($line) {
  188. $clip = $this->doc->createElement('clip', $line);
  189. $this->show->appendChild($clip);
  190. return true;
  191. }
  192. private function parseCategories($line) {
  193. if( ! preg_match('/Categories:(.*)/i', $line, $matches) ) {
  194. return false;
  195. }
  196. if( ! isset($matches[1]) ) {
  197. return true; // ignore badly formed Categories line.
  198. }
  199. $letters = array_unique(explode(',', $matches[1]));
  200. if( ! empty($letters) ) {
  201. $categories = $this->doc->createElement( 'categories' );
  202. $this->xmlScript->appendChild($categories);
  203. foreach( $letters as $letter ) {
  204. if( '' == trim($letter) ) continue;
  205. $categories->appendChild(
  206. $this->doc->createElement('category', $this->categoriesLookup(trim($letter)))
  207. );
  208. }
  209. }
  210. return true;
  211. }
  212. private function agesLookup($letterCode) {
  213. $hash = $this->agesLookupHash();
  214. if( ! isset($hash[strtoupper($letterCode)])) {
  215. $nameIdx = $this->zip->getNameIndex();
  216. throw new tprScriptParseException(
  217. "An unknown Age code was found: [$letterCode]. Check the Category line in the script. [$nameIdx]"
  218. );
  219. }
  220. return $hash[$letterCode];
  221. }
  222. private function categoriesLookup($letterCode) {
  223. $hash = $this->categoriesLookupHash();
  224. if( ! isset($hash[strtoupper($letterCode)])) {
  225. $nameIdx = $this->zip->getNameIndex();
  226. throw new tprScriptParseException(
  227. "Entry: $nameIdx -- An unknown Category code was found: [$letterCode]. Check the Category line in the script."
  228. );
  229. }
  230. return $hash[$letterCode];
  231. }
  232. private function agesLookupHash() {
  233. return array(
  234. 'N' => 'Newborn',
  235. 'I' => 'Infant',
  236. 'IT' => 'Toddler',
  237. 'PS' => 'Preschool',
  238. 'ES' => 'Early School',
  239. 'PT' => 'Preteen',
  240. 'T' => 'Teen',
  241. );
  242. }
  243. private function categoriesLookupHash() {
  244. return array(
  245. 'B' => 'Behaviour',
  246. 'D' => 'Development',
  247. 'SF' =>'Safety',
  248. 'E' => 'Education',
  249. 'N' => 'Nutrition',
  250. 'F' => 'Family Life',
  251. 'H' => 'Health',
  252. 'LS' => 'Limit Setting',
  253. 'KC' => 'Kids Culture',
  254. 'SL' => 'Sleep',
  255. 'F' => 'Family',
  256. );
  257. }
  258. private function parseAirDate($line) {
  259. if( ! preg_match('/Air Date:\s+(.*)/i', $line, $matches) ) return false;
  260. $date = strtotime($matches[1]);
  261. if( false === $date ) {
  262. throw new tprScriptParseException(
  263. "The air date is not a correctly formatted date: {$matches[1]} for entry: $this->zip"
  264. );
  265. return false;
  266. }
  267. // this is the format that scheduler module is expecting
  268. $airDate = $this->doc->createElement('airDate', date('Y-m-d H:i:s', $date));
  269. $this->show->appendChild($airDate);
  270. return true;
  271. }
  272. private function parseWeek($line) {
  273. //drupal_set_message("FOO");
  274. //fooooo();
  275. if( !preg_match('/^WEEK\s+(\d+)\s+-\s+(.*)$/', $line, $matches) ) {
  276. var_dump($matches);
  277. //drupal_set_message("Unable to parse Week line.");
  278. return false;
  279. }
  280. $weekNumber = $matches[1];
  281. $weekTitle = $matches[2];
  282. $week = $this->doc->createElement('week');
  283. $week->appendChild($this->doc->createElement('title', $weekTitle));
  284. $week->appendChild($this->doc->createElement('number', $weekNumber));
  285. $this->xmlScript->appendChild($week);
  286. return true;
  287. }
  288. private function handleParseError($line) {
  289. $err = "Unable to parse line: `$line' for entry: {$this->zip}\n";
  290. throw new tprScriptParseException($err);
  291. }
  292. private function parseFeature($line) {
  293. // echo " feature LINE: ", $line, "\n";
  294. if( ! preg_match(
  295. //'/(\d+)-(\d+)\s+"([^"]+)",\s+"?([^,]+),\s+.*\s+"([^"]+)"/',
  296. //'/Feature:?\s+(\d+)\s*-?\s*(\d+)\s+-?\s+"([^"]+)"/',
  297. '/Feature:?\s+(\d{4})\s*-\s*(\d)\s*-?\s*"(.*?)".*/i',
  298. $line,
  299. $matches
  300. ) ) return false;
  301. $this->show = $this->doc->createElement('show');
  302. $this->xmlScript->appendChild($this->show);
  303. $this->show->appendChild($this->doc->createElement('title', $matches[3]));
  304. return true;
  305. }
  306. }
  307. class tprScriptParseException extends Exception {
  308. }