/src/org/ishafoundation/archives/transcript/importer/MSWordImporter.as

http://transcriptstudio4isha.googlecode.com/ · ActionScript · 198 lines · 154 code · 16 blank · 28 comment · 24 complexity · 79a8093264da7bc66a4c94d05a8c9a35 MD5 · raw file

  1. /*
  2. Transcript Studio for Isha Foundation: An XML based application that allows users to define
  3. and store contextual metadata for contiguous sections within a text document.
  4. Copyright 2008 Mark Carter, Swami Kevala
  5. This file is part of Transcript Studio for Isha Foundation.
  6. Transcript Studio for Isha Foundation is free software: you can redistribute it and/or modify it
  7. under the terms of the GNU General Public License as published by the Free Software
  8. Foundation, either version 3 of the License, or (at your option) any later version.
  9. Transcript Studio for Isha Foundation is distributed in the hope that it will be useful, but
  10. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License along with
  13. Transcript Studio for Isha Foundation. If not, see http://www.gnu.org/licenses/.
  14. */
  15. package org.ishafoundation.archives.transcript.importer
  16. {
  17. import com.ericfeminella.collections.HashMap;
  18. import com.ericfeminella.collections.IMap;
  19. import mx.rpc.http.HTTPService;
  20. import name.carter.mark.flex.util.Utils;
  21. import name.carter.mark.flex.util.XMLUtils;
  22. import org.ishafoundation.archives.transcript.db.XQueryExecutor;
  23. import org.ishafoundation.archives.transcript.model.MediaMetadata;
  24. import org.ishafoundation.archives.transcript.model.ReferenceManager;
  25. import org.ishafoundation.archives.transcript.model.SessionMetadata;
  26. public class MSWordImporter
  27. {
  28. private static var DATE_FORMAT_STRINGS:Array = ["DD-MMMM-YY", "DD-MMM-YY", "DD-MM-YY"];
  29. private var xqueryExecutor:XQueryExecutor;
  30. private var referenceMgr:ReferenceManager;
  31. public function MSWordImporter(xqueryExecutor:XQueryExecutor, referenceMgr:ReferenceManager) {
  32. this.xqueryExecutor = xqueryExecutor;
  33. this.referenceMgr = referenceMgr;
  34. }
  35. public function importAudioTranscripts(names:Array, successFunc:Function, failureFunc:Function):void {
  36. var audioTranscripts:Array = [];
  37. var idFunc:Function = getIdFunc();
  38. importPathsInternal(names, audioTranscripts, idFunc, function():void {
  39. successFunc(audioTranscripts);
  40. }, function (msg:String):void {
  41. failureFunc(msg);
  42. });
  43. }
  44. public static function createEventElement(audioTranscripts:Array):XML {
  45. // get the event type from the first source
  46. var firstSource:WordMLTransformer = audioTranscripts[0]
  47. var eventType:String = firstSource.eventElement.@type;
  48. var eventElement:XML = <event type={eventType}><metadata/></event>;
  49. var metadataElement:XML = eventElement.metadata[0];
  50. for each (var audioTranscript:WordMLTransformer in audioTranscripts) {
  51. var audioEventElement:XML = audioTranscript.eventElement;
  52. if (audioEventElement.@type != eventType) {
  53. throw new Error("All transcripts must have the same event type");
  54. }
  55. WordMLTransformer.mergeInGuestProperties(metadataElement, audioEventElement.metadata[0]);
  56. }
  57. return eventElement;
  58. }
  59. public static function createSessionElement(audioTranscripts:Array):XML {
  60. var sessionElement:XML = <session><metadata/></session>;
  61. var metadataElement:XML = sessionElement.*[0];
  62. for each (var at1:WordMLTransformer in audioTranscripts) {
  63. var audioSessionElement:XML = at1.sessionElement;
  64. WordMLTransformer.mergeInGuestProperties(metadataElement, audioSessionElement);
  65. // remove the name attribute
  66. delete metadataElement.@name;
  67. }
  68. var mediaMetadataElement:XML = <mediaMetadata/>;
  69. sessionElement.appendChild(mediaMetadataElement);
  70. var deviceElement:XML = <device code={MediaMetadata.MAIN_AUDIO_DEVICE_CODE}/>
  71. mediaMetadataElement.appendChild(deviceElement);
  72. var transcriptElement:XML = <transcript id="t1"/>;
  73. sessionElement.appendChild(transcriptElement);
  74. for each (var audioTranscript:WordMLTransformer in audioTranscripts) {
  75. var mediaElement:XML = audioTranscript.mediaElement;
  76. deviceElement.appendChild(mediaElement);
  77. var lastAction:String;
  78. if (audioTranscript.audioTranscriptElement.hasOwnProperty("@proofreadBy")) {
  79. lastAction = "proofread";
  80. }
  81. else if (audioTranscript.audioTranscriptElement.hasOwnProperty("@proofedBy")) {
  82. lastAction = "proofed";
  83. }
  84. else {
  85. lastAction = "modified";
  86. }
  87. var lastActionAt:Date = XMLUtils.getAttributeAsDate(audioTranscript.audioTranscriptElement, lastAction + "At");
  88. var lastActionBy:String = XMLUtils.getAttributeValue(audioTranscript.audioTranscriptElement, lastAction + "By");
  89. for each (var segmentElement:XML in audioTranscript.audioTranscriptElement.segment) {
  90. if (segmentElement.content.length() > 0) {
  91. // work on a copy
  92. segmentElement = segmentElement.copy();
  93. // remove all extracted content
  94. XMLUtils.removeAllElements(segmentElement.*.(localName() != "content"));
  95. // apply actions
  96. XMLUtils.setAttributeValue(segmentElement, "lastAction", lastAction);
  97. XMLUtils.setAttributeAsDate(segmentElement, "lastActionAt", lastActionAt);
  98. XMLUtils.setAttributeValue(segmentElement, "lastActionBy", lastActionBy);
  99. transcriptElement.appendChild(segmentElement);
  100. }
  101. }
  102. // the session notes can contain information about the imported file(s)
  103. appendSessionNotesLine("Imported file:", metadataElement);
  104. appendAttributesToSessionNotes(audioTranscript.audioTranscriptElement, metadataElement);
  105. appendSessionNotesLine("", metadataElement);
  106. }
  107. return sessionElement;
  108. }
  109. private static function appendAttributesToSessionNotes(audioTranscriptElement:XML, metadataElement:XML):void {
  110. appendSessionNotesLine("", metadataElement);
  111. var attrNames:Array = []
  112. for each (var attr:XML in audioTranscriptElement.@*) {
  113. // but put "filename" and "name" at front
  114. var attrName:String = attr.localName();
  115. if (attrName == "filename" || attrName == "name") {
  116. attrName = "_" + attrName;
  117. }
  118. attrNames.push(attrName);
  119. }
  120. attrNames = attrNames.sort();
  121. for each (attrName in attrNames) {
  122. if (attrName.indexOf("_") == 0) {
  123. attrName = attrName.substring(1);
  124. }
  125. appendSessionNotesLine(attrName + ": " + audioTranscriptElement.attribute(attrName), metadataElement);
  126. }
  127. }
  128. private static function appendSessionNotesLine(text:String, metadataElement:XML):void {
  129. text = "\r" + text;
  130. XMLUtils.appendChildElementText(metadataElement, SessionMetadata.NOTES_ELEMENT_NAME, text, false);
  131. }
  132. private function importPathsInternal(names:Array, audioTranscripts:Array, idFunc:Function, successFunc:Function, failureFunc:Function):void {
  133. if (names.length == 0) {
  134. successFunc();
  135. return;
  136. }
  137. names = Utils.copyArray(names);
  138. var nextName:String = names.shift();
  139. var encodedNextName:String = encodeURIComponent(nextName);
  140. // We don't want to ignore whitespace when parsing the WordML
  141. // TODO - is there a less hacky way to do this?
  142. var oldWhitespace:Boolean = XML.ignoreWhitespace;
  143. XML.ignoreWhitespace = false;
  144. xqueryExecutor.executeStoredXQuery("import-transcript.xql", {transcriptName:encodedNextName}, function(wordXML:XML):void {
  145. XML.ignoreWhitespace = oldWhitespace;
  146. var audioTranscript:WordMLTransformer;
  147. try {
  148. audioTranscript = new WordMLTransformer(nextName, wordXML, referenceMgr, idFunc);
  149. }
  150. catch (e:Error) {
  151. failureFunc(e.message);
  152. return;
  153. }
  154. audioTranscripts.push(audioTranscript);
  155. importPathsInternal(names, audioTranscripts, idFunc, successFunc, failureFunc);
  156. }, function(msg:String):void {
  157. XML.ignoreWhitespace = oldWhitespace;
  158. failureFunc(msg);
  159. }, HTTPService.RESULT_FORMAT_E4X);
  160. }
  161. private static function getIdFunc():Function {
  162. var prefixToLargestIdMap:IMap = new HashMap();
  163. return function(prefix:String):String {
  164. var newId:int;
  165. if (!prefixToLargestIdMap.containsKey(prefix)) {
  166. newId = 1;
  167. }
  168. else {
  169. newId = prefixToLargestIdMap.getValue(prefix) + 1;
  170. }
  171. prefixToLargestIdMap.put(prefix, newId);
  172. var result:String = prefix + newId;
  173. return result;
  174. };
  175. }
  176. }
  177. }