PageRenderTime 36ms CodeModel.GetById 10ms app.highlight 19ms RepoModel.GetById 1ms app.codeStats 1ms

/src/org/ishafoundation/archives/transcript/importer/MSWordImporter.as

http://transcriptstudio4isha.googlecode.com/
ActionScript | 198 lines | 154 code | 16 blank | 28 comment | 24 complexity | 79a8093264da7bc66a4c94d05a8c9a35 MD5 | raw file
  1/*
  2   Transcript Studio for Isha Foundation: An XML based application that allows users to define 
  3   and store contextual metadata for contiguous sections within a text document. 
  4
  5   Copyright 2008 Mark Carter, Swami Kevala
  6
  7   This file is part of Transcript Studio for Isha Foundation.
  8
  9   Transcript Studio for Isha Foundation is free software: you can redistribute it and/or modify it 
 10   under the terms of the GNU General Public License as published by the Free Software 
 11   Foundation, either version 3 of the License, or (at your option) any later version.
 12
 13   Transcript Studio for Isha Foundation is distributed in the hope that it will be useful, but 
 14   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
 15   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 16
 17   You should have received a copy of the GNU General Public License along with 
 18   Transcript Studio for Isha Foundation. If not, see http://www.gnu.org/licenses/.
 19*/
 20
 21package org.ishafoundation.archives.transcript.importer
 22{
 23	import com.ericfeminella.collections.HashMap;
 24	import com.ericfeminella.collections.IMap;
 25	
 26	import mx.rpc.http.HTTPService;
 27	
 28	import name.carter.mark.flex.util.Utils;
 29	import name.carter.mark.flex.util.XMLUtils;
 30	
 31	import org.ishafoundation.archives.transcript.db.XQueryExecutor;
 32	import org.ishafoundation.archives.transcript.model.MediaMetadata;
 33	import org.ishafoundation.archives.transcript.model.ReferenceManager;
 34	import org.ishafoundation.archives.transcript.model.SessionMetadata;
 35	
 36	public class MSWordImporter
 37	{
 38		private static var DATE_FORMAT_STRINGS:Array = ["DD-MMMM-YY", "DD-MMM-YY", "DD-MM-YY"];
 39		
 40		private var xqueryExecutor:XQueryExecutor;
 41		private var referenceMgr:ReferenceManager;
 42				
 43		public function MSWordImporter(xqueryExecutor:XQueryExecutor, referenceMgr:ReferenceManager) {
 44			this.xqueryExecutor = xqueryExecutor;
 45			this.referenceMgr = referenceMgr;	
 46		}
 47		
 48		public function importAudioTranscripts(names:Array, successFunc:Function, failureFunc:Function):void {
 49			var audioTranscripts:Array = [];
 50			var idFunc:Function = getIdFunc();
 51			importPathsInternal(names, audioTranscripts, idFunc, function():void {
 52				successFunc(audioTranscripts);
 53			}, function (msg:String):void {
 54				failureFunc(msg);
 55			});
 56		}
 57		
 58		public static function createEventElement(audioTranscripts:Array):XML {
 59			// get the event type from the first source
 60			var firstSource:WordMLTransformer = audioTranscripts[0]
 61			var eventType:String = firstSource.eventElement.@type;
 62			var eventElement:XML = <event type={eventType}><metadata/></event>;
 63			var metadataElement:XML = eventElement.metadata[0];
 64			for each (var audioTranscript:WordMLTransformer in audioTranscripts) {
 65				var audioEventElement:XML = audioTranscript.eventElement;
 66				if (audioEventElement.@type != eventType) {
 67					throw new Error("All transcripts must have the same event type");
 68				}
 69				WordMLTransformer.mergeInGuestProperties(metadataElement, audioEventElement.metadata[0]);
 70			}
 71			return eventElement;
 72		}
 73		
 74		public static function createSessionElement(audioTranscripts:Array):XML {
 75			var sessionElement:XML = <session><metadata/></session>;
 76			var metadataElement:XML = sessionElement.*[0];
 77			for each (var at1:WordMLTransformer in audioTranscripts) {
 78				var audioSessionElement:XML = at1.sessionElement;
 79				WordMLTransformer.mergeInGuestProperties(metadataElement, audioSessionElement);
 80				// remove the name attribute
 81				delete metadataElement.@name;
 82			}
 83			var mediaMetadataElement:XML = <mediaMetadata/>;
 84			sessionElement.appendChild(mediaMetadataElement);
 85			var deviceElement:XML = <device code={MediaMetadata.MAIN_AUDIO_DEVICE_CODE}/>
 86			mediaMetadataElement.appendChild(deviceElement);
 87			var transcriptElement:XML = <transcript id="t1"/>;
 88			sessionElement.appendChild(transcriptElement);
 89			for each (var audioTranscript:WordMLTransformer in audioTranscripts) {
 90				var mediaElement:XML = audioTranscript.mediaElement;
 91				deviceElement.appendChild(mediaElement);
 92				var lastAction:String;
 93				if (audioTranscript.audioTranscriptElement.hasOwnProperty("@proofreadBy")) {
 94					lastAction = "proofread";
 95				}
 96				else if (audioTranscript.audioTranscriptElement.hasOwnProperty("@proofedBy")) {
 97					lastAction = "proofed";
 98				}
 99				else {
100					lastAction = "modified";
101				}
102				var lastActionAt:Date = XMLUtils.getAttributeAsDate(audioTranscript.audioTranscriptElement, lastAction + "At");
103				var lastActionBy:String = XMLUtils.getAttributeValue(audioTranscript.audioTranscriptElement, lastAction + "By");
104				for each (var segmentElement:XML in audioTranscript.audioTranscriptElement.segment) {
105					if (segmentElement.content.length() > 0) {
106						// work on a copy
107						segmentElement = segmentElement.copy();
108						// remove all extracted content
109						XMLUtils.removeAllElements(segmentElement.*.(localName() != "content"));
110						// apply actions
111		 				XMLUtils.setAttributeValue(segmentElement, "lastAction", lastAction);
112		 				XMLUtils.setAttributeAsDate(segmentElement, "lastActionAt", lastActionAt);
113		 				XMLUtils.setAttributeValue(segmentElement, "lastActionBy", lastActionBy);
114						transcriptElement.appendChild(segmentElement);
115					}
116				}
117				// the session notes can contain information about the imported file(s)
118				appendSessionNotesLine("Imported file:", metadataElement);
119				appendAttributesToSessionNotes(audioTranscript.audioTranscriptElement, metadataElement);
120				appendSessionNotesLine("", metadataElement);
121			}
122			return sessionElement;
123		}
124		
125		private static function appendAttributesToSessionNotes(audioTranscriptElement:XML, metadataElement:XML):void {
126			appendSessionNotesLine("", metadataElement);
127			var attrNames:Array = []
128			for each (var attr:XML in audioTranscriptElement.@*) {
129				// but put "filename" and "name" at front
130				var attrName:String = attr.localName();
131				if (attrName == "filename" || attrName == "name") {
132					attrName = "_" + attrName;
133				}
134				attrNames.push(attrName);
135			}
136			attrNames = attrNames.sort();
137			
138			for each (attrName in attrNames) {
139				if (attrName.indexOf("_") == 0) {
140					attrName = attrName.substring(1);
141				}
142				appendSessionNotesLine(attrName + ": " + audioTranscriptElement.attribute(attrName), metadataElement);
143			}
144		}
145		
146		private static function appendSessionNotesLine(text:String, metadataElement:XML):void {
147			text = "\r" + text;
148			XMLUtils.appendChildElementText(metadataElement, SessionMetadata.NOTES_ELEMENT_NAME, text, false);
149		}
150		
151		private function importPathsInternal(names:Array, audioTranscripts:Array, idFunc:Function, successFunc:Function, failureFunc:Function):void {
152			if (names.length == 0) {
153				successFunc();
154				return;
155			}
156			names = Utils.copyArray(names);
157			var nextName:String = names.shift();
158			var encodedNextName:String = encodeURIComponent(nextName);
159			// We don't want to ignore whitespace when parsing the WordML
160			// TODO - is there a less hacky way to do this?
161			var oldWhitespace:Boolean = XML.ignoreWhitespace;
162			XML.ignoreWhitespace = false;
163			xqueryExecutor.executeStoredXQuery("import-transcript.xql", {transcriptName:encodedNextName}, function(wordXML:XML):void {
164				XML.ignoreWhitespace = oldWhitespace;
165				var audioTranscript:WordMLTransformer;
166				try {
167					audioTranscript = new WordMLTransformer(nextName, wordXML, referenceMgr, idFunc);
168				}
169				catch (e:Error) {
170					failureFunc(e.message);
171					return;
172				}
173				audioTranscripts.push(audioTranscript); 
174				importPathsInternal(names, audioTranscripts, idFunc, successFunc, failureFunc);
175			}, function(msg:String):void {
176				XML.ignoreWhitespace = oldWhitespace;
177				failureFunc(msg);
178			}, HTTPService.RESULT_FORMAT_E4X);
179		}
180		
181		private static function getIdFunc():Function {
182			var prefixToLargestIdMap:IMap = new HashMap();
183			return function(prefix:String):String {
184				var newId:int;
185				if (!prefixToLargestIdMap.containsKey(prefix)) {
186					newId = 1;
187				}
188				else {
189					newId = prefixToLargestIdMap.getValue(prefix) + 1;
190				}
191				prefixToLargestIdMap.put(prefix, newId);
192				var result:String = prefix + newId;
193				return result;
194			};
195		}
196		
197	}
198}