PageRenderTime 59ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/AODL/Document/Import/OpenDocument/OpenDocumentImporter.cs

https://bitbucket.org/chrisc/aodl
C# | 524 lines | 277 code | 64 blank | 183 comment | 26 complexity | 0f2dee32758eda35a16f9556b3fc4161 MD5 | raw file
  1. /*************************************************************************
  2. *
  3. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
  4. *
  5. * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
  6. *
  7. * Use is subject to license terms.
  8. *
  9. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  10. * use this file except in compliance with the License. You may obtain a copy
  11. * of the License at http://www.apache.org/licenses/LICENSE-2.0. You can also
  12. * obtain a copy of the License at http://odftoolkit.org/docs/license.txt
  13. *
  14. * Unless required by applicable law or agreed to in writing, software
  15. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  16. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. *
  18. * See the License for the specific language governing permissions and
  19. * limitations under the License.
  20. *
  21. ************************************************************************/
  22. using System;
  23. using System.Collections.Generic;
  24. using System.IO;
  25. using System.Linq;
  26. using System.Xml.Linq;
  27. using AODL.Document.Content.Fields;
  28. using AODL.Document.Custom;
  29. using AODL.Document.Exceptions;
  30. using AODL.Document.Export;
  31. using AODL.Document.Import.OpenDocument.NodeProcessors;
  32. using AODL.Document.SpreadsheetDocuments;
  33. using AODL.Document.Styles.MasterStyles;
  34. using AODL.Document.TextDocuments;
  35. using AODL.IO;
  36. using DocumentManifest=AODL.Document.TextDocuments.DocumentManifest;
  37. using DocumentSetting=AODL.Document.TextDocuments.DocumentSetting;
  38. using DocumentStyles=AODL.Document.TextDocuments.DocumentStyles;
  39. namespace AODL.Document.Import.OpenDocument
  40. {
  41. /// <summary>
  42. /// OpenDocumentImporter - Importer for OpenDocuments in different formats.
  43. /// </summary>
  44. public class OpenDocumentImporter : IImporter, IPublisherInfo
  45. {
  46. private static readonly string[] KnownFiles = new [] {"content.xml", "styles.xml", "meta.xml", "settings.xml"};
  47. private readonly IPackageReader _packageReader;
  48. /// <summary>
  49. /// The document to fill with content.
  50. /// </summary>
  51. private IDocument _document;
  52. /// <summary>
  53. /// Initializes a new instance of the <see cref="OpenDocumentImporter"/> class.
  54. /// </summary>
  55. public OpenDocumentImporter(IPackageReader packageReader)
  56. {
  57. _packageReader = packageReader;
  58. _importError = new List<AODLWarning>();
  59. _supportedExtensions = new List<DocumentSupportInfo>
  60. {
  61. new DocumentSupportInfo(".odt", DocumentTypes.TextDocument),
  62. new DocumentSupportInfo(".ods", DocumentTypes.SpreadsheetDocument)
  63. };
  64. _author = "Lars Behrmann, lb@OpenDocument4all.com";
  65. _infoUrl = "http://AODL.OpenDocument4all.com";
  66. _description = "This the standard importer of the OpenDocument library AODL.";
  67. }
  68. #region IExporter Member
  69. private readonly IList<AODLWarning> _importError;
  70. private readonly IList<DocumentSupportInfo> _supportedExtensions;
  71. /// <summary>
  72. /// Gets the document support infos.
  73. /// </summary>
  74. /// <value>The document support infos.</value>
  75. public IList<DocumentSupportInfo> DocumentSupportInfos
  76. {
  77. get { return _supportedExtensions; }
  78. }
  79. /// <summary>
  80. /// Imports the specified filename.
  81. /// </summary>
  82. /// <param name="document">The TextDocument to fill.</param>
  83. /// <param name="filename">The filename.</param>
  84. /// <returns>The created TextDocument</returns>
  85. public void Import(IDocument document, string filename)
  86. {
  87. try
  88. {
  89. _document = document;
  90. UnpackFiles(filename);
  91. ReadContent();
  92. }
  93. catch (Exception ex)
  94. {
  95. throw new ImporterException(string.Format(
  96. "Failed to import document '{0}'", filename), ex);
  97. }
  98. }
  99. public Stream Open(string path)
  100. {
  101. return _packageReader.Open(path);
  102. }
  103. public IFile GetFile(string path)
  104. {
  105. return _packageReader.GetFile(path);
  106. }
  107. /// <summary>
  108. /// Gets the import errors as List of strings.
  109. /// </summary>
  110. /// <value>The import errors.</value>
  111. public IList<AODLWarning> ImportError
  112. {
  113. get { return _importError; }
  114. }
  115. /// <summary>
  116. /// If the import file format isn't any OpenDocument
  117. /// format you have to return true and AODL will
  118. /// create a new one.
  119. /// </summary>
  120. /// <value></value>
  121. public bool NeedNewOpenDocument
  122. {
  123. get { return false; }
  124. }
  125. #endregion
  126. #region IPublisherInfo Member
  127. private readonly string _author;
  128. private readonly string _description;
  129. private readonly string _infoUrl;
  130. /// <summary>
  131. /// The name the Author
  132. /// </summary>
  133. /// <value></value>
  134. public string Author
  135. {
  136. get { return _author; }
  137. }
  138. /// <summary>
  139. /// Url to a info site
  140. /// </summary>
  141. /// <value></value>
  142. public string InfoUrl
  143. {
  144. get { return _infoUrl; }
  145. }
  146. /// <summary>
  147. /// Description about the exporter resp. importer
  148. /// </summary>
  149. /// <value></value>
  150. public string Description
  151. {
  152. get { return _description; }
  153. }
  154. #endregion
  155. #region unpacking files and images
  156. /// <summary>
  157. /// Unpacks the files.
  158. /// </summary>
  159. /// <param name="file">The file.</param>
  160. private void UnpackFiles(string file)
  161. {
  162. _packageReader.Initialize(file);
  163. MovePictures();
  164. ReadResources();
  165. }
  166. /// <summary>
  167. /// Moves the pictures folder
  168. /// To avoid gdi errors.
  169. /// </summary>
  170. private static void MovePictures()
  171. {
  172. // if (Directory.Exists(dir+"Pictures"))
  173. // {
  174. // if (Directory.Exists(dirpics))
  175. // Directory.Delete(dirpics, true);
  176. // Directory.Move(dir+"Pictures", dirpics);
  177. // }
  178. }
  179. /// <summary>
  180. /// Reads the resources.
  181. /// </summary>
  182. private void ReadResources()
  183. {
  184. _document.DocumentConfigurations2 = new DocumentConfiguration2();
  185. ReadDocumentConfigurations2();
  186. _document.DocumentMetadata = new DocumentMetadata(_document);
  187. _document.DocumentMetadata.LoadFromFile(_packageReader.Open(DocumentMetadata.FileName));
  188. if (_document is TextDocument)
  189. {
  190. ((TextDocument) _document).DocumentSetting = new DocumentSetting();
  191. string file = DocumentSetting.FileName;
  192. ((TextDocument) _document).DocumentSetting.LoadFromFile(_packageReader.Open(file));
  193. ((TextDocument) _document).DocumentManifest = new DocumentManifest();
  194. string folder = DocumentManifest.FolderName;
  195. file = DocumentManifest.FileName;
  196. ((TextDocument) _document).DocumentManifest.LoadFromFile(_packageReader.Open(Path.Combine(folder, file)));
  197. ((TextDocument) _document).DocumentStyles = new DocumentStyles();
  198. file = DocumentStyles.FileName;
  199. ((TextDocument) _document).DocumentStyles.LoadFromFile(_packageReader.Open(file));
  200. ReadCustomFiles(((TextDocument) _document).DocumentManifest);
  201. }
  202. else if (_document is SpreadsheetDocument)
  203. {
  204. ((SpreadsheetDocument) _document).DocumentSetting = new SpreadsheetDocuments.DocumentSetting();
  205. string file = DocumentSetting.FileName;
  206. ((SpreadsheetDocument) _document).DocumentSetting.LoadFromFile(_packageReader.Open(file));
  207. ((SpreadsheetDocument) _document).DocumentManifest = new SpreadsheetDocuments.DocumentManifest();
  208. string folder = DocumentManifest.FolderName;
  209. file = DocumentManifest.FileName;
  210. ((SpreadsheetDocument) _document).DocumentManifest.LoadFromFile(_packageReader.Open(Path.Combine(folder, file)));
  211. ((SpreadsheetDocument) _document).DocumentStyles = new SpreadsheetDocuments.DocumentStyles();
  212. file = DocumentStyles.FileName;
  213. ((SpreadsheetDocument) _document).DocumentStyles.LoadFromFile(_packageReader.Open(file));
  214. ReadCustomFiles(((SpreadsheetDocument)_document).DocumentManifest);
  215. }
  216. _document.DocumentPictures = ReadImageResources("Pictures");
  217. _document.DocumentThumbnails = ReadImageResources("Thumbnails");
  218. //There's no really need to read the fonts.
  219. InitMetaData();
  220. }
  221. /// <summary>
  222. /// Reads the document configurations2.
  223. /// </summary>
  224. private void ReadDocumentConfigurations2()
  225. {
  226. if (!_packageReader.DirectoryExists(DocumentConfiguration2.FolderName))
  227. return;
  228. IFile file = _packageReader.GetFiles(DocumentConfiguration2.FolderName).FirstOrDefault();
  229. if (file != null)
  230. {
  231. _document.DocumentConfigurations2.FileName = file.Name;
  232. using (TextReader reader = new StreamReader(file.OpenRead()))
  233. {
  234. _document.DocumentConfigurations2.Configurations2Content += reader.ReadToEnd();
  235. }
  236. }
  237. }
  238. /// <summary>
  239. /// Reads the image resources.
  240. /// </summary>
  241. /// <param name="folder">The folder.</param>
  242. private DocumentPictureCollection ReadImageResources(string folder)
  243. {
  244. DocumentPictureCollection dpc = new DocumentPictureCollection();
  245. //If folder not exists, return (folder will only unpacked if not empty)
  246. if (!_packageReader.DirectoryExists(folder))
  247. return dpc;
  248. //Only image files should be in this folder, if not -> Exception
  249. foreach (IFile file in _packageReader.GetFiles(folder))
  250. {
  251. DocumentPicture dp = new DocumentPicture(file);
  252. dpc.Add(dp);
  253. }
  254. return dpc;
  255. }
  256. private void ReadCustomFiles(DocumentManifest manifest)
  257. {
  258. foreach (XElement fileEntry in manifest.Manifest.Element(Ns.Manifest + "manifest").Elements(Ns.Manifest + "file-entry"))
  259. {
  260. string fullPath = (string)fileEntry.Attribute(Ns.Manifest + "full-path");
  261. if (string.IsNullOrEmpty(fullPath))
  262. continue;
  263. if (fullPath.EndsWith("/", StringComparison.InvariantCulture))
  264. continue;
  265. if (fullPath.StartsWith("Configurations2/", StringComparison.InvariantCulture))
  266. continue;
  267. if (fullPath.StartsWith("Pictures/", StringComparison.InvariantCulture))
  268. continue;
  269. if (fullPath.StartsWith("Thumbnails/", StringComparison.InvariantCulture))
  270. continue;
  271. if (KnownFiles.Contains(fullPath))
  272. continue;
  273. string mediaType = (string)fileEntry.Attribute(Ns.Manifest + "media-type");
  274. _document.CustomFiles.Add(new PackageCustomFile(mediaType, _packageReader.GetFile(fullPath)));
  275. }
  276. }
  277. #endregion
  278. public IDocument Document
  279. {
  280. get { return _document; }
  281. set { _document = value; }
  282. }
  283. /// <summary>
  284. /// Reads the content.
  285. /// </summary>
  286. private void ReadContent()
  287. {
  288. /*
  289. * NOTICE:
  290. * Do not change this order!
  291. */
  292. // 1. load content file
  293. using (TextReader reader = new StreamReader(_packageReader.Open("content.xml")))
  294. {
  295. _document.XmlDoc = XDocument.Load(reader);
  296. }
  297. // 2. Read local styles
  298. LocalStyleProcessor lsp = new LocalStyleProcessor(_document, false);
  299. lsp.ReadStyles();
  300. // 3. Import common styles and read common styles
  301. ImportCommonStyles();
  302. lsp = new LocalStyleProcessor(_document, true);
  303. lsp.ReadStyles();
  304. if (_document is TextDocument)
  305. {
  306. FormsProcessor fp = new FormsProcessor(_document);
  307. fp.ReadFormNodes();
  308. TextDocument td = _document as TextDocument;
  309. td.VariableDeclarations.Clear();
  310. XElement nodeText =
  311. td.XmlDoc.Elements(Ns.Office + "document-content")
  312. .Elements(Ns.Office + "body")
  313. .Elements(Ns.Office + "text").FirstOrDefault();
  314. if (nodeText != null)
  315. {
  316. XElement nodeVarDecls = nodeText.Element(Ns.Text + "variable-decls");
  317. if (nodeVarDecls != null)
  318. {
  319. foreach (XElement vd in new XElement(nodeVarDecls).Elements(Ns.Text + "variable-decl"))
  320. {
  321. td.VariableDeclarations.Add(new VariableDecl(td, vd));
  322. }
  323. nodeVarDecls.Value = "";
  324. }
  325. }
  326. }
  327. // 4. Register warnig events
  328. MainContentProcessor mcp = new MainContentProcessor(_document);
  329. mcp.Warning += mcp_OnWarning;
  330. // 5. Read the content
  331. mcp.ReadContentNodes();
  332. // 6.1 load master pages and styles for TextDocument
  333. if (_document is TextDocument)
  334. {
  335. MasterPageFactory.RenameMasterStyles(
  336. ((TextDocument) _document).DocumentStyles.Styles,
  337. _document.XmlDoc);
  338. // Read the moved and renamed styles
  339. lsp = new LocalStyleProcessor(_document, false);
  340. lsp.ReReadKnownAutomaticStyles();
  341. new MasterPageFactory().FillFromXMLDocument(_document as TextDocument);
  342. }
  343. }
  344. /// <summary>
  345. /// If the common styles are placed in the DocumentStyles,
  346. /// they will be imported into the content file.
  347. /// </summary>
  348. public void ImportCommonStyles()
  349. {
  350. XElement nodeStyles = null;
  351. if (_document is TextDocument)
  352. nodeStyles =
  353. ((TextDocument) _document).DocumentStyles.Styles.Elements(Ns.Office + "document-styles").Elements(
  354. Ns.Office + "styles").FirstOrDefault();
  355. else if (_document is SpreadsheetDocument)
  356. nodeStyles =
  357. ((SpreadsheetDocument) _document).DocumentStyles.Styles.Elements(Ns.Office + "document-styles").
  358. Elements(Ns.Office + "styles").FirstOrDefault();
  359. XElement nodeOfficeDocument = _document.XmlDoc.Element(Ns.Office + "document-content");
  360. if (nodeOfficeDocument != null && nodeStyles != null)
  361. {
  362. nodeOfficeDocument.Add(new XElement(nodeStyles));
  363. }
  364. }
  365. /// <summary>
  366. /// Inits the meta data.
  367. /// </summary>
  368. private void InitMetaData()
  369. {
  370. _document.DocumentMetadata.ImageCount = 0;
  371. _document.DocumentMetadata.ObjectCount = 0;
  372. _document.DocumentMetadata.ParagraphCount = 0;
  373. _document.DocumentMetadata.TableCount = 0;
  374. _document.DocumentMetadata.WordCount = 0;
  375. _document.DocumentMetadata.CharacterCount = 0;
  376. _document.DocumentMetadata.LastModified = DateTime.Now.ToString("s");
  377. }
  378. /// <summary>
  379. /// MCP_s the on warning.
  380. /// </summary>
  381. /// <param name="warning">The warning.</param>
  382. private void mcp_OnWarning(AODLWarning warning)
  383. {
  384. _importError.Add(warning);
  385. }
  386. private void TextContentProcessor_OnWarning(AODLWarning warning)
  387. {
  388. _importError.Add(warning);
  389. }
  390. }
  391. }
  392. /*
  393. * $Log: OpenDocumentImporter.cs,v $
  394. * Revision 1.10 2008/04/29 15:39:52 mt
  395. * new copyright header
  396. *
  397. * Revision 1.9 2007/08/15 11:53:40 larsbehr
  398. * - Optimized Mono related stuff
  399. *
  400. * Revision 1.8 2007/07/15 09:30:46 yegorov
  401. * Issue number:
  402. * Submitted by:
  403. * Reviewed by:
  404. *
  405. * Revision 1.5 2007/06/20 17:37:19 yegorov
  406. * Issue number:
  407. * Submitted by:
  408. * Reviewed by:
  409. *
  410. * Revision 1.2 2007/04/08 16:51:37 larsbehr
  411. * - finished master pages and styles for text documents
  412. * - several bug fixes
  413. *
  414. * Revision 1.1 2007/02/25 08:58:45 larsbehr
  415. * initial checkin, import from Sourceforge.net to OpenOffice.org
  416. *
  417. * Revision 1.5 2006/05/02 17:37:16 larsbm
  418. * - Flag added graphics with guid
  419. * - Set guid based read and write directories
  420. *
  421. * Revision 1.4 2006/02/05 20:03:32 larsbm
  422. * - Fixed several bugs
  423. * - clean up some messy code
  424. *
  425. * Revision 1.3 2006/02/02 21:55:59 larsbm
  426. * - Added Clone object support for many AODL object types
  427. * - New Importer implementation PlainTextImporter and CsvImporter
  428. * - New tests
  429. *
  430. * Revision 1.2 2006/01/29 18:52:14 larsbm
  431. * - Added support for common styles (style templates in OpenOffice)
  432. * - Draw TextBox import and export
  433. * - DrawTextBox html export
  434. *
  435. * Revision 1.1 2006/01/29 11:28:23 larsbm
  436. * - Changes for the new version. 1.2. see next changelog for details
  437. *
  438. * Revision 1.4 2005/12/18 18:29:46 larsbm
  439. * - AODC Gui redesign
  440. * - AODC HTML exporter refecatored
  441. * - Full Meta Data Support
  442. * - Increase textprocessing performance
  443. *
  444. * Revision 1.3 2005/12/12 19:39:17 larsbm
  445. * - Added Paragraph Header
  446. * - Added Table Row Header
  447. * - Fixed some bugs
  448. * - better whitespace handling
  449. * - Implmemenation of HTML Exporter
  450. *
  451. * Revision 1.2 2005/11/20 17:31:20 larsbm
  452. * - added suport for XLinks, TabStopStyles
  453. * - First experimental of loading dcuments
  454. * - load and save via importer and exporter interfaces
  455. *
  456. * Revision 1.1 2005/11/06 14:55:25 larsbm
  457. * - Interfaces for Import and Export
  458. * - First implementation of IExport OpenDocumentTextExporter
  459. *
  460. */