PageRenderTime 4008ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/Logic/SubtitleFormats/OresmeDocXDocument.cs

https://gitlab.com/minaz922/subtitleedit
C# | 290 lines | 240 code | 27 blank | 23 comment | 21 complexity | 688011c25254ccb323e33f6ddd0e1910 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using System.Xml;
  5. namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
  6. {
  7. public class OresmeDocXDocument : SubtitleFormat
  8. {
  9. public override string Extension
  10. {
  11. get { return ".xml"; }
  12. }
  13. public override string Name
  14. {
  15. get { return "Oresme Docx document"; }
  16. }
  17. public override bool IsTimeBased
  18. {
  19. get { return true; }
  20. }
  21. public override bool IsMine(List<string> lines, string fileName)
  22. {
  23. var sb = new StringBuilder();
  24. lines.ForEach(line => sb.AppendLine(line));
  25. string xmlAsString = sb.ToString().Trim();
  26. if ((xmlAsString.Contains("<w:tc>")))
  27. {
  28. var subtitle = new Subtitle();
  29. LoadSubtitle(subtitle, lines, fileName);
  30. return subtitle.Paragraphs.Count > _errorCount;
  31. }
  32. return false;
  33. }
  34. private string Layout = @"<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
  35. <w:document xmlns:wpc='http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas' xmlns:mc='http://schemas.openxmlformats.org/markup-compatibility/2006' xmlns:o='urn:schemas-microsoft-com:office:office' xmlns:r='http://schemas.openxmlformats.org/officeDocument/2006/relationships' xmlns:m='http://schemas.openxmlformats.org/officeDocument/2006/math' xmlns:v='urn:schemas-microsoft-com:vml' xmlns:wp14='http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing' xmlns:wp='http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' xmlns:w10='urn:schemas-microsoft-com:office:word' xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' xmlns:w14='http://schemas.microsoft.com/office/word/2010/wordml' xmlns:wpg='http://schemas.microsoft.com/office/word/2010/wordprocessingGroup' xmlns:wpi='http://schemas.microsoft.com/office/word/2010/wordprocessingInk' xmlns:wne='http://schemas.microsoft.com/office/word/2006/wordml' xmlns:wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' mc:Ignorable='w14 wp14'>
  36. <w:body>
  37. <w:tbl>
  38. <w:tblPr>
  39. <w:tblW w:w='0' w:type='auto'/>
  40. <w:tblBorders>
  41. <w:top w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  42. <w:left w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  43. <w:bottom w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  44. <w:right w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  45. <w:insideH w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  46. <w:insideV w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
  47. </w:tblBorders>
  48. <w:tblLayout w:type='fixed'/>
  49. <w:tblCellMar>
  50. <w:left w:w='70' w:type='dxa'/>
  51. <w:right w:w='70' w:type='dxa'/>
  52. </w:tblCellMar>
  53. <w:tblLook w:val='0000' w:firstRow='0' w:lastRow='0' w:firstColumn='0' w:lastColumn='0' w:noHBand='0' w:noVBand='0'/>
  54. </w:tblPr>
  55. <w:tblGrid>
  56. <w:gridCol w:w='1240'/>
  57. <w:gridCol w:w='5560'/>
  58. </w:tblGrid>
  59. </w:tbl>
  60. <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'/>
  61. <w:sectPr w:rsidR='00D56C9E'>
  62. <w:pgSz w:w='12240' w:h='15840'/>
  63. <w:pgMar w:top='1440' w:right='1440' w:bottom='1440' w:left='1440' w:header='720' w:footer='720' w:gutter='0'/>
  64. <w:cols w:space='720'/>
  65. </w:sectPr>
  66. </w:body>
  67. </w:document>";
  68. public override string ToText(Subtitle subtitle, string title)
  69. {
  70. string xmlStructure = Layout.Replace("'", "\"");
  71. var xml = new XmlDocument();
  72. xml.LoadXml(xmlStructure);
  73. var nsmgr = new XmlNamespaceManager(xml.NameTable);
  74. nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  75. XmlNode div = xml.DocumentElement.SelectSingleNode("w:body/w:tbl", nsmgr);
  76. for (int i = 0; i < subtitle.Paragraphs.Count; i++)
  77. {
  78. Paragraph p = subtitle.Paragraphs[i];
  79. div.AppendChild(CreateXmlParagraph(xml, p));
  80. if (i < subtitle.Paragraphs.Count - 1 && Math.Abs(p.EndTime.TotalMilliseconds - subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds) > 100)
  81. {
  82. var endP = new Paragraph(string.Empty, p.EndTime.TotalMilliseconds, 0);
  83. div.AppendChild(CreateXmlParagraph(xml, endP));
  84. }
  85. }
  86. string s = ToUtf8XmlString(xml);
  87. return s;
  88. }
  89. private XmlNode CreateXmlParagraph(XmlDocument xml, Paragraph p)
  90. {
  91. XmlNode paragraph = xml.CreateElement("w:tr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  92. var tc1 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  93. paragraph.AppendChild(tc1);
  94. //<w:tcPr>
  95. // <w:tcW w:w='1240' w:type='dxa'/>
  96. //</w:tcPr>
  97. var n1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  98. var n1sub = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); // <w:tcW w:w='1240' w:type='dxa'/>
  99. n1.AppendChild(n1sub);
  100. var n1suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  101. n1suba1.InnerText = "1240";
  102. n1sub.Attributes.Append(n1suba1);
  103. var n1suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  104. n1suba2.InnerText = "dxa";
  105. n1sub.Attributes.Append(n1suba2);
  106. tc1.AppendChild(n1);
  107. //<w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
  108. // <w:pPr>
  109. // <w:pStyle w:val='TimeCode'/>
  110. // </w:pPr>
  111. // <w:r>
  112. // <w:t>[TIMECODE]</w:t>
  113. // </w:r>
  114. //</w:p>
  115. var n2 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  116. var n1a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  117. n1a1.InnerText = "00D56C9E";
  118. n2.Attributes.Append(n1a1);
  119. var n1a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  120. n1a2.InnerText = "00D56C9E";
  121. n2.Attributes.Append(n1a2);
  122. var n2sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  123. var n2sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  124. n2sub1.AppendChild(n2sub1sub);
  125. var n2sub1Suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  126. n2sub1Suba1.InnerText = "TimeCode";
  127. n2sub1sub.Attributes.Append(n2sub1Suba1);
  128. n2.AppendChild(n2sub1);
  129. var n2sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  130. var n2sub2sub = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  131. n2sub2sub.InnerText = ToTimeCode(p.StartTime);
  132. n2sub2.AppendChild(n2sub2sub);
  133. n2.AppendChild(n2sub2);
  134. tc1.AppendChild(n2);
  135. //<w:tc>
  136. // <w:tcPr>
  137. // <w:tcW w:w='5560' w:type='dxa'/>
  138. // <w:vAlign w:val='bottom'/>
  139. // </w:tcPr>
  140. // <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
  141. // <w:pPr>
  142. // <w:pStyle w:val='PopOn'/>
  143. // </w:pPr>
  144. // <w:proofErr w:type='spellStart'/>
  145. // </w:p>
  146. //</w:tc>
  147. var tc2 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  148. paragraph.AppendChild(tc2);
  149. var n3sub1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  150. tc2.AppendChild(n3sub1);
  151. var n3sub1sub1 = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  152. var n3suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  153. n3suba1.InnerText = "5560";
  154. n3sub1sub1.Attributes.Append(n3suba1);
  155. var n3suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  156. n3suba2.InnerText = "dxa";
  157. n3sub1sub1.Attributes.Append(n3suba2);
  158. n3sub1.AppendChild(n3sub1sub1);
  159. var n3sub1sub2 = xml.CreateElement("w:vAlign", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  160. var n3sub1sub2a1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  161. n3sub1sub2a1.InnerText = "bottom";
  162. n3sub1sub2.Attributes.Append(n3sub1sub2a1);
  163. n3sub1.AppendChild(n3sub1sub2);
  164. var n3sub1sub3 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  165. var n3sub1sub3a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  166. n3sub1sub3a1.InnerText = "00D56C9E";
  167. n3sub1sub3.Attributes.Append(n3sub1sub3a1);
  168. var n3sub1sub3a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  169. n3sub1sub3a2.InnerText = "00D56C9E";
  170. n3sub1sub3.Attributes.Append(n3sub1sub3a2);
  171. var n3sub1sub3sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  172. n3sub1sub3.AppendChild(n3sub1sub3sub1);
  173. var n3sub1sub3sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  174. var n3sub1sub3sub1suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  175. n3sub1sub3sub1suba1.InnerText = "PopOn";
  176. n3sub1sub3sub1sub.Attributes.Append(n3sub1sub3sub1suba1);
  177. n3sub1sub3sub1.AppendChild(n3sub1sub3sub1sub);
  178. var lines = Utilities.RemoveHtmlTags(p.Text, true).Replace(Environment.NewLine, "\n").Split('\n');
  179. for (int i = 0; i < lines.Length; i++)
  180. {
  181. var n3sub1sub3sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  182. n3sub1sub3.AppendChild(n3sub1sub3sub2);
  183. if (i > 0)
  184. {
  185. var lineBreak = xml.CreateElement("w:br", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  186. n3sub1sub3sub2.AppendChild(lineBreak);
  187. }
  188. var text = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  189. text.InnerText = lines[i];
  190. n3sub1sub3sub2.AppendChild(text);
  191. }
  192. tc2.AppendChild(n3sub1sub3);
  193. return paragraph;
  194. }
  195. private string ToTimeCode(TimeCode timeCode)
  196. {
  197. return timeCode.ToHHMMSSFF(); //10:00:07:27
  198. }
  199. public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
  200. {
  201. _errorCount = 0;
  202. var sb = new StringBuilder();
  203. lines.ForEach(line => sb.AppendLine(line));
  204. var xml = new XmlDocument();
  205. xml.LoadXml(sb.ToString().Trim());
  206. var nsmgr = new XmlNamespaceManager(xml.NameTable);
  207. nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
  208. foreach (XmlNode node in xml.DocumentElement.SelectNodes("//w:tr", nsmgr))
  209. {
  210. try
  211. {
  212. Paragraph p = new Paragraph();
  213. XmlNode t = node.SelectSingleNode("w:tc/w:p/w:r/w:t", nsmgr);
  214. if (t != null)
  215. {
  216. p.StartTime = GetTimeCode(t.InnerText);
  217. sb = new StringBuilder();
  218. foreach (XmlNode wrNode in node.SelectNodes("w:tc/w:p/w:r", nsmgr))
  219. {
  220. foreach (XmlNode child in wrNode.ChildNodes)
  221. {
  222. if (child.Name == "w:t")
  223. {
  224. bool isTimeCode = child.InnerText.Length == 11 && child.InnerText.Replace(":", string.Empty).Length == 8;
  225. if (!isTimeCode)
  226. sb.Append(child.InnerText);
  227. }
  228. else if (child.Name == "w:br")
  229. {
  230. sb.AppendLine();
  231. }
  232. }
  233. }
  234. p.Text = sb.ToString();
  235. subtitle.Paragraphs.Add(p);
  236. }
  237. }
  238. catch (Exception ex)
  239. {
  240. System.Diagnostics.Debug.WriteLine(ex.Message);
  241. _errorCount++;
  242. }
  243. }
  244. for (int i = 0; i < subtitle.Paragraphs.Count-1; i++)
  245. {
  246. subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds;
  247. }
  248. subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].EndTime.TotalMilliseconds = 2500;
  249. subtitle.RemoveEmptyLines();
  250. for (int i = 0; i < subtitle.Paragraphs.Count - 1; i++)
  251. {
  252. if (subtitle.Paragraphs[i].EndTime.TotalMilliseconds == subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds)
  253. subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds - 1;
  254. }
  255. subtitle.Renumber(1);
  256. }
  257. private static TimeCode GetTimeCode(string s)
  258. {
  259. var parts = s.Trim().Split(':');
  260. return new TimeCode(int.Parse(parts[0]), int.Parse(parts[1]), int.Parse(parts[2]), FramesToMillisecondsMax999(int.Parse(parts[3])));
  261. }
  262. }
  263. }