/src/Logic/SubtitleFormats/OresmeDocXDocument.cs
C# | 290 lines | 240 code | 27 blank | 23 comment | 21 complexity | 688011c25254ccb323e33f6ddd0e1910 MD5 | raw file
1using System;
2using System.Collections.Generic;
3using System.Text;
4using System.Xml;
5
6namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
7{
8 public class OresmeDocXDocument : SubtitleFormat
9 {
10 public override string Extension
11 {
12 get { return ".xml"; }
13 }
14
15 public override string Name
16 {
17 get { return "Oresme Docx document"; }
18 }
19
20 public override bool IsTimeBased
21 {
22 get { return true; }
23 }
24
25 public override bool IsMine(List<string> lines, string fileName)
26 {
27 var sb = new StringBuilder();
28 lines.ForEach(line => sb.AppendLine(line));
29 string xmlAsString = sb.ToString().Trim();
30 if ((xmlAsString.Contains("<w:tc>")))
31 {
32 var subtitle = new Subtitle();
33 LoadSubtitle(subtitle, lines, fileName);
34 return subtitle.Paragraphs.Count > _errorCount;
35 }
36 return false;
37 }
38
39 private string Layout = @"<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
40<w:document xmlns:wpc='http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas' xmlns:mc='http://schemas.openxmlformats.org/markup-compatibility/2006' xmlns:o='urn:schemas-microsoft-com:office:office' xmlns:r='http://schemas.openxmlformats.org/officeDocument/2006/relationships' xmlns:m='http://schemas.openxmlformats.org/officeDocument/2006/math' xmlns:v='urn:schemas-microsoft-com:vml' xmlns:wp14='http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing' xmlns:wp='http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' xmlns:w10='urn:schemas-microsoft-com:office:word' xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' xmlns:w14='http://schemas.microsoft.com/office/word/2010/wordml' xmlns:wpg='http://schemas.microsoft.com/office/word/2010/wordprocessingGroup' xmlns:wpi='http://schemas.microsoft.com/office/word/2010/wordprocessingInk' xmlns:wne='http://schemas.microsoft.com/office/word/2006/wordml' xmlns:wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' mc:Ignorable='w14 wp14'>
41 <w:body>
42 <w:tbl>
43 <w:tblPr>
44 <w:tblW w:w='0' w:type='auto'/>
45 <w:tblBorders>
46 <w:top w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
47 <w:left w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
48 <w:bottom w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
49 <w:right w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
50 <w:insideH w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
51 <w:insideV w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
52 </w:tblBorders>
53 <w:tblLayout w:type='fixed'/>
54 <w:tblCellMar>
55 <w:left w:w='70' w:type='dxa'/>
56 <w:right w:w='70' w:type='dxa'/>
57 </w:tblCellMar>
58 <w:tblLook w:val='0000' w:firstRow='0' w:lastRow='0' w:firstColumn='0' w:lastColumn='0' w:noHBand='0' w:noVBand='0'/>
59 </w:tblPr>
60 <w:tblGrid>
61 <w:gridCol w:w='1240'/>
62 <w:gridCol w:w='5560'/>
63 </w:tblGrid>
64 </w:tbl>
65 <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'/>
66 <w:sectPr w:rsidR='00D56C9E'>
67 <w:pgSz w:w='12240' w:h='15840'/>
68 <w:pgMar w:top='1440' w:right='1440' w:bottom='1440' w:left='1440' w:header='720' w:footer='720' w:gutter='0'/>
69 <w:cols w:space='720'/>
70 </w:sectPr>
71 </w:body>
72</w:document>";
73
74
75 public override string ToText(Subtitle subtitle, string title)
76 {
77 string xmlStructure = Layout.Replace("'", "\"");
78
79 var xml = new XmlDocument();
80 xml.LoadXml(xmlStructure);
81 var nsmgr = new XmlNamespaceManager(xml.NameTable);
82 nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
83 XmlNode div = xml.DocumentElement.SelectSingleNode("w:body/w:tbl", nsmgr);
84 for (int i = 0; i < subtitle.Paragraphs.Count; i++)
85 {
86 Paragraph p = subtitle.Paragraphs[i];
87 div.AppendChild(CreateXmlParagraph(xml, p));
88
89 if (i < subtitle.Paragraphs.Count - 1 && Math.Abs(p.EndTime.TotalMilliseconds - subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds) > 100)
90 {
91 var endP = new Paragraph(string.Empty, p.EndTime.TotalMilliseconds, 0);
92 div.AppendChild(CreateXmlParagraph(xml, endP));
93 }
94 }
95
96 string s = ToUtf8XmlString(xml);
97 return s;
98 }
99
100 private XmlNode CreateXmlParagraph(XmlDocument xml, Paragraph p)
101 {
102 XmlNode paragraph = xml.CreateElement("w:tr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
103 var tc1 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
104 paragraph.AppendChild(tc1);
105
106 //<w:tcPr>
107 // <w:tcW w:w='1240' w:type='dxa'/>
108 //</w:tcPr>
109 var n1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
110 var n1sub = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); // <w:tcW w:w='1240' w:type='dxa'/>
111 n1.AppendChild(n1sub);
112 var n1suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
113 n1suba1.InnerText = "1240";
114 n1sub.Attributes.Append(n1suba1);
115 var n1suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
116 n1suba2.InnerText = "dxa";
117 n1sub.Attributes.Append(n1suba2);
118 tc1.AppendChild(n1);
119
120 //<w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
121 // <w:pPr>
122 // <w:pStyle w:val='TimeCode'/>
123 // </w:pPr>
124 // <w:r>
125 // <w:t>[TIMECODE]</w:t>
126 // </w:r>
127 //</w:p>
128 var n2 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
129 var n1a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
130 n1a1.InnerText = "00D56C9E";
131 n2.Attributes.Append(n1a1);
132 var n1a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
133 n1a2.InnerText = "00D56C9E";
134 n2.Attributes.Append(n1a2);
135
136 var n2sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
137 var n2sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
138 n2sub1.AppendChild(n2sub1sub);
139 var n2sub1Suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
140 n2sub1Suba1.InnerText = "TimeCode";
141 n2sub1sub.Attributes.Append(n2sub1Suba1);
142 n2.AppendChild(n2sub1);
143
144 var n2sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
145 var n2sub2sub = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
146 n2sub2sub.InnerText = ToTimeCode(p.StartTime);
147 n2sub2.AppendChild(n2sub2sub);
148 n2.AppendChild(n2sub2);
149 tc1.AppendChild(n2);
150
151
152 //<w:tc>
153 // <w:tcPr>
154 // <w:tcW w:w='5560' w:type='dxa'/>
155 // <w:vAlign w:val='bottom'/>
156 // </w:tcPr>
157 // <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
158 // <w:pPr>
159 // <w:pStyle w:val='PopOn'/>
160 // </w:pPr>
161 // <w:proofErr w:type='spellStart'/>
162 // </w:p>
163 //</w:tc>
164 var tc2 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
165 paragraph.AppendChild(tc2);
166
167 var n3sub1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
168 tc2.AppendChild(n3sub1);
169
170 var n3sub1sub1 = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
171 var n3suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
172 n3suba1.InnerText = "5560";
173 n3sub1sub1.Attributes.Append(n3suba1);
174 var n3suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
175 n3suba2.InnerText = "dxa";
176 n3sub1sub1.Attributes.Append(n3suba2);
177 n3sub1.AppendChild(n3sub1sub1);
178
179 var n3sub1sub2 = xml.CreateElement("w:vAlign", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
180 var n3sub1sub2a1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
181 n3sub1sub2a1.InnerText = "bottom";
182 n3sub1sub2.Attributes.Append(n3sub1sub2a1);
183 n3sub1.AppendChild(n3sub1sub2);
184
185 var n3sub1sub3 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
186 var n3sub1sub3a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
187 n3sub1sub3a1.InnerText = "00D56C9E";
188 n3sub1sub3.Attributes.Append(n3sub1sub3a1);
189 var n3sub1sub3a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
190 n3sub1sub3a2.InnerText = "00D56C9E";
191 n3sub1sub3.Attributes.Append(n3sub1sub3a2);
192 var n3sub1sub3sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
193 n3sub1sub3.AppendChild(n3sub1sub3sub1);
194 var n3sub1sub3sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
195 var n3sub1sub3sub1suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
196 n3sub1sub3sub1suba1.InnerText = "PopOn";
197 n3sub1sub3sub1sub.Attributes.Append(n3sub1sub3sub1suba1);
198 n3sub1sub3sub1.AppendChild(n3sub1sub3sub1sub);
199
200 var lines = Utilities.RemoveHtmlTags(p.Text, true).Replace(Environment.NewLine, "\n").Split('\n');
201 for (int i = 0; i < lines.Length; i++)
202 {
203 var n3sub1sub3sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
204 n3sub1sub3.AppendChild(n3sub1sub3sub2);
205 if (i > 0)
206 {
207 var lineBreak = xml.CreateElement("w:br", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
208 n3sub1sub3sub2.AppendChild(lineBreak);
209 }
210 var text = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
211 text.InnerText = lines[i];
212 n3sub1sub3sub2.AppendChild(text);
213 }
214 tc2.AppendChild(n3sub1sub3);
215
216 return paragraph;
217 }
218
219 private string ToTimeCode(TimeCode timeCode)
220 {
221 return timeCode.ToHHMMSSFF(); //10:00:07:27
222 }
223
224 public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
225 {
226 _errorCount = 0;
227 var sb = new StringBuilder();
228 lines.ForEach(line => sb.AppendLine(line));
229 var xml = new XmlDocument();
230 xml.LoadXml(sb.ToString().Trim());
231 var nsmgr = new XmlNamespaceManager(xml.NameTable);
232 nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
233 foreach (XmlNode node in xml.DocumentElement.SelectNodes("//w:tr", nsmgr))
234 {
235 try
236 {
237 Paragraph p = new Paragraph();
238 XmlNode t = node.SelectSingleNode("w:tc/w:p/w:r/w:t", nsmgr);
239 if (t != null)
240 {
241 p.StartTime = GetTimeCode(t.InnerText);
242 sb = new StringBuilder();
243 foreach (XmlNode wrNode in node.SelectNodes("w:tc/w:p/w:r", nsmgr))
244 {
245 foreach (XmlNode child in wrNode.ChildNodes)
246 {
247 if (child.Name == "w:t")
248 {
249 bool isTimeCode = child.InnerText.Length == 11 && child.InnerText.Replace(":", string.Empty).Length == 8;
250 if (!isTimeCode)
251 sb.Append(child.InnerText);
252 }
253 else if (child.Name == "w:br")
254 {
255 sb.AppendLine();
256 }
257 }
258 }
259 p.Text = sb.ToString();
260 subtitle.Paragraphs.Add(p);
261 }
262 }
263 catch (Exception ex)
264 {
265 System.Diagnostics.Debug.WriteLine(ex.Message);
266 _errorCount++;
267 }
268 }
269 for (int i = 0; i < subtitle.Paragraphs.Count-1; i++)
270 {
271 subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds;
272 }
273 subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].EndTime.TotalMilliseconds = 2500;
274 subtitle.RemoveEmptyLines();
275 for (int i = 0; i < subtitle.Paragraphs.Count - 1; i++)
276 {
277 if (subtitle.Paragraphs[i].EndTime.TotalMilliseconds == subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds)
278 subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds - 1;
279 }
280 subtitle.Renumber(1);
281 }
282
283 private static TimeCode GetTimeCode(string s)
284 {
285 var parts = s.Trim().Split(':');
286 return new TimeCode(int.Parse(parts[0]), int.Parse(parts[1]), int.Parse(parts[2]), FramesToMillisecondsMax999(int.Parse(parts[3])));
287 }
288
289 }
290}