PageRenderTime 37ms CodeModel.GetById 20ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/src/Logic/SubtitleFormats/OresmeDocXDocument.cs

https://gitlab.com/minaz922/subtitleedit
C# | 290 lines | 240 code | 27 blank | 23 comment | 21 complexity | 688011c25254ccb323e33f6ddd0e1910 MD5 | raw file
  1using System;
  2using System.Collections.Generic;
  3using System.Text;
  4using System.Xml;
  5
  6namespace Nikse.SubtitleEdit.Logic.SubtitleFormats
  7{
  8    public class OresmeDocXDocument : SubtitleFormat
  9    {
 10        public override string Extension
 11        {
 12            get { return ".xml"; }
 13        }
 14
 15        public override string Name
 16        {
 17            get { return "Oresme Docx document"; }
 18        }
 19
 20        public override bool IsTimeBased
 21        {
 22            get { return true; }
 23        }
 24
 25        public override bool IsMine(List<string> lines, string fileName)
 26        {
 27            var sb = new StringBuilder();
 28            lines.ForEach(line => sb.AppendLine(line));
 29            string xmlAsString = sb.ToString().Trim();
 30            if ((xmlAsString.Contains("<w:tc>")))
 31            {
 32                var subtitle = new Subtitle();
 33                LoadSubtitle(subtitle, lines, fileName);
 34                return subtitle.Paragraphs.Count > _errorCount;
 35            }
 36            return false;
 37        }
 38
 39        private string Layout = @"<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
 40<w:document xmlns:wpc='http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas' xmlns:mc='http://schemas.openxmlformats.org/markup-compatibility/2006' xmlns:o='urn:schemas-microsoft-com:office:office' xmlns:r='http://schemas.openxmlformats.org/officeDocument/2006/relationships' xmlns:m='http://schemas.openxmlformats.org/officeDocument/2006/math' xmlns:v='urn:schemas-microsoft-com:vml' xmlns:wp14='http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing' xmlns:wp='http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' xmlns:w10='urn:schemas-microsoft-com:office:word' xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' xmlns:w14='http://schemas.microsoft.com/office/word/2010/wordml' xmlns:wpg='http://schemas.microsoft.com/office/word/2010/wordprocessingGroup' xmlns:wpi='http://schemas.microsoft.com/office/word/2010/wordprocessingInk' xmlns:wne='http://schemas.microsoft.com/office/word/2006/wordml' xmlns:wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' mc:Ignorable='w14 wp14'>
 41  <w:body>
 42    <w:tbl>
 43      <w:tblPr>
 44        <w:tblW w:w='0' w:type='auto'/>
 45        <w:tblBorders>
 46          <w:top w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 47          <w:left w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 48          <w:bottom w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 49          <w:right w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 50          <w:insideH w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 51          <w:insideV w:val='single' w:sz='4' w:space='0' w:color='FFCC00'/>
 52        </w:tblBorders>
 53        <w:tblLayout w:type='fixed'/>
 54        <w:tblCellMar>
 55          <w:left w:w='70' w:type='dxa'/>
 56          <w:right w:w='70' w:type='dxa'/>
 57        </w:tblCellMar>
 58        <w:tblLook w:val='0000' w:firstRow='0' w:lastRow='0' w:firstColumn='0' w:lastColumn='0' w:noHBand='0' w:noVBand='0'/>
 59      </w:tblPr>
 60      <w:tblGrid>
 61        <w:gridCol w:w='1240'/>
 62        <w:gridCol w:w='5560'/>
 63      </w:tblGrid>
 64    </w:tbl>
 65    <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'/>
 66    <w:sectPr w:rsidR='00D56C9E'>
 67      <w:pgSz w:w='12240' w:h='15840'/>
 68      <w:pgMar w:top='1440' w:right='1440' w:bottom='1440' w:left='1440' w:header='720' w:footer='720' w:gutter='0'/>
 69      <w:cols w:space='720'/>
 70    </w:sectPr>
 71  </w:body>
 72</w:document>";
 73
 74
 75        public override string ToText(Subtitle subtitle, string title)
 76        {
 77            string xmlStructure = Layout.Replace("'", "\"");
 78
 79            var xml = new XmlDocument();
 80            xml.LoadXml(xmlStructure);
 81            var nsmgr = new XmlNamespaceManager(xml.NameTable);
 82            nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
 83            XmlNode div = xml.DocumentElement.SelectSingleNode("w:body/w:tbl", nsmgr);
 84            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
 85            {
 86                Paragraph p = subtitle.Paragraphs[i];
 87                div.AppendChild(CreateXmlParagraph(xml, p));
 88
 89                if (i < subtitle.Paragraphs.Count - 1 && Math.Abs(p.EndTime.TotalMilliseconds - subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds) > 100)
 90                {
 91                    var endP = new Paragraph(string.Empty, p.EndTime.TotalMilliseconds, 0);
 92                    div.AppendChild(CreateXmlParagraph(xml, endP));
 93                }
 94            }
 95
 96            string s = ToUtf8XmlString(xml);
 97            return s;
 98        }
 99
100        private XmlNode CreateXmlParagraph(XmlDocument xml, Paragraph p)
101        {
102            XmlNode paragraph = xml.CreateElement("w:tr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
103            var tc1 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
104            paragraph.AppendChild(tc1);
105
106            //<w:tcPr>
107            //  <w:tcW w:w='1240' w:type='dxa'/>
108            //</w:tcPr>
109            var n1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
110            var n1sub = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); // <w:tcW w:w='1240' w:type='dxa'/>
111            n1.AppendChild(n1sub);
112            var n1suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
113            n1suba1.InnerText = "1240";
114            n1sub.Attributes.Append(n1suba1);
115            var n1suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
116            n1suba2.InnerText = "dxa";
117            n1sub.Attributes.Append(n1suba2);
118            tc1.AppendChild(n1);
119
120            //<w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
121            //  <w:pPr>
122            //    <w:pStyle w:val='TimeCode'/>
123            //  </w:pPr>
124            //  <w:r>
125            //    <w:t>[TIMECODE]</w:t>
126            //  </w:r>
127            //</w:p>
128            var n2 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
129            var n1a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
130            n1a1.InnerText = "00D56C9E";
131            n2.Attributes.Append(n1a1);
132            var n1a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
133            n1a2.InnerText = "00D56C9E";
134            n2.Attributes.Append(n1a2);
135
136            var n2sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
137            var n2sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
138            n2sub1.AppendChild(n2sub1sub);
139            var n2sub1Suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
140            n2sub1Suba1.InnerText = "TimeCode";
141            n2sub1sub.Attributes.Append(n2sub1Suba1);
142            n2.AppendChild(n2sub1);
143
144            var n2sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
145            var n2sub2sub = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
146            n2sub2sub.InnerText = ToTimeCode(p.StartTime);
147            n2sub2.AppendChild(n2sub2sub);
148            n2.AppendChild(n2sub2);
149            tc1.AppendChild(n2);
150
151
152            //<w:tc>
153            //  <w:tcPr>
154            //    <w:tcW w:w='5560' w:type='dxa'/>
155            //    <w:vAlign w:val='bottom'/>
156            //  </w:tcPr>
157            //  <w:p w:rsidR='00D56C9E' w:rsidRDefault='00D56C9E'>
158            //    <w:pPr>
159            //      <w:pStyle w:val='PopOn'/>
160            //    </w:pPr>
161            //    <w:proofErr w:type='spellStart'/>
162            //  </w:p>
163            //</w:tc>
164            var tc2 = xml.CreateElement("w:tc", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
165            paragraph.AppendChild(tc2);
166
167            var n3sub1 = xml.CreateElement("w:tcPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
168            tc2.AppendChild(n3sub1);
169
170            var n3sub1sub1 = xml.CreateElement("w:tcW", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
171            var n3suba1 = xml.CreateAttribute("w:w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
172            n3suba1.InnerText = "5560";
173            n3sub1sub1.Attributes.Append(n3suba1);
174            var n3suba2 = xml.CreateAttribute("w:type", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
175            n3suba2.InnerText = "dxa";
176            n3sub1sub1.Attributes.Append(n3suba2);
177            n3sub1.AppendChild(n3sub1sub1);
178
179            var n3sub1sub2 = xml.CreateElement("w:vAlign", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
180            var n3sub1sub2a1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
181            n3sub1sub2a1.InnerText = "bottom";
182            n3sub1sub2.Attributes.Append(n3sub1sub2a1);
183            n3sub1.AppendChild(n3sub1sub2);
184
185            var n3sub1sub3 = xml.CreateElement("w:p", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
186            var n3sub1sub3a1 = xml.CreateAttribute("w:rsidR", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
187            n3sub1sub3a1.InnerText = "00D56C9E";
188            n3sub1sub3.Attributes.Append(n3sub1sub3a1);
189            var n3sub1sub3a2 = xml.CreateAttribute("w:rsidRDefault", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
190            n3sub1sub3a2.InnerText = "00D56C9E";
191            n3sub1sub3.Attributes.Append(n3sub1sub3a2);
192            var n3sub1sub3sub1 = xml.CreateElement("w:pPr", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
193            n3sub1sub3.AppendChild(n3sub1sub3sub1);
194            var n3sub1sub3sub1sub = xml.CreateElement("w:pStyle", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
195            var n3sub1sub3sub1suba1 = xml.CreateAttribute("w:val", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
196            n3sub1sub3sub1suba1.InnerText = "PopOn";
197            n3sub1sub3sub1sub.Attributes.Append(n3sub1sub3sub1suba1);
198            n3sub1sub3sub1.AppendChild(n3sub1sub3sub1sub);
199
200            var lines = Utilities.RemoveHtmlTags(p.Text, true).Replace(Environment.NewLine, "\n").Split('\n');
201            for (int i = 0; i < lines.Length; i++)
202            {
203                var n3sub1sub3sub2 = xml.CreateElement("w:r", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
204                n3sub1sub3.AppendChild(n3sub1sub3sub2);
205                if (i > 0)
206                {
207                    var lineBreak = xml.CreateElement("w:br", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
208                    n3sub1sub3sub2.AppendChild(lineBreak);
209                }
210                var text = xml.CreateElement("w:t", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
211                text.InnerText = lines[i];
212                n3sub1sub3sub2.AppendChild(text);
213            }
214            tc2.AppendChild(n3sub1sub3);
215
216            return paragraph;
217        }
218
219        private string ToTimeCode(TimeCode timeCode)
220        {
221            return timeCode.ToHHMMSSFF(); //10:00:07:27
222        }
223
224        public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
225        {
226            _errorCount = 0;
227            var sb = new StringBuilder();
228            lines.ForEach(line => sb.AppendLine(line));
229            var xml = new XmlDocument();
230            xml.LoadXml(sb.ToString().Trim());
231            var nsmgr = new XmlNamespaceManager(xml.NameTable);
232            nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
233            foreach (XmlNode node in xml.DocumentElement.SelectNodes("//w:tr", nsmgr))
234            {
235                try
236                {
237                    Paragraph p = new Paragraph();
238                    XmlNode t = node.SelectSingleNode("w:tc/w:p/w:r/w:t", nsmgr);
239                    if (t != null)
240                    {
241                        p.StartTime = GetTimeCode(t.InnerText);
242                        sb = new StringBuilder();
243                        foreach (XmlNode wrNode in node.SelectNodes("w:tc/w:p/w:r", nsmgr))
244                        {
245                            foreach (XmlNode child in wrNode.ChildNodes)
246                            {
247                                if (child.Name == "w:t")
248                                {
249                                    bool isTimeCode = child.InnerText.Length == 11 && child.InnerText.Replace(":", string.Empty).Length == 8;
250                                    if (!isTimeCode)
251                                        sb.Append(child.InnerText);
252                                }
253                                else if (child.Name == "w:br")
254                                {
255                                    sb.AppendLine();
256                                }
257                            }
258                        }
259                        p.Text = sb.ToString();
260                        subtitle.Paragraphs.Add(p);
261                    }
262                }
263                catch (Exception ex)
264                {
265                    System.Diagnostics.Debug.WriteLine(ex.Message);
266                    _errorCount++;
267                }
268            }
269            for (int i = 0; i < subtitle.Paragraphs.Count-1; i++)
270            {
271                subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds;
272            }
273            subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].EndTime.TotalMilliseconds = 2500;
274            subtitle.RemoveEmptyLines();
275            for (int i = 0; i < subtitle.Paragraphs.Count - 1; i++)
276            {
277                if (subtitle.Paragraphs[i].EndTime.TotalMilliseconds == subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds)
278                    subtitle.Paragraphs[i].EndTime.TotalMilliseconds = subtitle.Paragraphs[i + 1].StartTime.TotalMilliseconds - 1;
279            }
280            subtitle.Renumber(1);
281        }
282
283        private static TimeCode GetTimeCode(string s)
284        {
285            var parts = s.Trim().Split(':');
286            return new TimeCode(int.Parse(parts[0]), int.Parse(parts[1]), int.Parse(parts[2]), FramesToMillisecondsMax999(int.Parse(parts[3])));
287        }
288
289    }
290}