PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/LiveWriterBackupWPF/reg2xml/xmlReader.cs

#
C# | 567 lines | 430 code | 107 blank | 30 comment | 100 complexity | 0e83b375e7b32964cd192f20082402e7 MD5 | raw file
  1. using System;
  2. using System.IO;
  3. using System.Windows.Forms;
  4. using System.Web;
  5. public enum NODETYPE
  6. {
  7. NODETYPE_NA = 0,
  8. NODETYPE_BEGINELEMENT = 1,
  9. NODETYPE_ENDELEMENT = 2,
  10. NODETYPE_ATTRIB = 3,
  11. NODETYPE_CONTENT = 4,
  12. NODETYPE_PI = 5,
  13. NODETYPE_COMMENT = 6,
  14. NODETYPE_CDATA = 7
  15. };
  16. namespace Cll
  17. {
  18. /// <summary>
  19. /// xmlReader reads and parses an XML file
  20. /// </summary>
  21. public class xmlReader
  22. {
  23. public const string IDS_EMPTYELEMENTNAME = "Empty element name";
  24. public const string IDS_BADBEGINNODESYMBOL = "Bad '<' symbol";
  25. public const string IDS_NOBEGINNODESYMBOLINEOL = "'<' symbol not allowed at the end of a line";
  26. public const string IDS_GENERICSYNTAXERROR = "Syntax error";
  27. public const string IDS_MISSINGATTRIBNAME = "Missing attribute name before '='";
  28. public const string IDS_MISSINGEQUALSYMBOL = "Missing '=' after attribute name";
  29. public const string IDS_NOEQUALSYMBOLINEOL = "There should not be a '=' symbol at the end of a line";
  30. public const string IDS_BADATTRIBUTEVALUESYNTAX = "There should not be a quote char at the end of a line";
  31. StreamReader _sr; // general file members
  32. bool _bFileOpen;
  33. string _strFilename;
  34. bool _bShowMsgBoxOnError; // true if message boxes are allowed to display while parsing
  35. string _strLastError; // filled with error description if ParseContent() returns false
  36. //
  37. string _strContent; // internal use : parser buffer
  38. int _nCursor, _nbLines; // internal cursors (horizontal and vertical directions)
  39. NODETYPE _nCurNodeType; // returns where is the parser on at the moment
  40. string _strCurNodeName, _strCurNodeContent; // returns the current node value, and the current node content value
  41. string _strCurAttribName, _strCurAttribValue; // returns the current attrib name/value pair
  42. string _strCurPInstruction; // returns the current PInstruction (for instance ?xml, !DOCTYPE, ...)
  43. bool _bCurInsideComment; // true if the parser is inside a comment ( <!-- ... -->)
  44. bool _bCurInsideCDATA; // true if hte parser is inside a CDATA secrtion ( <![CDATA[[ ... ]]> )
  45. bool _bCurInsideNode; // true if the parser is inside a node begin tag
  46. bool _bCurInsideAttrib; // true if _strCurAttribName is valid and _strCurAttribValue is pending
  47. bool _bCurInsideContent; // true if the parser is inside content
  48. public xmlReader()
  49. {
  50. init();
  51. }
  52. void init()
  53. {
  54. _bFileOpen = false;
  55. showMsgBoxOnError(false);
  56. }
  57. public void showMsgBoxOnError(bool bShow)
  58. {
  59. _bShowMsgBoxOnError = bShow;
  60. }
  61. public bool open(string strFilename)
  62. {
  63. if (_bFileOpen)
  64. return true;
  65. _strFilename = strFilename;
  66. return true;
  67. }
  68. public bool readString()
  69. {
  70. if (!_bFileOpen) // open file for reading
  71. {
  72. try
  73. {
  74. _sr = File.OpenText(_strFilename);
  75. _bFileOpen = _sr != null;
  76. _nCursor = -1;
  77. _nbLines = 0;
  78. _bCurInsideNode = _bCurInsideComment = _bCurInsideAttrib = _bCurInsideContent = _bCurInsideCDATA = false;
  79. _strLastError = "";
  80. _strCurNodeName = "";
  81. _strCurNodeContent = "";
  82. _strCurAttribName = "";
  83. _strCurAttribValue = "";
  84. _strCurPInstruction = "";
  85. }
  86. catch (Exception )
  87. {
  88. _bFileOpen = false;
  89. }
  90. }
  91. if (!_bFileOpen)
  92. return false;
  93. bool bResult = true;
  94. if (_nCursor==-1)
  95. {
  96. _strContent = _sr.ReadLine();
  97. _nCursor = 0;
  98. _nbLines++;
  99. }
  100. if (_strContent==null)
  101. {
  102. bResult = false;
  103. }
  104. if (_strContent!=null && !parseContent())
  105. {
  106. string s;
  107. s = "Parse error in line "+_nbLines+" : " + _strLastError;
  108. _strLastError = s;
  109. if (_bShowMsgBoxOnError)
  110. {
  111. MessageBox.Show (s, "Office Recovery Manager",
  112. MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
  113. }
  114. bResult = false;
  115. }
  116. return bResult;
  117. }
  118. public bool close()
  119. {
  120. if (_bFileOpen)
  121. _sr.Close();
  122. init();
  123. return true;
  124. }
  125. public NODETYPE getNodeType()
  126. {
  127. return _nCurNodeType;
  128. }
  129. public string getNodeName()
  130. {
  131. return _strCurNodeName;
  132. }
  133. public string getAttribName()
  134. {
  135. return _strCurAttribName;
  136. }
  137. public string getAttribValue()
  138. {
  139. return _strCurAttribValue;
  140. }
  141. string getNodeContent()
  142. {
  143. return _strCurNodeContent;
  144. }
  145. string getPInstruction()
  146. {
  147. return _strCurPInstruction;
  148. }
  149. public int getCurrentLine()
  150. {
  151. return _nbLines;
  152. }
  153. public string getLastError() // if any
  154. {
  155. return _strLastError;
  156. }
  157. public bool parseContent()
  158. {
  159. _nCurNodeType = NODETYPE.NODETYPE_NA;
  160. int i = (int)_nCursor;
  161. int imax = _strContent.Length-1;
  162. if (i>imax)
  163. {
  164. _nCursor = -1; // force next string to be read from file
  165. return true;
  166. }
  167. string strTemp = _strContent + i;
  168. // pass spaces if we are inside a <...> and not yet processing an attribute value
  169. while ( (i<=imax) &&
  170. (_bCurInsideNode && !_bCurInsideAttrib) &&
  171. (_strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D) )
  172. i++;
  173. if (i>imax)
  174. {
  175. _nCursor = -1; // force next string to be read from file
  176. return true;
  177. }
  178. // are we inside a comment ?
  179. if (_bCurInsideComment)
  180. {
  181. while ( (i<=imax-2) &&
  182. !(_strContent[i]=='-' && _strContent[i+1]=='-' && _strContent[i+2]=='>') )
  183. i++;
  184. if (i<=imax-2) // found an end-comment
  185. {
  186. _nCurNodeType = NODETYPE.NODETYPE_NA; // tell user we have nothing to provide him with
  187. _nCursor = i+2+1;
  188. // after '-->' we are automatically within a content
  189. _bCurInsideNode = _bCurInsideAttrib = _bCurInsideComment = _bCurInsideCDATA = false;
  190. _bCurInsideContent = true;
  191. _strCurNodeContent = "";
  192. return true;
  193. }
  194. else // we still are inside an comment
  195. {
  196. _nCurNodeType = NODETYPE.NODETYPE_COMMENT;
  197. _nCursor = imax+1; // force next string to be read
  198. return true;
  199. }
  200. }
  201. // are we inside a CDATA section ?
  202. if (_bCurInsideCDATA)
  203. {
  204. while ( (i<=imax-2) &&
  205. !(_strContent[i]==']' && _strContent[i+1]==']' && _strContent[i+2]=='>') )
  206. i++;
  207. if (i<=imax-2) // found an end-comment
  208. {
  209. _nCurNodeType = NODETYPE.NODETYPE_NA; // tell user we have nothing to provide him with
  210. _nCursor = i+2+1;
  211. // after ']]>' we are automatically within a content
  212. _bCurInsideNode = _bCurInsideAttrib = _bCurInsideComment = _bCurInsideCDATA = false;
  213. _bCurInsideContent = true;
  214. _strCurNodeContent = "";
  215. return true;
  216. }
  217. else // we still are inside an CDATA section
  218. {
  219. _nCurNodeType = NODETYPE.NODETYPE_CDATA;
  220. _nCursor = imax+1; // force next string to be read
  221. return true;
  222. }
  223. }
  224. if (_bCurInsideAttrib) // extracting the attrib value, possibly in multiple passes
  225. {
  226. if ( _strCurAttribValue.Length==0 )
  227. {
  228. // pass EOL
  229. while ( (i<=imax) && (_strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D) )
  230. i++;
  231. if (i>imax)
  232. {
  233. _nCurNodeType = NODETYPE.NODETYPE_NA;
  234. _nCursor = i;
  235. return true;
  236. }
  237. char quotechar = _strContent[i++];
  238. _strCurAttribValue += quotechar; // start with something whatsoever!
  239. // in fact, we don't check the quotechar is an actual quotechar, ie " or '
  240. _nCurNodeType = NODETYPE.NODETYPE_NA;
  241. _nCursor = i;
  242. return true;
  243. }
  244. else
  245. {
  246. long ibegin = i;
  247. // pass until we find spaces or EOL or >
  248. while ( (i<=imax)
  249. && _strContent[i]!='\"'
  250. //&& _strContent[i]!='\''
  251. && _strContent[i]!=0x0A
  252. && _strContent[i]!=0x0D
  253. && _strContent[i]!='>')
  254. i++;
  255. // TODO : properly manage the case of a multiple-line attrib-value
  256. // (we should in this case return a N/A nodetype as long as we haven't
  257. // encountered the ending quotechar, while buffering all the chars in
  258. // the strAttribValue member).
  259. long iend = i;
  260. _strCurAttribValue += _strContent.Substring((int)ibegin, (int)(iend-ibegin));
  261. _strCurAttribValue = HttpUtility.HtmlDecode(_strCurAttribValue); // Work Item #1781 http://www.codeplex.com/WLWBackup/WorkItem/View.aspx?WorkItemId=1781
  262. if (i>imax)
  263. { // don't forget to add the EOL as well
  264. _strCurAttribValue += "\r\n";
  265. _nCurNodeType = NODETYPE.NODETYPE_NA;
  266. _nCursor = i;
  267. return true;
  268. }
  269. // and remove the prefixed quote char
  270. while ( _strCurAttribValue.Length!=0 &&
  271. (_strCurAttribValue[0]=='\"' || _strCurAttribValue[0]=='\'') )
  272. {
  273. _strCurAttribValue = _strCurAttribValue.Substring(1);
  274. }
  275. _nCurNodeType = NODETYPE.NODETYPE_ATTRIB;
  276. _bCurInsideAttrib = false;
  277. if ( _strContent[i]!='>' )
  278. i++; // pass ending quote char
  279. _nCursor = i;
  280. return true;
  281. }
  282. } // end if _bCurInsideAttrib==true
  283. if (_bCurInsideContent)
  284. {
  285. long ibegin = i;
  286. // pass until we find spaces or EOL or >
  287. while ( (i<=imax) && _strContent[i]!=0x0A
  288. && _strContent[i]!=0x0D
  289. && _strContent[i]!='<')
  290. i++;
  291. long iend = i;
  292. if ( (i<=imax) && _strContent[i]=='<')
  293. _bCurInsideContent = false;
  294. _strCurNodeContent = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
  295. if (_strCurNodeContent.Length==0)
  296. _nCurNodeType = NODETYPE.NODETYPE_NA;
  297. else
  298. _nCurNodeType = NODETYPE.NODETYPE_CONTENT;
  299. _nCursor = i;
  300. return true;
  301. } // end if (_bCurInsideContent)
  302. //
  303. char c = _strContent[i];
  304. // a node ?
  305. if (c=='<')
  306. {
  307. if (_bCurInsideNode) // error, we were already inside one
  308. {
  309. _strLastError = IDS_BADBEGINNODESYMBOL;
  310. return false;
  311. }
  312. _bCurInsideNode = true;
  313. _bCurInsideAttrib = _bCurInsideContent = _bCurInsideComment = _bCurInsideCDATA = false;
  314. i++;
  315. // pass spaces
  316. while ( _strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D)
  317. i++;
  318. if (i>imax)
  319. {
  320. _strLastError = IDS_NOBEGINNODESYMBOLINEOL;
  321. return false;
  322. }
  323. // here we have either a node name, a PI, or a begin comment
  324. if (imax-i>=2) // is it a begin comment ? ( <!-- )
  325. {
  326. if ( _strContent[i+0]=='!' &&
  327. _strContent[i+1]=='-' &&
  328. _strContent[i+2]=='-')
  329. {
  330. _nCurNodeType = NODETYPE.NODETYPE_COMMENT;
  331. _bCurInsideComment = true;
  332. i+=3; // go to actual comment content
  333. _nCursor = i;
  334. return true;
  335. }
  336. }
  337. if (imax-i>=7) // is it a begin cdatasection ? ( <![CDATA[ )
  338. {
  339. if ( _strContent[i+0]=='!' &&
  340. _strContent[i+1]=='[' &&
  341. _strContent[i+2]=='C' &&
  342. _strContent[i+3]=='D' &&
  343. _strContent[i+4]=='A' &&
  344. _strContent[i+5]=='T' &&
  345. _strContent[i+6]=='A' &&
  346. _strContent[i+7]=='[')
  347. {
  348. _nCurNodeType = NODETYPE.NODETYPE_CDATA;
  349. _bCurInsideCDATA = true;
  350. i+=8; // go to actual cdata section content
  351. _nCursor = i;
  352. return true;
  353. }
  354. }
  355. // the node name begins at position i
  356. long ibegin = i;
  357. // pass until we find spaces or EOL or >
  358. while ( (i<=imax) && _strContent[i]!=' '
  359. && _strContent[i]!=0x0A
  360. && _strContent[i]!=0x0D
  361. && (_strContent[i]!='/' || (i==ibegin)) // don't forget empty elements (for instance <br/>)
  362. && _strContent[i]!='>')
  363. i++;
  364. long iend = i;
  365. _strCurNodeName = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
  366. if (_strCurNodeName.Length==0)
  367. {
  368. _strLastError = IDS_EMPTYELEMENTNAME;
  369. return false;
  370. }
  371. if (_strCurNodeName[0]=='?' || _strCurNodeName[0]=='!')
  372. {
  373. _nCurNodeType = NODETYPE.NODETYPE_PI;
  374. _strCurPInstruction = _strCurNodeName;
  375. _strCurNodeName = ""; // erase the PI instruction so it does not appear as a node name
  376. }
  377. else if (_strCurNodeName[0]=='/')
  378. {
  379. _nCurNodeType = NODETYPE.NODETYPE_ENDELEMENT;
  380. _strCurNodeName = _strCurNodeName.Substring(1); // remove /
  381. }
  382. else
  383. _nCurNodeType = NODETYPE.NODETYPE_BEGINELEMENT;
  384. _nCursor = i;
  385. return true;
  386. }
  387. else // >, or ?, or content or attribute
  388. {
  389. if (c=='?')
  390. {
  391. _nCurNodeType = NODETYPE.NODETYPE_NA;
  392. _nCursor = i+1;
  393. return true;
  394. }
  395. else if (c=='/')
  396. {
  397. i++;
  398. // pass node name
  399. long ibegin = i;
  400. // pass until we find spaces or EOL or >
  401. while ( (i<=imax) && _strContent[i]!=' '
  402. && _strContent[i]!=0x0A
  403. && _strContent[i]!=0x0D
  404. && _strContent[i]!='>')
  405. i++;
  406. long iend = i;
  407. _nCurNodeType = NODETYPE.NODETYPE_ENDELEMENT;
  408. _nCursor = i;
  409. return true;
  410. }
  411. else if (c=='>')
  412. {
  413. _bCurInsideNode = _bCurInsideAttrib = false;
  414. _bCurInsideContent = true;
  415. _strCurNodeContent = "";
  416. _nCurNodeType = NODETYPE.NODETYPE_NA;
  417. _nCursor = i+1;
  418. return true;
  419. }
  420. if (_bCurInsideNode) // attributes
  421. {
  422. if (!_bCurInsideAttrib)
  423. {
  424. if (c=='=')
  425. {
  426. _nCurNodeType = NODETYPE.NODETYPE_NA;
  427. _bCurInsideAttrib = true; // enable extraction of the associated attribute value
  428. i++; // pass '=' symbol
  429. _nCursor = i;
  430. return true;
  431. }
  432. // get attribute name
  433. long ibegin = i;
  434. // pass until we find spaces or EOL or >
  435. while ( (i<=imax) && _strContent[i]!=' '
  436. && _strContent[i]!=0x0A
  437. && _strContent[i]!=0x0D
  438. && _strContent[i]!='='
  439. && _strContent[i]!='>') // check against > is just for safety
  440. i++;
  441. long iend = i;
  442. _strCurAttribName = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
  443. if (_strCurAttribName.Length==0)
  444. {
  445. _strLastError = IDS_MISSINGATTRIBNAME;
  446. return false;
  447. }
  448. _strCurAttribValue = ""; // make sure the attrib value is empty for the moment
  449. _nCurNodeType = NODETYPE.NODETYPE_NA;
  450. _nCursor = i;
  451. return true;
  452. }
  453. }
  454. }
  455. // this code never executes
  456. _strLastError = IDS_GENERICSYNTAXERROR;
  457. return false;
  458. }
  459. }
  460. }