/LiveWriterBackupWPF/reg2xml/xmlReader.cs
C# | 567 lines | 430 code | 107 blank | 30 comment | 100 complexity | 0e83b375e7b32964cd192f20082402e7 MD5 | raw file
- using System;
- using System.IO;
- using System.Windows.Forms;
- using System.Web;
-
- public enum NODETYPE
- {
- NODETYPE_NA = 0,
- NODETYPE_BEGINELEMENT = 1,
- NODETYPE_ENDELEMENT = 2,
- NODETYPE_ATTRIB = 3,
- NODETYPE_CONTENT = 4,
- NODETYPE_PI = 5,
- NODETYPE_COMMENT = 6,
- NODETYPE_CDATA = 7
- };
-
- namespace Cll
- {
- /// <summary>
- /// xmlReader reads and parses an XML file
- /// </summary>
- public class xmlReader
- {
- public const string IDS_EMPTYELEMENTNAME = "Empty element name";
- public const string IDS_BADBEGINNODESYMBOL = "Bad '<' symbol";
- public const string IDS_NOBEGINNODESYMBOLINEOL = "'<' symbol not allowed at the end of a line";
- public const string IDS_GENERICSYNTAXERROR = "Syntax error";
- public const string IDS_MISSINGATTRIBNAME = "Missing attribute name before '='";
- public const string IDS_MISSINGEQUALSYMBOL = "Missing '=' after attribute name";
- public const string IDS_NOEQUALSYMBOLINEOL = "There should not be a '=' symbol at the end of a line";
- public const string IDS_BADATTRIBUTEVALUESYNTAX = "There should not be a quote char at the end of a line";
-
- StreamReader _sr; // general file members
- bool _bFileOpen;
- string _strFilename;
- bool _bShowMsgBoxOnError; // true if message boxes are allowed to display while parsing
- string _strLastError; // filled with error description if ParseContent() returns false
-
- //
- string _strContent; // internal use : parser buffer
- int _nCursor, _nbLines; // internal cursors (horizontal and vertical directions)
- NODETYPE _nCurNodeType; // returns where is the parser on at the moment
- string _strCurNodeName, _strCurNodeContent; // returns the current node value, and the current node content value
- string _strCurAttribName, _strCurAttribValue; // returns the current attrib name/value pair
- string _strCurPInstruction; // returns the current PInstruction (for instance ?xml, !DOCTYPE, ...)
- bool _bCurInsideComment; // true if the parser is inside a comment ( <!-- ... -->)
- bool _bCurInsideCDATA; // true if hte parser is inside a CDATA secrtion ( <![CDATA[[ ... ]]> )
- bool _bCurInsideNode; // true if the parser is inside a node begin tag
- bool _bCurInsideAttrib; // true if _strCurAttribName is valid and _strCurAttribValue is pending
- bool _bCurInsideContent; // true if the parser is inside content
-
- public xmlReader()
- {
- init();
- }
-
- void init()
- {
- _bFileOpen = false;
- showMsgBoxOnError(false);
- }
-
- public void showMsgBoxOnError(bool bShow)
- {
- _bShowMsgBoxOnError = bShow;
- }
-
- public bool open(string strFilename)
- {
- if (_bFileOpen)
- return true;
-
- _strFilename = strFilename;
-
- return true;
- }
-
- public bool readString()
- {
- if (!_bFileOpen) // open file for reading
- {
- try
- {
- _sr = File.OpenText(_strFilename);
- _bFileOpen = _sr != null;
- _nCursor = -1;
- _nbLines = 0;
- _bCurInsideNode = _bCurInsideComment = _bCurInsideAttrib = _bCurInsideContent = _bCurInsideCDATA = false;
- _strLastError = "";
- _strCurNodeName = "";
- _strCurNodeContent = "";
- _strCurAttribName = "";
- _strCurAttribValue = "";
- _strCurPInstruction = "";
- }
- catch (Exception )
- {
- _bFileOpen = false;
- }
- }
-
- if (!_bFileOpen)
- return false;
-
- bool bResult = true;
-
- if (_nCursor==-1)
- {
- _strContent = _sr.ReadLine();
- _nCursor = 0;
- _nbLines++;
- }
-
- if (_strContent==null)
- {
- bResult = false;
- }
-
- if (_strContent!=null && !parseContent())
- {
- string s;
- s = "Parse error in line "+_nbLines+" : " + _strLastError;
- _strLastError = s;
-
- if (_bShowMsgBoxOnError)
- {
- MessageBox.Show (s, "Office Recovery Manager",
- MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
- }
-
- bResult = false;
- }
-
- return bResult;
- }
-
- public bool close()
- {
- if (_bFileOpen)
- _sr.Close();
-
- init();
-
- return true;
- }
-
- public NODETYPE getNodeType()
- {
- return _nCurNodeType;
- }
-
- public string getNodeName()
- {
- return _strCurNodeName;
- }
-
- public string getAttribName()
- {
- return _strCurAttribName;
- }
-
- public string getAttribValue()
- {
- return _strCurAttribValue;
- }
-
- string getNodeContent()
- {
- return _strCurNodeContent;
- }
-
-
- string getPInstruction()
- {
- return _strCurPInstruction;
- }
-
- public int getCurrentLine()
- {
- return _nbLines;
- }
-
-
- public string getLastError() // if any
- {
- return _strLastError;
- }
-
- public bool parseContent()
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA;
-
- int i = (int)_nCursor;
- int imax = _strContent.Length-1;
-
- if (i>imax)
- {
- _nCursor = -1; // force next string to be read from file
- return true;
- }
-
- string strTemp = _strContent + i;
-
- // pass spaces if we are inside a <...> and not yet processing an attribute value
- while ( (i<=imax) &&
- (_bCurInsideNode && !_bCurInsideAttrib) &&
- (_strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D) )
- i++;
-
- if (i>imax)
- {
- _nCursor = -1; // force next string to be read from file
- return true;
- }
-
- // are we inside a comment ?
- if (_bCurInsideComment)
- {
- while ( (i<=imax-2) &&
- !(_strContent[i]=='-' && _strContent[i+1]=='-' && _strContent[i+2]=='>') )
- i++;
-
- if (i<=imax-2) // found an end-comment
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA; // tell user we have nothing to provide him with
- _nCursor = i+2+1;
-
- // after '-->' we are automatically within a content
- _bCurInsideNode = _bCurInsideAttrib = _bCurInsideComment = _bCurInsideCDATA = false;
- _bCurInsideContent = true;
- _strCurNodeContent = "";
-
- return true;
- }
- else // we still are inside an comment
- {
- _nCurNodeType = NODETYPE.NODETYPE_COMMENT;
- _nCursor = imax+1; // force next string to be read
- return true;
- }
- }
-
-
- // are we inside a CDATA section ?
- if (_bCurInsideCDATA)
- {
- while ( (i<=imax-2) &&
- !(_strContent[i]==']' && _strContent[i+1]==']' && _strContent[i+2]=='>') )
- i++;
-
- if (i<=imax-2) // found an end-comment
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA; // tell user we have nothing to provide him with
- _nCursor = i+2+1;
-
- // after ']]>' we are automatically within a content
- _bCurInsideNode = _bCurInsideAttrib = _bCurInsideComment = _bCurInsideCDATA = false;
- _bCurInsideContent = true;
- _strCurNodeContent = "";
-
- return true;
- }
- else // we still are inside an CDATA section
- {
- _nCurNodeType = NODETYPE.NODETYPE_CDATA;
- _nCursor = imax+1; // force next string to be read
- return true;
- }
- }
-
-
- if (_bCurInsideAttrib) // extracting the attrib value, possibly in multiple passes
- {
- if ( _strCurAttribValue.Length==0 )
- {
- // pass EOL
- while ( (i<=imax) && (_strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D) )
- i++;
-
- if (i>imax)
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _nCursor = i;
- return true;
- }
-
- char quotechar = _strContent[i++];
-
- _strCurAttribValue += quotechar; // start with something whatsoever!
- // in fact, we don't check the quotechar is an actual quotechar, ie " or '
-
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _nCursor = i;
- return true;
- }
- else
- {
- long ibegin = i;
-
- // pass until we find spaces or EOL or >
- while ( (i<=imax)
- && _strContent[i]!='\"'
- //&& _strContent[i]!='\''
- && _strContent[i]!=0x0A
- && _strContent[i]!=0x0D
- && _strContent[i]!='>')
- i++;
-
- // TODO : properly manage the case of a multiple-line attrib-value
- // (we should in this case return a N/A nodetype as long as we haven't
- // encountered the ending quotechar, while buffering all the chars in
- // the strAttribValue member).
-
- long iend = i;
-
- _strCurAttribValue += _strContent.Substring((int)ibegin, (int)(iend-ibegin));
- _strCurAttribValue = HttpUtility.HtmlDecode(_strCurAttribValue); // Work Item #1781 http://www.codeplex.com/WLWBackup/WorkItem/View.aspx?WorkItemId=1781
-
- if (i>imax)
- { // don't forget to add the EOL as well
- _strCurAttribValue += "\r\n";
-
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _nCursor = i;
- return true;
- }
-
- // and remove the prefixed quote char
- while ( _strCurAttribValue.Length!=0 &&
- (_strCurAttribValue[0]=='\"' || _strCurAttribValue[0]=='\'') )
- {
- _strCurAttribValue = _strCurAttribValue.Substring(1);
- }
-
- _nCurNodeType = NODETYPE.NODETYPE_ATTRIB;
- _bCurInsideAttrib = false;
-
- if ( _strContent[i]!='>' )
- i++; // pass ending quote char
-
- _nCursor = i;
- return true;
- }
-
- } // end if _bCurInsideAttrib==true
-
-
- if (_bCurInsideContent)
- {
- long ibegin = i;
-
- // pass until we find spaces or EOL or >
- while ( (i<=imax) && _strContent[i]!=0x0A
- && _strContent[i]!=0x0D
- && _strContent[i]!='<')
- i++;
-
- long iend = i;
-
- if ( (i<=imax) && _strContent[i]=='<')
- _bCurInsideContent = false;
-
- _strCurNodeContent = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
- if (_strCurNodeContent.Length==0)
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- else
- _nCurNodeType = NODETYPE.NODETYPE_CONTENT;
-
- _nCursor = i;
- return true;
- } // end if (_bCurInsideContent)
-
- //
- char c = _strContent[i];
-
- // a node ?
- if (c=='<')
- {
- if (_bCurInsideNode) // error, we were already inside one
- {
- _strLastError = IDS_BADBEGINNODESYMBOL;
- return false;
- }
-
- _bCurInsideNode = true;
- _bCurInsideAttrib = _bCurInsideContent = _bCurInsideComment = _bCurInsideCDATA = false;
-
- i++;
-
- // pass spaces
- while ( _strContent[i]==' ' || _strContent[i]==0x0A || _strContent[i]==0x0D)
- i++;
-
- if (i>imax)
- {
- _strLastError = IDS_NOBEGINNODESYMBOLINEOL;
- return false;
- }
-
- // here we have either a node name, a PI, or a begin comment
- if (imax-i>=2) // is it a begin comment ? ( <!-- )
- {
- if ( _strContent[i+0]=='!' &&
- _strContent[i+1]=='-' &&
- _strContent[i+2]=='-')
- {
- _nCurNodeType = NODETYPE.NODETYPE_COMMENT;
- _bCurInsideComment = true;
-
- i+=3; // go to actual comment content
-
- _nCursor = i;
- return true;
- }
- }
-
- if (imax-i>=7) // is it a begin cdatasection ? ( <![CDATA[ )
- {
- if ( _strContent[i+0]=='!' &&
- _strContent[i+1]=='[' &&
- _strContent[i+2]=='C' &&
- _strContent[i+3]=='D' &&
- _strContent[i+4]=='A' &&
- _strContent[i+5]=='T' &&
- _strContent[i+6]=='A' &&
- _strContent[i+7]=='[')
- {
- _nCurNodeType = NODETYPE.NODETYPE_CDATA;
- _bCurInsideCDATA = true;
-
- i+=8; // go to actual cdata section content
-
- _nCursor = i;
- return true;
- }
- }
-
-
- // the node name begins at position i
- long ibegin = i;
-
- // pass until we find spaces or EOL or >
- while ( (i<=imax) && _strContent[i]!=' '
- && _strContent[i]!=0x0A
- && _strContent[i]!=0x0D
- && (_strContent[i]!='/' || (i==ibegin)) // don't forget empty elements (for instance <br/>)
- && _strContent[i]!='>')
- i++;
-
- long iend = i;
-
- _strCurNodeName = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
- if (_strCurNodeName.Length==0)
- {
- _strLastError = IDS_EMPTYELEMENTNAME;
- return false;
- }
-
-
- if (_strCurNodeName[0]=='?' || _strCurNodeName[0]=='!')
- {
- _nCurNodeType = NODETYPE.NODETYPE_PI;
- _strCurPInstruction = _strCurNodeName;
- _strCurNodeName = ""; // erase the PI instruction so it does not appear as a node name
- }
- else if (_strCurNodeName[0]=='/')
- {
- _nCurNodeType = NODETYPE.NODETYPE_ENDELEMENT;
- _strCurNodeName = _strCurNodeName.Substring(1); // remove /
- }
- else
- _nCurNodeType = NODETYPE.NODETYPE_BEGINELEMENT;
-
- _nCursor = i;
- return true;
- }
- else // >, or ?, or content or attribute
- {
- if (c=='?')
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _nCursor = i+1;
- return true;
- }
- else if (c=='/')
- {
- i++;
-
- // pass node name
- long ibegin = i;
-
- // pass until we find spaces or EOL or >
- while ( (i<=imax) && _strContent[i]!=' '
- && _strContent[i]!=0x0A
- && _strContent[i]!=0x0D
- && _strContent[i]!='>')
- i++;
-
- long iend = i;
-
- _nCurNodeType = NODETYPE.NODETYPE_ENDELEMENT;
-
- _nCursor = i;
- return true;
- }
- else if (c=='>')
- {
- _bCurInsideNode = _bCurInsideAttrib = false;
- _bCurInsideContent = true;
- _strCurNodeContent = "";
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _nCursor = i+1;
- return true;
- }
-
- if (_bCurInsideNode) // attributes
- {
-
- if (!_bCurInsideAttrib)
- {
- if (c=='=')
- {
- _nCurNodeType = NODETYPE.NODETYPE_NA;
- _bCurInsideAttrib = true; // enable extraction of the associated attribute value
-
- i++; // pass '=' symbol
-
- _nCursor = i;
- return true;
- }
-
- // get attribute name
- long ibegin = i;
-
- // pass until we find spaces or EOL or >
- while ( (i<=imax) && _strContent[i]!=' '
- && _strContent[i]!=0x0A
- && _strContent[i]!=0x0D
- && _strContent[i]!='='
- && _strContent[i]!='>') // check against > is just for safety
- i++;
-
- long iend = i;
-
- _strCurAttribName = _strContent.Substring((int)ibegin, (int)(iend-ibegin));
- if (_strCurAttribName.Length==0)
- {
- _strLastError = IDS_MISSINGATTRIBNAME;
- return false;
- }
-
- _strCurAttribValue = ""; // make sure the attrib value is empty for the moment
- _nCurNodeType = NODETYPE.NODETYPE_NA;
-
- _nCursor = i;
- return true;
- }
- }
- }
-
- // this code never executes
- _strLastError = IDS_GENERICSYNTAXERROR;
- return false;
- }
- }
- }