/zeroParse/zeroParse/Rule.cs

http://zeroflag.googlecode.com/ · C# · 607 lines · 475 code · 88 blank · 44 comment · 117 complexity · 2cf833e0a21828e79942d23f0892034c MD5 · raw file

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. namespace zeroflag.Parsing
  5. {
  6. [System.ComponentModel.TypeConverter( typeof( System.ComponentModel.ExpandableObjectConverter ) )]
  7. public class Rule : IEnumerable<Rule>
  8. {
  9. private string _Name;
  10. private Rule _Inner;
  11. public Rule()
  12. {
  13. }
  14. public Rule( string name )
  15. {
  16. this.Name = name;
  17. }
  18. public Rule( params Rule[] rules )
  19. {
  20. this.Inner = this.Append( null, 0, rules );
  21. }
  22. Rule Append( Rule a, int index, params Rule[] rules )
  23. {
  24. if ( rules.Length > index )
  25. {
  26. if ( a == null )
  27. {
  28. return this.Append( rules[ index ], index + 1, rules );
  29. }
  30. else
  31. return this.Append( a & rules[ index ], index + 1, rules );
  32. }
  33. else
  34. return a;
  35. }
  36. public Rule Inner
  37. {
  38. get { return _Inner; }
  39. set { _Inner = value; }
  40. }
  41. public string Name
  42. {
  43. get { return _Name ?? this.DefaultName; }
  44. set { _Name = value; }
  45. }
  46. protected virtual string DefaultName
  47. {
  48. get { return null; }
  49. }
  50. bool _Ignore = false;
  51. public bool Ignore
  52. {
  53. get { return _Ignore; }
  54. set { _Ignore = value; }
  55. }
  56. #region StructureType
  57. private Type _StructureType;
  58. /// <summary>
  59. /// What type of structural item this rule defines.
  60. /// </summary>
  61. public Type StructureType
  62. {
  63. get { return _StructureType; }
  64. set
  65. {
  66. if ( _StructureType != value )
  67. {
  68. _StructureType = value;
  69. }
  70. }
  71. }
  72. #endregion StructureType
  73. #region Primitive
  74. private bool _Primitive = false;
  75. public bool Primitive
  76. {
  77. get { return _Primitive; }
  78. set
  79. {
  80. if ( _Primitive != value )
  81. {
  82. _Primitive = value;
  83. }
  84. }
  85. }
  86. #endregion Primitive
  87. const int MaxDepth = 2000;
  88. static DateTime lastOutput;
  89. static TimeSpan outputInterval = TimeSpan.FromSeconds( 1 );
  90. public Token Match( ParserContext context )
  91. {
  92. //try
  93. //{
  94. //Console.WriteLine(new StringBuilder().Append(' ', context.Depth).Append(this).Append("").ToString());
  95. context.Rule = this;
  96. //context.Success = true;
  97. if ( context.Depth > MaxDepth )
  98. {
  99. int index = context.Index;
  100. if ( context.Result != null )
  101. index = context.Result.Index + context.Result.Length;
  102. Console.WriteLine( ( "Reached " + ( index ).ToString().PadLeft( 6 ) + " / " + context.Source.Length.ToString().PadRight( 6 ) + ": " + context.ToString().Replace( "\0", @"\0" ).Replace( "\n", @"\n" ).Replace( "\t", @"\t" ) ).PadRight( Console.WindowWidth - 2 ) );
  103. Console.WriteLine();
  104. Console.WriteLine( "Canceling in " + this + " at depth" + context.Depth + ", line" + context.Line + ":\n\t" + context );
  105. //return null;
  106. throw new DepthMaxException( this, context, "Canceling in " + this + " at depth" + context.Depth + ", line" + context.Line, null );
  107. }
  108. //if (context.Source == null || context.Index >= context.Source.Length)
  109. //{
  110. // int index = context.Index;
  111. // if (context.Result != null)
  112. // index = context.Result.Index + context.Result.Length;
  113. // Console.WriteLine(("Reached " + (index).ToString().PadLeft(6) + " / " + context.Source.Length.ToString().PadRight(6) + ": " + context.ToString().Replace("\0", @"\0").Replace("\n", @"\n").Replace("\t", @"\t")).PadRight(Console.WindowWidth - 2));
  114. // Console.WriteLine();
  115. // Console.WriteLine("EOF in " + this + " at depth" + context.Depth + ", line" + context.Line + ":\n\t" + context);
  116. // //throw new EndOfFileException(this, context, "in " + this + " at depth" + context.Depth + ", line" + context.Line, null);
  117. // return null;
  118. //}
  119. #if !VERBOSE
  120. {
  121. Token result = null;
  122. try
  123. {
  124. result = this.MatchAll( context );
  125. }
  126. catch ( ParseFailedException exc )
  127. {
  128. //exc.ContextTrace.Add(context);
  129. //throw exc;
  130. throw;
  131. }
  132. if ( result != null )
  133. {
  134. //Console.WriteLine(new StringBuilder().Append(' ', context.Depth).Append(result).Append("").ToString());
  135. if ( context.Success )
  136. {
  137. if ( context.Debug && !this.Primitive && !this.Ignore )
  138. {
  139. if ( this.Name != null )
  140. Console.WriteLine( this.Name + ": " + context.Line + "." + context.Index + " := " + result.ToString().Replace( "\0", @"\0" ).Replace( "\r\n", @"\n" ).Replace( "\n", @"\n" ) + " := " + context.ToString().Replace( "\0", @"\0" ).Replace( "\r\n", @"\n" ).Replace( "\n", @"\n" ) );
  141. //else
  142. // Console.Write((this.GetType().Name + ": " + context.Line + "." + context.Index + " := " + context.ToString().Replace("\0", @"\0").Replace("\r\n", @"\n").Replace("\n", @"\n")).PadRight(Console.WindowWidth - 3) + "\r");
  143. }
  144. //Token inner;
  145. //while ((inner = context.WhiteSpaces.Match((context.Push(result.Start + result.BlockLength)))) != null)
  146. // if (inner.Length > 0)
  147. // result.Append(inner);
  148. //context.Trim();
  149. }
  150. //context.WhiteSpaces.Match(context);
  151. if ( DateTime.Now - lastOutput > outputInterval )
  152. {
  153. Console.Write( ( "Parsing " + ( result.Index + result.Length ).ToString().PadLeft( 6 ) + " / " + context.Source.Length.ToString().PadRight( 6 ) + ": " + context.ToString().Replace( "\0", @"\0" ).Replace( "\n", @"\n" ).Replace( "\t", @"\t" ) ).PadRight( Console.WindowWidth - 2 ) + "\r" );
  154. lastOutput = DateTime.Now;
  155. }
  156. return result;
  157. }
  158. else
  159. {
  160. if ( context.Debug && !this.Primitive && !this.Ignore && this.Name != null )
  161. {
  162. Console.Write( ( this.Name.PadRight( 15 ) + " failed" + context.ToString().Replace( "\0", @"\0" ).Replace( "\r\n", @"\n" ).Replace( "\n", @"\n" ) ).PadRight( 50 ) + "\r" );
  163. }
  164. context.Success = false;
  165. if ( context.LastError == null || context.LastError.Rule == null || context.LastError.Rule.Name == null && this.Name != null )
  166. {
  167. context.Errors.Add( new ParseFailedException( this, context, this + " could not match.", null ) );
  168. }
  169. return null;
  170. }
  171. }
  172. #else
  173. return this.MatchAll(context);
  174. #endif
  175. //}
  176. //catch (Exception exc)
  177. //{
  178. // throw new ParseFailedException(this, context, exc.Message, exc);
  179. //}
  180. //finally { }
  181. }
  182. protected virtual Token MatchAll( ParserContext context )
  183. {
  184. Token result = context.Result;
  185. if ( this.Inner != null )
  186. {
  187. Token inner = null;
  188. //while ((inner = this.MatchWhiteSpace(context.Push(result.Start + result.BlockLength))) != null)
  189. // this.AppendToken(result, inner);
  190. inner = this.MatchInner( context.Push() );
  191. if ( inner != null )
  192. {
  193. result = result ?? this.CreateToken( context, 0 );
  194. result.Append( inner );
  195. }
  196. else
  197. {
  198. context.Success = false;
  199. }
  200. }
  201. else
  202. result = this.MatchThis( context );
  203. if ( context.Success )
  204. return result;
  205. else
  206. return null;
  207. }
  208. protected virtual Token MatchThis( ParserContext context )
  209. {
  210. return null;
  211. }
  212. protected virtual Token MatchInner( ParserContext context )
  213. {
  214. return this.Inner != null ? this.Inner.Match( context ) : null;
  215. }
  216. //protected Token MatchWhiteSpace(ParserContext context)
  217. //{
  218. // return context.WhiteSpaces.Match(context);
  219. //}
  220. protected Token CreateToken( ParserContext context, int length )
  221. {
  222. Token token = new Token();
  223. token.Rule = this;
  224. token.Name = this.Name;
  225. token.Index = context.Index;
  226. token.Length = length;
  227. token.Context = context;
  228. if ( context != null )
  229. {
  230. context.Result = token;
  231. context.Success = true;
  232. if ( context.Outer != null && context.Outer.Result != null )
  233. token.Outer = context.Outer.Result;
  234. }
  235. token.Value = context.Source.Substring( context.Index, length );
  236. this.FillToken( token );
  237. return token;
  238. }
  239. protected virtual void FillToken( Token token )
  240. {
  241. }
  242. public override string ToString()
  243. {
  244. return this.Name ?? this.Structure ?? base.ToString();
  245. }
  246. public string Structure
  247. {
  248. get { return this.DescribeStructure( new List<Rule>() ); }
  249. }
  250. public virtual string DescribeStructure( List<Rule> done )
  251. {
  252. if ( done.Contains( this ) || this.Inner == null || this.Ignore || this.Primitive )
  253. return "<" + ( this.Name ?? this.GetType().Name ) + ">";
  254. done.Add( this );
  255. return ( this.Name ?? this.GetType().Name ) + ( this.Inner != null ? this.Inner.DescribeStructure( done ) : "<empty>" );
  256. }
  257. #region operators
  258. #region conversions
  259. public static implicit operator Rule( string str )
  260. {
  261. if ( str == null || str.Length < 1 )
  262. return new CharTerminal( '\0' );
  263. else if ( str.Length == 1 )
  264. return new CharTerminal( str[ 0 ] );
  265. else
  266. return new StringTerminal( str );
  267. }
  268. public static implicit operator Rule( char c )
  269. {
  270. return new CharTerminal( c );
  271. }
  272. public static implicit operator Rule( char[] c )
  273. {
  274. return Append( null, c, 0 );
  275. }
  276. static Rule Append( Rule a, char[] b, int index )
  277. {
  278. if ( index < b.Length )
  279. {
  280. return Append( a | b[ index ], b, index + 1 );
  281. }
  282. else
  283. return a;
  284. }
  285. public static implicit operator Rule( System.Text.RegularExpressions.Regex regex )
  286. {
  287. return new RegexTerminal( regex );
  288. }
  289. #endregion conversions
  290. #region Or
  291. public static Rule operator |( Rule a, Rule b )
  292. {
  293. if ( a != null && b != null )
  294. return new Or( a, b );
  295. return a ?? b;
  296. }
  297. public static Rule operator |( Rule a, char b )
  298. {
  299. if ( a != null )
  300. return new Or( a, b );
  301. return a ?? b;
  302. }
  303. public static Rule operator |( Rule a, char[] b )
  304. {
  305. if ( a != null && b != null )
  306. return new Or( a, b );
  307. return a ?? b;
  308. }
  309. public static Rule operator |( Rule a, string b )
  310. {
  311. if ( a != null )
  312. return new Or( a, b );
  313. return a ?? b;
  314. }
  315. public static Rule operator |( Rule a, System.Text.RegularExpressions.Regex b )
  316. {
  317. if ( a != null )
  318. return new Or( a, b );
  319. return a ?? b;
  320. }
  321. public static Rule operator |( char a, Rule b )
  322. {
  323. if ( b != null )
  324. return (Rule)a | b;
  325. return (Rule)a ?? b;
  326. }
  327. public static Rule operator |( string a, Rule b )
  328. {
  329. if ( a != null && b != null )
  330. return (Rule)a | b;
  331. return a ?? b;
  332. }
  333. public static Rule operator |( System.Text.RegularExpressions.Regex a, Rule b )
  334. {
  335. if ( a != null && b != null )
  336. return (Rule)a | b;
  337. return a ?? b;
  338. }
  339. #endregion Or
  340. #region And
  341. public static Rule operator *( Rule a, Rule b )
  342. {
  343. if ( a != null && b != null )
  344. return new And( a, b );
  345. return a ?? b;
  346. }
  347. public static Rule operator *( Rule a, char b )
  348. {
  349. if ( a != null )
  350. return new And( a, b );
  351. return a ?? b;
  352. }
  353. public static Rule operator *( Rule a, char[] b )
  354. {
  355. if ( a != null && b != null )
  356. return new And( a, b );
  357. return a ?? b;
  358. }
  359. public static Rule operator *( Rule a, string b )
  360. {
  361. if ( a != null )
  362. return new And( a, b );
  363. return a ?? b;
  364. }
  365. public static Rule operator *( Rule a, System.Text.RegularExpressions.Regex b )
  366. {
  367. if ( a != null )
  368. return new And( a, b );
  369. return a ?? b;
  370. }
  371. public static Rule operator *( char a, Rule b )
  372. {
  373. if ( b != null )
  374. return (Rule)a * b;
  375. return (Rule)a ?? b;
  376. }
  377. public static Rule operator *( string a, Rule b )
  378. {
  379. if ( a != null && b != null )
  380. return (Rule)a * b;
  381. return a ?? b;
  382. }
  383. public static Rule operator *( System.Text.RegularExpressions.Regex a, Rule b )
  384. {
  385. if ( a != null && b != null )
  386. return (Rule)a * b;
  387. return a ?? b;
  388. }
  389. #endregion And
  390. #region Then
  391. public static Rule operator ^( Rule a, Rule b )
  392. {
  393. if ( a != null && b != null )
  394. {
  395. return new Chain( a, b );
  396. }
  397. return a ?? b;
  398. }
  399. public static Rule operator ^( Rule a, char b )
  400. {
  401. return a ^ (Rule)b;
  402. }
  403. public static Rule operator ^( Rule a, string b )
  404. {
  405. return a ^ (Rule)b;
  406. }
  407. public static Rule operator ^( Rule a, System.Text.RegularExpressions.Regex b )
  408. {
  409. return a ^ (Rule)b;
  410. }
  411. public static Rule operator ^( char a, Rule b )
  412. {
  413. return (Rule)a ^ b;
  414. }
  415. public static Rule operator ^( string a, Rule b )
  416. {
  417. return (Rule)a ^ b;
  418. }
  419. public static Rule operator ^( System.Text.RegularExpressions.Regex a, Rule b )
  420. {
  421. return (Rule)a ^ b;
  422. }
  423. #endregion Then
  424. #region Then
  425. public static Rule operator &( Rule a, Rule b )
  426. {
  427. if ( a != null && b != null )
  428. {
  429. if ( b is Optional )
  430. {
  431. return a ^ ~( ~new Whitespace() ^ b.Inner );
  432. //return b ^ new Optional(~new Whitespace() ^ b.Inner);
  433. }
  434. if ( b is Repeat )
  435. {
  436. return a ^ b;
  437. //return b ^ new Repeat(~new Whitespace() ^ b.Inner);
  438. }
  439. if ( a is Whitespace || b is Whitespace )
  440. return a ^ b;
  441. else
  442. return a ^ ~new Whitespace() ^ b;
  443. }
  444. return a ?? b;
  445. }
  446. public static Rule operator &( Rule a, char b )
  447. {
  448. return a & (Rule)b;
  449. }
  450. public static Rule operator &( Rule a, string b )
  451. {
  452. return a & (Rule)b;
  453. }
  454. public static Rule operator &( Rule a, System.Text.RegularExpressions.Regex b )
  455. {
  456. return a & (Rule)b;
  457. }
  458. public static Rule operator &( char a, Rule b )
  459. {
  460. return (Rule)a & b;
  461. }
  462. public static Rule operator &( string a, Rule b )
  463. {
  464. return (Rule)a & b;
  465. }
  466. public static Rule operator &( System.Text.RegularExpressions.Regex a, Rule b )
  467. {
  468. return (Rule)a & b;
  469. }
  470. #endregion Then
  471. #region Optional
  472. public static Rule operator ~( Rule optional )
  473. {
  474. return new Optional( optional );
  475. }
  476. #endregion Optional
  477. #region Exists
  478. public static Rule operator -( Rule exists )
  479. {
  480. return new Exists( exists );
  481. }
  482. #endregion Exists
  483. #region Repeat
  484. public static Rule operator +( Rule repeat )
  485. {
  486. return new Repeat( repeat );
  487. }
  488. #endregion
  489. #region Not
  490. public static Rule operator !( Rule not )
  491. {
  492. return new Not( not );
  493. }
  494. #endregion
  495. #region Name
  496. public static Rule operator %( string name, Rule rule )
  497. {
  498. return new Rule() { Name = name, Inner = rule };
  499. }
  500. #endregion
  501. #endregion operators
  502. #region IEnumerable<Rule> Members
  503. public IEnumerator<Rule> GetEnumerator()
  504. {
  505. return this.Iterate().GetEnumerator();
  506. }
  507. protected virtual IEnumerable<Rule> Iterate()
  508. {
  509. yield return this.Inner;
  510. }
  511. #endregion
  512. #region IEnumerable Members
  513. System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
  514. {
  515. return this.GetEnumerator();
  516. }
  517. #endregion
  518. }
  519. }