/NRefactory/ICSharpCode.NRefactory.CSharp/Parser/mcs/cs-tokenizer.cs

http://github.com/icsharpcode/ILSpy · C# · 4033 lines · 3297 code · 444 blank · 292 comment · 828 complexity · 8bf17919cb14e9bc4341acecc388325c MD5 · raw file

Large files are truncated click here to view the full file

  1. //
  2. // cs-tokenizer.cs: The Tokenizer for the C# compiler
  3. // This also implements the preprocessor
  4. //
  5. // Author: Miguel de Icaza (miguel@gnu.org)
  6. // Marek Safar (marek.safar@gmail.com)
  7. //
  8. // Dual licensed under the terms of the MIT X11 or GNU GPL
  9. //
  10. // Copyright 2001, 2002 Ximian, Inc (http://www.ximian.com)
  11. // Copyright 2004-2008 Novell, Inc
  12. // Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
  13. //
  14. using System;
  15. using System.Text;
  16. using System.Collections.Generic;
  17. using System.Globalization;
  18. using System.Diagnostics;
  19. using System.Collections;
  20. namespace Mono.CSharp
  21. {
  22. //
  23. // This class has to be used by parser only, it reuses token
  24. // details once a file is parsed
  25. //
  26. public class LocatedToken
  27. {
  28. public int row, column;
  29. public string value;
  30. public SourceFile file;
  31. public LocatedToken ()
  32. {
  33. }
  34. public LocatedToken (string value, Location loc)
  35. {
  36. this.value = value;
  37. file = loc.SourceFile;
  38. row = loc.Row;
  39. column = loc.Column;
  40. }
  41. public override string ToString ()
  42. {
  43. return string.Format ("Token '{0}' at {1},{2}", Value, row, column);
  44. }
  45. public Location Location
  46. {
  47. get { return new Location (file, row, column); }
  48. }
  49. public string Value
  50. {
  51. get { return value; }
  52. }
  53. }
  54. /// <summary>
  55. /// Tokenizer for C# source code.
  56. /// </summary>
  57. public class Tokenizer : yyParser.yyInput
  58. {
  59. class KeywordEntry<T>
  60. {
  61. public readonly T Token;
  62. public KeywordEntry<T> Next;
  63. public readonly char[] Value;
  64. public KeywordEntry (string value,T token)
  65. {
  66. this.Value = value.ToCharArray ();
  67. this.Token = token;
  68. }
  69. }
  70. sealed class IdentifiersComparer : IEqualityComparer<char[]>
  71. {
  72. readonly int length;
  73. public IdentifiersComparer (int length)
  74. {
  75. this.length = length;
  76. }
  77. public bool Equals (char[] x, char[] y)
  78. {
  79. for (int i = 0; i < length; ++i)
  80. if (x [i] != y [i])
  81. return false;
  82. return true;
  83. }
  84. public int GetHashCode (char[] obj)
  85. {
  86. int h = 0;
  87. for (int i = 0; i < length; ++i)
  88. h = (h << 5) - h + obj [i];
  89. return h;
  90. }
  91. }
  92. public class LocatedTokenBuffer
  93. {
  94. readonly LocatedToken[] buffer;
  95. public int pos;
  96. public LocatedTokenBuffer ()
  97. {
  98. buffer = new LocatedToken[0];
  99. }
  100. public LocatedTokenBuffer (LocatedToken[] buffer)
  101. {
  102. this.buffer = buffer ?? new LocatedToken[0];
  103. }
  104. public LocatedToken Create (SourceFile file, int row, int column)
  105. {
  106. return Create (null, file, row, column);
  107. }
  108. public LocatedToken Create (string value, SourceFile file, int row, int column)
  109. {
  110. //
  111. // TODO: I am not very happy about the logic but it's the best
  112. // what I could come up with for now.
  113. // Ideally we should be using just tiny buffer (256 elements) which
  114. // is enough to hold all details for currect stack and recycle elements
  115. // poped from the stack but there is a trick needed to recycle
  116. // them properly.
  117. //
  118. LocatedToken entry;
  119. if (pos >= buffer.Length) {
  120. entry = new LocatedToken ();
  121. } else {
  122. entry = buffer[pos];
  123. if (entry == null) {
  124. entry = new LocatedToken ();
  125. buffer[pos] = entry;
  126. }
  127. ++pos;
  128. }
  129. entry.value = value;
  130. entry.file = file;
  131. entry.row = row;
  132. entry.column = column;
  133. return entry;
  134. }
  135. //
  136. // Used for token not required by expression evaluator
  137. //
  138. [Conditional ("FULL_AST")]
  139. public void CreateOptional (SourceFile file, int row, int col, ref object token)
  140. {
  141. token = Create (file, row, col);
  142. }
  143. }
  144. public enum PreprocessorDirective
  145. {
  146. Invalid = 0,
  147. Region = 1,
  148. Endregion = 2,
  149. If = 3 | RequiresArgument,
  150. Endif = 4,
  151. Elif = 5 | RequiresArgument,
  152. Else = 6,
  153. Define = 7 | RequiresArgument,
  154. Undef = 8 | RequiresArgument,
  155. Error = 9,
  156. Warning = 10,
  157. Pragma = 11 | CustomArgumentsParsing,
  158. Line = 12 | CustomArgumentsParsing,
  159. CustomArgumentsParsing = 1 << 10,
  160. RequiresArgument = 1 << 11
  161. }
  162. readonly SeekableStreamReader reader;
  163. readonly CompilationSourceFile source_file;
  164. public CompilationSourceFile SourceFile { get { return source_file; } }
  165. readonly CompilerContext context;
  166. readonly Report Report;
  167. SourceFile current_source;
  168. Location hidden_block_start;
  169. int ref_line = 1;
  170. int line = 1;
  171. int col = 0;
  172. int previous_col;
  173. int current_token;
  174. readonly int tab_size;
  175. bool handle_get_set = false;
  176. bool handle_remove_add = false;
  177. bool handle_where;
  178. bool lambda_arguments_parsing;
  179. List<Location> escaped_identifiers;
  180. int parsing_generic_less_than;
  181. readonly bool doc_processing;
  182. readonly LocatedTokenBuffer ltb;
  183. //
  184. // Used mainly for parser optimizations. Some expressions for instance
  185. // can appear only in block (including initializer, base initializer)
  186. // scope only
  187. //
  188. public int parsing_block;
  189. internal bool query_parsing;
  190. //
  191. // When parsing type only, useful for ambiguous nullable types
  192. //
  193. public int parsing_type;
  194. //
  195. // Set when parsing generic declaration (type or method header)
  196. //
  197. public bool parsing_generic_declaration;
  198. public bool parsing_generic_declaration_doc;
  199. //
  200. // The value indicates that we have not reach any declaration or
  201. // namespace yet
  202. //
  203. public int parsing_declaration;
  204. public bool parsing_attribute_section;
  205. public bool parsing_modifiers;
  206. //
  207. // The special characters to inject on streams to run the unit parser
  208. // in the special expression mode. Using private characters from
  209. // Plane Sixteen (U+100000 to U+10FFFD)
  210. //
  211. // This character is only tested just before the tokenizer is about to report
  212. // an error; So on the regular operation mode, this addition will have no
  213. // impact on the tokenizer's performance.
  214. //
  215. public const int EvalStatementParserCharacter = 0x100000;
  216. public const int EvalCompilationUnitParserCharacter = 0x100001;
  217. public const int EvalUsingDeclarationsParserCharacter = 0x100002;
  218. public const int DocumentationXref = 0x100003;
  219. const int UnicodeLS = 0x2028;
  220. const int UnicodePS = 0x2029;
  221. //
  222. // XML documentation buffer. The save point is used to divide
  223. // comments on types and comments on members.
  224. //
  225. StringBuilder xml_comment_buffer;
  226. //
  227. // See comment on XmlCommentState enumeration.
  228. //
  229. XmlCommentState xml_doc_state = XmlCommentState.Allowed;
  230. //
  231. // Whether tokens have been seen on this line
  232. //
  233. bool tokens_seen = false;
  234. //
  235. // Set to true once the GENERATE_COMPLETION token has bee
  236. // returned. This helps produce one GENERATE_COMPLETION,
  237. // as many COMPLETE_COMPLETION as necessary to complete the
  238. // AST tree and one final EOF.
  239. //
  240. bool generated;
  241. //
  242. // Whether a token has been seen on the file
  243. // This is needed because `define' is not allowed to be used
  244. // after a token has been seen.
  245. //
  246. bool any_token_seen;
  247. //
  248. // Class variables
  249. //
  250. static readonly KeywordEntry<int>[][] keywords;
  251. static readonly KeywordEntry<PreprocessorDirective>[][] keywords_preprocessor;
  252. static readonly HashSet<string> keyword_strings;
  253. static readonly NumberStyles styles;
  254. static readonly NumberFormatInfo csharp_format_info;
  255. // Pragma arguments
  256. static readonly char[] pragma_warning = "warning".ToCharArray ();
  257. static readonly char[] pragma_warning_disable = "disable".ToCharArray ();
  258. static readonly char[] pragma_warning_restore = "restore".ToCharArray ();
  259. static readonly char[] pragma_checksum = "checksum".ToCharArray ();
  260. static readonly char[] line_hidden = "hidden".ToCharArray ();
  261. static readonly char[] line_default = "default".ToCharArray ();
  262. static readonly char[] simple_whitespaces = new char[] { ' ', '\t' };
  263. bool startsLine = true;
  264. internal SpecialsBag sbag;
  265. public bool PropertyParsing {
  266. get { return handle_get_set; }
  267. set { handle_get_set = value; }
  268. }
  269. public bool EventParsing {
  270. get { return handle_remove_add; }
  271. set { handle_remove_add = value; }
  272. }
  273. public bool ConstraintsParsing {
  274. get { return handle_where; }
  275. set { handle_where = value; }
  276. }
  277. public XmlCommentState doc_state {
  278. get { return xml_doc_state; }
  279. set {
  280. if (value == XmlCommentState.Allowed) {
  281. check_incorrect_doc_comment ();
  282. reset_doc_comment ();
  283. }
  284. xml_doc_state = value;
  285. }
  286. }
  287. //
  288. // This is used to trigger completion generation on the parser
  289. public bool CompleteOnEOF;
  290. void AddEscapedIdentifier (Location loc)
  291. {
  292. if (escaped_identifiers == null)
  293. escaped_identifiers = new List<Location> ();
  294. escaped_identifiers.Add (loc);
  295. }
  296. public bool IsEscapedIdentifier (ATypeNameExpression name)
  297. {
  298. return escaped_identifiers != null && escaped_identifiers.Contains (name.Location);
  299. }
  300. //
  301. // Values for the associated token returned
  302. //
  303. internal int putback_char; // Used by repl only
  304. object val;
  305. //
  306. // Pre-processor
  307. //
  308. const int TAKING = 1;
  309. const int ELSE_SEEN = 4;
  310. const int PARENT_TAKING = 8;
  311. const int REGION = 16;
  312. //
  313. // pre-processor if stack state:
  314. //
  315. Stack<int> ifstack;
  316. public const int MaxIdentifierLength = 512;
  317. public const int MaxNumberLength = 512;
  318. readonly char[] id_builder;
  319. readonly Dictionary<char[], string>[] identifiers;
  320. readonly char[] number_builder;
  321. int number_pos;
  322. char[] value_builder = new char[64];
  323. public int Line {
  324. get {
  325. return ref_line;
  326. }
  327. set {
  328. ref_line = value;
  329. }
  330. }
  331. public int Column {
  332. get {
  333. return col;
  334. }
  335. set {
  336. col = value;
  337. }
  338. }
  339. //
  340. // This is used when the tokenizer needs to save
  341. // the current position as it needs to do some parsing
  342. // on its own to deamiguate a token in behalf of the
  343. // parser.
  344. //
  345. Stack<Position> position_stack = new Stack<Position> (2);
  346. class Position
  347. {
  348. public int position;
  349. public int line;
  350. public int ref_line;
  351. public int col;
  352. public Location hidden;
  353. public int putback_char;
  354. public int previous_col;
  355. public Stack<int> ifstack;
  356. public int parsing_generic_less_than;
  357. public int current_token;
  358. public object val;
  359. public Position (Tokenizer t)
  360. {
  361. position = t.reader.Position;
  362. line = t.line;
  363. ref_line = t.ref_line;
  364. col = t.col;
  365. hidden = t.hidden_block_start;
  366. putback_char = t.putback_char;
  367. previous_col = t.previous_col;
  368. if (t.ifstack != null && t.ifstack.Count != 0) {
  369. // There is no simple way to clone Stack<T> all
  370. // methods reverse the order
  371. var clone = t.ifstack.ToArray ();
  372. Array.Reverse (clone);
  373. ifstack = new Stack<int> (clone);
  374. }
  375. parsing_generic_less_than = t.parsing_generic_less_than;
  376. current_token = t.current_token;
  377. val = t.val;
  378. }
  379. }
  380. public Tokenizer (SeekableStreamReader input, CompilationSourceFile file, ParserSession session, Report report)
  381. {
  382. this.source_file = file;
  383. this.context = file.Compiler;
  384. this.current_source = file.SourceFile;
  385. this.identifiers = session.Identifiers;
  386. this.id_builder = session.IDBuilder;
  387. this.number_builder = session.NumberBuilder;
  388. this.ltb = new LocatedTokenBuffer (session.LocatedTokens);
  389. this.Report = report;
  390. reader = input;
  391. putback_char = -1;
  392. xml_comment_buffer = new StringBuilder ();
  393. doc_processing = context.Settings.DocumentationFile != null;
  394. tab_size = context.Settings.TabSize;
  395. }
  396. public void PushPosition ()
  397. {
  398. position_stack.Push (new Position (this));
  399. }
  400. public void PopPosition ()
  401. {
  402. Position p = position_stack.Pop ();
  403. reader.Position = p.position;
  404. ref_line = p.ref_line;
  405. line = p.line;
  406. col = p.col;
  407. hidden_block_start = p.hidden;
  408. putback_char = p.putback_char;
  409. previous_col = p.previous_col;
  410. ifstack = p.ifstack;
  411. parsing_generic_less_than = p.parsing_generic_less_than;
  412. current_token = p.current_token;
  413. val = p.val;
  414. }
  415. // Do not reset the position, ignore it.
  416. public void DiscardPosition ()
  417. {
  418. position_stack.Pop ();
  419. }
  420. static void AddKeyword (string kw, int token)
  421. {
  422. keyword_strings.Add (kw);
  423. AddKeyword (keywords, kw, token);
  424. }
  425. static void AddPreprocessorKeyword (string kw, PreprocessorDirective directive)
  426. {
  427. AddKeyword (keywords_preprocessor, kw, directive);
  428. }
  429. static void AddKeyword<T> (KeywordEntry<T>[][] keywords, string kw, T token)
  430. {
  431. int length = kw.Length;
  432. if (keywords[length] == null) {
  433. keywords[length] = new KeywordEntry<T>['z' - '_' + 1];
  434. }
  435. int char_index = kw[0] - '_';
  436. var kwe = keywords[length][char_index];
  437. if (kwe == null) {
  438. keywords[length][char_index] = new KeywordEntry<T> (kw, token);
  439. return;
  440. }
  441. while (kwe.Next != null) {
  442. kwe = kwe.Next;
  443. }
  444. kwe.Next = new KeywordEntry<T> (kw, token);
  445. }
  446. //
  447. // Class initializer
  448. //
  449. static Tokenizer ()
  450. {
  451. keyword_strings = new HashSet<string> ();
  452. // 11 is the length of the longest keyword for now
  453. keywords = new KeywordEntry<int>[11][];
  454. AddKeyword ("__arglist", Token.ARGLIST);
  455. AddKeyword ("__makeref", Token.MAKEREF);
  456. AddKeyword ("__reftype", Token.REFTYPE);
  457. AddKeyword ("__refvalue", Token.REFVALUE);
  458. AddKeyword ("abstract", Token.ABSTRACT);
  459. AddKeyword ("as", Token.AS);
  460. AddKeyword ("add", Token.ADD);
  461. AddKeyword ("base", Token.BASE);
  462. AddKeyword ("bool", Token.BOOL);
  463. AddKeyword ("break", Token.BREAK);
  464. AddKeyword ("byte", Token.BYTE);
  465. AddKeyword ("case", Token.CASE);
  466. AddKeyword ("catch", Token.CATCH);
  467. AddKeyword ("char", Token.CHAR);
  468. AddKeyword ("checked", Token.CHECKED);
  469. AddKeyword ("class", Token.CLASS);
  470. AddKeyword ("const", Token.CONST);
  471. AddKeyword ("continue", Token.CONTINUE);
  472. AddKeyword ("decimal", Token.DECIMAL);
  473. AddKeyword ("default", Token.DEFAULT);
  474. AddKeyword ("delegate", Token.DELEGATE);
  475. AddKeyword ("do", Token.DO);
  476. AddKeyword ("double", Token.DOUBLE);
  477. AddKeyword ("else", Token.ELSE);
  478. AddKeyword ("enum", Token.ENUM);
  479. AddKeyword ("event", Token.EVENT);
  480. AddKeyword ("explicit", Token.EXPLICIT);
  481. AddKeyword ("extern", Token.EXTERN);
  482. AddKeyword ("false", Token.FALSE);
  483. AddKeyword ("finally", Token.FINALLY);
  484. AddKeyword ("fixed", Token.FIXED);
  485. AddKeyword ("float", Token.FLOAT);
  486. AddKeyword ("for", Token.FOR);
  487. AddKeyword ("foreach", Token.FOREACH);
  488. AddKeyword ("goto", Token.GOTO);
  489. AddKeyword ("get", Token.GET);
  490. AddKeyword ("if", Token.IF);
  491. AddKeyword ("implicit", Token.IMPLICIT);
  492. AddKeyword ("in", Token.IN);
  493. AddKeyword ("int", Token.INT);
  494. AddKeyword ("interface", Token.INTERFACE);
  495. AddKeyword ("internal", Token.INTERNAL);
  496. AddKeyword ("is", Token.IS);
  497. AddKeyword ("lock", Token.LOCK);
  498. AddKeyword ("long", Token.LONG);
  499. AddKeyword ("namespace", Token.NAMESPACE);
  500. AddKeyword ("new", Token.NEW);
  501. AddKeyword ("null", Token.NULL);
  502. AddKeyword ("object", Token.OBJECT);
  503. AddKeyword ("operator", Token.OPERATOR);
  504. AddKeyword ("out", Token.OUT);
  505. AddKeyword ("override", Token.OVERRIDE);
  506. AddKeyword ("params", Token.PARAMS);
  507. AddKeyword ("private", Token.PRIVATE);
  508. AddKeyword ("protected", Token.PROTECTED);
  509. AddKeyword ("public", Token.PUBLIC);
  510. AddKeyword ("readonly", Token.READONLY);
  511. AddKeyword ("ref", Token.REF);
  512. AddKeyword ("remove", Token.REMOVE);
  513. AddKeyword ("return", Token.RETURN);
  514. AddKeyword ("sbyte", Token.SBYTE);
  515. AddKeyword ("sealed", Token.SEALED);
  516. AddKeyword ("set", Token.SET);
  517. AddKeyword ("short", Token.SHORT);
  518. AddKeyword ("sizeof", Token.SIZEOF);
  519. AddKeyword ("stackalloc", Token.STACKALLOC);
  520. AddKeyword ("static", Token.STATIC);
  521. AddKeyword ("string", Token.STRING);
  522. AddKeyword ("struct", Token.STRUCT);
  523. AddKeyword ("switch", Token.SWITCH);
  524. AddKeyword ("this", Token.THIS);
  525. AddKeyword ("throw", Token.THROW);
  526. AddKeyword ("true", Token.TRUE);
  527. AddKeyword ("try", Token.TRY);
  528. AddKeyword ("typeof", Token.TYPEOF);
  529. AddKeyword ("uint", Token.UINT);
  530. AddKeyword ("ulong", Token.ULONG);
  531. AddKeyword ("unchecked", Token.UNCHECKED);
  532. AddKeyword ("unsafe", Token.UNSAFE);
  533. AddKeyword ("ushort", Token.USHORT);
  534. AddKeyword ("using", Token.USING);
  535. AddKeyword ("virtual", Token.VIRTUAL);
  536. AddKeyword ("void", Token.VOID);
  537. AddKeyword ("volatile", Token.VOLATILE);
  538. AddKeyword ("while", Token.WHILE);
  539. AddKeyword ("partial", Token.PARTIAL);
  540. AddKeyword ("where", Token.WHERE);
  541. // LINQ keywords
  542. AddKeyword ("from", Token.FROM);
  543. AddKeyword ("join", Token.JOIN);
  544. AddKeyword ("on", Token.ON);
  545. AddKeyword ("equals", Token.EQUALS);
  546. AddKeyword ("select", Token.SELECT);
  547. AddKeyword ("group", Token.GROUP);
  548. AddKeyword ("by", Token.BY);
  549. AddKeyword ("let", Token.LET);
  550. AddKeyword ("orderby", Token.ORDERBY);
  551. AddKeyword ("ascending", Token.ASCENDING);
  552. AddKeyword ("descending", Token.DESCENDING);
  553. AddKeyword ("into", Token.INTO);
  554. // Contextual async keywords
  555. AddKeyword ("async", Token.ASYNC);
  556. AddKeyword ("await", Token.AWAIT);
  557. keywords_preprocessor = new KeywordEntry<PreprocessorDirective>[10][];
  558. AddPreprocessorKeyword ("region", PreprocessorDirective.Region);
  559. AddPreprocessorKeyword ("endregion", PreprocessorDirective.Endregion);
  560. AddPreprocessorKeyword ("if", PreprocessorDirective.If);
  561. AddPreprocessorKeyword ("endif", PreprocessorDirective.Endif);
  562. AddPreprocessorKeyword ("elif", PreprocessorDirective.Elif);
  563. AddPreprocessorKeyword ("else", PreprocessorDirective.Else);
  564. AddPreprocessorKeyword ("define", PreprocessorDirective.Define);
  565. AddPreprocessorKeyword ("undef", PreprocessorDirective.Undef);
  566. AddPreprocessorKeyword ("error", PreprocessorDirective.Error);
  567. AddPreprocessorKeyword ("warning", PreprocessorDirective.Warning);
  568. AddPreprocessorKeyword ("pragma", PreprocessorDirective.Pragma);
  569. AddPreprocessorKeyword ("line", PreprocessorDirective.Line);
  570. csharp_format_info = NumberFormatInfo.InvariantInfo;
  571. styles = NumberStyles.Float;
  572. }
  573. int GetKeyword (char[] id, int id_len)
  574. {
  575. //
  576. // Keywords are stored in an array of arrays grouped by their
  577. // length and then by the first character
  578. //
  579. if (id_len >= keywords.Length || keywords [id_len] == null)
  580. return -1;
  581. int first_index = id [0] - '_';
  582. if (first_index > 'z' - '_')
  583. return -1;
  584. var kwe = keywords [id_len] [first_index];
  585. if (kwe == null)
  586. return -1;
  587. int res;
  588. do {
  589. res = kwe.Token;
  590. for (int i = 1; i < id_len; ++i) {
  591. if (id [i] != kwe.Value [i]) {
  592. res = 0;
  593. kwe = kwe.Next;
  594. break;
  595. }
  596. }
  597. } while (res == 0 && kwe != null);
  598. if (res == 0)
  599. return -1;
  600. int next_token;
  601. switch (res) {
  602. case Token.GET:
  603. case Token.SET:
  604. if (!handle_get_set)
  605. res = -1;
  606. break;
  607. case Token.REMOVE:
  608. case Token.ADD:
  609. if (!handle_remove_add)
  610. res = -1;
  611. break;
  612. case Token.EXTERN:
  613. if (parsing_declaration == 0)
  614. res = Token.EXTERN_ALIAS;
  615. break;
  616. case Token.DEFAULT:
  617. if (peek_token () == Token.COLON) {
  618. token ();
  619. res = Token.DEFAULT_COLON;
  620. }
  621. break;
  622. case Token.WHERE:
  623. if (!(handle_where && current_token != Token.COLON) && !query_parsing)
  624. res = -1;
  625. break;
  626. case Token.FROM:
  627. //
  628. // A query expression is any expression that starts with `from identifier'
  629. // followed by any token except ; , =
  630. //
  631. if (!query_parsing) {
  632. if (lambda_arguments_parsing || parsing_block == 0) {
  633. res = -1;
  634. break;
  635. }
  636. PushPosition ();
  637. // HACK: to disable generics micro-parser, because PushPosition does not
  638. // store identifiers array
  639. parsing_generic_less_than = 1;
  640. switch (xtoken ()) {
  641. case Token.IDENTIFIER:
  642. case Token.INT:
  643. case Token.BOOL:
  644. case Token.BYTE:
  645. case Token.CHAR:
  646. case Token.DECIMAL:
  647. case Token.DOUBLE:
  648. case Token.FLOAT:
  649. case Token.LONG:
  650. case Token.OBJECT:
  651. case Token.STRING:
  652. case Token.UINT:
  653. case Token.ULONG:
  654. next_token = xtoken ();
  655. if (next_token == Token.SEMICOLON || next_token == Token.COMMA || next_token == Token.EQUALS || next_token == Token.ASSIGN)
  656. goto default;
  657. res = Token.FROM_FIRST;
  658. query_parsing = true;
  659. if (context.Settings.Version <= LanguageVersion.ISO_2)
  660. Report.FeatureIsNotAvailable (context, Location, "query expressions");
  661. break;
  662. case Token.VOID:
  663. Expression.Error_VoidInvalidInTheContext (Location, Report);
  664. break;
  665. default:
  666. PopPosition ();
  667. // HACK: A token is not a keyword so we need to restore identifiers buffer
  668. // which has been overwritten before we grabbed the identifier
  669. id_builder [0] = 'f'; id_builder [1] = 'r'; id_builder [2] = 'o'; id_builder [3] = 'm';
  670. return -1;
  671. }
  672. PopPosition ();
  673. }
  674. break;
  675. case Token.JOIN:
  676. case Token.ON:
  677. case Token.EQUALS:
  678. case Token.SELECT:
  679. case Token.GROUP:
  680. case Token.BY:
  681. case Token.LET:
  682. case Token.ORDERBY:
  683. case Token.ASCENDING:
  684. case Token.DESCENDING:
  685. case Token.INTO:
  686. if (!query_parsing)
  687. res = -1;
  688. break;
  689. case Token.USING:
  690. case Token.NAMESPACE:
  691. // TODO: some explanation needed
  692. check_incorrect_doc_comment ();
  693. parsing_modifiers = false;
  694. break;
  695. case Token.PARTIAL:
  696. if (parsing_block > 0) {
  697. res = -1;
  698. break;
  699. }
  700. // Save current position and parse next token.
  701. PushPosition ();
  702. next_token = token ();
  703. bool ok = (next_token == Token.CLASS) ||
  704. (next_token == Token.STRUCT) ||
  705. (next_token == Token.INTERFACE) ||
  706. (next_token == Token.VOID);
  707. PopPosition ();
  708. if (ok) {
  709. if (next_token == Token.VOID) {
  710. if (context.Settings.Version <= LanguageVersion.ISO_2)
  711. Report.FeatureIsNotAvailable (context, Location, "partial methods");
  712. } else if (context.Settings.Version == LanguageVersion.ISO_1)
  713. Report.FeatureIsNotAvailable (context, Location, "partial types");
  714. return res;
  715. }
  716. if (next_token < Token.LAST_KEYWORD) {
  717. Report.Error (267, Location,
  718. "The `partial' modifier can be used only immediately before `class', `struct', `interface', or `void' keyword");
  719. return token ();
  720. }
  721. // HACK: A token is not a keyword so we need to restore identifiers buffer
  722. // which has been overwritten before we grabbed the identifier
  723. id_builder[0] = 'p';
  724. id_builder[1] = 'a';
  725. id_builder[2] = 'r';
  726. id_builder[3] = 't';
  727. id_builder[4] = 'i';
  728. id_builder[5] = 'a';
  729. id_builder[6] = 'l';
  730. res = -1;
  731. break;
  732. case Token.ASYNC:
  733. if (parsing_modifiers) {
  734. //
  735. // Skip attributes section or constructor called async
  736. //
  737. if (parsing_attribute_section || peek_token () == Token.OPEN_PARENS) {
  738. res = -1;
  739. } else {
  740. // async is keyword
  741. }
  742. } else if (parsing_block > 0) {
  743. switch (peek_token ()) {
  744. case Token.DELEGATE:
  745. case Token.OPEN_PARENS_LAMBDA:
  746. // async is keyword
  747. break;
  748. case Token.IDENTIFIER:
  749. PushPosition ();
  750. xtoken ();
  751. if (xtoken () != Token.ARROW) {
  752. PopPosition ();
  753. goto default;
  754. }
  755. PopPosition ();
  756. break;
  757. default:
  758. // peek_token could overwrite id_buffer
  759. id_builder [0] = 'a'; id_builder [1] = 's'; id_builder [2] = 'y'; id_builder [3] = 'n'; id_builder [4] = 'c';
  760. res = -1;
  761. break;
  762. }
  763. } else {
  764. res = -1;
  765. }
  766. if (res == Token.ASYNC && context.Settings.Version <= LanguageVersion.V_4) {
  767. Report.FeatureIsNotAvailable (context, Location, "asynchronous functions");
  768. }
  769. break;
  770. case Token.AWAIT:
  771. if (parsing_block == 0)
  772. res = -1;
  773. break;
  774. }
  775. return res;
  776. }
  777. static PreprocessorDirective GetPreprocessorDirective (char[] id, int id_len)
  778. {
  779. //
  780. // Keywords are stored in an array of arrays grouped by their
  781. // length and then by the first character
  782. //
  783. if (id_len >= keywords_preprocessor.Length || keywords_preprocessor[id_len] == null)
  784. return PreprocessorDirective.Invalid;
  785. int first_index = id[0] - '_';
  786. if (first_index > 'z' - '_')
  787. return PreprocessorDirective.Invalid;
  788. var kwe = keywords_preprocessor[id_len][first_index];
  789. if (kwe == null)
  790. return PreprocessorDirective.Invalid;
  791. PreprocessorDirective res = PreprocessorDirective.Invalid;
  792. do {
  793. res = kwe.Token;
  794. for (int i = 1; i < id_len; ++i) {
  795. if (id[i] != kwe.Value[i]) {
  796. res = 0;
  797. kwe = kwe.Next;
  798. break;
  799. }
  800. }
  801. } while (res == PreprocessorDirective.Invalid && kwe != null);
  802. return res;
  803. }
  804. public Location Location {
  805. get {
  806. return new Location (current_source, ref_line, col);
  807. }
  808. }
  809. static bool is_identifier_start_character (int c)
  810. {
  811. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || Char.IsLetter ((char)c);
  812. }
  813. static bool is_identifier_part_character (char c)
  814. {
  815. if (c >= 'a' && c <= 'z')
  816. return true;
  817. if (c >= 'A' && c <= 'Z')
  818. return true;
  819. if (c == '_' || (c >= '0' && c <= '9'))
  820. return true;
  821. if (c < 0x80)
  822. return false;
  823. return is_identifier_part_character_slow_part (c);
  824. }
  825. static bool is_identifier_part_character_slow_part (char c)
  826. {
  827. if (Char.IsLetter (c))
  828. return true;
  829. switch (Char.GetUnicodeCategory (c)) {
  830. case UnicodeCategory.ConnectorPunctuation:
  831. // combining-character: A Unicode character of classes Mn or Mc
  832. case UnicodeCategory.NonSpacingMark:
  833. case UnicodeCategory.SpacingCombiningMark:
  834. // decimal-digit-character: A Unicode character of the class Nd
  835. case UnicodeCategory.DecimalDigitNumber:
  836. return true;
  837. }
  838. return false;
  839. }
  840. public static bool IsKeyword (string s)
  841. {
  842. return keyword_strings.Contains (s);
  843. }
  844. //
  845. // Open parens micro parser. Detects both lambda and cast ambiguity.
  846. //
  847. int TokenizeOpenParens ()
  848. {
  849. int ptoken;
  850. current_token = -1;
  851. int bracket_level = 0;
  852. bool is_type = false;
  853. bool can_be_type = false;
  854. while (true) {
  855. ptoken = current_token;
  856. token ();
  857. switch (current_token) {
  858. case Token.CLOSE_PARENS:
  859. token ();
  860. //
  861. // Expression inside parens is lambda, (int i) =>
  862. //
  863. if (current_token == Token.ARROW)
  864. return Token.OPEN_PARENS_LAMBDA;
  865. //
  866. // Expression inside parens is single type, (int[])
  867. //
  868. if (is_type) {
  869. if (current_token == Token.SEMICOLON)
  870. return Token.OPEN_PARENS;
  871. return Token.OPEN_PARENS_CAST;
  872. }
  873. //
  874. // Expression is possible cast, look at next token, (T)null
  875. //
  876. if (can_be_type) {
  877. switch (current_token) {
  878. case Token.OPEN_PARENS:
  879. case Token.BANG:
  880. case Token.TILDE:
  881. case Token.IDENTIFIER:
  882. case Token.LITERAL:
  883. case Token.BASE:
  884. case Token.CHECKED:
  885. case Token.DELEGATE:
  886. case Token.FALSE:
  887. case Token.FIXED:
  888. case Token.NEW:
  889. case Token.NULL:
  890. case Token.SIZEOF:
  891. case Token.THIS:
  892. case Token.THROW:
  893. case Token.TRUE:
  894. case Token.TYPEOF:
  895. case Token.UNCHECKED:
  896. case Token.UNSAFE:
  897. case Token.DEFAULT:
  898. case Token.AWAIT:
  899. //
  900. // These can be part of a member access
  901. //
  902. case Token.INT:
  903. case Token.UINT:
  904. case Token.SHORT:
  905. case Token.USHORT:
  906. case Token.LONG:
  907. case Token.ULONG:
  908. case Token.DOUBLE:
  909. case Token.FLOAT:
  910. case Token.CHAR:
  911. case Token.BYTE:
  912. case Token.DECIMAL:
  913. case Token.BOOL:
  914. return Token.OPEN_PARENS_CAST;
  915. }
  916. }
  917. return Token.OPEN_PARENS;
  918. case Token.DOT:
  919. case Token.DOUBLE_COLON:
  920. if (ptoken != Token.IDENTIFIER && ptoken != Token.OP_GENERICS_GT)
  921. goto default;
  922. continue;
  923. case Token.IDENTIFIER:
  924. case Token.AWAIT:
  925. switch (ptoken) {
  926. case Token.DOT:
  927. if (bracket_level == 0) {
  928. is_type = false;
  929. can_be_type = true;
  930. }
  931. continue;
  932. case Token.OP_GENERICS_LT:
  933. case Token.COMMA:
  934. case Token.DOUBLE_COLON:
  935. case -1:
  936. if (bracket_level == 0)
  937. can_be_type = true;
  938. continue;
  939. default:
  940. can_be_type = is_type = false;
  941. continue;
  942. }
  943. case Token.OBJECT:
  944. case Token.STRING:
  945. case Token.BOOL:
  946. case Token.DECIMAL:
  947. case Token.FLOAT:
  948. case Token.DOUBLE:
  949. case Token.SBYTE:
  950. case Token.BYTE:
  951. case Token.SHORT:
  952. case Token.USHORT:
  953. case Token.INT:
  954. case Token.UINT:
  955. case Token.LONG:
  956. case Token.ULONG:
  957. case Token.CHAR:
  958. case Token.VOID:
  959. if (bracket_level == 0)
  960. is_type = true;
  961. continue;
  962. case Token.COMMA:
  963. if (bracket_level == 0) {
  964. bracket_level = 100;
  965. can_be_type = is_type = false;
  966. }
  967. continue;
  968. case Token.OP_GENERICS_LT:
  969. case Token.OPEN_BRACKET:
  970. if (bracket_level++ == 0)
  971. is_type = true;
  972. continue;
  973. case Token.OP_GENERICS_GT:
  974. case Token.CLOSE_BRACKET:
  975. --bracket_level;
  976. continue;
  977. case Token.INTERR_NULLABLE:
  978. case Token.STAR:
  979. if (bracket_level == 0)
  980. is_type = true;
  981. continue;
  982. case Token.REF:
  983. case Token.OUT:
  984. can_be_type = is_type = false;
  985. continue;
  986. default:
  987. return Token.OPEN_PARENS;
  988. }
  989. }
  990. }
  991. public static bool IsValidIdentifier (string s)
  992. {
  993. if (s == null || s.Length == 0)
  994. return false;
  995. if (!is_identifier_start_character (s [0]))
  996. return false;
  997. for (int i = 1; i < s.Length; i ++)
  998. if (! is_identifier_part_character (s [i]))
  999. return false;
  1000. return true;
  1001. }
  1002. Stack<List<Location>> genericDimensionLocations = new Stack<List<Location>> ();
  1003. public List<Location> GenericDimensionLocations {
  1004. get {
  1005. if (genericDimensionLocations.Count == 0)
  1006. return null;
  1007. return genericDimensionLocations.Pop ();
  1008. }
  1009. }
  1010. bool parse_less_than (ref int genericDimension)
  1011. {
  1012. genericDimensionLocations.Push (new List<Location> ());
  1013. genericDimensionLocations.Peek ().Add (Location);
  1014. start:
  1015. int the_token = token ();
  1016. if (the_token == Token.OPEN_BRACKET) {
  1017. while (true) {
  1018. the_token = token ();
  1019. if (the_token == Token.EOF)
  1020. return true;
  1021. if (the_token == Token.CLOSE_BRACKET)
  1022. break;
  1023. }
  1024. the_token = token ();
  1025. } else if (the_token == Token.IN || the_token == Token.OUT) {
  1026. the_token = token ();
  1027. }
  1028. switch (the_token) {
  1029. case Token.IDENTIFIER:
  1030. case Token.OBJECT:
  1031. case Token.STRING:
  1032. case Token.BOOL:
  1033. case Token.DECIMAL:
  1034. case Token.FLOAT:
  1035. case Token.DOUBLE:
  1036. case Token.SBYTE:
  1037. case Token.BYTE:
  1038. case Token.SHORT:
  1039. case Token.USHORT:
  1040. case Token.INT:
  1041. case Token.UINT:
  1042. case Token.LONG:
  1043. case Token.ULONG:
  1044. case Token.CHAR:
  1045. case Token.VOID:
  1046. break;
  1047. case Token.OP_GENERICS_GT:
  1048. genericDimension = 1;
  1049. genericDimensionLocations.Peek ().Add (Location);
  1050. return true;
  1051. case Token.IN:
  1052. case Token.OUT:
  1053. genericDimensionLocations.Pop ();
  1054. return true;
  1055. case Token.COMMA:
  1056. do {
  1057. ++genericDimension;
  1058. if (genericDimensionLocations.Count > 0)
  1059. genericDimensionLocations.Peek ().Add (Location);
  1060. the_token = token ();
  1061. } while (the_token == Token.COMMA);
  1062. if (the_token == Token.OP_GENERICS_GT) {
  1063. ++genericDimension;
  1064. if (genericDimensionLocations.Count > 0)
  1065. genericDimensionLocations.Peek ().Add (Location);
  1066. return true;
  1067. }
  1068. genericDimensionLocations.Pop ();
  1069. return false;
  1070. default:
  1071. genericDimensionLocations.Pop ();
  1072. return false;
  1073. }
  1074. again:
  1075. the_token = token ();
  1076. if (the_token == Token.OP_GENERICS_GT) {
  1077. genericDimensionLocations.Peek ().Add (Location);
  1078. return true;
  1079. }
  1080. else if (the_token == Token.COMMA || the_token == Token.DOT || the_token == Token.DOUBLE_COLON)
  1081. goto start;
  1082. else if (the_token == Token.INTERR_NULLABLE || the_token == Token.STAR)
  1083. goto again;
  1084. else if (the_token == Token.OP_GENERICS_LT) {
  1085. if (!parse_less_than (ref genericDimension)) {
  1086. genericDimensionLocations.Pop ();
  1087. return false;
  1088. }
  1089. goto again;
  1090. } else if (the_token == Token.OPEN_BRACKET) {
  1091. rank_specifiers:
  1092. the_token = token ();
  1093. if (the_token == Token.CLOSE_BRACKET)
  1094. goto again;
  1095. else if (the_token == Token.COMMA)
  1096. goto rank_specifiers;
  1097. genericDimensionLocations.Pop ();
  1098. return false;
  1099. }
  1100. genericDimensionLocations.Pop ();
  1101. return false;
  1102. }
  1103. public int peek_token ()
  1104. {
  1105. int the_token;
  1106. PushPosition ();
  1107. sbag.Suppress = true;
  1108. the_token = token ();
  1109. sbag.Suppress = false;
  1110. PopPosition ();
  1111. return the_token;
  1112. }
  1113. //
  1114. // Tonizes `?' using custom disambiguous rules to return one
  1115. // of following tokens: INTERR_NULLABLE, OP_COALESCING, INTERR
  1116. //
  1117. // Tricky expression looks like:
  1118. //
  1119. // Foo ? a = x ? b : c;
  1120. //
  1121. int TokenizePossibleNullableType ()
  1122. {
  1123. if (parsing_block == 0 || parsing_type > 0)
  1124. return Token.INTERR_NULLABLE;
  1125. int d = peek_char ();
  1126. if (d == '?') {
  1127. get_char ();
  1128. return Token.OP_COALESCING;
  1129. }
  1130. if (d == '.') {
  1131. return Token.INTERR_OPERATOR;
  1132. }
  1133. if (d != ' ') {
  1134. if (d == ',' || d == ';' || d == '>')
  1135. return Token.INTERR_NULLABLE;
  1136. if (d == '*' || (d >= '0' && d <= '9'))
  1137. return Token.INTERR;
  1138. }
  1139. PushPosition ();
  1140. current_token = Token.NONE;
  1141. int next_token;
  1142. int parens = 0;
  1143. int generics = 0;
  1144. var nt = xtoken ();
  1145. switch (nt) {
  1146. case Token.DOT:
  1147. case Token.OPEN_BRACKET_EXPR:
  1148. next_token = Token.INTERR_OPERATOR;
  1149. break;
  1150. case Token.LITERAL:
  1151. case Token.TRUE:
  1152. case Token.FALSE:
  1153. case Token.NULL:
  1154. case Token.THIS:
  1155. case Token.NEW:
  1156. next_token = Token.INTERR;
  1157. break;
  1158. case Token.SEMICOLON:
  1159. case Token.COMMA:
  1160. case Token.CLOSE_PARENS:
  1161. case Token.OPEN_BRACKET:
  1162. case Token.OP_GENERICS_GT:
  1163. case Token.INTERR:
  1164. case Token.OP_COALESCING:
  1165. case Token.COLON:
  1166. next_token = Token.INTERR_NULLABLE;
  1167. break;
  1168. case Token.OPEN_PARENS:
  1169. case Token.OPEN_PARENS_CAST:
  1170. case Token.OPEN_PARENS_LAMBDA:
  1171. next_token = -1;
  1172. ++parens;
  1173. break;
  1174. case Token.OP_GENERICS_LT:
  1175. case Token.OP_GENERICS_LT_DECL:
  1176. case Token.GENERIC_DIMENSION:
  1177. next_token = -1;
  1178. ++generics;
  1179. break;
  1180. default:
  1181. next_token = -1;
  1182. break;
  1183. }
  1184. if (next_token == -1) {
  1185. switch (xtoken ()) {
  1186. case Token.COMMA:
  1187. case Token.SEMICOLON:
  1188. case Token.OPEN_BRACE:
  1189. case Token.IN:
  1190. next_token = Token.INTERR_NULLABLE;
  1191. break;
  1192. case Token.COLON:
  1193. next_token = Token.INTERR;
  1194. break;
  1195. case Token.OPEN_PARENS:
  1196. case Token.OPEN_PARENS_CAST:
  1197. case Token.OPEN_PARENS_LAMBDA:
  1198. ++parens;
  1199. goto default;
  1200. case Token.CLOSE_PARENS:
  1201. --parens;
  1202. goto default;
  1203. case Token.OP_GENERICS_LT:
  1204. case Token.OP_GENERICS_LT_DECL:
  1205. case Token.GENERIC_DIMENSION:
  1206. ++generics;
  1207. goto default;
  1208. default:
  1209. int ntoken;
  1210. int interrs = 1;
  1211. int colons = 0;
  1212. int braces = 0;
  1213. int brackets = 0;
  1214. //
  1215. // All shorcuts failed, do it hard way
  1216. //
  1217. while ((ntoken = xtoken ()) != Token.EOF) {
  1218. switch (ntoken) {
  1219. case Token.OPEN_BRACE:
  1220. ++braces;
  1221. continue;
  1222. case Token.OPEN_PARENS:
  1223. case Token.OPEN_PARENS_CAST:
  1224. case Token.OPEN_PARENS_LAMBDA:
  1225. ++parens;
  1226. continue;
  1227. case Token.CLOSE_BRACE:
  1228. --braces;
  1229. continue;
  1230. case Token.OP_GENERICS_LT:
  1231. case Token.OP_GENERICS_LT_DECL:
  1232. case Token.GENERIC_DIMENSION:
  1233. ++generics;
  1234. continue;
  1235. case Token.OPEN_BRACKET:
  1236. case Token.OPEN_BRACKET_EXPR:
  1237. ++brackets;
  1238. continue;
  1239. case Token.CLOSE_BRACKET:
  1240. --brackets;
  1241. continue;
  1242. case Token.CLOSE_PARENS:
  1243. if (parens > 0) {
  1244. --parens;
  1245. continue;
  1246. }
  1247. PopPosition ();
  1248. return Token.INTERR_NULLABLE;
  1249. case Token.OP_GENERICS_GT:
  1250. if (generics > 0) {
  1251. --generics;
  1252. continue;
  1253. }
  1254. PopPosition ();
  1255. return Token.INTERR_NULLABLE;
  1256. }
  1257. if (braces != 0)
  1258. continue;
  1259. if (ntoken == Token.SEMICOLON)
  1260. break;
  1261. if (parens != 0)
  1262. continue;
  1263. if (ntoken == Token.COMMA) {
  1264. if (generics != 0 || brackets != 0)
  1265. continue;
  1266. PopPosition ();
  1267. return Token.INTERR_NULLABLE;
  1268. }
  1269. if (ntoken == Token.COLON) {
  1270. if (++colons == interrs)
  1271. break;
  1272. continue;
  1273. }
  1274. if (ntoken == Token.INTERR) {
  1275. ++interrs;
  1276. continue;
  1277. }
  1278. }
  1279. next_token = colons != interrs && braces == 0 ? Token.INTERR_NULLABLE : Token.INTERR;
  1280. break;
  1281. }
  1282. }
  1283. PopPosition ();
  1284. return next_token;
  1285. }
  1286. bool decimal_digits (int c)
  1287. {
  1288. int d;
  1289. bool seen_digits = false;
  1290. if (c != -1){
  1291. if (number_pos == MaxNumberLength)
  1292. Error_NumericConstantTooLong ();
  1293. number_builder [number_pos++] = (char) c;
  1294. }
  1295. //
  1296. // We use peek_char2, because decimal_digits needs to do a
  1297. // 2-character look-ahead (5.ToString for example).
  1298. //
  1299. while ((d = peek_char2 ()) != -1){
  1300. if (d >= '0' && d <= '9'){
  1301. if (number_pos == MaxNumberLength)
  1302. Error_NumericConstantTooLong ();
  1303. number_builder [number_pos++] = (char) d;
  1304. get_char ();
  1305. seen_digits = true;
  1306. } else
  1307. break;
  1308. }
  1309. return seen_digits;
  1310. }
  1311. static bool is_hex (int e)
  1312. {
  1313. return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
  1314. }
  1315. static TypeCode real_type_suffix (int c)
  1316. {
  1317. switch (c){
  1318. case 'F': case 'f':
  1319. return TypeCode.Single;
  1320. case 'D': case 'd':
  1321. return TypeCode.Double;
  1322. case 'M': case 'm':
  1323. return TypeCode.Decimal;
  1324. default:
  1325. return TypeCode.Empty;
  1326. }
  1327. }
  1328. ILiteralConstant integer_type_suffix (ulong ul, int c, Location loc)
  1329. {
  1330. bool is_unsigned = false;
  1331. bool is_long = false;
  1332. if (c != -1){
  1333. bool scanning = true;
  1334. do {
  1335. switch (c){
  1336. case 'U': case 'u':
  1337. if (is_unsigned)
  1338. scanning = false;
  1339. is_unsigned = true;
  1340. get_char ();
  1341. break;
  1342. case 'l':
  1343. if (!is_unsigned){
  1344. //
  1345. // if we have not seen anything in between
  1346. // report this error
  1347. //
  1348. Report.Warning (78, 4, Location, "The `l' suffix is easily confused with the digit `1' (use `L' for clarity)");
  1349. }
  1350. goto case 'L';
  1351. case 'L':
  1352. if (is_long)
  1353. scanning = false;
  1354. is_long = true;
  1355. get_char ();
  1356. break;
  1357. default:
  1358. scanning = false;
  1359. break;
  1360. }
  1361. c = peek_char ();
  1362. } while (scanning);
  1363. }
  1364. if (is_long && is_unsigned){
  1365. return new ULongLiteral (context.BuiltinTypes, ul, loc);
  1366. }
  1367. if (is_unsigned){
  1368. // uint if possible, or ulong else.
  1369. if ((ul & 0xffffffff00000000) == 0)
  1370. return new UIntLiteral (context.BuiltinTypes, (uint) ul, loc);
  1371. else
  1372. return new ULongLiteral (context.BuiltinTypes, ul, loc);
  1373. } else if (is_long){
  1374. // long if possible, ulong otherwise
  1375. if ((ul & 0x8000000000000000) != 0)
  1376. return new ULongLiteral (context.BuiltinTypes, ul, loc);
  1377. else
  1378. return new LongLiteral (context.BuiltinTypes, (long) ul, loc);
  1379. } else {
  1380. // int, uint, long or ulong in that order
  1381. if ((ul & 0xffffffff00000000) == 0){
  1382. uint ui = (uint) ul;
  1383. if ((ui & 0x80000000) != 0)
  1384. return new UIntLiteral (context.BuiltinTypes, ui, loc);
  1385. else
  1386. return new IntLiteral (context.BuiltinTypes, (int) ui, loc);
  1387. } else {
  1388. if ((ul & 0x8000000000000000) != 0)
  1389. return new ULongLiteral (context.BuiltinTypes, ul, loc);
  1390. else
  1391. return new LongLiteral (context.BuiltinTypes, (long) ul, loc);
  1392. }
  1393. }
  1394. }
  1395. //
  1396. // given `c' as the next char in the input decide whether
  1397. // we need to convert to a special type, and then choose
  1398. // the best representation for the integer
  1399. //
  1400. ILiteralConstant adjust_int (int c, Location loc)
  1401. {
  1402. try {
  1403. if (number_pos > 9){
  1404. ulong ul = (uint) (number_builder [0] - '0');
  1405. for (int i = 1; i < number_pos; i++){
  1406. ul = checked ((ul * 10) + ((uint)(number_builder [i] - '0')));
  1407. }
  1408. return integer_type_suffix (ul, c, loc);
  1409. } else {
  1410. uint ui = (uint) (number_builder [0] - '0');
  1411. for (int i = 1; i < number_pos; i++){
  1412. ui = checked ((ui * 10) + ((uint)(number_builder [i] - '0')));
  1413. }
  1414. return integer_type_suffix (ui, c, loc);
  1415. }
  1416. } catch (OverflowException) {
  1417. Error_NumericConstantTooLong ();
  1418. return new IntLiteral (context.BuiltinTypes, 0, loc);
  1419. }
  1420. catch (FormatException) {
  1421. Report.Error (1013, Location, "Invalid number");
  1422. return new IntLiteral (context.BuiltinTypes, 0, loc);
  1423. }
  1424. }
  1425. ILiteralConstant adjust_real (TypeCode t, Location loc)
  1426. {
  1427. string s = new string (number_builder, 0, number_pos);
  1428. const string error_details = "Floating-point constant is outside the range of type `{0}'";
  1429. switch (t){
  1430. case TypeCode.Decimal:
  1431. try {
  1432. return new DecimalLiteral (context.BuiltinTypes, decimal.Parse (s, styles, csharp_format_info), loc);
  1433. } catch (OverflowException) {
  1434. Report.Error (594, Location, error_details, "decimal");
  1435. return new DecimalLiteral (context.BuiltinTypes, 0, loc);
  1436. }
  1437. case TypeCode.Single:
  1438. try {
  1439. return new FloatLiteral (context.BuiltinTypes, float.Parse (s, styles, csharp_format_info), loc);
  1440. } catch (OverflowException) {
  1441. Report.Error (594, Location, error_details, "float");
  1442. return new FloatLiteral (context.BuiltinTypes, 0, loc);
  1443. }
  1444. default:
  1445. try {
  1446. return new DoubleLiteral (context.BuiltinTypes, double.Parse (s, styles, csharp_format_info), loc);
  1447. } catch (OverflowException) {
  1448. Report.Error (594, loc, error_details, "double");
  1449. return new DoubleLiteral (context.BuiltinTypes, 0, loc);
  1450. }
  1451. }
  1452. }
  1453. ILiteralConstant handle_hex (Location loc)
  1454. {
  1455. int d;
  1456. ulong ul;
  1457. get_char ();
  1458. while ((d = peek_char ()) != -1){
  1459. if (is_hex (d)){
  1460. number_builder [number_pos++] = (char) d;
  1461. get_char ();
  1462. } else
  1463. break;
  1464. }
  1465. string s = new String (number_builder, 0, number_pos);
  1466. try {
  1467. if (number_pos <= 8)
  1468. ul = System.UInt32.Parse (s, NumberStyles.HexNumber);
  1469. else
  1470. ul = System.UInt64.Parse (s, NumberStyles.HexNumber);
  1471. return integer_type_suffix (ul, peek_char (), loc);
  1472. } catch (OverflowException){
  1473. Error_NumericConstantTooLong ();
  1474. return new IntLiteral (context.BuiltinTypes, 0, loc);
  1475. }
  1476. catch (FormatException) {
  1477. Report.Error (1013, Location, "Invalid number");
  1478. return new IntLiteral (context.BuiltinTypes, 0, loc);
  1479. }
  1480. }
  1481. //
  1482. // Invoked if we know we have .digits or digits
  1483. //
  1484. int is_number (int c, bool dotLead)
  1485. {
  1486. ILiteralConstant res;
  1487. #if FULL_AST
  1488. int read_start = reader.Position - 1;
  1489. if (dotLead) {
  1490. //
  1491. // Caller did peek_char
  1492. //
  1493. --read_start;
  1494. }
  1495. #endif
  1496. number_pos = 0;
  1497. var loc = Location;
  1498. bool hasLeadingDot = c == '.';
  1499. if (!dotLead){
  1500. if (c == '0'){
  1501. int peek = peek_char ();
  1502. if (peek == 'x' || peek == 'X') {
  1503. val = res = handle_hex (loc);
  1504. #if FULL_AST
  1505. res.ParsedValue = reader.ReadChars (read_start, reader.Position - 1);
  1506. #endif
  1507. return Token.LITERAL;
  1508. }
  1509. }
  1510. decimal_digits (c);
  1511. c = peek_char ();
  1512. }
  1513. //
  1514. // We need to handle the case of
  1515. // "1.1" vs "1.string" (LITERAL_FLOAT vs NUMBER DOT IDENTIFIER)
  1516. //
  1517. bool is_real = false;
  1518. if (c == '.'){
  1519. if (!dotLead)
  1520. get_char ();
  1521. if (decimal_digits ('.')){
  1522. is_real = true;
  1523. c = peek_char ();
  1524. } else {
  1525. putback ('.');
  1526. number_pos--;
  1527. val = res = adjust_int (-1, loc);
  1528. #if FULL_AST
  1529. res.ParsedValue = reader.ReadChars (read_start, reader.Position - 1);
  1530. #endif
  1531. return Token.LITERAL;
  1532. }
  1533. }
  1534. if (c == 'e' || c == 'E'){
  1535. is_real = true;
  1536. get_char ();
  1537. if (number_pos == MaxNumberLength)
  1538. Error_NumericConstantTooLong ();
  1539. number_builder [number_pos++] = (char) c;
  1540. c = get_char ();
  1541. if (c == '+'){
  1542. if (number_pos == MaxNumberLength)
  1543. Error_NumericConstantTooLong ();
  1544. number_builder [number_pos++] = '+';
  1545. c = -1;
  1546. } else if (c == '-') {
  1547. if (number_pos == MaxNumberLength)
  1548. Error_NumericConstantTooLong ();
  1549. number_builder [number_pos++] = '-';
  1550. c = -1;
  1551. } else {
  1552. if (number_pos == MaxNumberLength)
  1553. Error_NumericConstantTooLong ();
  1554. number_builder [number_pos++] = '+';
  1555. }
  1556. decimal_digits (c);
  1557. c = peek_char ();
  1558. }
  1559. var type = real_type_suffix (c);
  1560. if (type == TypeCode.Empty && !is_real) {
  1561. res = adjust_int (c, loc);
  1562. } else {
  1563. is_real = true;
  1564. if (type != TypeCode.Empty) {
  1565. get_char ();
  1566. }
  1567. res = adjust_real (type, loc);
  1568. }
  1569. val = res;
  1570. #if FULL_AST
  1571. var chars = reader.ReadChars (read_start, reader.Position - (type == TypeCode.Empty && c > 0 ? 1 : 0));
  1572. if (chars[chars.Length - 1] == '\r')
  1573. Array.Resize (ref chars, chars.Length - 1);
  1574. res.ParsedValue = chars;
  1575. #endif
  1576. return Token.LITERAL;
  1577. }
  1578. //
  1579. // Accepts exactly count (4 or 8) hex, no more no less
  1580. //
  1581. int getHex (int count, out int surrogate, out bool error)
  1582. {
  1583. int i;
  1584. int total = 0;
  1585. int c;
  1586. int top = count != -1 ? count : 4;
  1587. get_char ();
  1588. error = false;
  1589. surrogate = 0;
  1590. for (i = 0; i < top; i++){
  1591. c = get_char ();
  1592. if (c >= '0' && c <= '9')
  1593. c = (int) c - (int) '0';
  1594. else if (c >= 'A' && c <= 'F')
  1595. c = (int) c - (int) 'A' + 10;
  1596. else if (c >= 'a' && c <= 'f')
  1597. c = (int) c - (int) 'a' + 10;
  1598. else {
  1599. error = true;
  1600. return 0;
  1601. }
  1602. total = (total * 16) + c;
  1603. if (count == -1){
  1604. int p = peek_char ();
  1605. if (p == -1)
  1606. break;
  1607. if (!is_hex ((char)p))
  1608. break;
  1609. }
  1610. }
  1611. if (top == 8) {
  1612. if (total > 0x0010FFFF) {
  1613. error = true;
  1614. return 0;
  1615. }
  1616. if (total >= 0x00010000) {
  1617. surrogate = ((total - 0x00010000) % 0x0400 + 0xDC00);
  1618. total = ((total - 0x00010000) / 0x0400 + 0xD800);
  1619. }
  1620. }
  1621. return total;
  1622. }
  1623. int escape (int c, out int surrogate)
  1624. {
  1625. bool error;
  1626. int d;
  1627. int v;
  1628. d = peek_char ();
  1629. if (c != '\\') {
  1630. surrogate = 0;
  1631. return c;
  1632. }
  1633. switch (d){
  1634. case 'a':
  1635. v = '\a'; break;
  1636. case 'b':
  1637. v = '\b'; break;
  1638. case 'n':
  1639. v = '\n'; break;
  1640. case 't':
  1641. v = '\t'; break;
  1642. case 'v':
  1643. v = '\v'; break;
  1644. case 'r':
  1645. v = '\r'; break;
  1646. case '\\':
  1647. v = '\\'; break;
  1648. case 'f':
  1649. v = '\f'; break;
  1650. case '0':
  1651. v = 0; break;
  1652. case '"':
  1653. v = '"'; break;
  1654. case '\'':
  1655. v = '\''; break;
  1656. case 'x':
  1657. v = getHex (-1, out surrogate, out error);
  1658. if (error)
  1659. goto default;
  1660. return v;
  1661. case 'u':
  1662. case 'U':
  1663. return EscapeUnicode (d, out surrogate);
  1664. default:
  1665. surrogate = 0;
  1666. Report.Error (1009, Location, "Unrecognized escape sequence `\\{0}'", ((char)d).ToString ());
  1667. return d;
  1668. }
  1669. get_char ();
  1670. surrogate = 0;
  1671. return v;
  1672. }
  1673. int EscapeUnicode (int ch, out int surrogate)
  1674. {
  1675. bool error;
  1676. if (ch == 'U') {
  1677. ch = getHex (8, out surrogate, out error);
  1678. } else {
  1679. ch = getHex (4, out surrogate, out error);
  1680. }
  1681. if (error)
  1682. Report.Error (1009, Location, "Unrecognized escape sequence");
  1683. return ch;
  1684. }
  1685. int get_char ()
  1686. {
  1687. int x;
  1688. if (putback_char != -1) {
  1689. x = putback_char;
  1690. putback_char = -1;
  1691. } else {
  1692. x = reader.Read ();
  1693. }
  1694. if (x <= 13) {
  1695. if (x == '\r') {
  1696. if (peek_char () == '\n') {
  1697. putback_char = -1;
  1698. advance_line (SpecialsBag.NewLine.Windows);
  1699. } else {
  1700. advance_line (SpecialsBag.NewLine.Unix);
  1701. }
  1702. x = '\n';
  1703. } else if (x == '\n') {
  1704. advance_line (SpecialsBag.NewLine.Unix);
  1705. } else {
  1706. col++;
  1707. }
  1708. } else if (x >= UnicodeLS && x <= UnicodePS) {
  1709. advance_line (SpecialsBag.NewLine.Unix);
  1710. } else {
  1711. col++;
  1712. }
  1713. return x;
  1714. }
  1715. bool recordNewLine = true;
  1716. void advance_line (SpecialsBag.NewLine newLine)
  1717. {
  1718. if (recordNewLine)
  1719. sbag.AddNewLine (line, col, newLine);
  1720. line++;
  1721. ref_line++;
  1722. previous_col = col;
  1723. col = 0;
  1724. startsLine = true;
  1725. }
  1726. int peek_char ()
  1727. {
  1728. if (putback_char == -1)
  1729. putback_char = reader.Read ();
  1730. return putback_char;
  1731. }
  1732. int peek_char2 ()
  1733. {
  1734. if (putback_char != -1)
  1735. return putback_char;
  1736. return reader.Peek ();
  1737. }
  1738. public void putback (int c)
  1739. {
  1740. if (putback_char != -1) {
  1741. throw new InternalErrorException (string.Format ("Secondary putback [{0}] putting back [{1}] is not allowed", (char)putback_char, (char) c), Location);
  1742. }
  1743. if (c == '\n' || col == 0 || (c >= UnicodeLS && c <= UnicodePS)) {
  1744. // It won't happen though.
  1745. line--;
  1746. ref_line--;
  1747. col = previous_col;
  1748. }
  1749. else
  1750. col--;
  1751. putback_char = c;
  1752. }
  1753. public bool advance ()
  1754. {
  1755. return peek_char () != -1 || CompleteOnEOF;
  1756. }
  1757. public Object Value {
  1758. get {
  1759. return val;
  1760. }
  1761. }
  1762. public Object value ()
  1763. {
  1764. return val;
  1765. }
  1766. public int token ()
  1767. {
  1768. current_token = xtoken ();
  1769. return current_token;
  1770. }
  1771. int TokenizePreprocessorIdentifier (out int c)
  1772. {
  1773. int startCol, endLine, endCol;
  1774. return TokenizePreprocessorIdentifier (out c, out startCol, out endLine, out endCol);
  1775. }
  1776. int TokenizePreprocessorIdentifier (out int c, out int startCol, out int endLine, out int endCol)
  1777. {
  1778. // skip over white space
  1779. do {
  1780. endLine = line;
  1781. endCol = col;
  1782. c = get_char ();
  1783. } while (c == ' ' || c == '\t');
  1784. startCol = col;
  1785. int pos = 0;
  1786. while (c != -1 && c >= 'a' && c <= 'z') {
  1787. id_builder[pos++] = (char) c;
  1788. endCol = col + 1;
  1789. c = get_char ();
  1790. if (c == '\\') {
  1791. int peek = peek_char ();
  1792. if (peek == 'U' || peek == 'u') {
  1793. int surrogate;
  1794. c = EscapeUnicode (c, out surrogate);
  1795. if (surrogate != 0) {
  1796. if (is_identifier_part_character ((char) c)) {
  1797. id_builder[pos++] = (char) c;
  1798. }
  1799. c = surrogate;
  1800. }
  1801. }
  1802. }
  1803. }
  1804. return pos;
  1805. }
  1806. Prepro