PageRenderTime 93ms CodeModel.GetById 16ms app.highlight 72ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/dscribe/lexer.d

http://github.com/wilkie/djehuty
D | 691 lines | 652 code | 26 blank | 13 comment | 338 complexity | 4a0ac3461825a1fae5fe6d1b3b37d7a0 MD5 | raw file
  1/*
  2 * lexer.d
  3 *
  4 * This module implements the D lexicon.
  5 *
  6 */
  7
  8module tools.dscribe.lexer;
  9
 10import parsing.lexer;
 11
 12import core.event;
 13import core.string;
 14import core.regex;
 15
 16import tools.dscribe.tokens;
 17
 18import console.main;
 19
 20class LexerD : Lexer {
 21	this() {
 22		// D Lexicon
 23
 24		addRule(Lex.Whitespace, `\s*`);
 25
 26		addRule(Lex.Operator, `[/=+*^%\-<>!~&\|\.]+`);
 27
 28		// Other Lexicon
 29		addRule(Lex.Delimiter, `[()\][{}?,;:$]`);
 30
 31		// Wysiwyg String Literal
 32
 33		addRule(Lex.WysiwygString, "`([^`]*)`");
 34		addRule(Lex.WysiwygString, `r"([^"]*)"`);
 35
 36		// String Literal
 37
 38		addRule(Lex.DoubleQuotedString, `"((?:[^\\"](?:\\.)?)*)"`);
 39
 40		// Comment Line
 41		addRule(Lex.CommentLine, `//([^\n\r]*)`);
 42
 43		// Comment Blocks
 44		addRule(Lex.CommentBlock, `/\*([^\*](?:\*[^/])?)*\*/`);
 45
 46		// Nested Comment Blocks
 47		addRule(Lex.CommentNestedStart, `/\+`);
 48
 49		nestedCommentState = newState(); // For within comment blocks
 50
 51		// /+ afsdfasdf /+ afsdasdf +/ asdfasdfsda +/
 52
 53		addRule(Lex.CommentNestedStart, `/\+`);
 54		addRule(Lex.CommentNestedEnd, `\+/`);
 55		addRule(Lex.Comment, `([^\+/](?:\+[^/])?)*`);
 56
 57		setState(normalState);
 58
 59		// These rules slow down the lexer with their complexity
 60		addRule(Lex.DecimalFloat, `([0-9][_0-9]*|0)\.(?:[_0-9]+(?:[eE](?:\+|-)?[_0-9]+)?)?([fF]i?|Li?|i)?`);
 61		addRule(Lex.DecimalFloat, `\.[0-9][_0-9]*(?:[eE](?:\+|-)?[_0-9]+)?([fF]i?|Li?|i)?`);
 62		addRule(Lex.DecimalFloat, `[0-9][_0-9]*[eE][+-][_0-9]+([fF]i?|Li?|i)?`);
 63
 64		addRule(Lex.HexFloat, `0[xX](?:[0-9a-fA-F_]+)?\.?[0-9a-fA-F_]+[pP][+-]?[_0-9]+`);
 65
 66		addRule(Lex.IntegerFloat, `([1-9][_0-9]*|0)([fF]|L)?i`);
 67
 68		// Identifier
 69
 70		addRule(Lex.Identifier, `[_a-zA-Z][_a-zA-Z0-9]*`);
 71		addRule(Lex.HexLiteral, `0[xX][0-9a-fA-F_]+([uUL]*)`);
 72		addRule(Lex.BinaryLiteral, `0[bB][01_]+([uUL]*)`);
 73		addRule(Lex.OctalLiteral, `0[_0-7]+([uUL]*)`);
 74		addRule(Lex.IntegerLiteral, `([1-9][_0-9]*|0)([uUL]*)`);
 75
 76		// Special Tokens
 77
 78		addRule(Lex.SpecialLine,
 79			`#line\s+(0x[0-9a-fA-F_]+|0b[01_]+|0[_0-7]+|(?:[1-9][_0-9]*|0))(?:\s+("[^"]*"))?`);
 80	}
 81
 82	override bool raiseSignal(uint signal) {
 83		switch(signal) {
 84			case Lex.Delimiter:
 85				switch(token.getString()[0]) {
 86					case '(':
 87						token = new Token(Lex.LeftParen);
 88						break;
 89					case ')':
 90						token = new Token(Lex.RightParen);
 91						break;
 92					case '{':
 93						token = new Token(Lex.LeftCurly);
 94						break;
 95					case '}':
 96						token = new Token(Lex.RightCurly);
 97						break;
 98					case '[':
 99						token = new Token(Lex.LeftBrace);
100						break;
101					case ']':
102						token = new Token(Lex.RightBrace);
103						break;
104					case '?':
105						token = new Token(Lex.QuestionMark);
106						break;
107					case ',':
108						token = new Token(Lex.Comma);
109						break;
110					case ';':
111						token = new Token(Lex.Semicolon);
112						break;
113					case ':':
114						token = new Token(Lex.Colon);
115						break;
116					case '$':
117						token = new Token(Lex.Dollar);
118						break;
119					default:
120						// unknown block delimiter
121						return true;
122					}
123				break;
124			case Lex.Operator:
125				switch(token.getString().toString) {
126					case `==`:
127						token = new Token(Lex.Equal);
128						break;
129					case `=`:
130						token = new Token(Lex.Assign);
131						break;
132					case `>>`:
133						token = new Token(Lex.RightShift);
134						break;
135					case `>>=`:
136						token = new Token(Lex.RightShiftAssign);
137						break;
138					case `>>>`:
139						token = new Token(Lex.RightShiftUnsigned);
140						break;
141					case `>>>=`:
142						token = new Token(Lex.RightShiftUnsignedAssign);
143						break;
144					case `>`:
145						token = new Token(Lex.GreaterThan);
146						break;
147					case `>=`:
148						token = new Token(Lex.GreaterOrEqual);
149						break;
150					case `<<`:
151						token = new Token(Lex.LeftShift);
152						break;
153					case `<<=`:
154						token = new Token(Lex.LeftShiftAssign);
155						break;
156					case `<`:
157						token = new Token(Lex.LessThan);
158						break;
159					case `<=`:
160						token = new Token(Lex.LessOrEqual);
161						break;
162					case `<>`:
163						token = new Token(Lex.NotEqual);
164						break;
165					case `!=`:
166						token = new Token(Lex.UnorderedNotEqual);
167						break;
168					case `!<>`:
169						token = new Token(Lex.UnorderedEqual);
170						break;
171					case `!>=`:
172						token = new Token(Lex.UnorderedLessThan);
173						break;
174					case `!<=`:
175						token = new Token(Lex.UnorderedGreaterThan);
176						break;
177					case `!<`:
178						token = new Token(Lex.UnorderedGreaterOrEqual);
179						break;
180					case `!>`:
181						token = new Token(Lex.UnorderedLessOrEqual);
182						break;
183					case `<>=`:
184						token = new Token(Lex.Tautology);
185						break;
186					case `!<>=`:
187						token = new Token(Lex.UnorderedContradiction);
188						break;
189					case `.`:
190						token = new Token(Lex.Dot);
191						break;
192					case `..`:
193						token = new Token(Lex.DotDot);
194						break;
195					case `...`:
196						token = new Token(Lex.DotDotDot);
197						break;
198					case `&&`:
199						token = new Token(Lex.AndAnd);
200						break;
201					case `&=`:
202						token = new Token(Lex.AndAssign);
203						break;
204					case `&`:
205						token = new Token(Lex.And);
206						break;
207					case `||`:
208						token = new Token(Lex.OrOr);
209						break;
210					case `|=`:
211						token = new Token(Lex.OrAssign);
212						break;
213					case `|`:
214						token = new Token(Lex.Or);
215						break;
216					case `-=`:
217						token = new Token(Lex.SubAssign);
218						break;
219					case `-`:
220						token = new Token(Lex.Sub);
221						break;
222					case `+=`:
223						token = new Token(Lex.AddAssign);
224						break;
225					case `+`:
226						token = new Token(Lex.Add);
227						break;
228					case `*=`:
229						token = new Token(Lex.MulAssign);
230						break;
231					case `*`:
232						token = new Token(Lex.Mul);
233						break;
234					case `/=`:
235						token = new Token(Lex.DivAssign);
236						break;
237					case `/`:
238						token = new Token(Lex.Div);
239						break;
240					case `~=`:
241						token = new Token(Lex.CatAssign);
242						break;
243					case `~`:
244						token = new Token(Lex.Cat);
245						break;
246					case `!`:
247						token = new Token(Lex.Bang);
248						break;
249					default:
250						// unknown operator
251						return true;
252				}
253				break;
254			case Lex.CommentLine:
255				token = new Token(Lex.Comment, _1);
256				break;
257			case Lex.Whitespace:
258				return true;
259			case Lex.DoubleQuotedString:
260			case Lex.WysiwygString:
261				token = new Token(Lex.StringLiteral, _1);
262				break;
263			case Lex.CommentBlock:
264				// The grouping is the actual comment data
265				token = new Token(Lex.Comment, _1);
266				break;
267			case Lex.CommentNestedStart:
268				if (getState() == nestedCommentState) {
269					nestedCommentDepth++;
270					comment ~= token.getString();
271					return true;
272				}
273				else {
274					comment = new String("");
275					nestedCommentDepth = 0;
276					setState(nestedCommentState);
277					return true;
278				}
279			case Lex.CommentNestedEnd:
280				if (nestedCommentDepth == 0) {
281					setState(normalState);
282					return true;
283				}
284				else {
285					comment ~= token.getString();
286					nestedCommentDepth--;
287					return true;
288				}
289			case Lex.HexLiteral:
290				ulong value = 0;
291
292				foreach(chr; token.getString()[2..token.getString().length]) {
293					if (chr != '_') {
294						value *= 16;
295						if (chr >= 'a' && chr <= 'f') {
296							value += 10 + (chr - 'a');
297						}
298						else if (chr >= 'A' && chr <= 'F') {
299							value += 10 + (chr - 'A');
300						}
301						else {
302							value += chr - '0';
303						}
304					}
305				}
306				
307				token = new Token(Lex.IntegerLiteral, value);
308				break;
309			case Lex.OctalLiteral:
310				ulong value = 0;
311
312				foreach(chr; token.getString()[1..token.getString().length]) {
313					if (chr != '_') {
314						value *= 8;
315						value += chr - '0';
316					}
317				}
318
319				token = new Token(Lex.IntegerLiteral, value);
320				break;
321			case Lex.BinaryLiteral:
322				ulong value = 0;
323
324				foreach(chr; token.getString()[2..token.getString().length]) {
325					if (chr != '_') {
326						value *= 2;
327						value += chr - '0';
328					}
329				}
330				
331				token = new Token(Lex.IntegerLiteral, value);
332				break;
333			case Lex.IntegerLiteral:
334				ulong value = 0;
335
336				foreach(chr; token.getString()) {
337					if (chr != '_') {
338						value *= 10;
339						value += chr - '0';
340					}
341				}
342				
343				token = new Token(Lex.IntegerLiteral, value);
344				break;
345			case Lex.Identifier:
346				if (token.getString() == `abstract`) {
347					token = new Token(Lex.Abstract);
348				}
349				else if (token.getString() == `alias`) {
350					token = new Token(Lex.Alias);
351				}
352				else if (token.getString() == `align`) {
353					token = new Token(Lex.Align);
354				}
355				else if (token.getString() == `asm`) {
356					token = new Token(Lex.Asm);
357				}
358				else if (token.getString() == `assert`) {
359					token = new Token(Lex.Assert);
360				}
361				else if (token.getString() == `auto`) {
362					token = new Token(Lex.Auto);
363				}
364				else if (token.getString() == `body`) {
365					token = new Token(Lex.Body);
366				}
367				else if (token.getString() == `bool`) {
368					token = new Token(Lex.Bool);
369				}
370				else if (token.getString() == `break`) {
371					token = new Token(Lex.Break);
372				}
373				else if (token.getString() == `byte`) {
374					token = new Token(Lex.Byte);
375				}
376				else if (token.getString() == `case`) {
377					token = new Token(Lex.Case);
378				}
379				else if (token.getString() == `cast`) {
380					token = new Token(Lex.Cast);
381				}
382				else if (token.getString() == `catch`) {
383					token = new Token(Lex.Catch);
384				}
385				else if (token.getString() == `cdouble`) {
386					token = new Token(Lex.Cdouble);
387				}
388				else if (token.getString() == `cent`) {
389					token = new Token(Lex.Cent);
390				}
391				else if (token.getString() == `cfloat`) {
392					token = new Token(Lex.Cfloat);
393				}
394				else if (token.getString() == `char`) {
395					token = new Token(Lex.Char);
396				}
397				else if (token.getString() == `class`) {
398					token = new Token(Lex.Class);
399				}
400				else if (token.getString() == `const`) {
401					token = new Token(Lex.Const);
402				}
403				else if (token.getString() == `continue`) {
404					token = new Token(Lex.Continue);
405				}
406				else if (token.getString() == `creal`) {
407					token = new Token(Lex.Creal);
408				}
409				else if (token.getString() == `dchar`) {
410					token = new Token(Lex.Dchar);
411				}
412				else if (token.getString() == `debug`) {
413					token = new Token(Lex.Debug);
414				}
415				else if (token.getString() == `default`) {
416					token = new Token(Lex.Default);
417				}
418				else if (token.getString() == `delegate`) {
419					token = new Token(Lex.Delegate);
420				}
421				else if (token.getString() == `delete`) {
422					token = new Token(Lex.Delete);
423				}
424				else if (token.getString() == `deprecated`) {
425					token = new Token(Lex.Deprecated);
426				}
427				else if (token.getString() == `do`) {
428					token = new Token(Lex.Do);
429				}
430				else if (token.getString() == `double`) {
431					token = new Token(Lex.Double);
432				}
433				else if (token.getString() == `else`) {
434					token = new Token(Lex.Else);
435				}
436				else if (token.getString() == `enum`) {
437					token = new Token(Lex.Enum);
438				}
439				else if (token.getString() == `export`) {
440					token = new Token(Lex.Enum);
441				}
442				else if (token.getString() == `extern`) {
443					token = new Token(Lex.Extern);
444				}
445				else if (token.getString() == `false`) {
446					token = new Token(Lex.False);
447				}
448				else if (token.getString() == `final`) {
449					token = new Token(Lex.Final);
450				}
451				else if (token.getString() == `finally`) {
452					token = new Token(Lex.Finally);
453				}
454				else if (token.getString() == `float`) {
455					token = new Token(Lex.Float);
456				}
457				else if (token.getString() == `for`) {
458					token = new Token(Lex.For);
459				}
460				else if (token.getString() == `foreach`) {
461					token = new Token(Lex.Foreach);
462				}
463				else if (token.getString() == `foreach_reverse`) {
464					token = new Token(Lex.Foreach_reverse);
465				}
466				else if (token.getString() == `function`) {
467					token = new Token(Lex.Function);
468				}
469				else if (token.getString() == `goto`) {
470					token = new Token(Lex.Goto);
471				}
472				else if (token.getString() == `idouble`) {
473					token = new Token(Lex.Idouble);
474				}
475				else if (token.getString() == `if`) {
476					token = new Token(Lex.If);
477				}
478				else if (token.getString() == `ifloat`) {
479					token = new Token(Lex.Ifloat);
480				}
481				else if (token.getString() == `import`) {
482					token = new Token(Lex.Import);
483				}
484				else if (token.getString() == `in`) {
485					token = new Token(Lex.In);
486				}
487				else if (token.getString() == `inout`) {
488					token = new Token(Lex.Inout);
489				}
490				else if (token.getString() == `int`) {
491					token = new Token(Lex.Int);
492				}
493				else if (token.getString() == `interface`) {
494					token = new Token(Lex.Interface);
495				}
496				else if (token.getString() == `invariant`) {
497					token = new Token(Lex.Invariant);
498				}
499				else if (token.getString() == `ireal`) {
500					token = new Token(Lex.Ireal);
501				}
502				else if (token.getString() == `is`) {
503					token = new Token(Lex.Is);
504				}
505				else if (token.getString() == `lazy`) {
506					token = new Token(Lex.Lazy);
507				}
508				else if (token.getString() == `long`) {
509					token = new Token(Lex.Long);
510				}
511				else if (token.getString() == `macro`) {
512					token = new Token(Lex.Macro);
513				}
514				else if (token.getString() == `mixin`) {
515					token = new Token(Lex.Mixin);
516				}
517				else if (token.getString() == `module`) {
518					token = new Token(Lex.Module);
519				}
520				else if (token.getString() == `new`) {
521					token = new Token(Lex.New);
522				}
523				else if (token.getString() == `null`) {
524					token = new Token(Lex.Null);
525				}
526				else if (token.getString() == `out`) {
527					token = new Token(Lex.Out);
528				}
529				else if (token.getString() == `override`) {
530					token = new Token(Lex.Override);
531				}
532				else if (token.getString() == `package`) {
533					token = new Token(Lex.Package);
534				}
535				else if (token.getString() == `pragma`) {
536					token = new Token(Lex.Pragma);
537				}
538				else if (token.getString() == `private`) {
539					token = new Token(Lex.Private);
540				}
541				else if (token.getString() == `protected`) {
542					token = new Token(Lex.Protected);
543				}
544				else if (token.getString() == `public`) {
545					token = new Token(Lex.Public);
546				}
547				else if (token.getString() == `real`) {
548					token = new Token(Lex.Real);
549				}
550				else if (token.getString() == `ref`) {
551					token = new Token(Lex.Ref);
552				}
553				else if (token.getString() == `return`) {
554					token = new Token(Lex.Return);
555				}
556				else if (token.getString() == `scope`) {
557					token = new Token(Lex.Scope);
558				}
559				else if (token.getString() == `short`) {
560					token = new Token(Lex.Short);
561				}
562				else if (token.getString() == `static`) {
563					token = new Token(Lex.Static);
564				}
565				else if (token.getString() == `struct`) {
566					token = new Token(Lex.Struct);
567				}
568				else if (token.getString() == `super`) {
569					token = new Token(Lex.Super);
570				}
571				else if (token.getString() == `switch`) {
572					token = new Token(Lex.Switch);
573				}
574				else if (token.getString() == `synchronized`) {
575					token = new Token(Lex.Synchronized);
576				}
577				else if (token.getString() == `template`) {
578					token = new Token(Lex.Template);
579				}
580				else if (token.getString() == `this`) {
581					token = new Token(Lex.This);
582				}
583				else if (token.getString() == `throw`) {
584					token = new Token(Lex.Throw);
585				}
586				else if (token.getString() == `true`) {
587					token = new Token(Lex.True);
588				}
589				else if (token.getString() == `try`) {
590					token = new Token(Lex.Try);
591				}
592				else if (token.getString() == `typedef`) {
593					token = new Token(Lex.Typedef);
594				}
595				else if (token.getString() == `typeid`) {
596					token = new Token(Lex.Typeid);
597				}
598				else if (token.getString() == `typeof`) {
599					token = new Token(Lex.Typeof);
600				}
601				else if (token.getString() == `ubyte`) {
602					token = new Token(Lex.Ubyte);
603				}
604				else if (token.getString() == `ucent`) {
605					token = new Token(Lex.Ucent);
606				}
607				else if (token.getString() == `uint`) {
608					token = new Token(Lex.Uint);
609				}
610				else if (token.getString() == `ulong`) {
611					token = new Token(Lex.Ulong);
612				}
613				else if (token.getString() == `union`) {
614					token = new Token(Lex.Union);
615				}
616				else if (token.getString() == `unittest`) {
617					token = new Token(Lex.Unittest);
618				}
619				else if (token.getString() == `ushort`) {
620					token = new Token(Lex.Ushort);
621				}
622				else if (token.getString() == `version`) {
623					token = new Token(Lex.Version);
624				}
625				else if (token.getString() == `void`) {
626					token = new Token(Lex.Void);
627				}
628				else if (token.getString() == `volatile`) {
629					token = new Token(Lex.Volatile);
630				}
631				else if (token.getString() == `wchar`) {
632					token = new Token(Lex.Wchar);
633				}
634				else if (token.getString() == `while`) {
635					token = new Token(Lex.While);
636				}
637				else if (token.getString() == `with`) {
638					token = new Token(Lex.With);
639				}
640				else if (token.getString()[0..2] == `__`) {
641
642					// Reserved Identifiers
643
644					if (token.getString() == `__FILE__`) {
645						token = new Token(Lex.StringLiteral, new String("file.d"));
646					}
647					else if (token.getString() == `__LINE__`) {
648						token = new Token(Lex.IntegerLiteral, new String(0));
649					}
650					else if (token.getString() == `__DATE__`) {
651						token = new Token(Lex.StringLiteral, new String("mmmm dd yyyy"));
652					}
653					else if (token.getString() == `__TIME__`) {
654						token = new Token(Lex.StringLiteral, new String("hh:mm:ss"));
655					}
656					else if (token.getString() == `__TIMESTAMP__`) {
657						token = new Token(Lex.StringLiteral, new String("www mmm dd hh:mm:ss yyyy"));
658					}
659					else if (token.getString() == `__VENDER__`) {
660						token = new Token(Lex.StringLiteral, new String(""));
661					}
662					else if (token.getString() == `__VERSION__`) {
663						token = new Token(Lex.StringLiteral, new String(0));
664					}
665				}
666				break;
667			default:
668				break;
669		}
670
671		if (token.getString is null) {
672			Console.put(((token.getId())), " (", token.getInteger(), ") ");
673		}
674		else {
675			Console.put(((token.getId())), " [", token.getString(), "] ");
676		}
677
678		return super.raiseSignal(token.getId());
679	}
680
681protected:
682	uint normalState;
683	uint commentBlockState;
684	uint nestedCommentState;
685	uint nestedCommentDepth;
686	uint stringLiteralState;
687	uint wysiwygLiteralState;
688	uint wysiwygRLiteralState;
689
690	String comment;
691}