PageRenderTime 73ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/packages/linter/node_modules/jshint/src/lex.js

https://gitlab.com/xxtxx/atom-settings
JavaScript | 1651 lines | 1251 code | 206 blank | 194 comment | 160 complexity | 0230bed3d4239482b1926189ffc2e224 MD5 | raw file
  1. /*
  2. * Lexical analysis and token construction.
  3. */
  4. "use strict";
  5. var _ = require("underscore");
  6. var events = require("events");
  7. var reg = require("./reg.js");
  8. var state = require("./state.js").state;
  9. var unicodeData = require("../data/ascii-identifier-data.js");
  10. var asciiIdentifierStartTable = unicodeData.asciiIdentifierStartTable;
  11. var asciiIdentifierPartTable = unicodeData.asciiIdentifierPartTable;
  12. var nonAsciiIdentifierStartTable = require("../data/non-ascii-identifier-start.js");
  13. var nonAsciiIdentifierPartTable = require("../data/non-ascii-identifier-part-only.js");
  14. // Some of these token types are from JavaScript Parser API
  15. // while others are specific to JSHint parser.
  16. // JS Parser API: https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API
  17. var Token = {
  18. Identifier: 1,
  19. Punctuator: 2,
  20. NumericLiteral: 3,
  21. StringLiteral: 4,
  22. Comment: 5,
  23. Keyword: 6,
  24. NullLiteral: 7,
  25. BooleanLiteral: 8,
  26. RegExp: 9,
  27. TemplateLiteral: 10
  28. };
  29. // Object that handles postponed lexing verifications that checks the parsed
  30. // environment state.
  31. function asyncTrigger() {
  32. var _checks = [];
  33. return {
  34. push: function (fn) {
  35. _checks.push(fn);
  36. },
  37. check: function () {
  38. for (var check = 0; check < _checks.length; ++check) {
  39. _checks[check]();
  40. }
  41. _checks.splice(0, _checks.length);
  42. }
  43. };
  44. }
  45. /*
  46. * Lexer for JSHint.
  47. *
  48. * This object does a char-by-char scan of the provided source code
  49. * and produces a sequence of tokens.
  50. *
  51. * var lex = new Lexer("var i = 0;");
  52. * lex.start();
  53. * lex.token(); // returns the next token
  54. *
  55. * You have to use the token() method to move the lexer forward
  56. * but you don't have to use its return value to get tokens. In addition
  57. * to token() method returning the next token, the Lexer object also
  58. * emits events.
  59. *
  60. * lex.on("Identifier", function (data) {
  61. * if (data.name.indexOf("_") >= 0) {
  62. * // Produce a warning.
  63. * }
  64. * });
  65. *
  66. * Note that the token() method returns tokens in a JSLint-compatible
  67. * format while the event emitter uses a slightly modified version of
  68. * Mozilla's JavaScript Parser API. Eventually, we will move away from
  69. * JSLint format.
  70. */
  71. function Lexer(source) {
  72. var lines = source;
  73. if (typeof lines === "string") {
  74. lines = lines
  75. .replace(/\r\n/g, "\n")
  76. .replace(/\r/g, "\n")
  77. .split("\n");
  78. }
  79. // If the first line is a shebang (#!), make it a blank and move on.
  80. // Shebangs are used by Node scripts.
  81. if (lines[0] && lines[0].substr(0, 2) === "#!") {
  82. if (lines[0].indexOf("node") !== -1) {
  83. state.option.node = true;
  84. }
  85. lines[0] = "";
  86. }
  87. this.emitter = new events.EventEmitter();
  88. this.source = source;
  89. this.setLines(lines);
  90. this.prereg = true;
  91. this.line = 0;
  92. this.char = 1;
  93. this.from = 1;
  94. this.input = "";
  95. this.inComment = false;
  96. for (var i = 0; i < state.option.indent; i += 1) {
  97. state.tab += " ";
  98. }
  99. }
  100. Lexer.prototype = {
  101. _lines: [],
  102. getLines: function () {
  103. this._lines = state.lines;
  104. return this._lines;
  105. },
  106. setLines: function (val) {
  107. this._lines = val;
  108. state.lines = this._lines;
  109. },
  110. /*
  111. * Return the next i character without actually moving the
  112. * char pointer.
  113. */
  114. peek: function (i) {
  115. return this.input.charAt(i || 0);
  116. },
  117. /*
  118. * Move the char pointer forward i times.
  119. */
  120. skip: function (i) {
  121. i = i || 1;
  122. this.char += i;
  123. this.input = this.input.slice(i);
  124. },
  125. /*
  126. * Subscribe to a token event. The API for this method is similar
  127. * Underscore.js i.e. you can subscribe to multiple events with
  128. * one call:
  129. *
  130. * lex.on("Identifier Number", function (data) {
  131. * // ...
  132. * });
  133. */
  134. on: function (names, listener) {
  135. names.split(" ").forEach(function (name) {
  136. this.emitter.on(name, listener);
  137. }.bind(this));
  138. },
  139. /*
  140. * Trigger a token event. All arguments will be passed to each
  141. * listener.
  142. */
  143. trigger: function () {
  144. this.emitter.emit.apply(this.emitter, Array.prototype.slice.call(arguments));
  145. },
  146. /*
  147. * Postpone a token event. the checking condition is set as
  148. * last parameter, and the trigger function is called in a
  149. * stored callback. To be later called using the check() function
  150. * by the parser. This avoids parser's peek() to give the lexer
  151. * a false context.
  152. */
  153. triggerAsync: function (type, args, checks, fn) {
  154. checks.push(function () {
  155. if (fn()) {
  156. this.trigger(type, args);
  157. }
  158. }.bind(this));
  159. },
  160. /*
  161. * Extract a punctuator out of the next sequence of characters
  162. * or return 'null' if its not possible.
  163. *
  164. * This method's implementation was heavily influenced by the
  165. * scanPunctuator function in the Esprima parser's source code.
  166. */
  167. scanPunctuator: function () {
  168. var ch1 = this.peek();
  169. var ch2, ch3, ch4;
  170. switch (ch1) {
  171. // Most common single-character punctuators
  172. case ".":
  173. if ((/^[0-9]$/).test(this.peek(1))) {
  174. return null;
  175. }
  176. if (this.peek(1) === "." && this.peek(2) === ".") {
  177. return {
  178. type: Token.Punctuator,
  179. value: "..."
  180. };
  181. }
  182. /* falls through */
  183. case "(":
  184. case ")":
  185. case ";":
  186. case ",":
  187. case "{":
  188. case "}":
  189. case "[":
  190. case "]":
  191. case ":":
  192. case "~":
  193. case "?":
  194. return {
  195. type: Token.Punctuator,
  196. value: ch1
  197. };
  198. // A pound sign (for Node shebangs)
  199. case "#":
  200. return {
  201. type: Token.Punctuator,
  202. value: ch1
  203. };
  204. // We're at the end of input
  205. case "":
  206. return null;
  207. }
  208. // Peek more characters
  209. ch2 = this.peek(1);
  210. ch3 = this.peek(2);
  211. ch4 = this.peek(3);
  212. // 4-character punctuator: >>>=
  213. if (ch1 === ">" && ch2 === ">" && ch3 === ">" && ch4 === "=") {
  214. return {
  215. type: Token.Punctuator,
  216. value: ">>>="
  217. };
  218. }
  219. // 3-character punctuators: === !== >>> <<= >>=
  220. if (ch1 === "=" && ch2 === "=" && ch3 === "=") {
  221. return {
  222. type: Token.Punctuator,
  223. value: "==="
  224. };
  225. }
  226. if (ch1 === "!" && ch2 === "=" && ch3 === "=") {
  227. return {
  228. type: Token.Punctuator,
  229. value: "!=="
  230. };
  231. }
  232. if (ch1 === ">" && ch2 === ">" && ch3 === ">") {
  233. return {
  234. type: Token.Punctuator,
  235. value: ">>>"
  236. };
  237. }
  238. if (ch1 === "<" && ch2 === "<" && ch3 === "=") {
  239. return {
  240. type: Token.Punctuator,
  241. value: "<<="
  242. };
  243. }
  244. if (ch1 === ">" && ch2 === ">" && ch3 === "=") {
  245. return {
  246. type: Token.Punctuator,
  247. value: ">>="
  248. };
  249. }
  250. // Fat arrow punctuator
  251. if (ch1 === "=" && ch2 === ">") {
  252. return {
  253. type: Token.Punctuator,
  254. value: ch1 + ch2
  255. };
  256. }
  257. // 2-character punctuators: <= >= == != ++ -- << >> && ||
  258. // += -= *= %= &= |= ^= (but not /=, see below)
  259. if (ch1 === ch2 && ("+-<>&|".indexOf(ch1) >= 0)) {
  260. return {
  261. type: Token.Punctuator,
  262. value: ch1 + ch2
  263. };
  264. }
  265. if ("<>=!+-*%&|^".indexOf(ch1) >= 0) {
  266. if (ch2 === "=") {
  267. return {
  268. type: Token.Punctuator,
  269. value: ch1 + ch2
  270. };
  271. }
  272. return {
  273. type: Token.Punctuator,
  274. value: ch1
  275. };
  276. }
  277. // Special case: /=. We need to make sure that this is an
  278. // operator and not a regular expression.
  279. if (ch1 === "/") {
  280. if (ch2 === "=" && /\/=(?!(\S*\/[gim]?))/.test(this.input)) {
  281. // /= is not a part of a regular expression, return it as a
  282. // punctuator.
  283. return {
  284. type: Token.Punctuator,
  285. value: "/="
  286. };
  287. }
  288. return {
  289. type: Token.Punctuator,
  290. value: "/"
  291. };
  292. }
  293. return null;
  294. },
  295. /*
  296. * Extract a comment out of the next sequence of characters and/or
  297. * lines or return 'null' if its not possible. Since comments can
  298. * span across multiple lines this method has to move the char
  299. * pointer.
  300. *
  301. * In addition to normal JavaScript comments (// and /*) this method
  302. * also recognizes JSHint- and JSLint-specific comments such as
  303. * /*jshint, /*jslint, /*globals and so on.
  304. */
  305. scanComments: function () {
  306. var ch1 = this.peek();
  307. var ch2 = this.peek(1);
  308. var rest = this.input.substr(2);
  309. var startLine = this.line;
  310. var startChar = this.char;
  311. // Create a comment token object and make sure it
  312. // has all the data JSHint needs to work with special
  313. // comments.
  314. function commentToken(label, body, opt) {
  315. var special = ["jshint", "jslint", "members", "member", "globals", "global", "exported"];
  316. var isSpecial = false;
  317. var value = label + body;
  318. var commentType = "plain";
  319. opt = opt || {};
  320. if (opt.isMultiline) {
  321. value += "*/";
  322. }
  323. special.forEach(function (str) {
  324. if (isSpecial) {
  325. return;
  326. }
  327. // Don't recognize any special comments other than jshint for single-line
  328. // comments. This introduced many problems with legit comments.
  329. if (label === "//" && str !== "jshint") {
  330. return;
  331. }
  332. if (body.charAt(str.length) === " " && body.substr(0, str.length) === str) {
  333. isSpecial = true;
  334. label = label + str;
  335. body = body.substr(str.length);
  336. }
  337. if (!isSpecial && body.charAt(0) === " " && body.charAt(str.length + 1) === " " &&
  338. body.substr(1, str.length) === str) {
  339. isSpecial = true;
  340. label = label + " " + str;
  341. body = body.substr(str.length + 1);
  342. }
  343. if (!isSpecial) {
  344. return;
  345. }
  346. switch (str) {
  347. case "member":
  348. commentType = "members";
  349. break;
  350. case "global":
  351. commentType = "globals";
  352. break;
  353. default:
  354. commentType = str;
  355. }
  356. });
  357. return {
  358. type: Token.Comment,
  359. commentType: commentType,
  360. value: value,
  361. body: body,
  362. isSpecial: isSpecial,
  363. isMultiline: opt.isMultiline || false,
  364. isMalformed: opt.isMalformed || false
  365. };
  366. }
  367. // End of unbegun comment. Raise an error and skip that input.
  368. if (ch1 === "*" && ch2 === "/") {
  369. this.trigger("error", {
  370. code: "E018",
  371. line: startLine,
  372. character: startChar
  373. });
  374. this.skip(2);
  375. return null;
  376. }
  377. // Comments must start either with // or /*
  378. if (ch1 !== "/" || (ch2 !== "*" && ch2 !== "/")) {
  379. return null;
  380. }
  381. // One-line comment
  382. if (ch2 === "/") {
  383. this.skip(this.input.length); // Skip to the EOL.
  384. return commentToken("//", rest);
  385. }
  386. var body = "";
  387. /* Multi-line comment */
  388. if (ch2 === "*") {
  389. this.inComment = true;
  390. this.skip(2);
  391. while (this.peek() !== "*" || this.peek(1) !== "/") {
  392. if (this.peek() === "") { // End of Line
  393. body += "\n";
  394. // If we hit EOF and our comment is still unclosed,
  395. // trigger an error and end the comment implicitly.
  396. if (!this.nextLine()) {
  397. this.trigger("error", {
  398. code: "E017",
  399. line: startLine,
  400. character: startChar
  401. });
  402. this.inComment = false;
  403. return commentToken("/*", body, {
  404. isMultiline: true,
  405. isMalformed: true
  406. });
  407. }
  408. } else {
  409. body += this.peek();
  410. this.skip();
  411. }
  412. }
  413. this.skip(2);
  414. this.inComment = false;
  415. return commentToken("/*", body, { isMultiline: true });
  416. }
  417. },
  418. /*
  419. * Extract a keyword out of the next sequence of characters or
  420. * return 'null' if its not possible.
  421. */
  422. scanKeyword: function () {
  423. var result = /^[a-zA-Z_$][a-zA-Z0-9_$]*/.exec(this.input);
  424. var keywords = [
  425. "if", "in", "do", "var", "for", "new",
  426. "try", "let", "this", "else", "case",
  427. "void", "with", "enum", "while", "break",
  428. "catch", "throw", "const", "yield", "class",
  429. "super", "return", "typeof", "delete",
  430. "switch", "export", "import", "default",
  431. "finally", "extends", "function", "continue",
  432. "debugger", "instanceof"
  433. ];
  434. if (result && keywords.indexOf(result[0]) >= 0) {
  435. return {
  436. type: Token.Keyword,
  437. value: result[0]
  438. };
  439. }
  440. return null;
  441. },
  442. /*
  443. * Extract a JavaScript identifier out of the next sequence of
  444. * characters or return 'null' if its not possible. In addition,
  445. * to Identifier this method can also produce BooleanLiteral
  446. * (true/false) and NullLiteral (null).
  447. */
  448. scanIdentifier: function () {
  449. var id = "";
  450. var index = 0;
  451. var type, char;
  452. function isNonAsciiIdentifierStart(code) {
  453. return nonAsciiIdentifierStartTable.indexOf(code) > -1;
  454. }
  455. function isNonAsciiIdentifierPart(code) {
  456. return isNonAsciiIdentifierStart(code) || nonAsciiIdentifierPartTable.indexOf(code) > -1;
  457. }
  458. function isHexDigit(str) {
  459. return (/^[0-9a-fA-F]$/).test(str);
  460. }
  461. var readUnicodeEscapeSequence = function () {
  462. /*jshint validthis:true */
  463. index += 1;
  464. if (this.peek(index) !== "u") {
  465. return null;
  466. }
  467. var ch1 = this.peek(index + 1);
  468. var ch2 = this.peek(index + 2);
  469. var ch3 = this.peek(index + 3);
  470. var ch4 = this.peek(index + 4);
  471. var code;
  472. if (isHexDigit(ch1) && isHexDigit(ch2) && isHexDigit(ch3) && isHexDigit(ch4)) {
  473. code = parseInt(ch1 + ch2 + ch3 + ch4, 16);
  474. if (asciiIdentifierPartTable[code] || isNonAsciiIdentifierPart(code)) {
  475. index += 5;
  476. return "\\u" + ch1 + ch2 + ch3 + ch4;
  477. }
  478. return null;
  479. }
  480. return null;
  481. }.bind(this);
  482. var getIdentifierStart = function () {
  483. /*jshint validthis:true */
  484. var chr = this.peek(index);
  485. var code = chr.charCodeAt(0);
  486. if (code === 92) {
  487. return readUnicodeEscapeSequence();
  488. }
  489. if (code < 128) {
  490. if (asciiIdentifierStartTable[code]) {
  491. index += 1;
  492. return chr;
  493. }
  494. return null;
  495. }
  496. if (isNonAsciiIdentifierStart(code)) {
  497. index += 1;
  498. return chr;
  499. }
  500. return null;
  501. }.bind(this);
  502. var getIdentifierPart = function () {
  503. /*jshint validthis:true */
  504. var chr = this.peek(index);
  505. var code = chr.charCodeAt(0);
  506. if (code === 92) {
  507. return readUnicodeEscapeSequence();
  508. }
  509. if (code < 128) {
  510. if (asciiIdentifierPartTable[code]) {
  511. index += 1;
  512. return chr;
  513. }
  514. return null;
  515. }
  516. if (isNonAsciiIdentifierPart(code)) {
  517. index += 1;
  518. return chr;
  519. }
  520. return null;
  521. }.bind(this);
  522. char = getIdentifierStart();
  523. if (char === null) {
  524. return null;
  525. }
  526. id = char;
  527. for (;;) {
  528. char = getIdentifierPart();
  529. if (char === null) {
  530. break;
  531. }
  532. id += char;
  533. }
  534. switch (id) {
  535. case "true":
  536. case "false":
  537. type = Token.BooleanLiteral;
  538. break;
  539. case "null":
  540. type = Token.NullLiteral;
  541. break;
  542. default:
  543. type = Token.Identifier;
  544. }
  545. return {
  546. type: type,
  547. value: id
  548. };
  549. },
  550. /*
  551. * Extract a numeric literal out of the next sequence of
  552. * characters or return 'null' if its not possible. This method
  553. * supports all numeric literals described in section 7.8.3
  554. * of the EcmaScript 5 specification.
  555. *
  556. * This method's implementation was heavily influenced by the
  557. * scanNumericLiteral function in the Esprima parser's source code.
  558. */
  559. scanNumericLiteral: function () {
  560. var index = 0;
  561. var value = "";
  562. var length = this.input.length;
  563. var char = this.peek(index);
  564. var bad;
  565. function isDecimalDigit(str) {
  566. return (/^[0-9]$/).test(str);
  567. }
  568. function isOctalDigit(str) {
  569. return (/^[0-7]$/).test(str);
  570. }
  571. function isHexDigit(str) {
  572. return (/^[0-9a-fA-F]$/).test(str);
  573. }
  574. function isIdentifierStart(ch) {
  575. return (ch === "$") || (ch === "_") || (ch === "\\") ||
  576. (ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z");
  577. }
  578. // Numbers must start either with a decimal digit or a point.
  579. if (char !== "." && !isDecimalDigit(char)) {
  580. return null;
  581. }
  582. if (char !== ".") {
  583. value = this.peek(index);
  584. index += 1;
  585. char = this.peek(index);
  586. if (value === "0") {
  587. // Base-16 numbers.
  588. if (char === "x" || char === "X") {
  589. index += 1;
  590. value += char;
  591. while (index < length) {
  592. char = this.peek(index);
  593. if (!isHexDigit(char)) {
  594. break;
  595. }
  596. value += char;
  597. index += 1;
  598. }
  599. if (value.length <= 2) { // 0x
  600. return {
  601. type: Token.NumericLiteral,
  602. value: value,
  603. isMalformed: true
  604. };
  605. }
  606. if (index < length) {
  607. char = this.peek(index);
  608. if (isIdentifierStart(char)) {
  609. return null;
  610. }
  611. }
  612. return {
  613. type: Token.NumericLiteral,
  614. value: value,
  615. base: 16,
  616. isMalformed: false
  617. };
  618. }
  619. // Base-8 numbers.
  620. if (isOctalDigit(char)) {
  621. index += 1;
  622. value += char;
  623. bad = false;
  624. while (index < length) {
  625. char = this.peek(index);
  626. // Numbers like '019' (note the 9) are not valid octals
  627. // but we still parse them and mark as malformed.
  628. if (isDecimalDigit(char)) {
  629. bad = true;
  630. } else if (!isOctalDigit(char)) {
  631. break;
  632. }
  633. value += char;
  634. index += 1;
  635. }
  636. if (index < length) {
  637. char = this.peek(index);
  638. if (isIdentifierStart(char)) {
  639. return null;
  640. }
  641. }
  642. return {
  643. type: Token.NumericLiteral,
  644. value: value,
  645. base: 8,
  646. isMalformed: false
  647. };
  648. }
  649. // Decimal numbers that start with '0' such as '09' are illegal
  650. // but we still parse them and return as malformed.
  651. if (isDecimalDigit(char)) {
  652. index += 1;
  653. value += char;
  654. }
  655. }
  656. while (index < length) {
  657. char = this.peek(index);
  658. if (!isDecimalDigit(char)) {
  659. break;
  660. }
  661. value += char;
  662. index += 1;
  663. }
  664. }
  665. // Decimal digits.
  666. if (char === ".") {
  667. value += char;
  668. index += 1;
  669. while (index < length) {
  670. char = this.peek(index);
  671. if (!isDecimalDigit(char)) {
  672. break;
  673. }
  674. value += char;
  675. index += 1;
  676. }
  677. }
  678. // Exponent part.
  679. if (char === "e" || char === "E") {
  680. value += char;
  681. index += 1;
  682. char = this.peek(index);
  683. if (char === "+" || char === "-") {
  684. value += this.peek(index);
  685. index += 1;
  686. }
  687. char = this.peek(index);
  688. if (isDecimalDigit(char)) {
  689. value += char;
  690. index += 1;
  691. while (index < length) {
  692. char = this.peek(index);
  693. if (!isDecimalDigit(char)) {
  694. break;
  695. }
  696. value += char;
  697. index += 1;
  698. }
  699. } else {
  700. return null;
  701. }
  702. }
  703. if (index < length) {
  704. char = this.peek(index);
  705. if (isIdentifierStart(char)) {
  706. return null;
  707. }
  708. }
  709. return {
  710. type: Token.NumericLiteral,
  711. value: value,
  712. base: 10,
  713. isMalformed: !isFinite(value)
  714. };
  715. },
  716. /*
  717. * Extract a template literal out of the next sequence of characters
  718. * and/or lines or return 'null' if its not possible. Since template
  719. * literals can span across multiple lines, this method has to move
  720. * the char pointer.
  721. */
  722. scanTemplateLiteral: function () {
  723. // String must start with a backtick.
  724. if (!state.option.esnext || this.peek() !== "`") {
  725. return null;
  726. }
  727. var startLine = this.line;
  728. var startChar = this.char;
  729. var jump = 1;
  730. var value = "";
  731. // For now, do not perform any linting of the content of the template
  732. // string. Just skip until the next backtick is found.
  733. this.skip();
  734. while (this.peek() !== "`") {
  735. while (this.peek() === "") {
  736. // End of line --- For template literals in ES6, no backslash is
  737. // required to precede newlines.
  738. if (!this.nextLine()) {
  739. this.trigger("error", {
  740. code: "E052",
  741. line: startLine,
  742. character: startChar
  743. });
  744. return {
  745. type: Token.TemplateLiteral,
  746. value: value,
  747. isUnclosed: true
  748. };
  749. }
  750. value += "\n";
  751. }
  752. // TODO: do more interesting linting here, similar to string literal
  753. // linting.
  754. var char = this.peek();
  755. this.skip(jump);
  756. value += char;
  757. }
  758. this.skip();
  759. return {
  760. type: Token.TemplateLiteral,
  761. value: value,
  762. isUnclosed: false
  763. };
  764. },
  765. /*
  766. * Extract a string out of the next sequence of characters and/or
  767. * lines or return 'null' if its not possible. Since strings can
  768. * span across multiple lines this method has to move the char
  769. * pointer.
  770. *
  771. * This method recognizes pseudo-multiline JavaScript strings:
  772. *
  773. * var str = "hello\
  774. * world";
  775. */
  776. scanStringLiteral: function (checks) {
  777. /*jshint loopfunc:true */
  778. var quote = this.peek();
  779. // String must start with a quote.
  780. if (quote !== "\"" && quote !== "'") {
  781. return null;
  782. }
  783. // In JSON strings must always use double quotes.
  784. this.triggerAsync("warning", {
  785. code: "W108",
  786. line: this.line,
  787. character: this.char // +1?
  788. }, checks, function () { return state.jsonMode && quote !== "\""; });
  789. var value = "";
  790. var startLine = this.line;
  791. var startChar = this.char;
  792. var allowNewLine = false;
  793. this.skip();
  794. while (this.peek() !== quote) {
  795. while (this.peek() === "") { // End Of Line
  796. // If an EOL is not preceded by a backslash, show a warning
  797. // and proceed like it was a legit multi-line string where
  798. // author simply forgot to escape the newline symbol.
  799. //
  800. // Another approach is to implicitly close a string on EOL
  801. // but it generates too many false positives.
  802. if (!allowNewLine) {
  803. this.trigger("warning", {
  804. code: "W112",
  805. line: this.line,
  806. character: this.char
  807. });
  808. } else {
  809. allowNewLine = false;
  810. // Otherwise show a warning if multistr option was not set.
  811. // For JSON, show warning no matter what.
  812. this.triggerAsync("warning", {
  813. code: "W043",
  814. line: this.line,
  815. character: this.char
  816. }, checks, function () { return !state.option.multistr; });
  817. this.triggerAsync("warning", {
  818. code: "W042",
  819. line: this.line,
  820. character: this.char
  821. }, checks, function () { return state.jsonMode && state.option.multistr; });
  822. }
  823. // If we get an EOF inside of an unclosed string, show an
  824. // error and implicitly close it at the EOF point.
  825. if (!this.nextLine()) {
  826. this.trigger("error", {
  827. code: "E029",
  828. line: startLine,
  829. character: startChar
  830. });
  831. return {
  832. type: Token.StringLiteral,
  833. value: value,
  834. isUnclosed: true,
  835. quote: quote
  836. };
  837. }
  838. }
  839. allowNewLine = false;
  840. var char = this.peek();
  841. var jump = 1; // A length of a jump, after we're done
  842. // parsing this character.
  843. if (char < " ") {
  844. // Warn about a control character in a string.
  845. this.trigger("warning", {
  846. code: "W113",
  847. line: this.line,
  848. character: this.char,
  849. data: [ "<non-printable>" ]
  850. });
  851. }
  852. // Special treatment for some escaped characters.
  853. if (char === "\\") {
  854. this.skip();
  855. char = this.peek();
  856. switch (char) {
  857. case "'":
  858. this.triggerAsync("warning", {
  859. code: "W114",
  860. line: this.line,
  861. character: this.char,
  862. data: [ "\\'" ]
  863. }, checks, function () {return state.jsonMode; });
  864. break;
  865. case "b":
  866. char = "\\b";
  867. break;
  868. case "f":
  869. char = "\\f";
  870. break;
  871. case "n":
  872. char = "\\n";
  873. break;
  874. case "r":
  875. char = "\\r";
  876. break;
  877. case "t":
  878. char = "\\t";
  879. break;
  880. case "0":
  881. char = "\\0";
  882. // Octal literals fail in strict mode.
  883. // Check if the number is between 00 and 07.
  884. var n = parseInt(this.peek(1), 10);
  885. this.triggerAsync("warning", {
  886. code: "W115",
  887. line: this.line,
  888. character: this.char
  889. }, checks,
  890. function () { return n >= 0 && n <= 7 && state.directive["use strict"]; });
  891. break;
  892. case "u":
  893. char = String.fromCharCode(parseInt(this.input.substr(1, 4), 16));
  894. jump = 5;
  895. break;
  896. case "v":
  897. this.triggerAsync("warning", {
  898. code: "W114",
  899. line: this.line,
  900. character: this.char,
  901. data: [ "\\v" ]
  902. }, checks, function () { return state.jsonMode; });
  903. char = "\v";
  904. break;
  905. case "x":
  906. var x = parseInt(this.input.substr(1, 2), 16);
  907. this.triggerAsync("warning", {
  908. code: "W114",
  909. line: this.line,
  910. character: this.char,
  911. data: [ "\\x-" ]
  912. }, checks, function () { return state.jsonMode; });
  913. char = String.fromCharCode(x);
  914. jump = 3;
  915. break;
  916. case "\\":
  917. char = "\\\\";
  918. break;
  919. case "\"":
  920. char = "\\\"";
  921. break;
  922. case "/":
  923. break;
  924. case "":
  925. allowNewLine = true;
  926. char = "";
  927. break;
  928. case "!":
  929. if (value.slice(value.length - 2) === "<") {
  930. break;
  931. }
  932. /*falls through */
  933. default:
  934. // Weird escaping.
  935. this.trigger("warning", {
  936. code: "W044",
  937. line: this.line,
  938. character: this.char
  939. });
  940. }
  941. }
  942. value += char;
  943. this.skip(jump);
  944. }
  945. this.skip();
  946. return {
  947. type: Token.StringLiteral,
  948. value: value,
  949. isUnclosed: false,
  950. quote: quote
  951. };
  952. },
  953. /*
  954. * Extract a regular expression out of the next sequence of
  955. * characters and/or lines or return 'null' if its not possible.
  956. *
  957. * This method is platform dependent: it accepts almost any
  958. * regular expression values but then tries to compile and run
  959. * them using system's RegExp object. This means that there are
  960. * rare edge cases where one JavaScript engine complains about
  961. * your regular expression while others don't.
  962. */
  963. scanRegExp: function () {
  964. var index = 0;
  965. var length = this.input.length;
  966. var char = this.peek();
  967. var value = char;
  968. var body = "";
  969. var flags = [];
  970. var malformed = false;
  971. var isCharSet = false;
  972. var terminated;
  973. var scanUnexpectedChars = function () {
  974. // Unexpected control character
  975. if (char < " ") {
  976. malformed = true;
  977. this.trigger("warning", {
  978. code: "W048",
  979. line: this.line,
  980. character: this.char
  981. });
  982. }
  983. // Unexpected escaped character
  984. if (char === "<") {
  985. malformed = true;
  986. this.trigger("warning", {
  987. code: "W049",
  988. line: this.line,
  989. character: this.char,
  990. data: [ char ]
  991. });
  992. }
  993. }.bind(this);
  994. // Regular expressions must start with '/'
  995. if (!this.prereg || char !== "/") {
  996. return null;
  997. }
  998. index += 1;
  999. terminated = false;
  1000. // Try to get everything in between slashes. A couple of
  1001. // cases aside (see scanUnexpectedChars) we don't really
  1002. // care whether the resulting expression is valid or not.
  1003. // We will check that later using the RegExp object.
  1004. while (index < length) {
  1005. char = this.peek(index);
  1006. value += char;
  1007. body += char;
  1008. if (isCharSet) {
  1009. if (char === "]") {
  1010. if (this.peek(index - 1) !== "\\" || this.peek(index - 2) === "\\") {
  1011. isCharSet = false;
  1012. }
  1013. }
  1014. if (char === "\\") {
  1015. index += 1;
  1016. char = this.peek(index);
  1017. body += char;
  1018. value += char;
  1019. scanUnexpectedChars();
  1020. }
  1021. index += 1;
  1022. continue;
  1023. }
  1024. if (char === "\\") {
  1025. index += 1;
  1026. char = this.peek(index);
  1027. body += char;
  1028. value += char;
  1029. scanUnexpectedChars();
  1030. if (char === "/") {
  1031. index += 1;
  1032. continue;
  1033. }
  1034. if (char === "[") {
  1035. index += 1;
  1036. continue;
  1037. }
  1038. }
  1039. if (char === "[") {
  1040. isCharSet = true;
  1041. index += 1;
  1042. continue;
  1043. }
  1044. if (char === "/") {
  1045. body = body.substr(0, body.length - 1);
  1046. terminated = true;
  1047. index += 1;
  1048. break;
  1049. }
  1050. index += 1;
  1051. }
  1052. // A regular expression that was never closed is an
  1053. // error from which we cannot recover.
  1054. if (!terminated) {
  1055. this.trigger("error", {
  1056. code: "E015",
  1057. line: this.line,
  1058. character: this.from
  1059. });
  1060. return void this.trigger("fatal", {
  1061. line: this.line,
  1062. from: this.from
  1063. });
  1064. }
  1065. // Parse flags (if any).
  1066. while (index < length) {
  1067. char = this.peek(index);
  1068. if (!/[gim]/.test(char)) {
  1069. break;
  1070. }
  1071. flags.push(char);
  1072. value += char;
  1073. index += 1;
  1074. }
  1075. // Check regular expression for correctness.
  1076. try {
  1077. new RegExp(body, flags.join(""));
  1078. } catch (err) {
  1079. malformed = true;
  1080. this.trigger("error", {
  1081. code: "E016",
  1082. line: this.line,
  1083. character: this.char,
  1084. data: [ err.message ] // Platform dependent!
  1085. });
  1086. }
  1087. return {
  1088. type: Token.RegExp,
  1089. value: value,
  1090. flags: flags,
  1091. isMalformed: malformed
  1092. };
  1093. },
  1094. /*
  1095. * Scan for any occurence of non-breaking spaces. Non-breaking spaces
  1096. * can be mistakenly typed on OS X with option-space. Non UTF-8 web
  1097. * pages with non-breaking pages produce syntax errors.
  1098. */
  1099. scanNonBreakingSpaces: function () {
  1100. return state.option.nonbsp ?
  1101. this.input.search(/(\u00A0)/) : -1;
  1102. },
  1103. /*
  1104. * Scan for characters that get silently deleted by one or more browsers.
  1105. */
  1106. scanUnsafeChars: function () {
  1107. return this.input.search(reg.unsafeChars);
  1108. },
  1109. /*
  1110. * Produce the next raw token or return 'null' if no tokens can be matched.
  1111. * This method skips over all space characters.
  1112. */
  1113. next: function (checks) {
  1114. this.from = this.char;
  1115. // Move to the next non-space character.
  1116. var start;
  1117. if (/\s/.test(this.peek())) {
  1118. start = this.char;
  1119. while (/\s/.test(this.peek())) {
  1120. this.from += 1;
  1121. this.skip();
  1122. }
  1123. }
  1124. // Methods that work with multi-line structures and move the
  1125. // character pointer.
  1126. var match = this.scanComments() ||
  1127. this.scanStringLiteral(checks) ||
  1128. this.scanTemplateLiteral();
  1129. if (match) {
  1130. return match;
  1131. }
  1132. // Methods that don't move the character pointer.
  1133. match =
  1134. this.scanRegExp() ||
  1135. this.scanPunctuator() ||
  1136. this.scanKeyword() ||
  1137. this.scanIdentifier() ||
  1138. this.scanNumericLiteral();
  1139. if (match) {
  1140. this.skip(match.value.length);
  1141. return match;
  1142. }
  1143. // No token could be matched, give up.
  1144. return null;
  1145. },
  1146. /*
  1147. * Switch to the next line and reset all char pointers. Once
  1148. * switched, this method also checks for other minor warnings.
  1149. */
  1150. nextLine: function () {
  1151. var char;
  1152. if (this.line >= this.getLines().length) {
  1153. return false;
  1154. }
  1155. this.input = this.getLines()[this.line];
  1156. this.line += 1;
  1157. this.char = 1;
  1158. this.from = 1;
  1159. var inputTrimmed = this.input.trim();
  1160. var startsWith = function () {
  1161. return _.some(arguments, function (prefix) {
  1162. return inputTrimmed.indexOf(prefix) === 0;
  1163. });
  1164. };
  1165. var endsWith = function () {
  1166. return _.some(arguments, function (suffix) {
  1167. return inputTrimmed.indexOf(suffix, inputTrimmed.length - suffix.length) !== -1;
  1168. });
  1169. };
  1170. // If we are ignoring linter errors, replace the input with empty string
  1171. // if it doesn't already at least start or end a multi-line comment
  1172. if (state.ignoreLinterErrors === true) {
  1173. if (!startsWith("/*", "//") && !endsWith("*/")) {
  1174. this.input = "";
  1175. }
  1176. }
  1177. char = this.scanNonBreakingSpaces();
  1178. if (char >= 0) {
  1179. this.trigger("warning", { code: "W125", line: this.line, character: char + 1 });
  1180. }
  1181. this.input = this.input.replace(/\t/g, state.tab);
  1182. char = this.scanUnsafeChars();
  1183. if (char >= 0) {
  1184. this.trigger("warning", { code: "W100", line: this.line, character: char });
  1185. }
  1186. // If there is a limit on line length, warn when lines get too
  1187. // long.
  1188. if (state.option.maxlen && state.option.maxlen < this.input.length) {
  1189. var inComment = this.inComment ||
  1190. startsWith.call(inputTrimmed, "//") ||
  1191. startsWith.call(inputTrimmed, "/*");
  1192. var shouldTriggerError = !inComment || !reg.maxlenException.test(inputTrimmed);
  1193. if (shouldTriggerError) {
  1194. this.trigger("warning", { code: "W101", line: this.line, character: this.input.length });
  1195. }
  1196. }
  1197. return true;
  1198. },
  1199. /*
  1200. * This is simply a synonym for nextLine() method with a friendlier
  1201. * public name.
  1202. */
  1203. start: function () {
  1204. this.nextLine();
  1205. },
  1206. /*
  1207. * Produce the next token. This function is called by advance() to get
  1208. * the next token. It retuns a token in a JSLint-compatible format.
  1209. */
  1210. token: function () {
  1211. /*jshint loopfunc:true */
  1212. var checks = asyncTrigger();
  1213. var token;
  1214. function isReserved(token, isProperty) {
  1215. if (!token.reserved) {
  1216. return false;
  1217. }
  1218. var meta = token.meta;
  1219. if (meta && meta.isFutureReservedWord && state.option.inES5()) {
  1220. // ES3 FutureReservedWord in an ES5 environment.
  1221. if (!meta.es5) {
  1222. return false;
  1223. }
  1224. // Some ES5 FutureReservedWord identifiers are active only
  1225. // within a strict mode environment.
  1226. if (meta.strictOnly) {
  1227. if (!state.option.strict && !state.directive["use strict"]) {
  1228. return false;
  1229. }
  1230. }
  1231. if (isProperty) {
  1232. return false;
  1233. }
  1234. }
  1235. return true;
  1236. }
  1237. // Produce a token object.
  1238. var create = function (type, value, isProperty) {
  1239. /*jshint validthis:true */
  1240. var obj;
  1241. if (type !== "(endline)" && type !== "(end)") {
  1242. this.prereg = false;
  1243. }
  1244. if (type === "(punctuator)") {
  1245. switch (value) {
  1246. case ".":
  1247. case ")":
  1248. case "~":
  1249. case "#":
  1250. case "]":
  1251. this.prereg = false;
  1252. break;
  1253. default:
  1254. this.prereg = true;
  1255. }
  1256. obj = Object.create(state.syntax[value] || state.syntax["(error)"]);
  1257. }
  1258. if (type === "(identifier)") {
  1259. if (value === "return" || value === "case" || value === "typeof") {
  1260. this.prereg = true;
  1261. }
  1262. if (_.has(state.syntax, value)) {
  1263. obj = Object.create(state.syntax[value] || state.syntax["(error)"]);
  1264. // If this can't be a reserved keyword, reset the object.
  1265. if (!isReserved(obj, isProperty && type === "(identifier)")) {
  1266. obj = null;
  1267. }
  1268. }
  1269. }
  1270. if (!obj) {
  1271. obj = Object.create(state.syntax[type]);
  1272. }
  1273. obj.identifier = (type === "(identifier)");
  1274. obj.type = obj.type || type;
  1275. obj.value = value;
  1276. obj.line = this.line;
  1277. obj.character = this.char;
  1278. obj.from = this.from;
  1279. if (isProperty && obj.identifier) {
  1280. obj.isProperty = isProperty;
  1281. }
  1282. obj.check = checks.check;
  1283. return obj;
  1284. }.bind(this);
  1285. for (;;) {
  1286. if (!this.input.length) {
  1287. return create(this.nextLine() ? "(endline)" : "(end)", "");
  1288. }
  1289. token = this.next(checks);
  1290. if (!token) {
  1291. if (this.input.length) {
  1292. // Unexpected character.
  1293. this.trigger("error", {
  1294. code: "E024",
  1295. line: this.line,
  1296. character: this.char,
  1297. data: [ this.peek() ]
  1298. });
  1299. this.input = "";
  1300. }
  1301. continue;
  1302. }
  1303. switch (token.type) {
  1304. case Token.StringLiteral:
  1305. this.triggerAsync("String", {
  1306. line: this.line,
  1307. char: this.char,
  1308. from: this.from,
  1309. value: token.value,
  1310. quote: token.quote
  1311. }, checks, function () { return true; });
  1312. return create("(string)", token.value);
  1313. case Token.TemplateLiteral:
  1314. this.trigger("Template", {
  1315. line: this.line,
  1316. char: this.char,
  1317. from: this.from,
  1318. value: token.value
  1319. });
  1320. return create("(template)", token.value);
  1321. case Token.Identifier:
  1322. this.trigger("Identifier", {
  1323. line: this.line,
  1324. char: this.char,
  1325. from: this.form,
  1326. name: token.value,
  1327. isProperty: state.tokens.curr.id === "."
  1328. });
  1329. /* falls through */
  1330. case Token.Keyword:
  1331. case Token.NullLiteral:
  1332. case Token.BooleanLiteral:
  1333. return create("(identifier)", token.value, state.tokens.curr.id === ".");
  1334. case Token.NumericLiteral:
  1335. if (token.isMalformed) {
  1336. this.trigger("warning", {
  1337. code: "W045",
  1338. line: this.line,
  1339. character: this.char,
  1340. data: [ token.value ]
  1341. });
  1342. }
  1343. this.triggerAsync("warning", {
  1344. code: "W114",
  1345. line: this.line,
  1346. character: this.char,
  1347. data: [ "0x-" ]
  1348. }, checks, function () { return token.base === 16 && state.jsonMode; });
  1349. this.triggerAsync("warning", {
  1350. code: "W115",
  1351. line: this.line,
  1352. character: this.char
  1353. }, checks, function () {
  1354. return state.directive["use strict"] && token.base === 8;
  1355. });
  1356. this.trigger("Number", {
  1357. line: this.line,
  1358. char: this.char,
  1359. from: this.from,
  1360. value: token.value,
  1361. base: token.base,
  1362. isMalformed: token.malformed
  1363. });
  1364. return create("(number)", token.value);
  1365. case Token.RegExp:
  1366. return create("(regexp)", token.value);
  1367. case Token.Comment:
  1368. state.tokens.curr.comment = true;
  1369. if (token.isSpecial) {
  1370. return {
  1371. id: '(comment)',
  1372. value: token.value,
  1373. body: token.body,
  1374. type: token.commentType,
  1375. isSpecial: token.isSpecial,
  1376. line: this.line,
  1377. character: this.char,
  1378. from: this.from
  1379. };
  1380. }
  1381. break;
  1382. case "":
  1383. break;
  1384. default:
  1385. return create("(punctuator)", token.value);
  1386. }
  1387. }
  1388. }
  1389. };
  1390. exports.Lexer = Lexer;