PageRenderTime 84ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 2ms

/deps/v8/src/jsregexp.cc

https://gitlab.com/GeekSir/node
C++ | 6113 lines | 4598 code | 684 blank | 831 comment | 1004 complexity | aef8ad3ebb01bc84c3cbdd8aea488a1e MD5 | raw file
Possible License(s): 0BSD, Apache-2.0, MPL-2.0-no-copyleft-exception, JSON, WTFPL, CC-BY-SA-3.0, Unlicense, ISC, BSD-3-Clause, MIT, AGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. // Copyright 2012 the V8 project authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "src/v8.h"
  5. #include "src/ast.h"
  6. #include "src/base/platform/platform.h"
  7. #include "src/compilation-cache.h"
  8. #include "src/compiler.h"
  9. #include "src/execution.h"
  10. #include "src/factory.h"
  11. #include "src/jsregexp-inl.h"
  12. #include "src/jsregexp.h"
  13. #include "src/ostreams.h"
  14. #include "src/parser.h"
  15. #include "src/regexp-macro-assembler.h"
  16. #include "src/regexp-macro-assembler-irregexp.h"
  17. #include "src/regexp-macro-assembler-tracer.h"
  18. #include "src/regexp-stack.h"
  19. #include "src/runtime.h"
  20. #include "src/string-search.h"
  21. #ifndef V8_INTERPRETED_REGEXP
  22. #if V8_TARGET_ARCH_IA32
  23. #include "src/ia32/regexp-macro-assembler-ia32.h" // NOLINT
  24. #elif V8_TARGET_ARCH_X64
  25. #include "src/x64/regexp-macro-assembler-x64.h" // NOLINT
  26. #elif V8_TARGET_ARCH_ARM64
  27. #include "src/arm64/regexp-macro-assembler-arm64.h" // NOLINT
  28. #elif V8_TARGET_ARCH_ARM
  29. #include "src/arm/regexp-macro-assembler-arm.h" // NOLINT
  30. #elif V8_TARGET_ARCH_MIPS
  31. #include "src/mips/regexp-macro-assembler-mips.h" // NOLINT
  32. #elif V8_TARGET_ARCH_MIPS64
  33. #include "src/mips64/regexp-macro-assembler-mips64.h" // NOLINT
  34. #elif V8_TARGET_ARCH_X87
  35. #include "src/x87/regexp-macro-assembler-x87.h" // NOLINT
  36. #else
  37. #error Unsupported target architecture.
  38. #endif
  39. #endif
  40. #include "src/interpreter-irregexp.h"
  41. namespace v8 {
  42. namespace internal {
  43. MaybeHandle<Object> RegExpImpl::CreateRegExpLiteral(
  44. Handle<JSFunction> constructor,
  45. Handle<String> pattern,
  46. Handle<String> flags) {
  47. // Call the construct code with 2 arguments.
  48. Handle<Object> argv[] = { pattern, flags };
  49. return Execution::New(constructor, ARRAY_SIZE(argv), argv);
  50. }
  51. static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
  52. int flags = JSRegExp::NONE;
  53. for (int i = 0; i < str->length(); i++) {
  54. switch (str->Get(i)) {
  55. case 'i':
  56. flags |= JSRegExp::IGNORE_CASE;
  57. break;
  58. case 'g':
  59. flags |= JSRegExp::GLOBAL;
  60. break;
  61. case 'm':
  62. flags |= JSRegExp::MULTILINE;
  63. break;
  64. }
  65. }
  66. return JSRegExp::Flags(flags);
  67. }
  68. MUST_USE_RESULT
  69. static inline MaybeHandle<Object> ThrowRegExpException(
  70. Handle<JSRegExp> re,
  71. Handle<String> pattern,
  72. Handle<String> error_text,
  73. const char* message) {
  74. Isolate* isolate = re->GetIsolate();
  75. Factory* factory = isolate->factory();
  76. Handle<FixedArray> elements = factory->NewFixedArray(2);
  77. elements->set(0, *pattern);
  78. elements->set(1, *error_text);
  79. Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
  80. Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
  81. return isolate->Throw<Object>(regexp_err);
  82. }
  83. ContainedInLattice AddRange(ContainedInLattice containment,
  84. const int* ranges,
  85. int ranges_length,
  86. Interval new_range) {
  87. DCHECK((ranges_length & 1) == 1);
  88. DCHECK(ranges[ranges_length - 1] == String::kMaxUtf16CodeUnit + 1);
  89. if (containment == kLatticeUnknown) return containment;
  90. bool inside = false;
  91. int last = 0;
  92. for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
  93. // Consider the range from last to ranges[i].
  94. // We haven't got to the new range yet.
  95. if (ranges[i] <= new_range.from()) continue;
  96. // New range is wholly inside last-ranges[i]. Note that new_range.to() is
  97. // inclusive, but the values in ranges are not.
  98. if (last <= new_range.from() && new_range.to() < ranges[i]) {
  99. return Combine(containment, inside ? kLatticeIn : kLatticeOut);
  100. }
  101. return kLatticeUnknown;
  102. }
  103. return containment;
  104. }
  105. // More makes code generation slower, less makes V8 benchmark score lower.
  106. const int kMaxLookaheadForBoyerMoore = 8;
  107. // In a 3-character pattern you can maximally step forwards 3 characters
  108. // at a time, which is not always enough to pay for the extra logic.
  109. const int kPatternTooShortForBoyerMoore = 2;
  110. // Identifies the sort of regexps where the regexp engine is faster
  111. // than the code used for atom matches.
  112. static bool HasFewDifferentCharacters(Handle<String> pattern) {
  113. int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
  114. if (length <= kPatternTooShortForBoyerMoore) return false;
  115. const int kMod = 128;
  116. bool character_found[kMod];
  117. int different = 0;
  118. memset(&character_found[0], 0, sizeof(character_found));
  119. for (int i = 0; i < length; i++) {
  120. int ch = (pattern->Get(i) & (kMod - 1));
  121. if (!character_found[ch]) {
  122. character_found[ch] = true;
  123. different++;
  124. // We declare a regexp low-alphabet if it has at least 3 times as many
  125. // characters as it has different characters.
  126. if (different * 3 > length) return false;
  127. }
  128. }
  129. return true;
  130. }
  131. // Generic RegExp methods. Dispatches to implementation specific methods.
  132. MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
  133. Handle<String> pattern,
  134. Handle<String> flag_str) {
  135. Isolate* isolate = re->GetIsolate();
  136. Zone zone(isolate);
  137. JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
  138. CompilationCache* compilation_cache = isolate->compilation_cache();
  139. MaybeHandle<FixedArray> maybe_cached =
  140. compilation_cache->LookupRegExp(pattern, flags);
  141. Handle<FixedArray> cached;
  142. bool in_cache = maybe_cached.ToHandle(&cached);
  143. LOG(isolate, RegExpCompileEvent(re, in_cache));
  144. Handle<Object> result;
  145. if (in_cache) {
  146. re->set_data(*cached);
  147. return re;
  148. }
  149. pattern = String::Flatten(pattern);
  150. PostponeInterruptsScope postpone(isolate);
  151. RegExpCompileData parse_result;
  152. FlatStringReader reader(isolate, pattern);
  153. if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
  154. &parse_result, &zone)) {
  155. // Throw an exception if we fail to parse the pattern.
  156. return ThrowRegExpException(re,
  157. pattern,
  158. parse_result.error,
  159. "malformed_regexp");
  160. }
  161. bool has_been_compiled = false;
  162. if (parse_result.simple &&
  163. !flags.is_ignore_case() &&
  164. !HasFewDifferentCharacters(pattern)) {
  165. // Parse-tree is a single atom that is equal to the pattern.
  166. AtomCompile(re, pattern, flags, pattern);
  167. has_been_compiled = true;
  168. } else if (parse_result.tree->IsAtom() &&
  169. !flags.is_ignore_case() &&
  170. parse_result.capture_count == 0) {
  171. RegExpAtom* atom = parse_result.tree->AsAtom();
  172. Vector<const uc16> atom_pattern = atom->data();
  173. Handle<String> atom_string;
  174. ASSIGN_RETURN_ON_EXCEPTION(
  175. isolate, atom_string,
  176. isolate->factory()->NewStringFromTwoByte(atom_pattern),
  177. Object);
  178. if (!HasFewDifferentCharacters(atom_string)) {
  179. AtomCompile(re, pattern, flags, atom_string);
  180. has_been_compiled = true;
  181. }
  182. }
  183. if (!has_been_compiled) {
  184. IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
  185. }
  186. DCHECK(re->data()->IsFixedArray());
  187. // Compilation succeeded so the data is set on the regexp
  188. // and we can store it in the cache.
  189. Handle<FixedArray> data(FixedArray::cast(re->data()));
  190. compilation_cache->PutRegExp(pattern, flags, data);
  191. return re;
  192. }
  193. MaybeHandle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
  194. Handle<String> subject,
  195. int index,
  196. Handle<JSArray> last_match_info) {
  197. switch (regexp->TypeTag()) {
  198. case JSRegExp::ATOM:
  199. return AtomExec(regexp, subject, index, last_match_info);
  200. case JSRegExp::IRREGEXP: {
  201. return IrregexpExec(regexp, subject, index, last_match_info);
  202. }
  203. default:
  204. UNREACHABLE();
  205. return MaybeHandle<Object>();
  206. }
  207. }
  208. // RegExp Atom implementation: Simple string search using indexOf.
  209. void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
  210. Handle<String> pattern,
  211. JSRegExp::Flags flags,
  212. Handle<String> match_pattern) {
  213. re->GetIsolate()->factory()->SetRegExpAtomData(re,
  214. JSRegExp::ATOM,
  215. pattern,
  216. flags,
  217. match_pattern);
  218. }
  219. static void SetAtomLastCapture(FixedArray* array,
  220. String* subject,
  221. int from,
  222. int to) {
  223. SealHandleScope shs(array->GetIsolate());
  224. RegExpImpl::SetLastCaptureCount(array, 2);
  225. RegExpImpl::SetLastSubject(array, subject);
  226. RegExpImpl::SetLastInput(array, subject);
  227. RegExpImpl::SetCapture(array, 0, from);
  228. RegExpImpl::SetCapture(array, 1, to);
  229. }
  230. int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
  231. Handle<String> subject,
  232. int index,
  233. int32_t* output,
  234. int output_size) {
  235. Isolate* isolate = regexp->GetIsolate();
  236. DCHECK(0 <= index);
  237. DCHECK(index <= subject->length());
  238. subject = String::Flatten(subject);
  239. DisallowHeapAllocation no_gc; // ensure vectors stay valid
  240. String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
  241. int needle_len = needle->length();
  242. DCHECK(needle->IsFlat());
  243. DCHECK_LT(0, needle_len);
  244. if (index + needle_len > subject->length()) {
  245. return RegExpImpl::RE_FAILURE;
  246. }
  247. for (int i = 0; i < output_size; i += 2) {
  248. String::FlatContent needle_content = needle->GetFlatContent();
  249. String::FlatContent subject_content = subject->GetFlatContent();
  250. DCHECK(needle_content.IsFlat());
  251. DCHECK(subject_content.IsFlat());
  252. // dispatch on type of strings
  253. index = (needle_content.IsAscii()
  254. ? (subject_content.IsAscii()
  255. ? SearchString(isolate,
  256. subject_content.ToOneByteVector(),
  257. needle_content.ToOneByteVector(),
  258. index)
  259. : SearchString(isolate,
  260. subject_content.ToUC16Vector(),
  261. needle_content.ToOneByteVector(),
  262. index))
  263. : (subject_content.IsAscii()
  264. ? SearchString(isolate,
  265. subject_content.ToOneByteVector(),
  266. needle_content.ToUC16Vector(),
  267. index)
  268. : SearchString(isolate,
  269. subject_content.ToUC16Vector(),
  270. needle_content.ToUC16Vector(),
  271. index)));
  272. if (index == -1) {
  273. return i / 2; // Return number of matches.
  274. } else {
  275. output[i] = index;
  276. output[i+1] = index + needle_len;
  277. index += needle_len;
  278. }
  279. }
  280. return output_size / 2;
  281. }
  282. Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
  283. Handle<String> subject,
  284. int index,
  285. Handle<JSArray> last_match_info) {
  286. Isolate* isolate = re->GetIsolate();
  287. static const int kNumRegisters = 2;
  288. STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
  289. int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
  290. int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
  291. if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
  292. DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
  293. SealHandleScope shs(isolate);
  294. FixedArray* array = FixedArray::cast(last_match_info->elements());
  295. SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]);
  296. return last_match_info;
  297. }
  298. // Irregexp implementation.
  299. // Ensures that the regexp object contains a compiled version of the
  300. // source for either ASCII or non-ASCII strings.
  301. // If the compiled version doesn't already exist, it is compiled
  302. // from the source pattern.
  303. // If compilation fails, an exception is thrown and this function
  304. // returns false.
  305. bool RegExpImpl::EnsureCompiledIrregexp(
  306. Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {
  307. Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
  308. #ifdef V8_INTERPRETED_REGEXP
  309. if (compiled_code->IsByteArray()) return true;
  310. #else // V8_INTERPRETED_REGEXP (RegExp native code)
  311. if (compiled_code->IsCode()) return true;
  312. #endif
  313. // We could potentially have marked this as flushable, but have kept
  314. // a saved version if we did not flush it yet.
  315. Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
  316. if (saved_code->IsCode()) {
  317. // Reinstate the code in the original place.
  318. re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
  319. DCHECK(compiled_code->IsSmi());
  320. return true;
  321. }
  322. return CompileIrregexp(re, sample_subject, is_ascii);
  323. }
  324. static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
  325. bool is_ascii,
  326. Handle<String> error_message,
  327. Isolate* isolate) {
  328. Factory* factory = isolate->factory();
  329. Handle<FixedArray> elements = factory->NewFixedArray(2);
  330. elements->set(0, re->Pattern());
  331. elements->set(1, *error_message);
  332. Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
  333. Handle<Object> regexp_err =
  334. factory->NewSyntaxError("malformed_regexp", array);
  335. isolate->Throw(*regexp_err);
  336. return false;
  337. }
  338. bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
  339. Handle<String> sample_subject,
  340. bool is_ascii) {
  341. // Compile the RegExp.
  342. Isolate* isolate = re->GetIsolate();
  343. Zone zone(isolate);
  344. PostponeInterruptsScope postpone(isolate);
  345. // If we had a compilation error the last time this is saved at the
  346. // saved code index.
  347. Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
  348. // When arriving here entry can only be a smi, either representing an
  349. // uncompiled regexp, a previous compilation error, or code that has
  350. // been flushed.
  351. DCHECK(entry->IsSmi());
  352. int entry_value = Smi::cast(entry)->value();
  353. DCHECK(entry_value == JSRegExp::kUninitializedValue ||
  354. entry_value == JSRegExp::kCompilationErrorValue ||
  355. (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
  356. if (entry_value == JSRegExp::kCompilationErrorValue) {
  357. // A previous compilation failed and threw an error which we store in
  358. // the saved code index (we store the error message, not the actual
  359. // error). Recreate the error object and throw it.
  360. Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
  361. DCHECK(error_string->IsString());
  362. Handle<String> error_message(String::cast(error_string));
  363. CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
  364. return false;
  365. }
  366. JSRegExp::Flags flags = re->GetFlags();
  367. Handle<String> pattern(re->Pattern());
  368. pattern = String::Flatten(pattern);
  369. RegExpCompileData compile_data;
  370. FlatStringReader reader(isolate, pattern);
  371. if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
  372. &compile_data,
  373. &zone)) {
  374. // Throw an exception if we fail to parse the pattern.
  375. // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
  376. USE(ThrowRegExpException(re,
  377. pattern,
  378. compile_data.error,
  379. "malformed_regexp"));
  380. return false;
  381. }
  382. RegExpEngine::CompilationResult result =
  383. RegExpEngine::Compile(&compile_data,
  384. flags.is_ignore_case(),
  385. flags.is_global(),
  386. flags.is_multiline(),
  387. pattern,
  388. sample_subject,
  389. is_ascii,
  390. &zone);
  391. if (result.error_message != NULL) {
  392. // Unable to compile regexp.
  393. Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
  394. CStrVector(result.error_message)).ToHandleChecked();
  395. CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
  396. return false;
  397. }
  398. Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
  399. data->set(JSRegExp::code_index(is_ascii), result.code);
  400. int register_max = IrregexpMaxRegisterCount(*data);
  401. if (result.num_registers > register_max) {
  402. SetIrregexpMaxRegisterCount(*data, result.num_registers);
  403. }
  404. return true;
  405. }
  406. int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) {
  407. return Smi::cast(
  408. re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
  409. }
  410. void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
  411. re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
  412. }
  413. int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
  414. return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
  415. }
  416. int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
  417. return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
  418. }
  419. ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
  420. return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
  421. }
  422. Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
  423. return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
  424. }
  425. void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
  426. Handle<String> pattern,
  427. JSRegExp::Flags flags,
  428. int capture_count) {
  429. // Initialize compiled code entries to null.
  430. re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
  431. JSRegExp::IRREGEXP,
  432. pattern,
  433. flags,
  434. capture_count);
  435. }
  436. int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
  437. Handle<String> subject) {
  438. subject = String::Flatten(subject);
  439. // Check the asciiness of the underlying storage.
  440. bool is_ascii = subject->IsOneByteRepresentationUnderneath();
  441. if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
  442. #ifdef V8_INTERPRETED_REGEXP
  443. // Byte-code regexp needs space allocated for all its registers.
  444. // The result captures are copied to the start of the registers array
  445. // if the match succeeds. This way those registers are not clobbered
  446. // when we set the last match info from last successful match.
  447. return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
  448. (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
  449. #else // V8_INTERPRETED_REGEXP
  450. // Native regexp only needs room to output captures. Registers are handled
  451. // internally.
  452. return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
  453. #endif // V8_INTERPRETED_REGEXP
  454. }
  455. int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
  456. Handle<String> subject,
  457. int index,
  458. int32_t* output,
  459. int output_size) {
  460. Isolate* isolate = regexp->GetIsolate();
  461. Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
  462. DCHECK(index >= 0);
  463. DCHECK(index <= subject->length());
  464. DCHECK(subject->IsFlat());
  465. bool is_ascii = subject->IsOneByteRepresentationUnderneath();
  466. #ifndef V8_INTERPRETED_REGEXP
  467. DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
  468. do {
  469. EnsureCompiledIrregexp(regexp, subject, is_ascii);
  470. Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
  471. // The stack is used to allocate registers for the compiled regexp code.
  472. // This means that in case of failure, the output registers array is left
  473. // untouched and contains the capture results from the previous successful
  474. // match. We can use that to set the last match info lazily.
  475. NativeRegExpMacroAssembler::Result res =
  476. NativeRegExpMacroAssembler::Match(code,
  477. subject,
  478. output,
  479. output_size,
  480. index,
  481. isolate);
  482. if (res != NativeRegExpMacroAssembler::RETRY) {
  483. DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
  484. isolate->has_pending_exception());
  485. STATIC_ASSERT(
  486. static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
  487. STATIC_ASSERT(
  488. static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
  489. STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
  490. == RE_EXCEPTION);
  491. return static_cast<IrregexpResult>(res);
  492. }
  493. // If result is RETRY, the string has changed representation, and we
  494. // must restart from scratch.
  495. // In this case, it means we must make sure we are prepared to handle
  496. // the, potentially, different subject (the string can switch between
  497. // being internal and external, and even between being ASCII and UC16,
  498. // but the characters are always the same).
  499. IrregexpPrepare(regexp, subject);
  500. is_ascii = subject->IsOneByteRepresentationUnderneath();
  501. } while (true);
  502. UNREACHABLE();
  503. return RE_EXCEPTION;
  504. #else // V8_INTERPRETED_REGEXP
  505. DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
  506. // We must have done EnsureCompiledIrregexp, so we can get the number of
  507. // registers.
  508. int number_of_capture_registers =
  509. (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
  510. int32_t* raw_output = &output[number_of_capture_registers];
  511. // We do not touch the actual capture result registers until we know there
  512. // has been a match so that we can use those capture results to set the
  513. // last match info.
  514. for (int i = number_of_capture_registers - 1; i >= 0; i--) {
  515. raw_output[i] = -1;
  516. }
  517. Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
  518. IrregexpResult result = IrregexpInterpreter::Match(isolate,
  519. byte_codes,
  520. subject,
  521. raw_output,
  522. index);
  523. if (result == RE_SUCCESS) {
  524. // Copy capture results to the start of the registers array.
  525. MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t));
  526. }
  527. if (result == RE_EXCEPTION) {
  528. DCHECK(!isolate->has_pending_exception());
  529. isolate->StackOverflow();
  530. }
  531. return result;
  532. #endif // V8_INTERPRETED_REGEXP
  533. }
  534. MaybeHandle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
  535. Handle<String> subject,
  536. int previous_index,
  537. Handle<JSArray> last_match_info) {
  538. Isolate* isolate = regexp->GetIsolate();
  539. DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
  540. // Prepare space for the return values.
  541. #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
  542. if (FLAG_trace_regexp_bytecodes) {
  543. String* pattern = regexp->Pattern();
  544. PrintF("\n\nRegexp match: /%s/\n\n", pattern->ToCString().get());
  545. PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
  546. }
  547. #endif
  548. int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
  549. if (required_registers < 0) {
  550. // Compiling failed with an exception.
  551. DCHECK(isolate->has_pending_exception());
  552. return MaybeHandle<Object>();
  553. }
  554. int32_t* output_registers = NULL;
  555. if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
  556. output_registers = NewArray<int32_t>(required_registers);
  557. }
  558. SmartArrayPointer<int32_t> auto_release(output_registers);
  559. if (output_registers == NULL) {
  560. output_registers = isolate->jsregexp_static_offsets_vector();
  561. }
  562. int res = RegExpImpl::IrregexpExecRaw(
  563. regexp, subject, previous_index, output_registers, required_registers);
  564. if (res == RE_SUCCESS) {
  565. int capture_count =
  566. IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
  567. return SetLastMatchInfo(
  568. last_match_info, subject, capture_count, output_registers);
  569. }
  570. if (res == RE_EXCEPTION) {
  571. DCHECK(isolate->has_pending_exception());
  572. return MaybeHandle<Object>();
  573. }
  574. DCHECK(res == RE_FAILURE);
  575. return isolate->factory()->null_value();
  576. }
  577. Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
  578. Handle<String> subject,
  579. int capture_count,
  580. int32_t* match) {
  581. DCHECK(last_match_info->HasFastObjectElements());
  582. int capture_register_count = (capture_count + 1) * 2;
  583. JSArray::EnsureSize(last_match_info,
  584. capture_register_count + kLastMatchOverhead);
  585. DisallowHeapAllocation no_allocation;
  586. FixedArray* array = FixedArray::cast(last_match_info->elements());
  587. if (match != NULL) {
  588. for (int i = 0; i < capture_register_count; i += 2) {
  589. SetCapture(array, i, match[i]);
  590. SetCapture(array, i + 1, match[i + 1]);
  591. }
  592. }
  593. SetLastCaptureCount(array, capture_register_count);
  594. SetLastSubject(array, *subject);
  595. SetLastInput(array, *subject);
  596. return last_match_info;
  597. }
  598. RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
  599. Handle<String> subject,
  600. bool is_global,
  601. Isolate* isolate)
  602. : register_array_(NULL),
  603. register_array_size_(0),
  604. regexp_(regexp),
  605. subject_(subject) {
  606. #ifdef V8_INTERPRETED_REGEXP
  607. bool interpreted = true;
  608. #else
  609. bool interpreted = false;
  610. #endif // V8_INTERPRETED_REGEXP
  611. if (regexp_->TypeTag() == JSRegExp::ATOM) {
  612. static const int kAtomRegistersPerMatch = 2;
  613. registers_per_match_ = kAtomRegistersPerMatch;
  614. // There is no distinction between interpreted and native for atom regexps.
  615. interpreted = false;
  616. } else {
  617. registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
  618. if (registers_per_match_ < 0) {
  619. num_matches_ = -1; // Signal exception.
  620. return;
  621. }
  622. }
  623. if (is_global && !interpreted) {
  624. register_array_size_ =
  625. Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
  626. max_matches_ = register_array_size_ / registers_per_match_;
  627. } else {
  628. // Global loop in interpreted regexp is not implemented. We choose
  629. // the size of the offsets vector so that it can only store one match.
  630. register_array_size_ = registers_per_match_;
  631. max_matches_ = 1;
  632. }
  633. if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
  634. register_array_ = NewArray<int32_t>(register_array_size_);
  635. } else {
  636. register_array_ = isolate->jsregexp_static_offsets_vector();
  637. }
  638. // Set state so that fetching the results the first time triggers a call
  639. // to the compiled regexp.
  640. current_match_index_ = max_matches_ - 1;
  641. num_matches_ = max_matches_;
  642. DCHECK(registers_per_match_ >= 2); // Each match has at least one capture.
  643. DCHECK_GE(register_array_size_, registers_per_match_);
  644. int32_t* last_match =
  645. &register_array_[current_match_index_ * registers_per_match_];
  646. last_match[0] = -1;
  647. last_match[1] = 0;
  648. }
  649. // -------------------------------------------------------------------
  650. // Implementation of the Irregexp regular expression engine.
  651. //
  652. // The Irregexp regular expression engine is intended to be a complete
  653. // implementation of ECMAScript regular expressions. It generates either
  654. // bytecodes or native code.
  655. // The Irregexp regexp engine is structured in three steps.
  656. // 1) The parser generates an abstract syntax tree. See ast.cc.
  657. // 2) From the AST a node network is created. The nodes are all
  658. // subclasses of RegExpNode. The nodes represent states when
  659. // executing a regular expression. Several optimizations are
  660. // performed on the node network.
  661. // 3) From the nodes we generate either byte codes or native code
  662. // that can actually execute the regular expression (perform
  663. // the search). The code generation step is described in more
  664. // detail below.
  665. // Code generation.
  666. //
  667. // The nodes are divided into four main categories.
  668. // * Choice nodes
  669. // These represent places where the regular expression can
  670. // match in more than one way. For example on entry to an
  671. // alternation (foo|bar) or a repetition (*, +, ? or {}).
  672. // * Action nodes
  673. // These represent places where some action should be
  674. // performed. Examples include recording the current position
  675. // in the input string to a register (in order to implement
  676. // captures) or other actions on register for example in order
  677. // to implement the counters needed for {} repetitions.
  678. // * Matching nodes
  679. // These attempt to match some element part of the input string.
  680. // Examples of elements include character classes, plain strings
  681. // or back references.
  682. // * End nodes
  683. // These are used to implement the actions required on finding
  684. // a successful match or failing to find a match.
  685. //
  686. // The code generated (whether as byte codes or native code) maintains
  687. // some state as it runs. This consists of the following elements:
  688. //
  689. // * The capture registers. Used for string captures.
  690. // * Other registers. Used for counters etc.
  691. // * The current position.
  692. // * The stack of backtracking information. Used when a matching node
  693. // fails to find a match and needs to try an alternative.
  694. //
  695. // Conceptual regular expression execution model:
  696. //
  697. // There is a simple conceptual model of regular expression execution
  698. // which will be presented first. The actual code generated is a more
  699. // efficient simulation of the simple conceptual model:
  700. //
  701. // * Choice nodes are implemented as follows:
  702. // For each choice except the last {
  703. // push current position
  704. // push backtrack code location
  705. // <generate code to test for choice>
  706. // backtrack code location:
  707. // pop current position
  708. // }
  709. // <generate code to test for last choice>
  710. //
  711. // * Actions nodes are generated as follows
  712. // <push affected registers on backtrack stack>
  713. // <generate code to perform action>
  714. // push backtrack code location
  715. // <generate code to test for following nodes>
  716. // backtrack code location:
  717. // <pop affected registers to restore their state>
  718. // <pop backtrack location from stack and go to it>
  719. //
  720. // * Matching nodes are generated as follows:
  721. // if input string matches at current position
  722. // update current position
  723. // <generate code to test for following nodes>
  724. // else
  725. // <pop backtrack location from stack and go to it>
  726. //
  727. // Thus it can be seen that the current position is saved and restored
  728. // by the choice nodes, whereas the registers are saved and restored by
  729. // by the action nodes that manipulate them.
  730. //
  731. // The other interesting aspect of this model is that nodes are generated
  732. // at the point where they are needed by a recursive call to Emit(). If
  733. // the node has already been code generated then the Emit() call will
  734. // generate a jump to the previously generated code instead. In order to
  735. // limit recursion it is possible for the Emit() function to put the node
  736. // on a work list for later generation and instead generate a jump. The
  737. // destination of the jump is resolved later when the code is generated.
  738. //
  739. // Actual regular expression code generation.
  740. //
  741. // Code generation is actually more complicated than the above. In order
  742. // to improve the efficiency of the generated code some optimizations are
  743. // performed
  744. //
  745. // * Choice nodes have 1-character lookahead.
  746. // A choice node looks at the following character and eliminates some of
  747. // the choices immediately based on that character. This is not yet
  748. // implemented.
  749. // * Simple greedy loops store reduced backtracking information.
  750. // A quantifier like /.*foo/m will greedily match the whole input. It will
  751. // then need to backtrack to a point where it can match "foo". The naive
  752. // implementation of this would push each character position onto the
  753. // backtracking stack, then pop them off one by one. This would use space
  754. // proportional to the length of the input string. However since the "."
  755. // can only match in one way and always has a constant length (in this case
  756. // of 1) it suffices to store the current position on the top of the stack
  757. // once. Matching now becomes merely incrementing the current position and
  758. // backtracking becomes decrementing the current position and checking the
  759. // result against the stored current position. This is faster and saves
  760. // space.
  761. // * The current state is virtualized.
  762. // This is used to defer expensive operations until it is clear that they
  763. // are needed and to generate code for a node more than once, allowing
  764. // specialized an efficient versions of the code to be created. This is
  765. // explained in the section below.
  766. //
  767. // Execution state virtualization.
  768. //
  769. // Instead of emitting code, nodes that manipulate the state can record their
  770. // manipulation in an object called the Trace. The Trace object can record a
  771. // current position offset, an optional backtrack code location on the top of
  772. // the virtualized backtrack stack and some register changes. When a node is
  773. // to be emitted it can flush the Trace or update it. Flushing the Trace
  774. // will emit code to bring the actual state into line with the virtual state.
  775. // Avoiding flushing the state can postpone some work (e.g. updates of capture
  776. // registers). Postponing work can save time when executing the regular
  777. // expression since it may be found that the work never has to be done as a
  778. // failure to match can occur. In addition it is much faster to jump to a
  779. // known backtrack code location than it is to pop an unknown backtrack
  780. // location from the stack and jump there.
  781. //
  782. // The virtual state found in the Trace affects code generation. For example
  783. // the virtual state contains the difference between the actual current
  784. // position and the virtual current position, and matching code needs to use
  785. // this offset to attempt a match in the correct location of the input
  786. // string. Therefore code generated for a non-trivial trace is specialized
  787. // to that trace. The code generator therefore has the ability to generate
  788. // code for each node several times. In order to limit the size of the
  789. // generated code there is an arbitrary limit on how many specialized sets of
  790. // code may be generated for a given node. If the limit is reached, the
  791. // trace is flushed and a generic version of the code for a node is emitted.
  792. // This is subsequently used for that node. The code emitted for non-generic
  793. // trace is not recorded in the node and so it cannot currently be reused in
  794. // the event that code generation is requested for an identical trace.
  795. void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
  796. UNREACHABLE();
  797. }
  798. void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
  799. text->AddElement(TextElement::Atom(this), zone);
  800. }
  801. void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
  802. text->AddElement(TextElement::CharClass(this), zone);
  803. }
  804. void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
  805. for (int i = 0; i < elements()->length(); i++)
  806. text->AddElement(elements()->at(i), zone);
  807. }
  808. TextElement TextElement::Atom(RegExpAtom* atom) {
  809. return TextElement(ATOM, atom);
  810. }
  811. TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
  812. return TextElement(CHAR_CLASS, char_class);
  813. }
  814. int TextElement::length() const {
  815. switch (text_type()) {
  816. case ATOM:
  817. return atom()->length();
  818. case CHAR_CLASS:
  819. return 1;
  820. }
  821. UNREACHABLE();
  822. return 0;
  823. }
  824. DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
  825. if (table_ == NULL) {
  826. table_ = new(zone()) DispatchTable(zone());
  827. DispatchTableConstructor cons(table_, ignore_case, zone());
  828. cons.BuildTable(this);
  829. }
  830. return table_;
  831. }
  832. class FrequencyCollator {
  833. public:
  834. FrequencyCollator() : total_samples_(0) {
  835. for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
  836. frequencies_[i] = CharacterFrequency(i);
  837. }
  838. }
  839. void CountCharacter(int character) {
  840. int index = (character & RegExpMacroAssembler::kTableMask);
  841. frequencies_[index].Increment();
  842. total_samples_++;
  843. }
  844. // Does not measure in percent, but rather per-128 (the table size from the
  845. // regexp macro assembler).
  846. int Frequency(int in_character) {
  847. DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
  848. if (total_samples_ < 1) return 1; // Division by zero.
  849. int freq_in_per128 =
  850. (frequencies_[in_character].counter() * 128) / total_samples_;
  851. return freq_in_per128;
  852. }
  853. private:
  854. class CharacterFrequency {
  855. public:
  856. CharacterFrequency() : counter_(0), character_(-1) { }
  857. explicit CharacterFrequency(int character)
  858. : counter_(0), character_(character) { }
  859. void Increment() { counter_++; }
  860. int counter() { return counter_; }
  861. int character() { return character_; }
  862. private:
  863. int counter_;
  864. int character_;
  865. };
  866. private:
  867. CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
  868. int total_samples_;
  869. };
  870. class RegExpCompiler {
  871. public:
  872. RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
  873. Zone* zone);
  874. int AllocateRegister() {
  875. if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
  876. reg_exp_too_big_ = true;
  877. return next_register_;
  878. }
  879. return next_register_++;
  880. }
  881. RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
  882. RegExpNode* start,
  883. int capture_count,
  884. Handle<String> pattern);
  885. inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
  886. static const int kImplementationOffset = 0;
  887. static const int kNumberOfRegistersOffset = 0;
  888. static const int kCodeOffset = 1;
  889. RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
  890. EndNode* accept() { return accept_; }
  891. static const int kMaxRecursion = 100;
  892. inline int recursion_depth() { return recursion_depth_; }
  893. inline void IncrementRecursionDepth() { recursion_depth_++; }
  894. inline void DecrementRecursionDepth() { recursion_depth_--; }
  895. void SetRegExpTooBig() { reg_exp_too_big_ = true; }
  896. inline bool ignore_case() { return ignore_case_; }
  897. inline bool ascii() { return ascii_; }
  898. FrequencyCollator* frequency_collator() { return &frequency_collator_; }
  899. int current_expansion_factor() { return current_expansion_factor_; }
  900. void set_current_expansion_factor(int value) {
  901. current_expansion_factor_ = value;
  902. }
  903. Zone* zone() const { return zone_; }
  904. static const int kNoRegister = -1;
  905. private:
  906. EndNode* accept_;
  907. int next_register_;
  908. List<RegExpNode*>* work_list_;
  909. int recursion_depth_;
  910. RegExpMacroAssembler* macro_assembler_;
  911. bool ignore_case_;
  912. bool ascii_;
  913. bool reg_exp_too_big_;
  914. int current_expansion_factor_;
  915. FrequencyCollator frequency_collator_;
  916. Zone* zone_;
  917. };
  918. class RecursionCheck {
  919. public:
  920. explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
  921. compiler->IncrementRecursionDepth();
  922. }
  923. ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
  924. private:
  925. RegExpCompiler* compiler_;
  926. };
  927. static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
  928. return RegExpEngine::CompilationResult(isolate, "RegExp too big");
  929. }
  930. // Attempts to compile the regexp using an Irregexp code generator. Returns
  931. // a fixed array or a null handle depending on whether it succeeded.
  932. RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
  933. Zone* zone)
  934. : next_register_(2 * (capture_count + 1)),
  935. work_list_(NULL),
  936. recursion_depth_(0),
  937. ignore_case_(ignore_case),
  938. ascii_(ascii),
  939. reg_exp_too_big_(false),
  940. current_expansion_factor_(1),
  941. frequency_collator_(),
  942. zone_(zone) {
  943. accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
  944. DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
  945. }
  946. RegExpEngine::CompilationResult RegExpCompiler::Assemble(
  947. RegExpMacroAssembler* macro_assembler,
  948. RegExpNode* start,
  949. int capture_count,
  950. Handle<String> pattern) {
  951. Heap* heap = pattern->GetHeap();
  952. bool use_slow_safe_regexp_compiler = false;
  953. if (heap->total_regexp_code_generated() >
  954. RegExpImpl::kRegWxpCompiledLimit &&
  955. heap->isolate()->memory_allocator()->SizeExecutable() >
  956. RegExpImpl::kRegExpExecutableMemoryLimit) {
  957. use_slow_safe_regexp_compiler = true;
  958. }
  959. macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
  960. #ifdef DEBUG
  961. if (FLAG_trace_regexp_assembler)
  962. macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
  963. else
  964. #endif
  965. macro_assembler_ = macro_assembler;
  966. List <RegExpNode*> work_list(0);
  967. work_list_ = &work_list;
  968. Label fail;
  969. macro_assembler_->PushBacktrack(&fail);
  970. Trace new_trace;
  971. start->Emit(this, &new_trace);
  972. macro_assembler_->Bind(&fail);
  973. macro_assembler_->Fail();
  974. while (!work_list.is_empty()) {
  975. work_list.RemoveLast()->Emit(this, &new_trace);
  976. }
  977. if (reg_exp_too_big_) return IrregexpRegExpTooBig(zone_->isolate());
  978. Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
  979. heap->IncreaseTotalRegexpCodeGenerated(code->Size());
  980. work_list_ = NULL;
  981. #ifdef DEBUG
  982. if (FLAG_print_code) {
  983. CodeTracer::Scope trace_scope(heap->isolate()->GetCodeTracer());
  984. OFStream os(trace_scope.file());
  985. Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
  986. }
  987. if (FLAG_trace_regexp_assembler) {
  988. delete macro_assembler_;
  989. }
  990. #endif
  991. return RegExpEngine::CompilationResult(*code, next_register_);
  992. }
  993. bool Trace::DeferredAction::Mentions(int that) {
  994. if (action_type() == ActionNode::CLEAR_CAPTURES) {
  995. Interval range = static_cast<DeferredClearCaptures*>(this)->range();
  996. return range.Contains(that);
  997. } else {
  998. return reg() == that;
  999. }
  1000. }
  1001. bool Trace::mentions_reg(int reg) {
  1002. for (DeferredAction* action = actions_;
  1003. action != NULL;
  1004. action = action->next()) {
  1005. if (action->Mentions(reg))
  1006. return true;
  1007. }
  1008. return false;
  1009. }
  1010. bool Trace::GetStoredPosition(int reg, int* cp_offset) {
  1011. DCHECK_EQ(0, *cp_offset);
  1012. for (DeferredAction* action = actions_;
  1013. action != NULL;
  1014. action = action->next()) {
  1015. if (action->Mentions(reg)) {
  1016. if (action->action_type() == ActionNode::STORE_POSITION) {
  1017. *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
  1018. return true;
  1019. } else {
  1020. return false;
  1021. }
  1022. }
  1023. }
  1024. return false;
  1025. }
  1026. int Trace::FindAffectedRegisters(OutSet* affected_registers,
  1027. Zone* zone) {
  1028. int max_register = RegExpCompiler::kNoRegister;
  1029. for (DeferredAction* action = actions_;
  1030. action != NULL;
  1031. action = action->next()) {
  1032. if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
  1033. Interval range = static_cast<DeferredClearCaptures*>(action)->range();
  1034. for (int i = range.from(); i <= range.to(); i++)
  1035. affected_registers->Set(i, zone);
  1036. if (range.to() > max_register) max_register = range.to();
  1037. } else {
  1038. affected_registers->Set(action->reg(), zone);
  1039. if (action->reg() > max_register) max_register = action->reg();
  1040. }
  1041. }
  1042. return max_register;
  1043. }
  1044. void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
  1045. int max_register,
  1046. const OutSet& registers_to_pop,
  1047. const OutSet& registers_to_clear) {
  1048. for (int reg = max_register; reg >= 0; reg--) {
  1049. if (registers_to_pop.Get(reg)) {
  1050. assembler->PopRegister(reg);
  1051. } else if (registers_to_clear.Get(reg)) {
  1052. int clear_to = reg;
  1053. while (reg > 0 && registers_to_clear.Get(reg - 1)) {
  1054. reg--;
  1055. }
  1056. assembler->ClearRegisters(reg, clear_to);
  1057. }
  1058. }
  1059. }
  1060. void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
  1061. int max_register,
  1062. const OutSet& affected_registers,
  1063. OutSet* registers_to_pop,
  1064. OutSet* registers_to_clear,
  1065. Zone* zone) {
  1066. // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
  1067. const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
  1068. // Count pushes performed to force a stack limit check occasionally.
  1069. int pushes = 0;
  1070. for (int reg = 0; reg <= max_register; reg++) {
  1071. if (!affected_registers.Get(reg)) {
  1072. continue;
  1073. }
  1074. // The chronologically first deferred action in the trace
  1075. // is used to infer the action needed to restore a register
  1076. // to its previous state (or not, if it's safe to ignore it).
  1077. enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
  1078. DeferredActionUndoType undo_action = IGNORE;
  1079. int value = 0;
  1080. bool absolute = false;
  1081. bool clear = false;
  1082. int store_position = -1;
  1083. // This is a little tricky because we are scanning the actions in reverse
  1084. // historical order (newest first).
  1085. for (DeferredAction* action = actions_;
  1086. action != NULL;
  1087. action = action->next()) {
  1088. if (action->Mentions(reg)) {
  1089. switch (action->action_type()) {
  1090. case ActionNode::SET_REGISTER: {
  1091. Trace::DeferredSetRegister* psr =
  1092. static_cast<Trace::DeferredSetRegister*>(action);
  1093. if (!absolute) {
  1094. value += psr->value();
  1095. absolute = true;
  1096. }
  1097. // SET_REGISTER is currently only used for newly introduced loop
  1098. // counters. They can have a significant previous value if they
  1099. // occour in a loop. TODO(lrn): Propagate this information, so
  1100. // we can set undo_action to IGNORE if we know there is no value to
  1101. // restore.
  1102. undo_action = RESTORE;
  1103. DCHECK_EQ(store_position, -1);
  1104. DCHECK(!clear);
  1105. break;
  1106. }
  1107. case ActionNode::INCREMENT_REGISTER:
  1108. if (!absolute) {
  1109. value++;
  1110. }
  1111. DCHECK_EQ(store_position, -1);
  1112. DCHECK(!clear);
  1113. undo_action = RESTORE;
  1114. break;
  1115. case ActionNode::STORE_POSITION: {
  1116. Trace::DeferredCapture* pc =
  1117. static_cast<Trace::DeferredCapture*>(action);
  1118. if (!clear && store_position == -1) {
  1119. store_position = pc->cp_offset();
  1120. }
  1121. // For captures we know that stores and clears alternate.
  1122. // Other register, are never cleared, and if the occur
  1123. // inside a loop, they might be assigned more than once.
  1124. if (reg <= 1) {
  1125. // Registers zero and one, aka "capture zero", is
  1126. // always set correctly if we succeed. There is no
  1127. // need to undo a setting on backtrack, because we
  1128. // will set it again or fail.
  1129. undo_action = IGNORE;
  1130. } else {
  1131. undo_action = pc->is_capture() ? CLEAR : RESTORE;
  1132. }
  1133. DCHECK(!absolute);
  1134. DCHECK_EQ(value, 0);
  1135. break;
  1136. }
  1137. case ActionNode::CLEAR_CAPTURES: {
  1138. // Since we're scanning in reverse order, if we've already
  1139. // set the position we have to ignore historically earlier
  1140. // clearing operations.
  1141. if (store_position == -1) {
  1142. clear = true;
  1143. }
  1144. undo_action = RESTORE;
  1145. DCHECK(!absolute);
  1146. DCHECK_EQ(value, 0);
  1147. break;
  1148. }
  1149. default:
  1150. UNREACHABLE();
  1151. break;
  1152. }
  1153. }
  1154. }
  1155. // Prepare for the undo-action (e.g., push if it's going to be popped).
  1156. if (undo_action == RESTORE) {
  1157. pushes++;
  1158. RegExpMacroAssembler::StackCheckFlag stack_check =
  1159. RegExpMacroAssembler::kNoStackLimitCheck;
  1160. if (pushes == push_limit) {
  1161. stack_check = RegExpMacroAssembler::kCheckStackLimit;
  1162. pushes = 0;
  1163. }
  1164. assembler->PushRegister(reg, stack_check);
  1165. registers_to_pop->Set(reg, zone);
  1166. } else if (undo_action == CLEAR) {
  1167. registers_to_clear->Set(reg, zone);
  1168. }
  1169. // Perform the chronologically last action (or accumulated increment)
  1170. // for the register.
  1171. if (store_position != -1) {
  1172. assembler->WriteCurrentPositionToRegister(reg, store_position);
  1173. } else if (clear) {
  1174. assembler->ClearRegisters(reg, reg);
  1175. } else if (absolute) {
  1176. assembler->SetRegister(reg, value);
  1177. } else if (value != 0) {
  1178. assembler->AdvanceRegister(reg, value);
  1179. }
  1180. }
  1181. }
  1182. // This is called as we come into a loop choice node and some other tricky
  1183. // nodes. It normalizes the state of the code generator to ensure we can
  1184. // g…

Large files files are truncated, but you can click here to view the full file