1// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5// A simple interpreter for the Irregexp byte code.
6
7#include <memory>
8#include <utility>
9
10#include "heap/safepoint.h"
11#include "vm/regexp_interpreter.h"
12
13#include "platform/unicode.h"
14#include "vm/object.h"
15#include "vm/regexp_assembler.h"
16#include "vm/regexp_bytecodes.h"
17#include "vm/unibrow-inl.h"
18#include "vm/unibrow.h"
19
20namespace dart {
21
22DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes");
23
24typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
25
26template <typename Char>
27static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
28 intptr_t from,
29 intptr_t current,
30 intptr_t len,
31 const String& subject,
32 bool unicode);
33
34template <>
35bool BackRefMatchesNoCase<uint16_t>(Canonicalize* interp_canonicalize,
36 intptr_t from,
37 intptr_t current,
38 intptr_t len,
39 const String& subject,
40 bool unicode) {
41 Bool& ret = Bool::Handle();
42 if (unicode) {
43 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUTF16(
44 str_raw: static_cast<uword>(subject.ptr()), lhs_index_raw: static_cast<uword>(Smi::New(value: from)),
45 rhs_index_raw: static_cast<uword>(Smi::New(value: current)),
46 length_raw: static_cast<uword>(Smi::New(value: len))));
47 } else {
48 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUCS2(
49 str_raw: static_cast<uword>(subject.ptr()), lhs_index_raw: static_cast<uword>(Smi::New(value: from)),
50 rhs_index_raw: static_cast<uword>(Smi::New(value: current)),
51 length_raw: static_cast<uword>(Smi::New(value: len))));
52 }
53 return ret.value();
54}
55
56template <>
57bool BackRefMatchesNoCase<uint8_t>(Canonicalize* interp_canonicalize,
58 intptr_t from,
59 intptr_t current,
60 intptr_t len,
61 const String& subject,
62 bool unicode) {
63 // For Latin1 characters the unicode flag makes no difference.
64 for (int i = 0; i < len; i++) {
65 unsigned int old_char = subject.CharAt(index: from++);
66 unsigned int new_char = subject.CharAt(index: current++);
67 if (old_char == new_char) continue;
68 // Convert both characters to lower case.
69 old_char |= 0x20;
70 new_char |= 0x20;
71 if (old_char != new_char) return false;
72 // Not letters in the ASCII range and Latin-1 range.
73 if (!(old_char - 'a' <= 'z' - 'a') &&
74 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
75 return false;
76 }
77 }
78 return true;
79}
80
81#ifdef DEBUG
82static void TraceInterpreter(const uint8_t* code_base,
83 const uint8_t* pc,
84 int stack_depth,
85 int current_position,
86 uint32_t current_char,
87 int bytecode_length,
88 const char* bytecode_name) {
89 if (FLAG_trace_regexp_bytecodes) {
90 bool printable = (current_char < 127 && current_char >= 32);
91 const char* format =
92 printable
93 ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
94 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
95 OS::PrintErr(format, pc - code_base, stack_depth, current_position,
96 current_char, printable ? current_char : '.', bytecode_name);
97 for (int i = 0; i < bytecode_length; i++) {
98 OS::PrintErr(format: ", %02x", pc[i]);
99 }
100 OS::PrintErr(format: " ");
101 for (int i = 1; i < bytecode_length; i++) {
102 unsigned char b = pc[i];
103 if (b < 127 && b >= 32) {
104 OS::PrintErr(format: "%c", b);
105 } else {
106 OS::PrintErr(format: ".");
107 }
108 }
109 OS::PrintErr(format: "\n");
110 }
111}
112
113#define BYTECODE(name) \
114 case BC_##name: \
115 TraceInterpreter(code_base, pc, \
116 static_cast<int>(backtrack_sp - backtrack_stack_base), \
117 current, current_char, BC_##name##_LENGTH, #name);
118#else
119#define BYTECODE(name) case BC_##name:
120#endif
121
122static int32_t Load32Aligned(const uint8_t* pc) {
123 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
124 return *reinterpret_cast<const int32_t*>(pc);
125}
126
127static int32_t Load16Aligned(const uint8_t* pc) {
128 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
129 return *reinterpret_cast<const uint16_t*>(pc);
130}
131
132// A simple abstraction over the backtracking stack used by the interpreter.
133// This backtracking stack does not grow automatically, but it ensures that the
134// the memory held by the stack is released or remembered in a cache if the
135// matching terminates.
136class BacktrackStack {
137 public:
138 BacktrackStack() {
139 memory_ = Isolate::Current()->TakeRegexpBacktrackStack();
140 // Note: using malloc here has a potential of triggering jemalloc/tcmalloc
141 // bugs which cause application to leak memory and eventually OOM.
142 // See https://github.com/dart-lang/sdk/issues/38820 and
143 // https://github.com/flutter/flutter/issues/29007 for examples.
144 // So instead we directly ask OS to provide us memory.
145 if (memory_ == nullptr) {
146 const bool executable = false;
147 const bool compressed = false;
148 memory_ = std::unique_ptr<VirtualMemory>(VirtualMemory::Allocate(
149 sizeof(intptr_t) * kBacktrackStackSize, executable, compressed,
150 "regexp-backtrack-stack"));
151 }
152 }
153
154 ~BacktrackStack() {
155 if (memory_ != nullptr) {
156 Isolate::Current()->CacheRegexpBacktrackStack(std::move(memory_));
157 }
158 }
159
160 bool out_of_memory() const { return memory_ == nullptr; }
161
162 intptr_t* data() const {
163 return reinterpret_cast<intptr_t*>(memory_->address());
164 }
165
166 intptr_t max_size() const { return kBacktrackStackSize; }
167
168 private:
169 static constexpr intptr_t kBacktrackStackSize = 1 << 16;
170
171 std::unique_ptr<VirtualMemory> memory_;
172
173 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
174};
175
176// Returns True if success, False if failure, Null if internal exception,
177// Error if VM error needs to be propagated up the callchain.
178template <typename Char>
179static ObjectPtr RawMatch(const TypedData& bytecode,
180 const String& subject,
181 int32_t* registers,
182 intptr_t current,
183 uint32_t current_char) {
184 // BacktrackStack ensures that the memory allocated for the backtracking stack
185 // is returned to the system or cached if there is no stack being cached at
186 // the moment.
187 BacktrackStack backtrack_stack;
188 if (backtrack_stack.out_of_memory()) {
189 Exceptions::ThrowOOM();
190 UNREACHABLE();
191 }
192 intptr_t* backtrack_stack_base = backtrack_stack.data();
193 intptr_t* backtrack_sp = backtrack_stack_base;
194 intptr_t backtrack_stack_space = backtrack_stack.max_size();
195
196 // TODO(zerny): Optimize as single instance. V8 has this as an
197 // isolate member.
198 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
199
200 intptr_t subject_length = subject.Length();
201
202#ifdef DEBUG
203 if (FLAG_trace_regexp_bytecodes) {
204 OS::PrintErr(format: "Start irregexp bytecode interpreter\n");
205 }
206#endif
207 const auto thread = Thread::Current();
208 const uint8_t* code_base;
209 const uint8_t* pc;
210 {
211 NoSafepointScope no_safepoint;
212 code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(byte_offset: 0));
213 pc = code_base;
214 }
215 while (true) {
216 if (UNLIKELY(thread->HasScheduledInterrupts())) {
217 intptr_t pc_offset = pc - code_base;
218 ErrorPtr error = thread->HandleInterrupts();
219 if (error != Object::null()) {
220 // Needs to be propagated to the Dart native invoking the
221 // regex matcher.
222 return error;
223 }
224 NoSafepointScope no_safepoint;
225 code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(byte_offset: 0));
226 pc = code_base + pc_offset;
227 }
228 NoSafepointScope no_safepoint;
229 bool check_for_safepoint_now = false;
230 while (!check_for_safepoint_now) {
231 int32_t insn = Load32Aligned(pc);
232 switch (insn & BYTECODE_MASK) {
233 BYTECODE(BREAK)
234 UNREACHABLE();
235 return Bool::False().ptr();
236 BYTECODE(PUSH_CP)
237 if (--backtrack_stack_space < 0) {
238 return Object::null();
239 }
240 *backtrack_sp++ = current;
241 pc += BC_PUSH_CP_LENGTH;
242 break;
243 BYTECODE(PUSH_BT)
244 if (--backtrack_stack_space < 0) {
245 return Object::null();
246 }
247 *backtrack_sp++ = Load32Aligned(pc: pc + 4);
248 pc += BC_PUSH_BT_LENGTH;
249 break;
250 BYTECODE(PUSH_REGISTER)
251 if (--backtrack_stack_space < 0) {
252 return Object::null();
253 }
254 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
255 pc += BC_PUSH_REGISTER_LENGTH;
256 break;
257 BYTECODE(SET_REGISTER)
258 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc: pc + 4);
259 pc += BC_SET_REGISTER_LENGTH;
260 break;
261 BYTECODE(ADVANCE_REGISTER)
262 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc: pc + 4);
263 pc += BC_ADVANCE_REGISTER_LENGTH;
264 break;
265 BYTECODE(SET_REGISTER_TO_CP)
266 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc: pc + 4);
267 pc += BC_SET_REGISTER_TO_CP_LENGTH;
268 break;
269 BYTECODE(SET_CP_TO_REGISTER)
270 current = registers[insn >> BYTECODE_SHIFT];
271 pc += BC_SET_CP_TO_REGISTER_LENGTH;
272 break;
273 BYTECODE(SET_REGISTER_TO_SP)
274 registers[insn >> BYTECODE_SHIFT] =
275 static_cast<int>(backtrack_sp - backtrack_stack_base);
276 pc += BC_SET_REGISTER_TO_SP_LENGTH;
277 break;
278 BYTECODE(SET_SP_TO_REGISTER)
279 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
280 backtrack_stack_space =
281 backtrack_stack.max_size() -
282 static_cast<int>(backtrack_sp - backtrack_stack_base);
283 pc += BC_SET_SP_TO_REGISTER_LENGTH;
284 break;
285 BYTECODE(POP_CP)
286 backtrack_stack_space++;
287 --backtrack_sp;
288 current = *backtrack_sp;
289 pc += BC_POP_CP_LENGTH;
290 break;
291 BYTECODE(POP_BT)
292 backtrack_stack_space++;
293 --backtrack_sp;
294 pc = code_base + *backtrack_sp;
295 // This should match check cadence in JIT irregexp implementation.
296 check_for_safepoint_now = true;
297 break;
298 BYTECODE(POP_REGISTER)
299 backtrack_stack_space++;
300 --backtrack_sp;
301 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
302 pc += BC_POP_REGISTER_LENGTH;
303 break;
304 BYTECODE(FAIL)
305 return Bool::False().ptr();
306 BYTECODE(SUCCEED)
307 return Bool::True().ptr();
308 BYTECODE(ADVANCE_CP)
309 current += insn >> BYTECODE_SHIFT;
310 pc += BC_ADVANCE_CP_LENGTH;
311 break;
312 BYTECODE(GOTO)
313 pc = code_base + Load32Aligned(pc: pc + 4);
314 break;
315 BYTECODE(ADVANCE_CP_AND_GOTO)
316 current += insn >> BYTECODE_SHIFT;
317 pc = code_base + Load32Aligned(pc: pc + 4);
318 break;
319 BYTECODE(CHECK_GREEDY)
320 if (current == backtrack_sp[-1]) {
321 backtrack_sp--;
322 backtrack_stack_space++;
323 pc = code_base + Load32Aligned(pc: pc + 4);
324 } else {
325 pc += BC_CHECK_GREEDY_LENGTH;
326 }
327 break;
328 BYTECODE(LOAD_CURRENT_CHAR) {
329 int pos = current + (insn >> BYTECODE_SHIFT);
330 if (pos < 0 || pos >= subject_length) {
331 pc = code_base + Load32Aligned(pc: pc + 4);
332 } else {
333 current_char = subject.CharAt(index: pos);
334 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
335 }
336 break;
337 }
338 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
339 int pos = current + (insn >> BYTECODE_SHIFT);
340 current_char = subject.CharAt(index: pos);
341 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
342 break;
343 }
344 BYTECODE(LOAD_2_CURRENT_CHARS) {
345 int pos = current + (insn >> BYTECODE_SHIFT);
346 if (pos + 2 > subject_length) {
347 pc = code_base + Load32Aligned(pc: pc + 4);
348 } else {
349 Char next = subject.CharAt(index: pos + 1);
350 current_char =
351 subject.CharAt(index: pos) | (next << (kBitsPerByte * sizeof(Char)));
352 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
353 }
354 break;
355 }
356 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
357 int pos = current + (insn >> BYTECODE_SHIFT);
358 Char next = subject.CharAt(index: pos + 1);
359 current_char =
360 subject.CharAt(index: pos) | (next << (kBitsPerByte * sizeof(Char)));
361 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
362 break;
363 }
364 BYTECODE(LOAD_4_CURRENT_CHARS) {
365 ASSERT(sizeof(Char) == 1);
366 int pos = current + (insn >> BYTECODE_SHIFT);
367 if (pos + 4 > subject_length) {
368 pc = code_base + Load32Aligned(pc: pc + 4);
369 } else {
370 Char next1 = subject.CharAt(index: pos + 1);
371 Char next2 = subject.CharAt(index: pos + 2);
372 Char next3 = subject.CharAt(index: pos + 3);
373 current_char = (subject.CharAt(index: pos) | (next1 << 8) | (next2 << 16) |
374 (next3 << 24));
375 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
376 }
377 break;
378 }
379 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
380 ASSERT(sizeof(Char) == 1);
381 int pos = current + (insn >> BYTECODE_SHIFT);
382 Char next1 = subject.CharAt(index: pos + 1);
383 Char next2 = subject.CharAt(index: pos + 2);
384 Char next3 = subject.CharAt(index: pos + 3);
385 current_char = (subject.CharAt(index: pos) | (next1 << 8) | (next2 << 16) |
386 (next3 << 24));
387 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
388 break;
389 }
390 BYTECODE(CHECK_4_CHARS) {
391 uint32_t c = Load32Aligned(pc: pc + 4);
392 if (c == current_char) {
393 pc = code_base + Load32Aligned(pc: pc + 8);
394 } else {
395 pc += BC_CHECK_4_CHARS_LENGTH;
396 }
397 break;
398 }
399 BYTECODE(CHECK_CHAR) {
400 uint32_t c = (insn >> BYTECODE_SHIFT);
401 if (c == current_char) {
402 pc = code_base + Load32Aligned(pc: pc + 4);
403 } else {
404 pc += BC_CHECK_CHAR_LENGTH;
405 }
406 break;
407 }
408 BYTECODE(CHECK_NOT_4_CHARS) {
409 uint32_t c = Load32Aligned(pc: pc + 4);
410 if (c != current_char) {
411 pc = code_base + Load32Aligned(pc: pc + 8);
412 } else {
413 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
414 }
415 break;
416 }
417 BYTECODE(CHECK_NOT_CHAR) {
418 uint32_t c = (insn >> BYTECODE_SHIFT);
419 if (c != current_char) {
420 pc = code_base + Load32Aligned(pc: pc + 4);
421 } else {
422 pc += BC_CHECK_NOT_CHAR_LENGTH;
423 }
424 break;
425 }
426 BYTECODE(AND_CHECK_4_CHARS) {
427 uint32_t c = Load32Aligned(pc: pc + 4);
428 if (c == (current_char & Load32Aligned(pc: pc + 8))) {
429 pc = code_base + Load32Aligned(pc: pc + 12);
430 } else {
431 pc += BC_AND_CHECK_4_CHARS_LENGTH;
432 }
433 break;
434 }
435 BYTECODE(AND_CHECK_CHAR) {
436 uint32_t c = (insn >> BYTECODE_SHIFT);
437 if (c == (current_char & Load32Aligned(pc: pc + 4))) {
438 pc = code_base + Load32Aligned(pc: pc + 8);
439 } else {
440 pc += BC_AND_CHECK_CHAR_LENGTH;
441 }
442 break;
443 }
444 BYTECODE(AND_CHECK_NOT_4_CHARS) {
445 uint32_t c = Load32Aligned(pc: pc + 4);
446 if (c != (current_char & Load32Aligned(pc: pc + 8))) {
447 pc = code_base + Load32Aligned(pc: pc + 12);
448 } else {
449 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
450 }
451 break;
452 }
453 BYTECODE(AND_CHECK_NOT_CHAR) {
454 uint32_t c = (insn >> BYTECODE_SHIFT);
455 if (c != (current_char & Load32Aligned(pc: pc + 4))) {
456 pc = code_base + Load32Aligned(pc: pc + 8);
457 } else {
458 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
459 }
460 break;
461 }
462 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
463 uint32_t c = (insn >> BYTECODE_SHIFT);
464 uint32_t minus = Load16Aligned(pc: pc + 4);
465 uint32_t mask = Load16Aligned(pc: pc + 6);
466 if (c != ((current_char - minus) & mask)) {
467 pc = code_base + Load32Aligned(pc: pc + 8);
468 } else {
469 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
470 }
471 break;
472 }
473 BYTECODE(CHECK_CHAR_IN_RANGE) {
474 uint32_t from = Load16Aligned(pc: pc + 4);
475 uint32_t to = Load16Aligned(pc: pc + 6);
476 if (from <= current_char && current_char <= to) {
477 pc = code_base + Load32Aligned(pc: pc + 8);
478 } else {
479 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
480 }
481 break;
482 }
483 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
484 uint32_t from = Load16Aligned(pc: pc + 4);
485 uint32_t to = Load16Aligned(pc: pc + 6);
486 if (from > current_char || current_char > to) {
487 pc = code_base + Load32Aligned(pc: pc + 8);
488 } else {
489 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
490 }
491 break;
492 }
493 BYTECODE(CHECK_BIT_IN_TABLE) {
494 int mask = RegExpMacroAssembler::kTableMask;
495 uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
496 int bit = (current_char & (kBitsPerByte - 1));
497 if ((b & (1 << bit)) != 0) {
498 pc = code_base + Load32Aligned(pc: pc + 4);
499 } else {
500 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
501 }
502 break;
503 }
504 BYTECODE(CHECK_LT) {
505 uint32_t limit = (insn >> BYTECODE_SHIFT);
506 if (current_char < limit) {
507 pc = code_base + Load32Aligned(pc: pc + 4);
508 } else {
509 pc += BC_CHECK_LT_LENGTH;
510 }
511 break;
512 }
513 BYTECODE(CHECK_GT) {
514 uint32_t limit = (insn >> BYTECODE_SHIFT);
515 if (current_char > limit) {
516 pc = code_base + Load32Aligned(pc: pc + 4);
517 } else {
518 pc += BC_CHECK_GT_LENGTH;
519 }
520 break;
521 }
522 BYTECODE(CHECK_REGISTER_LT)
523 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc: pc + 4)) {
524 pc = code_base + Load32Aligned(pc: pc + 8);
525 } else {
526 pc += BC_CHECK_REGISTER_LT_LENGTH;
527 }
528 break;
529 BYTECODE(CHECK_REGISTER_GE)
530 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc: pc + 4)) {
531 pc = code_base + Load32Aligned(pc: pc + 8);
532 } else {
533 pc += BC_CHECK_REGISTER_GE_LENGTH;
534 }
535 break;
536 BYTECODE(CHECK_REGISTER_EQ_POS)
537 if (registers[insn >> BYTECODE_SHIFT] == current) {
538 pc = code_base + Load32Aligned(pc: pc + 4);
539 } else {
540 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
541 }
542 break;
543 BYTECODE(CHECK_NOT_REGS_EQUAL)
544 if (registers[insn >> BYTECODE_SHIFT] ==
545 registers[Load32Aligned(pc: pc + 4)]) {
546 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
547 } else {
548 pc = code_base + Load32Aligned(pc: pc + 8);
549 }
550 break;
551 BYTECODE(CHECK_NOT_BACK_REF) {
552 int from = registers[insn >> BYTECODE_SHIFT];
553 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
554 if (from < 0 || len <= 0) {
555 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
556 break;
557 }
558 if (current + len > subject_length) {
559 pc = code_base + Load32Aligned(pc: pc + 4);
560 break;
561 } else {
562 int i;
563 for (i = 0; i < len; i++) {
564 if (subject.CharAt(index: from + i) != subject.CharAt(index: current + i)) {
565 pc = code_base + Load32Aligned(pc: pc + 4);
566 break;
567 }
568 }
569 if (i < len) break;
570 current += len;
571 }
572 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
573 break;
574 }
575 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
576 FALL_THROUGH;
577 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
578 const bool unicode =
579 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
580 int from = registers[insn >> BYTECODE_SHIFT];
581 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
582 if (from < 0 || len <= 0) {
583 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
584 break;
585 }
586 if (current + len > subject_length) {
587 pc = code_base + Load32Aligned(pc: pc + 4);
588 break;
589 } else {
590 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len,
591 subject, unicode)) {
592 current += len;
593 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
594 } else {
595 pc = code_base + Load32Aligned(pc: pc + 4);
596 }
597 }
598 break;
599 }
600 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
601 const int from = registers[insn >> BYTECODE_SHIFT];
602 const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
603 if (from < 0 || len <= 0) {
604 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
605 break;
606 }
607 if ((current - len) < 0) {
608 pc = code_base + Load32Aligned(pc: pc + 4);
609 break;
610 } else {
611 // When looking behind, the string to match (if it is there) lies
612 // before the current position, so we will check the [len]
613 // characters before the current position, excluding the current
614 // position itself.
615 const int start = current - len;
616 int i;
617 for (i = 0; i < len; i++) {
618 if (subject.CharAt(index: from + i) != subject.CharAt(index: start + i)) {
619 pc = code_base + Load32Aligned(pc: pc + 4);
620 break;
621 }
622 }
623 if (i < len) break;
624 current -= len;
625 }
626 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
627 break;
628 }
629 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
630 FALL_THROUGH;
631 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
632 bool unicode = (insn & BYTECODE_MASK) ==
633 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
634 int from = registers[insn >> BYTECODE_SHIFT];
635 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
636 if (from < 0 || len <= 0) {
637 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
638 break;
639 }
640 if (current < len) {
641 pc = code_base + Load32Aligned(pc: pc + 4);
642 break;
643 } else {
644 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len,
645 len, subject, unicode)) {
646 current -= len;
647 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
648 } else {
649 pc = code_base + Load32Aligned(pc: pc + 4);
650 }
651 }
652 break;
653 }
654 BYTECODE(CHECK_AT_START)
655 if (current == 0) {
656 pc = code_base + Load32Aligned(pc: pc + 4);
657 } else {
658 pc += BC_CHECK_AT_START_LENGTH;
659 }
660 break;
661 BYTECODE(CHECK_NOT_AT_START) {
662 const int32_t cp_offset = insn >> BYTECODE_SHIFT;
663 if (current + cp_offset == 0) {
664 pc += BC_CHECK_NOT_AT_START_LENGTH;
665 } else {
666 pc = code_base + Load32Aligned(pc: pc + 4);
667 }
668 break;
669 }
670 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
671 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
672 if (subject_length - current > by) {
673 current = subject_length - by;
674 current_char = subject.CharAt(index: current - 1);
675 }
676 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
677 break;
678 }
679 default:
680 UNREACHABLE();
681 break;
682 }
683 }
684 }
685}
686
687// Returns True if success, False if failure, Null if internal exception,
688// Error if VM error needs to be propagated up the callchain.
689ObjectPtr IrregexpInterpreter::Match(const TypedData& bytecode,
690 const String& subject,
691 int32_t* registers,
692 intptr_t start_position) {
693 uint16_t previous_char = '\n';
694 if (start_position != 0) {
695 previous_char = subject.CharAt(index: start_position - 1);
696 }
697
698 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) {
699 return RawMatch<uint8_t>(bytecode, subject, registers, current: start_position,
700 current_char: previous_char);
701 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) {
702 return RawMatch<uint16_t>(bytecode, subject, registers, current: start_position,
703 current_char: previous_char);
704 } else {
705 UNREACHABLE();
706 return Bool::False().ptr();
707 }
708}
709
710} // namespace dart
711

source code of dart_sdk/runtime/vm/regexp_interpreter.cc