Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1 | // Copyright 2008-2009 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 5 | #ifdef V8_INTERPRETED_REGEXP |
| 6 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 7 | #include "src/regexp/regexp-macro-assembler-irregexp.h" |
| 8 | |
| 9 | #include "src/ast/ast.h" |
| 10 | #include "src/regexp/bytecodes-irregexp.h" |
| 11 | #include "src/regexp/regexp-macro-assembler.h" |
| 12 | #include "src/regexp/regexp-macro-assembler-irregexp-inl.h" |
| 13 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 14 | namespace v8 { |
| 15 | namespace internal { |
| 16 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 17 | RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Isolate* isolate, |
| 18 | Vector<byte> buffer, |
| 19 | Zone* zone) |
| 20 | : RegExpMacroAssembler(isolate, zone), |
| 21 | buffer_(buffer), |
| 22 | pc_(0), |
| 23 | own_buffer_(false), |
| 24 | advance_current_end_(kInvalidPC), |
| 25 | isolate_(isolate) {} |
| 26 | |
| 27 | |
| 28 | RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { |
| 29 | if (backtrack_.is_linked()) backtrack_.Unuse(); |
| 30 | if (own_buffer_) buffer_.Dispose(); |
| 31 | } |
| 32 | |
| 33 | |
| 34 | RegExpMacroAssemblerIrregexp::IrregexpImplementation |
| 35 | RegExpMacroAssemblerIrregexp::Implementation() { |
| 36 | return kBytecodeImplementation; |
| 37 | } |
| 38 | |
| 39 | |
| 40 | void RegExpMacroAssemblerIrregexp::Bind(Label* l) { |
| 41 | advance_current_end_ = kInvalidPC; |
| 42 | DCHECK(!l->is_bound()); |
| 43 | if (l->is_linked()) { |
| 44 | int pos = l->pos(); |
| 45 | while (pos != 0) { |
| 46 | int fixup = pos; |
| 47 | pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup); |
| 48 | *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_; |
| 49 | } |
| 50 | } |
| 51 | l->bind_to(pc_); |
| 52 | } |
| 53 | |
| 54 | |
| 55 | void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { |
| 56 | if (l == NULL) l = &backtrack_; |
| 57 | if (l->is_bound()) { |
| 58 | Emit32(l->pos()); |
| 59 | } else { |
| 60 | int pos = 0; |
| 61 | if (l->is_linked()) { |
| 62 | pos = l->pos(); |
| 63 | } |
| 64 | l->link_to(pc_); |
| 65 | Emit32(pos); |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | |
| 70 | void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { |
| 71 | DCHECK(register_index >= 0); |
| 72 | DCHECK(register_index <= kMaxRegister); |
| 73 | Emit(BC_POP_REGISTER, register_index); |
| 74 | } |
| 75 | |
| 76 | |
| 77 | void RegExpMacroAssemblerIrregexp::PushRegister( |
| 78 | int register_index, |
| 79 | StackCheckFlag check_stack_limit) { |
| 80 | DCHECK(register_index >= 0); |
| 81 | DCHECK(register_index <= kMaxRegister); |
| 82 | Emit(BC_PUSH_REGISTER, register_index); |
| 83 | } |
| 84 | |
| 85 | |
| 86 | void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( |
| 87 | int register_index, int cp_offset) { |
| 88 | DCHECK(register_index >= 0); |
| 89 | DCHECK(register_index <= kMaxRegister); |
| 90 | Emit(BC_SET_REGISTER_TO_CP, register_index); |
| 91 | Emit32(cp_offset); // Current position offset. |
| 92 | } |
| 93 | |
| 94 | |
| 95 | void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) { |
| 96 | DCHECK(reg_from <= reg_to); |
| 97 | for (int reg = reg_from; reg <= reg_to; reg++) { |
| 98 | SetRegister(reg, -1); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | |
| 103 | void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( |
| 104 | int register_index) { |
| 105 | DCHECK(register_index >= 0); |
| 106 | DCHECK(register_index <= kMaxRegister); |
| 107 | Emit(BC_SET_CP_TO_REGISTER, register_index); |
| 108 | } |
| 109 | |
| 110 | |
| 111 | void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( |
| 112 | int register_index) { |
| 113 | DCHECK(register_index >= 0); |
| 114 | DCHECK(register_index <= kMaxRegister); |
| 115 | Emit(BC_SET_REGISTER_TO_SP, register_index); |
| 116 | } |
| 117 | |
| 118 | |
| 119 | void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( |
| 120 | int register_index) { |
| 121 | DCHECK(register_index >= 0); |
| 122 | DCHECK(register_index <= kMaxRegister); |
| 123 | Emit(BC_SET_SP_TO_REGISTER, register_index); |
| 124 | } |
| 125 | |
| 126 | |
| 127 | void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) { |
| 128 | DCHECK(is_uint24(by)); |
| 129 | Emit(BC_SET_CURRENT_POSITION_FROM_END, by); |
| 130 | } |
| 131 | |
| 132 | |
| 133 | void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { |
| 134 | DCHECK(register_index >= 0); |
| 135 | DCHECK(register_index <= kMaxRegister); |
| 136 | Emit(BC_SET_REGISTER, register_index); |
| 137 | Emit32(to); |
| 138 | } |
| 139 | |
| 140 | |
| 141 | void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { |
| 142 | DCHECK(register_index >= 0); |
| 143 | DCHECK(register_index <= kMaxRegister); |
| 144 | Emit(BC_ADVANCE_REGISTER, register_index); |
| 145 | Emit32(by); |
| 146 | } |
| 147 | |
| 148 | |
| 149 | void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { |
| 150 | Emit(BC_POP_CP, 0); |
| 151 | } |
| 152 | |
| 153 | |
| 154 | void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { |
| 155 | Emit(BC_PUSH_CP, 0); |
| 156 | } |
| 157 | |
| 158 | |
| 159 | void RegExpMacroAssemblerIrregexp::Backtrack() { |
| 160 | Emit(BC_POP_BT, 0); |
| 161 | } |
| 162 | |
| 163 | |
| 164 | void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { |
| 165 | if (advance_current_end_ == pc_) { |
| 166 | // Combine advance current and goto. |
| 167 | pc_ = advance_current_start_; |
| 168 | Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_); |
| 169 | EmitOrLink(l); |
| 170 | advance_current_end_ = kInvalidPC; |
| 171 | } else { |
| 172 | // Regular goto. |
| 173 | Emit(BC_GOTO, 0); |
| 174 | EmitOrLink(l); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | |
| 179 | void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { |
| 180 | Emit(BC_PUSH_BT, 0); |
| 181 | EmitOrLink(l); |
| 182 | } |
| 183 | |
| 184 | |
| 185 | bool RegExpMacroAssemblerIrregexp::Succeed() { |
| 186 | Emit(BC_SUCCEED, 0); |
| 187 | return false; // Restart matching for global regexp not supported. |
| 188 | } |
| 189 | |
| 190 | |
| 191 | void RegExpMacroAssemblerIrregexp::Fail() { |
| 192 | Emit(BC_FAIL, 0); |
| 193 | } |
| 194 | |
| 195 | |
| 196 | void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { |
| 197 | DCHECK(by >= kMinCPOffset); |
| 198 | DCHECK(by <= kMaxCPOffset); |
| 199 | advance_current_start_ = pc_; |
| 200 | advance_current_offset_ = by; |
| 201 | Emit(BC_ADVANCE_CP, by); |
| 202 | advance_current_end_ = pc_; |
| 203 | } |
| 204 | |
| 205 | |
| 206 | void RegExpMacroAssemblerIrregexp::CheckGreedyLoop( |
| 207 | Label* on_tos_equals_current_position) { |
| 208 | Emit(BC_CHECK_GREEDY, 0); |
| 209 | EmitOrLink(on_tos_equals_current_position); |
| 210 | } |
| 211 | |
| 212 | |
| 213 | void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, |
| 214 | Label* on_failure, |
| 215 | bool check_bounds, |
| 216 | int characters) { |
| 217 | DCHECK(cp_offset >= kMinCPOffset); |
| 218 | DCHECK(cp_offset <= kMaxCPOffset); |
| 219 | int bytecode; |
| 220 | if (check_bounds) { |
| 221 | if (characters == 4) { |
| 222 | bytecode = BC_LOAD_4_CURRENT_CHARS; |
| 223 | } else if (characters == 2) { |
| 224 | bytecode = BC_LOAD_2_CURRENT_CHARS; |
| 225 | } else { |
| 226 | DCHECK(characters == 1); |
| 227 | bytecode = BC_LOAD_CURRENT_CHAR; |
| 228 | } |
| 229 | } else { |
| 230 | if (characters == 4) { |
| 231 | bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED; |
| 232 | } else if (characters == 2) { |
| 233 | bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED; |
| 234 | } else { |
| 235 | DCHECK(characters == 1); |
| 236 | bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED; |
| 237 | } |
| 238 | } |
| 239 | Emit(bytecode, cp_offset); |
| 240 | if (check_bounds) EmitOrLink(on_failure); |
| 241 | } |
| 242 | |
| 243 | |
| 244 | void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, |
| 245 | Label* on_less) { |
| 246 | Emit(BC_CHECK_LT, limit); |
| 247 | EmitOrLink(on_less); |
| 248 | } |
| 249 | |
| 250 | |
| 251 | void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, |
| 252 | Label* on_greater) { |
| 253 | Emit(BC_CHECK_GT, limit); |
| 254 | EmitOrLink(on_greater); |
| 255 | } |
| 256 | |
| 257 | |
| 258 | void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) { |
| 259 | if (c > MAX_FIRST_ARG) { |
| 260 | Emit(BC_CHECK_4_CHARS, 0); |
| 261 | Emit32(c); |
| 262 | } else { |
| 263 | Emit(BC_CHECK_CHAR, c); |
| 264 | } |
| 265 | EmitOrLink(on_equal); |
| 266 | } |
| 267 | |
| 268 | |
| 269 | void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { |
| 270 | Emit(BC_CHECK_AT_START, 0); |
| 271 | EmitOrLink(on_at_start); |
| 272 | } |
| 273 | |
| 274 | |
| 275 | void RegExpMacroAssemblerIrregexp::CheckNotAtStart(int cp_offset, |
| 276 | Label* on_not_at_start) { |
| 277 | Emit(BC_CHECK_NOT_AT_START, cp_offset); |
| 278 | EmitOrLink(on_not_at_start); |
| 279 | } |
| 280 | |
| 281 | |
| 282 | void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c, |
| 283 | Label* on_not_equal) { |
| 284 | if (c > MAX_FIRST_ARG) { |
| 285 | Emit(BC_CHECK_NOT_4_CHARS, 0); |
| 286 | Emit32(c); |
| 287 | } else { |
| 288 | Emit(BC_CHECK_NOT_CHAR, c); |
| 289 | } |
| 290 | EmitOrLink(on_not_equal); |
| 291 | } |
| 292 | |
| 293 | |
| 294 | void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd( |
| 295 | uint32_t c, |
| 296 | uint32_t mask, |
| 297 | Label* on_equal) { |
| 298 | if (c > MAX_FIRST_ARG) { |
| 299 | Emit(BC_AND_CHECK_4_CHARS, 0); |
| 300 | Emit32(c); |
| 301 | } else { |
| 302 | Emit(BC_AND_CHECK_CHAR, c); |
| 303 | } |
| 304 | Emit32(mask); |
| 305 | EmitOrLink(on_equal); |
| 306 | } |
| 307 | |
| 308 | |
| 309 | void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd( |
| 310 | uint32_t c, |
| 311 | uint32_t mask, |
| 312 | Label* on_not_equal) { |
| 313 | if (c > MAX_FIRST_ARG) { |
| 314 | Emit(BC_AND_CHECK_NOT_4_CHARS, 0); |
| 315 | Emit32(c); |
| 316 | } else { |
| 317 | Emit(BC_AND_CHECK_NOT_CHAR, c); |
| 318 | } |
| 319 | Emit32(mask); |
| 320 | EmitOrLink(on_not_equal); |
| 321 | } |
| 322 | |
| 323 | |
| 324 | void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd( |
| 325 | uc16 c, |
| 326 | uc16 minus, |
| 327 | uc16 mask, |
| 328 | Label* on_not_equal) { |
| 329 | Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c); |
| 330 | Emit16(minus); |
| 331 | Emit16(mask); |
| 332 | EmitOrLink(on_not_equal); |
| 333 | } |
| 334 | |
| 335 | |
| 336 | void RegExpMacroAssemblerIrregexp::CheckCharacterInRange( |
| 337 | uc16 from, |
| 338 | uc16 to, |
| 339 | Label* on_in_range) { |
| 340 | Emit(BC_CHECK_CHAR_IN_RANGE, 0); |
| 341 | Emit16(from); |
| 342 | Emit16(to); |
| 343 | EmitOrLink(on_in_range); |
| 344 | } |
| 345 | |
| 346 | |
| 347 | void RegExpMacroAssemblerIrregexp::CheckCharacterNotInRange( |
| 348 | uc16 from, |
| 349 | uc16 to, |
| 350 | Label* on_not_in_range) { |
| 351 | Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0); |
| 352 | Emit16(from); |
| 353 | Emit16(to); |
| 354 | EmitOrLink(on_not_in_range); |
| 355 | } |
| 356 | |
| 357 | |
| 358 | void RegExpMacroAssemblerIrregexp::CheckBitInTable( |
| 359 | Handle<ByteArray> table, Label* on_bit_set) { |
| 360 | Emit(BC_CHECK_BIT_IN_TABLE, 0); |
| 361 | EmitOrLink(on_bit_set); |
| 362 | for (int i = 0; i < kTableSize; i += kBitsPerByte) { |
| 363 | int byte = 0; |
| 364 | for (int j = 0; j < kBitsPerByte; j++) { |
| 365 | if (table->get(i + j) != 0) byte |= 1 << j; |
| 366 | } |
| 367 | Emit8(byte); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | |
| 372 | void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, |
| 373 | bool read_backward, |
| 374 | Label* on_not_equal) { |
| 375 | DCHECK(start_reg >= 0); |
| 376 | DCHECK(start_reg <= kMaxRegister); |
| 377 | Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, |
| 378 | start_reg); |
| 379 | EmitOrLink(on_not_equal); |
| 380 | } |
| 381 | |
| 382 | |
| 383 | void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 384 | int start_reg, bool read_backward, bool unicode, Label* on_not_equal) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 385 | DCHECK(start_reg >= 0); |
| 386 | DCHECK(start_reg <= kMaxRegister); |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 387 | Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD |
| 388 | : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) |
| 389 | : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE |
| 390 | : BC_CHECK_NOT_BACK_REF_NO_CASE), |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 391 | start_reg); |
| 392 | EmitOrLink(on_not_equal); |
| 393 | } |
| 394 | |
| 395 | |
| 396 | void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, |
| 397 | int comparand, |
| 398 | Label* on_less_than) { |
| 399 | DCHECK(register_index >= 0); |
| 400 | DCHECK(register_index <= kMaxRegister); |
| 401 | Emit(BC_CHECK_REGISTER_LT, register_index); |
| 402 | Emit32(comparand); |
| 403 | EmitOrLink(on_less_than); |
| 404 | } |
| 405 | |
| 406 | |
| 407 | void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, |
| 408 | int comparand, |
| 409 | Label* on_greater_or_equal) { |
| 410 | DCHECK(register_index >= 0); |
| 411 | DCHECK(register_index <= kMaxRegister); |
| 412 | Emit(BC_CHECK_REGISTER_GE, register_index); |
| 413 | Emit32(comparand); |
| 414 | EmitOrLink(on_greater_or_equal); |
| 415 | } |
| 416 | |
| 417 | |
| 418 | void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index, |
| 419 | Label* on_eq) { |
| 420 | DCHECK(register_index >= 0); |
| 421 | DCHECK(register_index <= kMaxRegister); |
| 422 | Emit(BC_CHECK_REGISTER_EQ_POS, register_index); |
| 423 | EmitOrLink(on_eq); |
| 424 | } |
| 425 | |
| 426 | |
| 427 | Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode( |
| 428 | Handle<String> source) { |
| 429 | Bind(&backtrack_); |
| 430 | Emit(BC_POP_BT, 0); |
| 431 | Handle<ByteArray> array = isolate_->factory()->NewByteArray(length()); |
| 432 | Copy(array->GetDataStartAddress()); |
| 433 | return array; |
| 434 | } |
| 435 | |
| 436 | |
| 437 | int RegExpMacroAssemblerIrregexp::length() { |
| 438 | return pc_; |
| 439 | } |
| 440 | |
| 441 | |
| 442 | void RegExpMacroAssemblerIrregexp::Copy(Address a) { |
| 443 | MemCopy(a, buffer_.start(), length()); |
| 444 | } |
| 445 | |
| 446 | |
| 447 | void RegExpMacroAssemblerIrregexp::Expand() { |
| 448 | bool old_buffer_was_our_own = own_buffer_; |
| 449 | Vector<byte> old_buffer = buffer_; |
| 450 | buffer_ = Vector<byte>::New(old_buffer.length() * 2); |
| 451 | own_buffer_ = true; |
| 452 | MemCopy(buffer_.start(), old_buffer.start(), old_buffer.length()); |
| 453 | if (old_buffer_was_our_own) { |
| 454 | old_buffer.Dispose(); |
| 455 | } |
| 456 | } |
| 457 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 458 | } // namespace internal |
| 459 | } // namespace v8 |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 460 | |
| 461 | #endif // V8_INTERPRETED_REGEXP |