blob: 211ab6ba3948940af8e138913d302b15d61bdcb5 [file] [log] [blame]
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001// Copyright 2012 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
29#define V8_REGEXP_MACRO_ASSEMBLER_H_
30
ricow@chromium.orgd236f4d2010-09-01 06:52:08 +000031#include "ast.h"
32
kasperl@chromium.org71affb52009-05-26 05:44:31 +000033namespace v8 {
34namespace internal {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000035
ager@chromium.orga74f0da2008-12-03 16:05:52 +000036struct DisjunctDecisionRow {
37 RegExpCharacterClass cc;
38 Label* on_match;
39};
40
41
42class RegExpMacroAssembler {
43 public:
ager@chromium.orgddb913d2009-01-27 10:01:48 +000044 // The implementation must be able to handle at least:
45 static const int kMaxRegister = (1 << 16) - 1;
46 static const int kMaxCPOffset = (1 << 15) - 1;
47 static const int kMinCPOffset = -(1 << 15);
jkummerow@chromium.org1456e702012-03-30 08:38:13 +000048
49 static const int kTableSizeBits = 7;
50 static const int kTableSize = 1 << kTableSizeBits;
51 static const int kTableMask = kTableSize - 1;
52
ager@chromium.orga74f0da2008-12-03 16:05:52 +000053 enum IrregexpImplementation {
54 kIA32Implementation,
55 kARMImplementation,
lrn@chromium.org7516f052011-03-30 08:52:27 +000056 kMIPSImplementation,
sgjesse@chromium.org911335c2009-08-19 12:59:44 +000057 kX64Implementation,
ager@chromium.org32912102009-01-16 10:38:43 +000058 kBytecodeImplementation
59 };
60
61 enum StackCheckFlag {
62 kNoStackLimitCheck = false,
63 kCheckStackLimit = true
64 };
ager@chromium.orga74f0da2008-12-03 16:05:52 +000065
mmassi@chromium.org7028c052012-06-13 11:51:58 +000066 explicit RegExpMacroAssembler(Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000067 virtual ~RegExpMacroAssembler();
ager@chromium.org32912102009-01-16 10:38:43 +000068 // The maximal number of pushes between stack checks. Users must supply
69 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
70 // at least once for every stack_limit() pushes that are executed.
71 virtual int stack_limit_slack() = 0;
ager@chromium.org18ad94b2009-09-02 08:22:29 +000072 virtual bool CanReadUnaligned();
ager@chromium.orga74f0da2008-12-03 16:05:52 +000073 virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
74 virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
ager@chromium.org32912102009-01-16 10:38:43 +000075 // Continues execution from the position pushed on the top of the backtrack
76 // stack by an earlier PushBacktrack(Label*).
ager@chromium.orga74f0da2008-12-03 16:05:52 +000077 virtual void Backtrack() = 0;
78 virtual void Bind(Label* label) = 0;
ager@chromium.orgddb913d2009-01-27 10:01:48 +000079 virtual void CheckAtStart(Label* on_at_start) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000080 // Dispatch after looking the current character up in a 2-bits-per-entry
81 // map. The destinations vector has up to 4 labels.
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +000082 virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000083 // Bitwise and the current character with the given constant and then
84 // check for a match with c.
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +000085 virtual void CheckCharacterAfterAnd(unsigned c,
86 unsigned and_with,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000087 Label* on_equal) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000088 virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
89 virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
90 // Check the current character for a match with a literal string. If we
ager@chromium.org8bb60582008-12-11 12:02:20 +000091 // fail to match then goto the on_failure label. If check_eos is set then
92 // the end of input always fails. If check_eos is clear then it is the
93 // caller's responsibility to ensure that the end of string is not hit.
94 // If the label is NULL then we should pop a backtrack address off
95 // the stack and go to that.
ager@chromium.orga74f0da2008-12-03 16:05:52 +000096 virtual void CheckCharacters(
97 Vector<const uc16> str,
98 int cp_offset,
ager@chromium.org8bb60582008-12-11 12:02:20 +000099 Label* on_failure,
100 bool check_eos) = 0;
101 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000102 virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
103 virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
104 virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
105 Label* on_no_match) = 0;
106 // Check the current character for a match with a literal character. If we
107 // fail to match then goto the on_failure label. End of input always
108 // matches. If the label is NULL then we should pop a backtrack address off
109 // the stack and go to that.
sgjesse@chromium.orgc6c57182011-01-17 12:24:25 +0000110 virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
111 virtual void CheckNotCharacterAfterAnd(unsigned c,
112 unsigned and_with,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000113 Label* on_not_equal) = 0;
jkummerow@chromium.org1456e702012-03-30 08:38:13 +0000114 // Subtract a constant from the current character, then and with the given
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000115 // constant and then check for a match with c.
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000116 virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
117 uc16 minus,
118 uc16 and_with,
119 Label* on_not_equal) = 0;
jkummerow@chromium.org1456e702012-03-30 08:38:13 +0000120 virtual void CheckCharacterInRange(uc16 from,
121 uc16 to, // Both inclusive.
122 Label* on_in_range) = 0;
123 virtual void CheckCharacterNotInRange(uc16 from,
124 uc16 to, // Both inclusive.
125 Label* on_not_in_range) = 0;
126
127 // The current character (modulus the kTableSize) is looked up in the byte
128 // array, and if the found byte is non-zero, we jump to the on_bit_set label.
129 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
130
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000131 // Checks whether the given offset from the current position is before
132 // the end of the string. May overwrite the current character.
133 virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
134 LoadCurrentCharacter(cp_offset, on_outside_input, true);
135 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000136 // Check whether a standard/default character class matches the current
137 // character. Returns false if the type of special character class does
138 // not have custom support.
139 // May clobber the current loaded character.
140 virtual bool CheckSpecialCharacterClass(uc16 type,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000141 Label* on_no_match) {
142 return false;
143 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000144 virtual void Fail() = 0;
karlklose@chromium.org83a47282011-05-11 11:54:09 +0000145 virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000146 virtual void GoTo(Label* label) = 0;
147 // Check whether a register is >= a given constant and go to a label if it
148 // is. Backtracks instead if the label is NULL.
149 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
150 // Check whether a register is < a given constant and go to a label if it is.
151 // Backtracks instead if the label is NULL.
152 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
ager@chromium.org32912102009-01-16 10:38:43 +0000153 // Check whether a register is == to the current position and go to a
154 // label if it is.
155 virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000156 virtual IrregexpImplementation Implementation() = 0;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000157 virtual void LoadCurrentCharacter(int cp_offset,
158 Label* on_end_of_input,
159 bool check_bounds = true,
160 int characters = 1) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000161 virtual void PopCurrentPosition() = 0;
162 virtual void PopRegister(int register_index) = 0;
ager@chromium.org32912102009-01-16 10:38:43 +0000163 // Pushes the label on the backtrack stack, so that a following Backtrack
164 // will go to this label. Always checks the backtrack stack limit.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000165 virtual void PushBacktrack(Label* label) = 0;
166 virtual void PushCurrentPosition() = 0;
ager@chromium.org32912102009-01-16 10:38:43 +0000167 virtual void PushRegister(int register_index,
168 StackCheckFlag check_stack_limit) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000169 virtual void ReadCurrentPositionFromRegister(int reg) = 0;
170 virtual void ReadStackPointerFromRegister(int reg) = 0;
whesse@chromium.org4a5224e2010-10-20 12:37:07 +0000171 virtual void SetCurrentPositionFromEnd(int by) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000172 virtual void SetRegister(int register_index, int to) = 0;
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000173 // Return whether the matching (with a global regexp) will be restarted.
174 virtual bool Succeed() = 0;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000175 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000176 virtual void ClearRegisters(int reg_from, int reg_to) = 0;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000177 virtual void WriteStackPointerToRegister(int reg) = 0;
karlklose@chromium.org83a47282011-05-11 11:54:09 +0000178
179 // Controls the generation of large inlined constants in the code.
180 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
181 bool slow_safe() { return slow_safe_compiler_; }
182
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000183 enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000184 // Set whether the regular expression has the global flag. Exiting due to
185 // a failure in a global regexp may still mean success overall.
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000186 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
187 inline bool global() { return global_mode_ != NOT_GLOBAL; }
188 inline bool global_with_zero_length_check() {
189 return global_mode_ == GLOBAL;
190 }
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000191
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000192 Zone* zone() const { return zone_; }
193
karlklose@chromium.org83a47282011-05-11 11:54:09 +0000194 private:
195 bool slow_safe_compiler_;
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000196 bool global_mode_;
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000197 Zone* zone_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000198};
199
200
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000201#ifndef V8_INTERPRETED_REGEXP // Avoid compiling unused code.
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000202
203class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000204 public:
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000205 // Type of input string to generate code for.
206 enum Mode { ASCII = 1, UC16 = 2 };
207
208 // Result of calling generated native RegExp code.
209 // RETRY: Something significant changed during execution, and the matching
210 // should be retried from scratch.
211 // EXCEPTION: Something failed during execution. If no exception has been
212 // thrown, it's an internal out-of-memory, and the caller should
213 // throw the exception.
214 // FAILURE: Matching failed.
215 // SUCCESS: Matching succeeded, and the output array has been filled with
216 // capture positions.
217 enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
218
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000219 explicit NativeRegExpMacroAssembler(Zone* zone);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000220 virtual ~NativeRegExpMacroAssembler();
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000221 virtual bool CanReadUnaligned();
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000222
223 static Result Match(Handle<Code> regexp,
224 Handle<String> subject,
225 int* offsets_vector,
226 int offsets_vector_length,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000227 int previous_index,
228 Isolate* isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000229
230 // Compares two-byte strings case insensitively.
231 // Called from generated RegExp code.
232 static int CaseInsensitiveCompareUC16(Address byte_offset1,
233 Address byte_offset2,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000234 size_t byte_length,
235 Isolate* isolate);
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000236
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000237 // Called from RegExp if the backtrack stack limit is hit.
238 // Tries to expand the stack. Returns the new stack-pointer if
239 // successful, and updates the stack_top address, or returns 0 if unable
240 // to grow the stack.
241 // This function must not trigger a garbage collection.
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000242 static Address GrowStack(Address stack_pointer, Address* stack_top,
243 Isolate* isolate);
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000244
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000245 static const byte* StringCharacterPosition(String* subject, int start_index);
246
mvstanton@chromium.org6bec0092013-01-23 13:46:53 +0000247 // Byte map of one byte characters with a 0xff if the character is a word
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000248 // character (digit, letter or underscore) and 0x00 otherwise.
249 // Used by generated RegExp code.
mvstanton@chromium.org6bec0092013-01-23 13:46:53 +0000250 static const byte word_character_map[256];
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000251
252 static Address word_character_map_address() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000253 return const_cast<Address>(&word_character_map[0]);
sgjesse@chromium.orgb302e562010-02-03 11:26:59 +0000254 }
255
sgjesse@chromium.org911335c2009-08-19 12:59:44 +0000256 static Result Execute(Code* code,
257 String* input,
258 int start_offset,
259 const byte* input_start,
260 const byte* input_end,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000261 int* output,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000262 int output_size,
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +0000263 Isolate* isolate);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000264};
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000265
ricow@chromium.orgc9c80822010-04-21 08:22:37 +0000266#endif // V8_INTERPRETED_REGEXP
ager@chromium.org18ad94b2009-09-02 08:22:29 +0000267
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000268} } // namespace v8::internal
269
270#endif // V8_REGEXP_MACRO_ASSEMBLER_H_