ager@chromium.org | a9aa5fa | 2011-04-13 08:46:07 +0000 | [diff] [blame] | 1 | // Copyright 2011 the V8 project authors. All rights reserved. |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 2 | // Redistribution and use in source and binary forms, with or without |
| 3 | // modification, are permitted provided that the following conditions are |
| 4 | // met: |
| 5 | // |
| 6 | // * Redistributions of source code must retain the above copyright |
| 7 | // notice, this list of conditions and the following disclaimer. |
| 8 | // * Redistributions in binary form must reproduce the above |
| 9 | // copyright notice, this list of conditions and the following |
| 10 | // disclaimer in the documentation and/or other materials provided |
| 11 | // with the distribution. |
| 12 | // * Neither the name of Google Inc. nor the names of its |
| 13 | // contributors may be used to endorse or promote products derived |
| 14 | // from this software without specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | |
| 28 | #include "../include/v8-preparser.h" |
sgjesse@chromium.org | ea88ce9 | 2011-03-23 11:19:56 +0000 | [diff] [blame] | 29 | |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 30 | #include "globals.h" |
| 31 | #include "checks.h" |
| 32 | #include "allocation.h" |
| 33 | #include "utils.h" |
| 34 | #include "list.h" |
| 35 | #include "scanner-base.h" |
lrn@chromium.org | 1c09276 | 2011-05-09 09:42:16 +0000 | [diff] [blame] | 36 | #include "preparse-data-format.h" |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 37 | #include "preparse-data.h" |
| 38 | #include "preparser.h" |
| 39 | |
| 40 | namespace v8 { |
| 41 | namespace internal { |
| 42 | |
| 43 | // UTF16Buffer based on a v8::UnicodeInputStream. |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 44 | class InputStreamUTF16Buffer : public UC16CharacterStream { |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 45 | public: |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 46 | /* The InputStreamUTF16Buffer maintains an internal buffer |
| 47 | * that is filled in chunks from the UC16CharacterStream. |
| 48 | * It also maintains unlimited pushback capability, but optimized |
| 49 | * for small pushbacks. |
| 50 | * The pushback_buffer_ pointer points to the limit of pushbacks |
| 51 | * in the current buffer. There is room for a few pushback'ed chars before |
| 52 | * the buffer containing the most recently read chunk. If this is overflowed, |
| 53 | * an external buffer is allocated/reused to hold further pushbacks, and |
| 54 | * pushback_buffer_ and buffer_cursor_/buffer_end_ now points to the |
| 55 | * new buffer. When this buffer is read to the end again, the cursor is |
| 56 | * switched back to the internal buffer |
| 57 | */ |
| 58 | explicit InputStreamUTF16Buffer(v8::UnicodeInputStream* stream) |
| 59 | : UC16CharacterStream(), |
| 60 | stream_(stream), |
| 61 | pushback_buffer_(buffer_), |
| 62 | pushback_buffer_end_cache_(NULL), |
| 63 | pushback_buffer_backing_(NULL), |
| 64 | pushback_buffer_backing_size_(0) { |
| 65 | buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize; |
| 66 | } |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 67 | |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 68 | virtual ~InputStreamUTF16Buffer() { |
| 69 | if (pushback_buffer_backing_ != NULL) { |
| 70 | DeleteArray(pushback_buffer_backing_); |
| 71 | } |
| 72 | } |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 73 | |
sgjesse@chromium.org | c6c5718 | 2011-01-17 12:24:25 +0000 | [diff] [blame] | 74 | virtual void PushBack(uc32 ch) { |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 75 | ASSERT(pos_ > 0); |
sgjesse@chromium.org | c6c5718 | 2011-01-17 12:24:25 +0000 | [diff] [blame] | 76 | if (ch == kEndOfInput) { |
| 77 | pos_--; |
| 78 | return; |
| 79 | } |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 80 | if (buffer_cursor_ <= pushback_buffer_) { |
| 81 | // No more room in the current buffer to do pushbacks. |
| 82 | if (pushback_buffer_end_cache_ == NULL) { |
| 83 | // We have overflowed the pushback space at the beginning of buffer_. |
| 84 | // Switch to using a separate allocated pushback buffer. |
| 85 | if (pushback_buffer_backing_ == NULL) { |
| 86 | // Allocate a buffer the first time we need it. |
| 87 | pushback_buffer_backing_ = NewArray<uc16>(kPushBackSize); |
| 88 | pushback_buffer_backing_size_ = kPushBackSize; |
| 89 | } |
| 90 | pushback_buffer_ = pushback_buffer_backing_; |
| 91 | pushback_buffer_end_cache_ = buffer_end_; |
| 92 | buffer_end_ = pushback_buffer_backing_ + pushback_buffer_backing_size_; |
| 93 | buffer_cursor_ = buffer_end_ - 1; |
| 94 | } else { |
| 95 | // Hit the bottom of the allocated pushback buffer. |
| 96 | // Double the buffer and continue. |
| 97 | uc16* new_buffer = NewArray<uc16>(pushback_buffer_backing_size_ * 2); |
| 98 | memcpy(new_buffer + pushback_buffer_backing_size_, |
| 99 | pushback_buffer_backing_, |
| 100 | pushback_buffer_backing_size_); |
| 101 | DeleteArray(pushback_buffer_backing_); |
| 102 | buffer_cursor_ = new_buffer + pushback_buffer_backing_size_; |
| 103 | pushback_buffer_backing_ = pushback_buffer_ = new_buffer; |
| 104 | buffer_end_ = pushback_buffer_backing_ + pushback_buffer_backing_size_; |
| 105 | } |
| 106 | } |
sgjesse@chromium.org | c6c5718 | 2011-01-17 12:24:25 +0000 | [diff] [blame] | 107 | pushback_buffer_[buffer_cursor_ - pushback_buffer_- 1] = |
| 108 | static_cast<uc16>(ch); |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 109 | pos_--; |
| 110 | } |
| 111 | |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 112 | protected: |
| 113 | virtual bool ReadBlock() { |
| 114 | if (pushback_buffer_end_cache_ != NULL) { |
| 115 | buffer_cursor_ = buffer_; |
| 116 | buffer_end_ = pushback_buffer_end_cache_; |
| 117 | pushback_buffer_end_cache_ = NULL; |
| 118 | return buffer_end_ > buffer_cursor_; |
| 119 | } |
| 120 | // Copy the top of the buffer into the pushback area. |
| 121 | int32_t value; |
| 122 | uc16* buffer_start = buffer_ + kPushBackSize; |
| 123 | buffer_cursor_ = buffer_end_ = buffer_start; |
| 124 | while ((value = stream_->Next()) >= 0) { |
| 125 | if (value > static_cast<int32_t>(unibrow::Utf8::kMaxThreeByteChar)) { |
| 126 | value = unibrow::Utf8::kBadChar; |
| 127 | } |
| 128 | // buffer_end_ is a const pointer, but buffer_ is writable. |
| 129 | buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value); |
| 130 | if (buffer_end_ == buffer_ + kPushBackSize + kBufferSize) break; |
| 131 | } |
| 132 | return buffer_end_ > buffer_start; |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 133 | } |
| 134 | |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 135 | virtual unsigned SlowSeekForward(unsigned pos) { |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 136 | // Seeking in the input is not used by preparsing. |
| 137 | // It's only used by the real parser based on preparser data. |
| 138 | UNIMPLEMENTED(); |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 139 | return 0; |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 140 | } |
| 141 | |
| 142 | private: |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 143 | static const unsigned kBufferSize = 512; |
| 144 | static const unsigned kPushBackSize = 16; |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 145 | v8::UnicodeInputStream* const stream_; |
ager@chromium.org | 5f0c45f | 2010-12-17 08:51:21 +0000 | [diff] [blame] | 146 | // Buffer holding first kPushBackSize characters of pushback buffer, |
| 147 | // then kBufferSize chars of read-ahead. |
| 148 | // The pushback buffer is only used if pushing back characters past |
| 149 | // the start of a block. |
| 150 | uc16 buffer_[kPushBackSize + kBufferSize]; |
| 151 | // Limit of pushbacks before new allocation is necessary. |
| 152 | uc16* pushback_buffer_; |
| 153 | // Only if that pushback buffer at the start of buffer_ isn't sufficient |
| 154 | // is the following used. |
| 155 | const uc16* pushback_buffer_end_cache_; |
| 156 | uc16* pushback_buffer_backing_; |
| 157 | unsigned pushback_buffer_backing_size_; |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 158 | }; |
| 159 | |
| 160 | |
lrn@chromium.org | 7516f05 | 2011-03-30 08:52:27 +0000 | [diff] [blame] | 161 | // Functions declared by allocation.h and implemented in both api.cc (for v8) |
| 162 | // or here (for a stand-alone preparser). |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 163 | |
| 164 | void FatalProcessOutOfMemory(const char* reason) { |
| 165 | V8_Fatal(__FILE__, __LINE__, reason); |
| 166 | } |
| 167 | |
| 168 | bool EnableSlowAsserts() { return true; } |
| 169 | |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 170 | } // namespace internal. |
| 171 | |
| 172 | |
| 173 | UnicodeInputStream::~UnicodeInputStream() { } |
| 174 | |
| 175 | |
| 176 | PreParserData Preparse(UnicodeInputStream* input, size_t max_stack) { |
| 177 | internal::InputStreamUTF16Buffer buffer(input); |
| 178 | uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack; |
ager@chromium.org | a9aa5fa | 2011-04-13 08:46:07 +0000 | [diff] [blame] | 179 | internal::UnicodeCache unicode_cache; |
lrn@chromium.org | ac2828d | 2011-06-23 06:29:21 +0000 | [diff] [blame] | 180 | internal::JavaScriptScanner scanner(&unicode_cache); |
kasperl@chromium.org | a555126 | 2010-12-07 12:49:48 +0000 | [diff] [blame] | 181 | scanner.Initialize(&buffer); |
| 182 | internal::CompleteParserRecorder recorder; |
| 183 | preparser::PreParser::PreParseResult result = |
| 184 | preparser::PreParser::PreParseProgram(&scanner, |
| 185 | &recorder, |
| 186 | true, |
| 187 | stack_limit); |
| 188 | if (result == preparser::PreParser::kPreParseStackOverflow) { |
| 189 | return PreParserData::StackOverflow(); |
| 190 | } |
| 191 | internal::Vector<unsigned> pre_data = recorder.ExtractData(); |
| 192 | size_t size = pre_data.length() * sizeof(pre_data[0]); |
| 193 | unsigned char* data = reinterpret_cast<unsigned char*>(pre_data.start()); |
| 194 | return PreParserData(size, data); |
| 195 | } |
| 196 | |
| 197 | } // namespace v8. |
| 198 | |
| 199 | |
| 200 | // Used by ASSERT macros and other immediate exits. |
| 201 | extern "C" void V8_Fatal(const char* file, int line, const char* format, ...) { |
| 202 | exit(EXIT_FAILURE); |
| 203 | } |