Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | |
| 6 | #include "src/unicode-inl.h" |
| 7 | #include "src/unicode-decoder.h" |
| 8 | #include <stdio.h> |
| 9 | #include <stdlib.h> |
| 10 | |
| 11 | namespace unibrow { |
| 12 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 13 | void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, |
| 14 | const uint8_t* stream, size_t stream_length) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 15 | // Assume everything will fit in the buffer and stream won't be needed. |
| 16 | last_byte_of_buffer_unused_ = false; |
| 17 | unbuffered_start_ = NULL; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 18 | unbuffered_length_ = 0; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 19 | bool writing_to_buffer = true; |
| 20 | // Loop until stream is read, writing to buffer as long as buffer has space. |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 21 | size_t utf16_length = 0; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 22 | while (stream_length != 0) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 23 | size_t cursor = 0; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 24 | uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); |
| 25 | DCHECK(cursor > 0 && cursor <= stream_length); |
| 26 | stream += cursor; |
| 27 | stream_length -= cursor; |
| 28 | bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; |
| 29 | utf16_length += is_two_characters ? 2 : 1; |
| 30 | // Don't need to write to the buffer, but still need utf16_length. |
| 31 | if (!writing_to_buffer) continue; |
| 32 | // Write out the characters to the buffer. |
| 33 | // Must check for equality with buffer_length as we've already updated it. |
| 34 | if (utf16_length <= buffer_length) { |
| 35 | if (is_two_characters) { |
| 36 | *buffer++ = Utf16::LeadSurrogate(character); |
| 37 | *buffer++ = Utf16::TrailSurrogate(character); |
| 38 | } else { |
| 39 | *buffer++ = character; |
| 40 | } |
| 41 | if (utf16_length == buffer_length) { |
| 42 | // Just wrote last character of buffer |
| 43 | writing_to_buffer = false; |
| 44 | unbuffered_start_ = stream; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 45 | unbuffered_length_ = stream_length; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 46 | } |
| 47 | continue; |
| 48 | } |
| 49 | // Have gone over buffer. |
| 50 | // Last char of buffer is unused, set cursor back. |
| 51 | DCHECK(is_two_characters); |
| 52 | writing_to_buffer = false; |
| 53 | last_byte_of_buffer_unused_ = true; |
| 54 | unbuffered_start_ = stream - cursor; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 55 | unbuffered_length_ = stream_length + cursor; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 56 | } |
| 57 | utf16_length_ = utf16_length; |
| 58 | } |
| 59 | |
| 60 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 61 | void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, |
| 62 | size_t stream_length, uint16_t* data, |
| 63 | size_t data_length) { |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 64 | while (data_length != 0) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 65 | size_t cursor = 0; |
| 66 | uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 67 | // There's a total lack of bounds checking for stream |
| 68 | // as it was already done in Reset. |
| 69 | stream += cursor; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 70 | DCHECK(stream_length >= cursor); |
| 71 | stream_length -= cursor; |
Emily Bernier | d0a1eb7 | 2015-03-24 16:35:39 -0400 | [diff] [blame] | 72 | if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 73 | *data++ = Utf16::LeadSurrogate(character); |
| 74 | *data++ = Utf16::TrailSurrogate(character); |
| 75 | DCHECK(data_length > 1); |
| 76 | data_length -= 2; |
| 77 | } else { |
| 78 | *data++ = character; |
| 79 | data_length -= 1; |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | } // namespace unibrow |