blob: 88eff3ad2660d3b230bfe04922b5c9dfede8a2f9 [file] [log] [blame]
Emily Bernierd0a1eb72015-03-24 16:35:39 -04001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5
6#include "src/unicode-inl.h"
7#include "src/unicode-decoder.h"
8#include <stdio.h>
9#include <stdlib.h>
10
11namespace unibrow {
12
13void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
14 const uint8_t* stream, unsigned stream_length) {
15 // Assume everything will fit in the buffer and stream won't be needed.
16 last_byte_of_buffer_unused_ = false;
17 unbuffered_start_ = NULL;
18 bool writing_to_buffer = true;
19 // Loop until stream is read, writing to buffer as long as buffer has space.
20 unsigned utf16_length = 0;
21 while (stream_length != 0) {
22 unsigned cursor = 0;
23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
24 DCHECK(cursor > 0 && cursor <= stream_length);
25 stream += cursor;
26 stream_length -= cursor;
27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
28 utf16_length += is_two_characters ? 2 : 1;
29 // Don't need to write to the buffer, but still need utf16_length.
30 if (!writing_to_buffer) continue;
31 // Write out the characters to the buffer.
32 // Must check for equality with buffer_length as we've already updated it.
33 if (utf16_length <= buffer_length) {
34 if (is_two_characters) {
35 *buffer++ = Utf16::LeadSurrogate(character);
36 *buffer++ = Utf16::TrailSurrogate(character);
37 } else {
38 *buffer++ = character;
39 }
40 if (utf16_length == buffer_length) {
41 // Just wrote last character of buffer
42 writing_to_buffer = false;
43 unbuffered_start_ = stream;
44 }
45 continue;
46 }
47 // Have gone over buffer.
48 // Last char of buffer is unused, set cursor back.
49 DCHECK(is_two_characters);
50 writing_to_buffer = false;
51 last_byte_of_buffer_unused_ = true;
52 unbuffered_start_ = stream - cursor;
53 }
54 utf16_length_ = utf16_length;
55}
56
57
58void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
59 unsigned data_length) {
60 while (data_length != 0) {
61 unsigned cursor = 0;
62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
63 // There's a total lack of bounds checking for stream
64 // as it was already done in Reset.
65 stream += cursor;
66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
67 *data++ = Utf16::LeadSurrogate(character);
68 *data++ = Utf16::TrailSurrogate(character);
69 DCHECK(data_length > 1);
70 data_length -= 2;
71 } else {
72 *data++ = character;
73 data_length -= 1;
74 }
75 }
76}
77
78} // namespace unibrow