Blame - src/unicode-decoder.cc - fp2-dev/platform/external/v8

blob: 88eff3ad2660d3b230bfe04922b5c9dfede8a2f9 [file] [log] [blame]

Emily Bernier	d0a1eb7	2015-03-24 16:35:39 -0400	[diff] [blame^]	1	// Copyright 2014 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5
				6	#include "src/unicode-inl.h"
				7	#include "src/unicode-decoder.h"
				8	#include <stdio.h>
				9	#include <stdlib.h>
				10
				11	namespace unibrow {
				12
				13	void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
				14	const uint8_t* stream, unsigned stream_length) {
				15	// Assume everything will fit in the buffer and stream won't be needed.
				16	last_byte_of_buffer_unused_ = false;
				17	unbuffered_start_ = NULL;
				18	bool writing_to_buffer = true;
				19	// Loop until stream is read, writing to buffer as long as buffer has space.
				20	unsigned utf16_length = 0;
				21	while (stream_length != 0) {
				22	unsigned cursor = 0;
				23	uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
				24	DCHECK(cursor > 0 && cursor <= stream_length);
				25	stream += cursor;
				26	stream_length -= cursor;
				27	bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
				28	utf16_length += is_two_characters ? 2 : 1;
				29	// Don't need to write to the buffer, but still need utf16_length.
				30	if (!writing_to_buffer) continue;
				31	// Write out the characters to the buffer.
				32	// Must check for equality with buffer_length as we've already updated it.
				33	if (utf16_length <= buffer_length) {
				34	if (is_two_characters) {
				35	*buffer++ = Utf16::LeadSurrogate(character);
				36	*buffer++ = Utf16::TrailSurrogate(character);
				37	} else {
				38	*buffer++ = character;
				39	}
				40	if (utf16_length == buffer_length) {
				41	// Just wrote last character of buffer
				42	writing_to_buffer = false;
				43	unbuffered_start_ = stream;
				44	}
				45	continue;
				46	}
				47	// Have gone over buffer.
				48	// Last char of buffer is unused, set cursor back.
				49	DCHECK(is_two_characters);
				50	writing_to_buffer = false;
				51	last_byte_of_buffer_unused_ = true;
				52	unbuffered_start_ = stream - cursor;
				53	}
				54	utf16_length_ = utf16_length;
				55	}
				56
				57
				58	void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
				59	unsigned data_length) {
				60	while (data_length != 0) {
				61	unsigned cursor = 0;
				62	uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
				63	// There's a total lack of bounds checking for stream
				64	// as it was already done in Reset.
				65	stream += cursor;
				66	if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
				67	*data++ = Utf16::LeadSurrogate(character);
				68	*data++ = Utf16::TrailSurrogate(character);
				69	DCHECK(data_length > 1);
				70	data_length -= 2;
				71	} else {
				72	*data++ = character;
				73	data_length -= 1;
				74	}
				75	}
				76	}
				77
				78	} // namespace unibrow