Upgrade to 3.29 Update V8 to 3.29.88.17 and update makefiles to support building on all the relevant platforms. Bug: 17370214 Change-Id: Ia3407c157fd8d72a93e23d8318ccaf6ecf77fa4e

commit: b8a8cc1952d61a2f3a2568848933943a543b5d3e [log] [tgz]
author: Ben Murdoch <benm@google.com> Wed Nov 26 15:28:44 2014 +0000
committer: Ben Murdoch <benm@google.com> Thu Dec 04 14:47:29 2014 +0000
tree: 83e9846202f2a441db91efdd164d7d8478ba9897
parent: b7a971bd8c35b5952b1f25fca56de6113506d3da [diff] [blame]
diff --git a/src/unicode-inl.h b/src/unicode-inl.h
index 9c0ebf9..81327d7 100644
--- a/src/unicode-inl.h
+++ b/src/unicode-inl.h

@@ -1,34 +1,13 @@
 // Copyright 2007-2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
 
 #ifndef V8_UNICODE_INL_H_
 #define V8_UNICODE_INL_H_
 
-#include "unicode.h"
+#include "src/unicode.h"
+#include "src/base/logging.h"
+#include "src/utils.h"
 
 namespace unibrow {
 
@@ -78,7 +57,41 @@
 }
 
 
-unsigned Utf8::Encode(char* str, uchar c, int previous) {
+uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
+  DCHECK(c > Latin1::kMaxChar);
+  switch (c) {
+    // This are equivalent characters in unicode.
+    case 0x39c:
+    case 0x3bc:
+      return 0xb5;
+    // This is an uppercase of a Latin-1 character
+    // outside of Latin-1.
+    case 0x178:
+      return 0xff;
+  }
+  return 0;
+}
+
+
+unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
+  static const int kMask = ~(1 << 6);
+  if (c <= kMaxOneByteChar) {
+    str[0] = c;
+    return 1;
+  }
+  str[0] = 0xC0 | (c >> 6);
+  str[1] = 0x80 | (c & kMask);
+  return 2;
+}
+
+// Encode encodes the UTF-16 code units c and previous into the given str
+// buffer, and combines surrogate code units into single code points. If
+// replace_invalid is set to true, orphan surrogate code units will be replaced
+// with kBadChar.
+unsigned Utf8::Encode(char* str,
+                      uchar c,
+                      int previous,
+                      bool replace_invalid) {
   static const int kMask = ~(1 << 6);
   if (c <= kMaxOneByteChar) {
     str[0] = c;
@@ -88,12 +101,16 @@
     str[1] = 0x80 | (c & kMask);
     return 2;
   } else if (c <= kMaxThreeByteChar) {
-    if (Utf16::IsTrailSurrogate(c) &&
-        Utf16::IsLeadSurrogate(previous)) {
+    if (Utf16::IsSurrogatePair(previous, c)) {
       const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
       return Encode(str - kUnmatchedSize,
                     Utf16::CombineSurrogatePair(previous, c),
-                    Utf16::kNoPreviousCharacter) - kUnmatchedSize;
+                    Utf16::kNoPreviousCharacter,
+                    replace_invalid) - kUnmatchedSize;
+    } else if (replace_invalid &&
+               (Utf16::IsLeadSurrogate(c) ||
+               Utf16::IsTrailSurrogate(c))) {
+      c = kBadChar;
     }
     str[0] = 0xE0 | (c >> 12);
     str[1] = 0x80 | ((c >> 6) & kMask);
@@ -136,112 +153,51 @@
   }
 }
 
-uchar CharacterStream::GetNext() {
-  uchar result = DecodeCharacter(buffer_, &cursor_);
-  if (remaining_ == 1) {
-    cursor_ = 0;
-    FillBuffer();
-  } else {
-    remaining_--;
-  }
-  return result;
+Utf8DecoderBase::Utf8DecoderBase()
+  : unbuffered_start_(NULL),
+    utf16_length_(0),
+    last_byte_of_buffer_unused_(false) {}
+
+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer,
+                                 unsigned buffer_length,
+                                 const uint8_t* stream,
+                                 unsigned stream_length) {
+  Reset(buffer, buffer_length, stream, stream_length);
 }
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define IF_LITTLE(expr) expr
-#define IF_BIG(expr)    ((void) 0)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-#define IF_LITTLE(expr) ((void) 0)
-#define IF_BIG(expr)    expr
-#else
-#warning Unknown byte ordering
-#endif
-
-bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer,
-    unsigned capacity, unsigned& offset) {
-  if (offset >= capacity) return false;
-  buffer[offset] = c;
-  offset += 1;
-  return true;
+template<unsigned kBufferSize>
+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
+  : Utf8DecoderBase(buffer_,
+                    kBufferSize,
+                    reinterpret_cast<const uint8_t*>(stream),
+                    length) {
 }
 
-bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer,
-    unsigned capacity, unsigned& offset) {
-  unsigned aligned = (offset + 0x3) & ~0x3;
-  if ((aligned + sizeof(uchar)) > capacity)
-    return false;
-  if (offset == aligned) {
-    IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80);
-    IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31));
-  } else {
-    buffer[offset] = 0x80;
-    IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8);
-    IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c);
-  }
-  offset = aligned + sizeof(uchar);
-  return true;
+template<unsigned kBufferSize>
+void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
+  Utf8DecoderBase::Reset(buffer_,
+                         kBufferSize,
+                         reinterpret_cast<const uint8_t*>(stream),
+                         length);
 }
 
-bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
-    unsigned& offset) {
-  if (c <= Utf8::kMaxOneByteChar) {
-    return EncodeAsciiCharacter(c, buffer, capacity, offset);
-  } else {
-    return EncodeNonAsciiCharacter(c, buffer, capacity, offset);
-  }
-}
-
-uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) {
-  byte b = buffer[*offset];
-  if (b <= Utf8::kMaxOneByteChar) {
-    (*offset)++;
-    return b;
-  } else {
-    unsigned aligned = (*offset + 0x3) & ~0x3;
-    *offset = aligned + sizeof(uchar);
-    IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8);
-    IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) &
-                    ~(1 << 31));
-  }
-}
-
-#undef IF_LITTLE
-#undef IF_BIG
-
-template <class R, class I, unsigned s>
-void InputBuffer<R, I, s>::FillBuffer() {
-  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
-}
-
-template <class R, class I, unsigned s>
-void InputBuffer<R, I, s>::Rewind() {
-  Reset(input_);
-}
-
-template <class R, class I, unsigned s>
-void InputBuffer<R, I, s>::Reset(unsigned position, I input) {
-  input_ = input;
-  remaining_ = 0;
-  cursor_ = 0;
-  offset_ = position;
-  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
-}
-
-template <class R, class I, unsigned s>
-void InputBuffer<R, I, s>::Reset(I input) {
-  Reset(0, input);
-}
-
-template <class R, class I, unsigned s>
-void InputBuffer<R, I, s>::Seek(unsigned position) {
-  offset_ = position;
-  buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
-}
-
-template <unsigned s>
-Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length)
-    : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data,
-                                                                    length)) {
+template <unsigned kBufferSize>
+unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
+                                              unsigned length) const {
+  DCHECK(length > 0);
+  if (length > utf16_length_) length = utf16_length_;
+  // memcpy everything in buffer.
+  unsigned buffer_length =
+      last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
+  unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
+  v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
+  if (length <= buffer_length) return length;
+  DCHECK(unbuffered_start_ != NULL);
+  // Copy the rest the slow way.
+  WriteUtf16Slow(unbuffered_start_,
+                 data + buffer_length,
+                 length - buffer_length);
+  return length;
 }
 
 }  // namespace unibrow
commit	b8a8cc1952d61a2f3a2568848933943a543b5d3e	[log] [tgz]
author	Ben Murdoch <benm@google.com>	Wed Nov 26 15:28:44 2014 +0000
committer	Ben Murdoch <benm@google.com>	Thu Dec 04 14:47:29 2014 +0000
tree	83e9846202f2a441db91efdd164d7d8478ba9897
parent	b7a971bd8c35b5952b1f25fca56de6113506d3da [diff] [blame]