Revert "[skjson] Unescape strings"
This reverts commit 20fda9ce6a6b8c9913acf65075437c41b6725d4a.
Reason for revert: pesky ASAN
Original change's description:
> [skjson] Unescape strings
>
> Bug: skia:
> Change-Id: Ie40f498c87cb57ee59c9bea41b1ff3d81a9b5858
> Reviewed-on: https://skia-review.googlesource.com/c/167240
> Reviewed-by: Mike Klein <mtklein@google.com>
> Commit-Queue: Florin Malita <fmalita@chromium.org>
TBR=mtklein@google.com,fmalita@chromium.org
Change-Id: Ic09ee8e4c57d494f68e15c9e5d0c9fd78de8db47
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:
Reviewed-on: https://skia-review.googlesource.com/c/168263
Reviewed-by: Florin Malita <fmalita@google.com>
Commit-Queue: Florin Malita <fmalita@google.com>
diff --git a/src/utils/SkJSON.cpp b/src/utils/SkJSON.cpp
index feffc28..cb7704c 100644
--- a/src/utils/SkJSON.cpp
+++ b/src/utils/SkJSON.cpp
@@ -8,10 +8,8 @@
#include "SkJSON.h"
#include "SkMalloc.h"
-#include "SkParse.h"
#include "SkStream.h"
#include "SkString.h"
-#include "SkUTF.h"
#include <cmath>
#include <tuple>
@@ -228,7 +226,7 @@
// bit 0 (0x01) - plain ASCII string character
// bit 1 (0x02) - whitespace
-// bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
+// bit 2 (0x04) - string terminator (" \0 [control chars] **AND } ]** <- see matchString notes)
// bit 3 (0x08) - 0-9
// bit 4 (0x10) - 0-9 e E .
// bit 5 (0x20) - scope terminator (} ])
@@ -239,7 +237,7 @@
3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,0x25, 1, 1, // 5
1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
@@ -288,7 +286,6 @@
explicit DOMParser(SkArenaAlloc& alloc)
: fAlloc(alloc) {
fValueStack.reserve(kValueStackReserve);
- fUnescapeBuffer.reserve(kUnescapeBufferReserve);
}
const Value parse(const char* p, size_t size) {
@@ -463,10 +460,6 @@
static constexpr size_t kValueStackReserve = 256;
std::vector<Value> fValueStack;
- // String unescape buffer.
- static constexpr size_t kUnescapeBufferReserve = 512;
- std::vector<char> fUnescapeBuffer;
-
// Tracks the current object/array scope, as an index into fStack:
//
// - for objects: fScopeIndex = (index of first value in scope)
@@ -633,97 +626,28 @@
return this->error(nullptr, p, "invalid token");
}
- const std::vector<char>* unescapeString(const char* begin, const char* end) {
- fUnescapeBuffer.clear();
-
- for (const auto* p = begin; p != end; ++p) {
- if (*p != '\\') {
- fUnescapeBuffer.push_back(*p);
- continue;
- }
-
- if (++p == end) {
- return nullptr;
- }
-
- switch (*p) {
- case '"': fUnescapeBuffer.push_back( '"'); break;
- case '\\': fUnescapeBuffer.push_back('\\'); break;
- case '/': fUnescapeBuffer.push_back( '/'); break;
- case 'b': fUnescapeBuffer.push_back('\b'); break;
- case 'f': fUnescapeBuffer.push_back('\f'); break;
- case 'n': fUnescapeBuffer.push_back('\n'); break;
- case 'r': fUnescapeBuffer.push_back('\r'); break;
- case 't': fUnescapeBuffer.push_back('\t'); break;
- case 'u': {
- if (p + 4 >= end) {
- return nullptr;
- }
-
- uint32_t hexed;
- const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
- const auto* eos = SkParse::FindHex(hex_str, &hexed);
- if (!eos || *eos) {
- return nullptr;
- }
-
- char utf8[SkUTF::kMaxBytesInUTF8Sequence];
- const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
- fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
- p += 4;
- } break;
- default: return nullptr;
- }
- }
-
- return &fUnescapeBuffer;
- }
-
template <typename MatchFunc>
const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
SkASSERT(*p == '"');
const auto* s_begin = p + 1;
- bool requires_unescape = false;
+
+ // TODO: unescape
do {
// Consume string chars.
- // This is the fast path, and hopefully we only hit it once then quick-exit below.
for (p = p + 1; !is_eostring(*p); ++p);
if (*p == '"') {
// Valid string found.
- if (!requires_unescape) {
- func(s_begin, p - s_begin, p_stop);
- } else {
- // Slow unescape. We could avoid this extra copy with some effort,
- // but in practice escaped strings should be rare.
- const auto* buf = this->unescapeString(s_begin, p);
- if (!buf) {
- break;
- }
-
- func(buf->data(), buf->size(), buf->data() + buf->capacity());
- }
+ func(s_begin, p - s_begin, p_stop);
return p + 1;
}
- if (*p == '\\') {
- requires_unescape = true;
- ++p;
- continue;
- }
-
// End-of-scope chars are special: we use them to tag the end of the input.
// Thus they cannot be consumed indiscriminately -- we need to check if we hit the
// end of the input. To that effect, we treat them as string terminators above,
// then we catch them here.
- if (is_eoscope(*p)) {
- continue;
- }
-
- // Invalid/unexpected char.
- break;
- } while (p != p_stop);
+ } while (is_eoscope(*p) && (p != p_stop)); // Safe scope terminator char, keep going.
// Premature end-of-input, or illegal string char.
return this->error(nullptr, s_begin - 1, "invalid string");