Blame - llvm/lib/Support/JSON.cpp - toolchain/llvm-project

blob: c2025bb2299750a3ea901d1d6745f3ee0e115c5c [file] [log] [blame]

Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	1	//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===---------------------------------------------------------------------===//
				9
				10	#include "llvm/Support/JSON.h"
				11	#include "llvm/Support/Format.h"
				12	#include <cctype>
				13
				14	namespace llvm {
				15	namespace json {
				16
				17	Value &Object::operator[](const ObjectKey &K) {
				18	return try_emplace(K, nullptr).first->getSecond();
				19	}
				20	Value &Object::operator[](ObjectKey &&K) {
				21	return try_emplace(std::move(K), nullptr).first->getSecond();
				22	}
				23	Value *Object::get(StringRef K) {
				24	auto I = find(K);
				25	if (I == end())
				26	return nullptr;
				27	return &I->second;
				28	}
				29	const Value *Object::get(StringRef K) const {
				30	auto I = find(K);
				31	if (I == end())
				32	return nullptr;
				33	return &I->second;
				34	}
				35	llvm::Optional<std::nullptr_t> Object::getNull(StringRef K) const {
				36	if (auto *V = get(K))
				37	return V->getAsNull();
				38	return llvm::None;
				39	}
				40	llvm::Optional<bool> Object::getBoolean(StringRef K) const {
				41	if (auto *V = get(K))
				42	return V->getAsBoolean();
				43	return llvm::None;
				44	}
				45	llvm::Optional<double> Object::getNumber(StringRef K) const {
				46	if (auto *V = get(K))
				47	return V->getAsNumber();
				48	return llvm::None;
				49	}
				50	llvm::Optional<int64_t> Object::getInteger(StringRef K) const {
				51	if (auto *V = get(K))
				52	return V->getAsInteger();
				53	return llvm::None;
				54	}
				55	llvm::Optional<llvm::StringRef> Object::getString(StringRef K) const {
				56	if (auto *V = get(K))
				57	return V->getAsString();
				58	return llvm::None;
				59	}
				60	const json::Object *Object::getObject(StringRef K) const {
				61	if (auto *V = get(K))
				62	return V->getAsObject();
				63	return nullptr;
				64	}
				65	json::Object *Object::getObject(StringRef K) {
				66	if (auto *V = get(K))
				67	return V->getAsObject();
				68	return nullptr;
				69	}
				70	const json::Array *Object::getArray(StringRef K) const {
				71	if (auto *V = get(K))
				72	return V->getAsArray();
				73	return nullptr;
				74	}
				75	json::Array *Object::getArray(StringRef K) {
				76	if (auto *V = get(K))
				77	return V->getAsArray();
				78	return nullptr;
				79	}
				80	bool operator==(const Object &LHS, const Object &RHS) {
				81	if (LHS.size() != RHS.size())
				82	return false;
				83	for (const auto &L : LHS) {
				84	auto R = RHS.find(L.first);
				85	if (R == RHS.end() \|\| L.second != R->second)
				86	return false;
				87	}
				88	return true;
				89	}
				90
				91	Array::Array(std::initializer_list<Value> Elements) {
				92	V.reserve(Elements.size());
				93	for (const Value &V : Elements) {
				94	emplace_back(nullptr);
				95	back().moveFrom(std::move(V));
				96	}
				97	}
				98
				99	Value::Value(std::initializer_list<Value> Elements)
				100	: Value(json::Array(Elements)) {}
				101
				102	void Value::copyFrom(const Value &M) {
				103	Type = M.Type;
				104	switch (Type) {
				105	case T_Null:
				106	case T_Boolean:
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	107	case T_Double:
				108	case T_Integer:
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	109	memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
				110	break;
				111	case T_StringRef:
				112	create<StringRef>(M.as<StringRef>());
				113	break;
				114	case T_String:
				115	create<std::string>(M.as<std::string>());
				116	break;
				117	case T_Object:
				118	create<json::Object>(M.as<json::Object>());
				119	break;
				120	case T_Array:
				121	create<json::Array>(M.as<json::Array>());
				122	break;
				123	}
				124	}
				125
				126	void Value::moveFrom(const Value &&M) {
				127	Type = M.Type;
				128	switch (Type) {
				129	case T_Null:
				130	case T_Boolean:
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	131	case T_Double:
				132	case T_Integer:
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	133	memcpy(Union.buffer, M.Union.buffer, sizeof(Union.buffer));
				134	break;
				135	case T_StringRef:
				136	create<StringRef>(M.as<StringRef>());
				137	break;
				138	case T_String:
				139	create<std::string>(std::move(M.as<std::string>()));
				140	M.Type = T_Null;
				141	break;
				142	case T_Object:
				143	create<json::Object>(std::move(M.as<json::Object>()));
				144	M.Type = T_Null;
				145	break;
				146	case T_Array:
				147	create<json::Array>(std::move(M.as<json::Array>()));
				148	M.Type = T_Null;
				149	break;
				150	}
				151	}
				152
				153	void Value::destroy() {
				154	switch (Type) {
				155	case T_Null:
				156	case T_Boolean:
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	157	case T_Double:
				158	case T_Integer:
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	159	break;
				160	case T_StringRef:
				161	as<StringRef>().~StringRef();
				162	break;
				163	case T_String:
				164	as<std::string>().~basic_string();
				165	break;
				166	case T_Object:
				167	as<json::Object>().~Object();
				168	break;
				169	case T_Array:
				170	as<json::Array>().~Array();
				171	break;
				172	}
				173	}
				174
				175	bool operator==(const Value &L, const Value &R) {
				176	if (L.kind() != R.kind())
				177	return false;
				178	switch (L.kind()) {
				179	case Value::Null:
				180	return L.getAsNull() == R.getAsNull();
				181	case Value::Boolean:
				182	return L.getAsBoolean() == R.getAsBoolean();
				183	case Value::Number:
				184	return L.getAsNumber() == R.getAsNumber();
				185	case Value::String:
				186	return L.getAsString() == R.getAsString();
				187	case Value::Array:
				188	return L.getAsArray() == R.getAsArray();
				189	case Value::Object:
				190	return L.getAsObject() == R.getAsObject();
				191	}
				192	llvm_unreachable("Unknown value kind");
				193	}
				194
				195	namespace {
				196	// Simple recursive-descent JSON parser.
				197	class Parser {
				198	public:
				199	Parser(StringRef JSON)
				200	: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
				201
				202	bool parseValue(Value &Out);
				203
				204	bool assertEnd() {
				205	eatWhitespace();
				206	if (P == End)
				207	return true;
				208	return parseError("Text after end of document");
				209	}
				210
				211	Error takeError() {
				212	assert(Err);
				213	return std::move(*Err);
				214	}
				215
				216	private:
				217	void eatWhitespace() {
				218	while (P != End && (P == ' ' \|\| P == '\r' \|\| P == '\n' \|\| P == '\t'))
				219	++P;
				220	}
				221
				222	// On invalid syntax, parseX() functions return false and set Err.
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	223	bool parseNumber(char First, Value &Out);
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	224	bool parseString(std::string &Out);
				225	bool parseUnicode(std::string &Out);
				226	bool parseError(const char *Msg); // always returns false
				227
				228	char next() { return P == End ? 0 : *P++; }
				229	char peek() { return P == End ? 0 : *P; }
				230	static bool isNumber(char C) {
				231	return C == '0' \|\| C == '1' \|\| C == '2' \|\| C == '3' \|\| C == '4' \|\|
				232	C == '5' \|\| C == '6' \|\| C == '7' \|\| C == '8' \|\| C == '9' \|\|
				233	C == 'e' \|\| C == 'E' \|\| C == '+' \|\| C == '-' \|\| C == '.';
				234	}
				235
				236	Optional<Error> Err;
				237	const char Start, P, *End;
				238	};
				239
				240	bool Parser::parseValue(Value &Out) {
				241	eatWhitespace();
				242	if (P == End)
				243	return parseError("Unexpected EOF");
				244	switch (char C = next()) {
				245	// Bare null/true/false are easy - first char identifies them.
				246	case 'n':
				247	Out = nullptr;
				248	return (next() == 'u' && next() == 'l' && next() == 'l') \|\|
				249	parseError("Invalid JSON value (null?)");
				250	case 't':
				251	Out = true;
				252	return (next() == 'r' && next() == 'u' && next() == 'e') \|\|
				253	parseError("Invalid JSON value (true?)");
				254	case 'f':
				255	Out = false;
				256	return (next() == 'a' && next() == 'l' && next() == 's' && next() == 'e') \|\|
				257	parseError("Invalid JSON value (false?)");
				258	case '"': {
				259	std::string S;
				260	if (parseString(S)) {
				261	Out = std::move(S);
				262	return true;
				263	}
				264	return false;
				265	}
				266	case '[': {
				267	Out = Array{};
				268	Array &A = *Out.getAsArray();
				269	eatWhitespace();
				270	if (peek() == ']') {
				271	++P;
				272	return true;
				273	}
				274	for (;;) {
				275	A.emplace_back(nullptr);
				276	if (!parseValue(A.back()))
				277	return false;
				278	eatWhitespace();
				279	switch (next()) {
				280	case ',':
				281	eatWhitespace();
				282	continue;
				283	case ']':
				284	return true;
				285	default:
				286	return parseError("Expected , or ] after array element");
				287	}
				288	}
				289	}
				290	case '{': {
				291	Out = Object{};
				292	Object &O = *Out.getAsObject();
				293	eatWhitespace();
				294	if (peek() == '}') {
				295	++P;
				296	return true;
				297	}
				298	for (;;) {
				299	if (next() != '"')
				300	return parseError("Expected object key");
				301	std::string K;
				302	if (!parseString(K))
				303	return false;
				304	eatWhitespace();
				305	if (next() != ':')
				306	return parseError("Expected : after object key");
				307	eatWhitespace();
				308	if (!parseValue(O[std::move(K)]))
				309	return false;
				310	eatWhitespace();
				311	switch (next()) {
				312	case ',':
				313	eatWhitespace();
				314	continue;
				315	case '}':
				316	return true;
				317	default:
				318	return parseError("Expected , or } after object property");
				319	}
				320	}
				321	}
				322	default:
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	323	if (isNumber(C))
				324	return parseNumber(C, Out);
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	325	return parseError("Invalid JSON value");
				326	}
				327	}
				328
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	329	bool Parser::parseNumber(char First, Value &Out) {
				330	// Read the number into a string. (Must be null-terminated for strto*).
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	331	SmallString<24> S;
				332	S.push_back(First);
				333	while (isNumber(peek()))
				334	S.push_back(next());
				335	char *End;
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	336	// Try first to parse as integer, and if so preserve full 64 bits.
				337	// strtoll returns long long >= 64 bits, so check it's in range too.
				338	auto I = std::strtoll(S.c_str(), &End, 10);
				339	if (End == S.end() && I >= std::numeric_limits<int64_t>::min() &&
				340	I <= std::numeric_limits<int64_t>::max()) {
				341	Out = int64_t(I);
				342	return true;
				343	}
				344	// If it's not an integer
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	345	Out = std::strtod(S.c_str(), &End);
				346	return End == S.end() \|\| parseError("Invalid JSON value (number?)");
				347	}
				348
				349	bool Parser::parseString(std::string &Out) {
				350	// leading quote was already consumed.
				351	for (char C = next(); C != '"'; C = next()) {
				352	if (LLVM_UNLIKELY(P == End))
				353	return parseError("Unterminated string");
				354	if (LLVM_UNLIKELY((C & 0x1f) == C))
				355	return parseError("Control character in string");
				356	if (LLVM_LIKELY(C != '\\')) {
				357	Out.push_back(C);
				358	continue;
				359	}
				360	// Handle escape sequence.
				361	switch (C = next()) {
				362	case '"':
				363	case '\\':
				364	case '/':
				365	Out.push_back(C);
				366	break;
				367	case 'b':
				368	Out.push_back('\b');
				369	break;
				370	case 'f':
				371	Out.push_back('\f');
				372	break;
				373	case 'n':
				374	Out.push_back('\n');
				375	break;
				376	case 'r':
				377	Out.push_back('\r');
				378	break;
				379	case 't':
				380	Out.push_back('\t');
				381	break;
				382	case 'u':
				383	if (!parseUnicode(Out))
				384	return false;
				385	break;
				386	default:
				387	return parseError("Invalid escape sequence");
				388	}
				389	}
				390	return true;
				391	}
				392
				393	static void encodeUtf8(uint32_t Rune, std::string &Out) {
				394	if (Rune < 0x80) {
				395	Out.push_back(Rune & 0x7F);
				396	} else if (Rune < 0x800) {
				397	uint8_t FirstByte = 0xC0 \| ((Rune & 0x7C0) >> 6);
				398	uint8_t SecondByte = 0x80 \| (Rune & 0x3F);
				399	Out.push_back(FirstByte);
				400	Out.push_back(SecondByte);
				401	} else if (Rune < 0x10000) {
				402	uint8_t FirstByte = 0xE0 \| ((Rune & 0xF000) >> 12);
				403	uint8_t SecondByte = 0x80 \| ((Rune & 0xFC0) >> 6);
				404	uint8_t ThirdByte = 0x80 \| (Rune & 0x3F);
				405	Out.push_back(FirstByte);
				406	Out.push_back(SecondByte);
				407	Out.push_back(ThirdByte);
				408	} else if (Rune < 0x110000) {
				409	uint8_t FirstByte = 0xF0 \| ((Rune & 0x1F0000) >> 18);
				410	uint8_t SecondByte = 0x80 \| ((Rune & 0x3F000) >> 12);
				411	uint8_t ThirdByte = 0x80 \| ((Rune & 0xFC0) >> 6);
				412	uint8_t FourthByte = 0x80 \| (Rune & 0x3F);
				413	Out.push_back(FirstByte);
				414	Out.push_back(SecondByte);
				415	Out.push_back(ThirdByte);
				416	Out.push_back(FourthByte);
				417	} else {
				418	llvm_unreachable("Invalid codepoint");
				419	}
				420	}
				421
				422	// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
				423	// May parse several sequential escapes to ensure proper surrogate handling.
				424	// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
				425	// These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
				426	bool Parser::parseUnicode(std::string &Out) {
				427	// Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
				428	auto Invalid = [&] { Out.append(/* UTF-8 */ {'\xef', '\xbf', '\xbd'}); };
				429	// Decodes 4 hex digits from the stream into Out, returns false on error.
				430	auto Parse4Hex = [this](uint16_t &Out) -> bool {
				431	Out = 0;
				432	char Bytes[] = {next(), next(), next(), next()};
				433	for (unsigned char C : Bytes) {
				434	if (!std::isxdigit(C))
				435	return parseError("Invalid \\u escape sequence");
				436	Out <<= 4;
				437	Out \|= (C > '9') ? (C & ~0x20) - 'A' + 10 : (C - '0');
				438	}
				439	return true;
				440	};
				441	uint16_t First; // UTF-16 code unit from the first \u escape.
				442	if (!Parse4Hex(First))
				443	return false;
				444
				445	// We loop to allow proper surrogate-pair error handling.
				446	while (true) {
				447	// Case 1: the UTF-16 code unit is already a codepoint in the BMP.
				448	if (LLVM_LIKELY(First < 0xD800 \|\| First >= 0xE000)) {
				449	encodeUtf8(First, Out);
				450	return true;
				451	}
				452
				453	// Case 2: it's an (unpaired) trailing surrogate.
				454	if (LLVM_UNLIKELY(First >= 0xDC00)) {
				455	Invalid();
				456	return true;
				457	}
				458
				459	// Case 3: it's a leading surrogate. We expect a trailing one next.
				460	// Case 3a: there's no trailing \u escape. Don't advance in the stream.
				461	if (!LLVM_LIKELY(P + 2 <= End && P == '\\' && (P + 1) == 'u')) {
				462	Invalid(); // Leading surrogate was unpaired.
				463	return true;
				464	}
				465	P += 2;
				466	uint16_t Second;
				467	if (!Parse4Hex(Second))
				468	return false;
				469	// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
				470	if (LLVM_UNLIKELY(Second < 0xDC00 \|\| Second >= 0xE000)) {
				471	Invalid(); // Leading surrogate was unpaired.
				472	First = Second; // Second escape still needs to be processed.
				473	continue;
				474	}
				475	// Case 3c: a valid surrogate pair encoding an astral codepoint.
				476	encodeUtf8(0x10000 \| ((First - 0xD800) << 10) \| (Second - 0xDC00), Out);
				477	return true;
				478	}
				479	}
				480
				481	bool Parser::parseError(const char *Msg) {
				482	int Line = 1;
				483	const char *StartOfLine = Start;
				484	for (const char *X = Start; X < P; ++X) {
				485	if (*X == 0x0A) {
				486	++Line;
				487	StartOfLine = X + 1;
				488	}
				489	}
				490	Err.emplace(
				491	llvm::make_unique<ParseError>(Msg, Line, P - StartOfLine, P - Start));
				492	return false;
				493	}
				494	} // namespace
				495
				496	Expected<Value> parse(StringRef JSON) {
				497	Parser P(JSON);
				498	Value E = nullptr;
				499	if (P.parseValue(E))
				500	if (P.assertEnd())
				501	return std::move(E);
				502	return P.takeError();
				503	}
				504	char ParseError::ID = 0;
				505
				506	static std::vector<const Object::value_type *> sortedElements(const Object &O) {
				507	std::vector<const Object::value_type *> Elements;
				508	for (const auto &E : O)
				509	Elements.push_back(&E);
				510	llvm::sort(Elements.begin(), Elements.end(),
				511	[](const Object::value_type L, const Object::value_type R) {
				512	return L->first < R->first;
				513	});
				514	return Elements;
				515	}
				516
				517	} // namespace json
				518	} // namespace llvm
				519
				520	static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
				521	OS << '\"';
				522	for (unsigned char C : S) {
				523	if (C == 0x22 \|\| C == 0x5C)
				524	OS << '\\';
				525	if (C >= 0x20) {
				526	OS << C;
				527	continue;
				528	}
				529	OS << '\\';
				530	switch (C) {
				531	// A few characters are common enough to make short escapes worthwhile.
				532	case '\t':
				533	OS << 't';
				534	break;
				535	case '\n':
				536	OS << 'n';
				537	break;
				538	case '\r':
				539	OS << 'r';
				540	break;
				541	default:
				542	OS << 'u';
				543	llvm::write_hex(OS, C, llvm::HexPrintStyle::Lower, 4);
				544	break;
				545	}
				546	}
				547	OS << '\"';
				548	}
				549
				550	enum IndenterAction {
				551	Indent,
				552	Outdent,
				553	Newline,
				554	Space,
				555	};
				556
				557	// Prints JSON. The indenter can be used to control formatting.
				558	template <typename Indenter>
				559	void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const {
				560	switch (Type) {
				561	case T_Null:
				562	OS << "null";
				563	break;
				564	case T_Boolean:
				565	OS << (as<bool>() ? "true" : "false");
				566	break;
Sam McCall	d93eaeb	2018-07-09 12:16:40 +0000	[diff] [blame]	567	case T_Double:
				568	OS << format("%.*g", std::numeric_limits<double>::max_digits10,
				569	as<double>());
				570	break;
				571	case T_Integer:
				572	OS << as<int64_t>();
Sam McCall	6be3824	2018-07-09 10:05:41 +0000	[diff] [blame]	573	break;
				574	case T_StringRef:
				575	quote(OS, as<StringRef>());
				576	break;
				577	case T_String:
				578	quote(OS, as<std::string>());
				579	break;
				580	case T_Object: {
				581	bool Comma = false;
				582	OS << '{';
				583	I(Indent);
				584	for (const auto *P : sortedElements(as<json::Object>())) {
				585	if (Comma)
				586	OS << ',';
				587	Comma = true;
				588	I(Newline);
				589	quote(OS, P->first);
				590	OS << ':';
				591	I(Space);
				592	P->second.print(OS, I);
				593	}
				594	I(Outdent);
				595	if (Comma)
				596	I(Newline);
				597	OS << '}';
				598	break;
				599	}
				600	case T_Array: {
				601	bool Comma = false;
				602	OS << '[';
				603	I(Indent);
				604	for (const auto &E : as<json::Array>()) {
				605	if (Comma)
				606	OS << ',';
				607	Comma = true;
				608	I(Newline);
				609	E.print(OS, I);
				610	}
				611	I(Outdent);
				612	if (Comma)
				613	I(Newline);
				614	OS << ']';
				615	break;
				616	}
				617	}
				618	}
				619
				620	void llvm::format_provider<llvm::json::Value>::format(
				621	const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
				622	if (Options.empty()) {
				623	OS << E;
				624	return;
				625	}
				626	unsigned IndentAmount = 0;
				627	if (Options.getAsInteger(/Radix=/10, IndentAmount))
				628	llvm_unreachable("json::Value format options should be an integer");
				629	unsigned IndentLevel = 0;
				630	E.print(OS, [&](IndenterAction A) {
				631	switch (A) {
				632	case Newline:
				633	OS << '\n';
				634	OS.indent(IndentLevel);
				635	break;
				636	case Space:
				637	OS << ' ';
				638	break;
				639	case Indent:
				640	IndentLevel += IndentAmount;
				641	break;
				642	case Outdent:
				643	IndentLevel -= IndentAmount;
				644	break;
				645	};
				646	});
				647	}
				648
				649	llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) {
				650	E.print(OS, [](IndenterAction A) { /ignore/ });
				651	return OS;
				652	}