Blame - llvm/lib/Support/DJB.cpp - toolchain/llvm-project

blob: 905dcf1b7e81a0ee55e917b35562fc24b3784509 [file] [log] [blame]

Jonas Devlieghere	92ac9d3	2018-01-28 11:05:10 +0000	[diff] [blame]	1	//===-- Support/DJB.cpp ---DJB Hash ------------------------------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file contains support for the DJ Bernstein hash function.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "llvm/Support/DJB.h"
Pavel Labath	3b17b84	2018-02-21 22:36:31 +0000	[diff] [blame]	15	#include "llvm/ADT/ArrayRef.h"
				16	#include "llvm/Support/Compiler.h"
				17	#include "llvm/Support/ConvertUTF.h"
				18	#include "llvm/Support/Unicode.h"
				19
				20	using namespace llvm;
				21
Pavel Labath	3b17b84	2018-02-21 22:36:31 +0000	[diff] [blame]	22	static UTF32 chopOneUTF32(StringRef &Buffer) {
				23	UTF32 C;
				24	const UTF8 *const Begin8Const =
				25	reinterpret_cast<const UTF8 *>(Buffer.begin());
				26	const UTF8 *Begin8 = Begin8Const;
				27	UTF32 *Begin32 = &C;
				28
				29	// In lenient mode we will always end up with a "reasonable" value in C for
				30	// non-empty input.
				31	assert(!Buffer.empty());
				32	ConvertUTF8toUTF32(&Begin8, reinterpret_cast<const UTF8 *>(Buffer.end()),
				33	&Begin32, &C + 1, lenientConversion);
				34	Buffer = Buffer.drop_front(Begin8 - Begin8Const);
				35	return C;
				36	}
				37
				38	static StringRef toUTF8(UTF32 C, MutableArrayRef<UTF8> Storage) {
				39	const UTF32 *Begin32 = &C;
				40	UTF8 *Begin8 = Storage.begin();
				41
				42	// The case-folded output should always be a valid unicode character, so use
				43	// strict mode here.
				44	ConversionResult CR = ConvertUTF32toUTF8(&Begin32, &C + 1, &Begin8,
				45	Storage.end(), strictConversion);
				46	assert(CR == conversionOK && "Case folding produced invalid char?");
				47	(void)CR;
				48	return StringRef(reinterpret_cast<char *>(Storage.begin()),
				49	Begin8 - Storage.begin());
				50	}
				51
				52	static UTF32 foldCharDwarf(UTF32 C) {
				53	// DWARF v5 addition to the unicode folding rules.
				54	// Fold "Latin Small Letter Dotless I" and "Latin Capital Letter I With Dot
				55	// Above" into "i".
				56	if (C == 0x130 \|\| C == 0x131)
				57	return 'i';
				58	return sys::unicode::foldCharSimple(C);
				59	}
				60
				61	static uint32_t caseFoldingDjbHashCharSlow(StringRef &Buffer, uint32_t H) {
				62	UTF32 C = chopOneUTF32(Buffer);
				63
				64	C = foldCharDwarf(C);
				65
				66	std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
				67	StringRef Folded = toUTF8(C, Storage);
				68	return djbHash(Folded, H);
				69	}
				70
				71	uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {
				72	while (!Buffer.empty()) {
				73	unsigned char C = Buffer.front();
				74	if (LLVM_LIKELY(C <= 0x7f)) {
				75	// US-ASCII, encoded as one character in utf-8.
				76	// This is by far the most common case, so handle this specially.
				77	if (C >= 'A' && C <= 'Z')
				78	C = 'a' + (C - 'A'); // fold uppercase into lowercase
Rui Ueyama	e403c86	2018-03-02 22:00:38 +0000	[diff] [blame]	79	H = (H << 5) + H + C;
Pavel Labath	3b17b84	2018-02-21 22:36:31 +0000	[diff] [blame]	80	Buffer = Buffer.drop_front();
				81	continue;
				82	}
				83	H = caseFoldingDjbHashCharSlow(Buffer, H);
				84	}
Jonas Devlieghere	92ac9d3	2018-01-28 11:05:10 +0000	[diff] [blame]	85	return H;
				86	}