blob: 3afe2b73f0aae5c708910932cef8de2cd61abfa9 [file] [log] [blame]
Dmitri Gribenko5bd1e5b2013-01-30 14:29:28 +00001//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend emits an fficient function to translate HTML named
11// character references to UTF-8 sequences.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/SmallString.h"
16#include "llvm/Support/ConvertUTF.h"
17#include "llvm/TableGen/Error.h"
18#include "llvm/TableGen/Record.h"
19#include "llvm/TableGen/StringMatcher.h"
20#include <vector>
21
22using namespace llvm;
23
24/// \brief Convert a code point to the corresponding UTF-8 sequence represented
25/// as a C string literal.
26///
27/// \returns true on success.
28static bool translateCodePointToUTF8(unsigned CodePoint,
29 SmallVectorImpl<char> &CLiteral) {
30 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
31 char *TranslatedPtr = Translated;
32 if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
33 return false;
34
35 StringRef UTF8(Translated, TranslatedPtr - Translated);
36
37 raw_svector_ostream OS(CLiteral);
38 OS << "\"";
39 for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
40 OS << "\\x";
41 OS.write_hex(static_cast<unsigned char>(UTF8[i]));
42 }
43 OS << "\"";
44
45 return true;
46}
47
48namespace clang {
49void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50 raw_ostream &OS) {
51 std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52 std::vector<StringMatcher::StringPair> NameToUTF8;
53 SmallString<32> CLiteral;
54 for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55 I != E; ++I) {
56 Record &Tag = **I;
57 std::string Spelling = Tag.getValueAsString("Spelling");
58 uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59 CLiteral.clear();
60 CLiteral.append("return ");
61 if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62 SrcMgr.PrintMessage(Tag.getLoc().front(),
63 SourceMgr::DK_Error,
64 Twine("invalid code point"));
65 continue;
66 }
67 CLiteral.append(";");
68
69 StringMatcher::StringPair Match(Spelling, CLiteral.str());
70 NameToUTF8.push_back(Match);
71 }
72
73 OS << "// This file is generated by TableGen. Do not edit.\n\n";
74
75 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
76 " StringRef Name) {\n";
77 StringMatcher("Name", NameToUTF8, OS).Emit();
78 OS << " return StringRef();\n"
79 << "}\n\n";
80}
81
82} // end namespace clang
83