Blame - lib/Rewrite/HTMLRewrite.cpp - platform/external/clang

blob: 342b0e6ef5e5bc5516f28ec16dfe6c50f0b606f6 [file] [log] [blame]

Shih-wei Liao	f8fd82b	2010-02-10 11:10:31 -0800	[diff] [blame^]	1	//== HTMLRewrite.cpp - Translate source code into prettified HTML --- C++ --//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file defines the HTMLRewriter clas, which is used to translate the
				11	// text of a source file into prettified HTML.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/Preprocessor.h"
				16	#include "clang/Rewrite/Rewriter.h"
				17	#include "clang/Rewrite/HTMLRewrite.h"
				18	#include "clang/Lex/TokenConcatenation.h"
				19	#include "clang/Lex/Preprocessor.h"
				20	#include "clang/Basic/SourceManager.h"
				21	#include "llvm/ADT/SmallString.h"
				22	#include "llvm/ADT/OwningPtr.h"
				23	#include "llvm/Support/MemoryBuffer.h"
				24	#include "llvm/Support/raw_ostream.h"
				25	using namespace clang;
				26
				27
				28	/// HighlightRange - Highlight a range in the source code with the specified
				29	/// start/end tags. B/E must be in the same file. This ensures that
				30	/// start/end tags are placed at the start/end of each line if the range is
				31	/// multiline.
				32	void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
				33	const char StartTag, const char EndTag) {
				34	SourceManager &SM = R.getSourceMgr();
				35	B = SM.getInstantiationLoc(B);
				36	E = SM.getInstantiationLoc(E);
				37	FileID FID = SM.getFileID(B);
				38	assert(SM.getFileID(E) == FID && "B/E not in the same file!");
				39
				40	unsigned BOffset = SM.getFileOffset(B);
				41	unsigned EOffset = SM.getFileOffset(E);
				42
				43	// Include the whole end token in the range.
				44	EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
				45
				46	HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
				47	SM.getBufferData(FID).first, StartTag, EndTag);
				48	}
				49
				50	/// HighlightRange - This is the same as the above method, but takes
				51	/// decomposed file locations.
				52	void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
				53	const char *BufferStart,
				54	const char StartTag, const char EndTag) {
				55	// Insert the tag at the absolute start/end of the range.
				56	RB.InsertTextAfter(B, StartTag);
				57	RB.InsertTextBefore(E, EndTag);
				58
				59	// Scan the range to see if there is a \r or \n. If so, and if the line is
				60	// not blank, insert tags on that line as well.
				61	bool HadOpenTag = true;
				62
				63	unsigned LastNonWhiteSpace = B;
				64	for (unsigned i = B; i != E; ++i) {
				65	switch (BufferStart[i]) {
				66	case '\r':
				67	case '\n':
				68	// Okay, we found a newline in the range. If we have an open tag, we need
				69	// to insert a close tag at the first non-whitespace before the newline.
				70	if (HadOpenTag)
				71	RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
				72
				73	// Instead of inserting an open tag immediately after the newline, we
				74	// wait until we see a non-whitespace character. This prevents us from
				75	// inserting tags around blank lines, and also allows the open tag to
				76	// be put after whitespace on a non-blank line.
				77	HadOpenTag = false;
				78	break;
				79	case '\0':
				80	case ' ':
				81	case '\t':
				82	case '\f':
				83	case '\v':
				84	// Ignore whitespace.
				85	break;
				86
				87	default:
				88	// If there is no tag open, do it now.
				89	if (!HadOpenTag) {
				90	RB.InsertTextAfter(i, StartTag);
				91	HadOpenTag = true;
				92	}
				93
				94	// Remember this character.
				95	LastNonWhiteSpace = i;
				96	break;
				97	}
				98	}
				99	}
				100
				101	void html::EscapeText(Rewriter &R, FileID FID,
				102	bool EscapeSpaces, bool ReplaceTabs) {
				103
				104	const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
				105	const char* C = Buf->getBufferStart();
				106	const char* FileEnd = Buf->getBufferEnd();
				107
				108	assert (C <= FileEnd);
				109
				110	RewriteBuffer &RB = R.getEditBuffer(FID);
				111
				112	unsigned ColNo = 0;
				113	for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
				114	switch (*C) {
				115	default: ++ColNo; break;
				116	case '\n':
				117	case '\r':
				118	ColNo = 0;
				119	break;
				120
				121	case ' ':
				122	if (EscapeSpaces)
				123	RB.ReplaceText(FilePos, 1, " ");
				124	++ColNo;
				125	break;
				126	case '\f':
				127	RB.ReplaceText(FilePos, 1, "<hr>");
				128	ColNo = 0;
				129	break;
				130
				131	case '\t': {
				132	if (!ReplaceTabs)
				133	break;
				134	unsigned NumSpaces = 8-(ColNo&7);
				135	if (EscapeSpaces)
				136	RB.ReplaceText(FilePos, 1,
				137	llvm::StringRef("     "
				138	"   ", 6*NumSpaces));
				139	else
				140	RB.ReplaceText(FilePos, 1, llvm::StringRef(" ", NumSpaces));
				141	ColNo += NumSpaces;
				142	break;
				143	}
				144	case '<':
				145	RB.ReplaceText(FilePos, 1, "<");
				146	++ColNo;
				147	break;
				148
				149	case '>':
				150	RB.ReplaceText(FilePos, 1, ">");
				151	++ColNo;
				152	break;
				153
				154	case '&':
				155	RB.ReplaceText(FilePos, 1, "&");
				156	++ColNo;
				157	break;
				158	}
				159	}
				160	}
				161
				162	std::string html::EscapeText(const std::string& s, bool EscapeSpaces,
				163	bool ReplaceTabs) {
				164
				165	unsigned len = s.size();
				166	std::string Str;
				167	llvm::raw_string_ostream os(Str);
				168
				169	for (unsigned i = 0 ; i < len; ++i) {
				170
				171	char c = s[i];
				172	switch (c) {
				173	default:
				174	os << c; break;
				175
				176	case ' ':
				177	if (EscapeSpaces) os << " ";
				178	else os << ' ';
				179	break;
				180
				181	case '\t':
				182	if (ReplaceTabs) {
				183	if (EscapeSpaces)
				184	for (unsigned i = 0; i < 4; ++i)
				185	os << " ";
				186	else
				187	for (unsigned i = 0; i < 4; ++i)
				188	os << " ";
				189	}
				190	else
				191	os << c;
				192
				193	break;
				194
				195	case '<': os << "<"; break;
				196	case '>': os << ">"; break;
				197	case '&': os << "&"; break;
				198	}
				199	}
				200
				201	return os.str();
				202	}
				203
				204	static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
				205	unsigned B, unsigned E) {
				206	llvm::SmallString<256> Str;
				207	llvm::raw_svector_ostream OS(Str);
				208
				209	OS << "<tr><td class=\"num\" id=\"LN"
				210	<< LineNo << "\">"
				211	<< LineNo << "</td><td class=\"line\">";
				212
				213	if (B == E) { // Handle empty lines.
				214	OS << " </td></tr>";
				215	RB.InsertTextBefore(B, OS.str());
				216	} else {
				217	RB.InsertTextBefore(B, OS.str());
				218	RB.InsertTextBefore(E, "</td></tr>");
				219	}
				220	}
				221
				222	void html::AddLineNumbers(Rewriter& R, FileID FID) {
				223
				224	const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
				225	const char* FileBeg = Buf->getBufferStart();
				226	const char* FileEnd = Buf->getBufferEnd();
				227	const char* C = FileBeg;
				228	RewriteBuffer &RB = R.getEditBuffer(FID);
				229
				230	assert (C <= FileEnd);
				231
				232	unsigned LineNo = 0;
				233	unsigned FilePos = 0;
				234
				235	while (C != FileEnd) {
				236
				237	++LineNo;
				238	unsigned LineStartPos = FilePos;
				239	unsigned LineEndPos = FileEnd - FileBeg;
				240
				241	assert (FilePos <= LineEndPos);
				242	assert (C < FileEnd);
				243
				244	// Scan until the newline (or end-of-file).
				245
				246	while (C != FileEnd) {
				247	char c = *C;
				248	++C;
				249
				250	if (c == '\n') {
				251	LineEndPos = FilePos++;
				252	break;
				253	}
				254
				255	++FilePos;
				256	}
				257
				258	AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
				259	}
				260
				261	// Add one big table tag that surrounds all of the code.
				262	RB.InsertTextBefore(0, "<table class=\"code\">\n");
				263	RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
				264	}
				265
				266	void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID,
				267	const char *title) {
				268
				269	const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
				270	const char* FileStart = Buf->getBufferStart();
				271	const char* FileEnd = Buf->getBufferEnd();
				272
				273	SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
				274	SourceLocation EndLoc = StartLoc.getFileLocWithOffset(FileEnd-FileStart);
				275
				276	std::string s;
				277	llvm::raw_string_ostream os(s);
				278	os << "<!doctype html>\n" // Use HTML 5 doctype
				279	"<html>\n<head>\n";
				280
				281	if (title)
				282	os << "<title>" << html::EscapeText(title) << "</title>\n";
				283
				284	os << "<style type=\"text/css\">\n"
				285	" body { color:#000000; background-color:#ffffff }\n"
				286	" body { font-family:Helvetica, sans-serif; font-size:10pt }\n"
				287	" h1 { font-size:14pt }\n"
				288	" .code { border-collapse:collapse; width:100%; }\n"
				289	" .code { font-family: \"Andale Mono\", monospace; font-size:10pt }\n"
				290	" .code { line-height: 1.2em }\n"
				291	" .comment { color: green; font-style: oblique }\n"
				292	" .keyword { color: blue }\n"
				293	" .string_literal { color: red }\n"
				294	" .directive { color: darkmagenta }\n"
				295	// Macro expansions.
				296	" .expansion { display: none; }\n"
				297	" .macro:hover .expansion { display: block; border: 2px solid #FF0000; "
				298	"padding: 2px; background-color:#FFF0F0; font-weight: normal; "
				299	" -webkit-border-radius:5px; -webkit-box-shadow:1px 1px 7px #000; "
				300	"position: absolute; top: -1em; left:10em; z-index: 1 } \n"
				301	" .macro { color: darkmagenta; background-color:LemonChiffon;"
				302	// Macros are position: relative to provide base for expansions.
				303	" position: relative }\n"
				304	" .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n"
				305	" .num { text-align:right; font-size:8pt }\n"
				306	" .num { color:#444444 }\n"
				307	" .line { padding-left: 1ex; border-left: 3px solid #ccc }\n"
				308	" .line { white-space: pre }\n"
				309	" .msg { -webkit-box-shadow:1px 1px 7px #000 }\n"
				310	" .msg { -webkit-border-radius:5px }\n"
				311	" .msg { font-family:Helvetica, sans-serif; font-size:8pt }\n"
				312	" .msg { float:left }\n"
				313	" .msg { padding:0.25em 1ex 0.25em 1ex }\n"
				314	" .msg { margin-top:10px; margin-bottom:10px }\n"
				315	" .msg { font-weight:bold }\n"
				316	" .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }\n"
				317	" .msgT { padding:0x; spacing:0x }\n"
				318	" .msgEvent { background-color:#fff8b4; color:#000000 }\n"
				319	" .msgControl { background-color:#bbbbbb; color:#000000 }\n"
				320	" .mrange { background-color:#dfddf3 }\n"
				321	" .mrange { border-bottom:1px solid #6F9DBE }\n"
				322	" .PathIndex { font-weight: bold; padding:0px 5px 0px 5px; "
				323	"margin-right:5px; }\n"
				324	" .PathIndex { -webkit-border-radius:8px }\n"
				325	" .PathIndexEvent { background-color:#bfba87 }\n"
				326	" .PathIndexControl { background-color:#8c8c8c }\n"
				327	" .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n"
				328	" .CodeRemovalHint { background-color:#de1010 }\n"
				329	" .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n"
				330	" table.simpletable {\n"
				331	" padding: 5px;\n"
				332	" font-size:12pt;\n"
				333	" margin:20px;\n"
				334	" border-collapse: collapse; border-spacing: 0px;\n"
				335	" }\n"
				336	" td.rowname {\n"
				337	" text-align:right; font-weight:bold; color:#444444;\n"
				338	" padding-right:2ex; }\n"
				339	"</style>\n</head>\n<body>";
				340
				341	// Generate header
				342	R.InsertTextBefore(StartLoc, os.str());
				343	// Generate footer
				344
				345	R.InsertTextAfter(EndLoc, "</body></html>\n");
				346	}
				347
				348	/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
				349	/// information about keywords, macro expansions etc. This uses the macro
				350	/// table state from the end of the file, so it won't be perfectly perfect,
				351	/// but it will be reasonably close.
				352	void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
				353	RewriteBuffer &RB = R.getEditBuffer(FID);
				354
				355	const SourceManager &SM = PP.getSourceManager();
				356	const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
				357	Lexer L(FID, FromFile, SM, PP.getLangOptions());
				358	const char *BufferStart = L.getBufferStart();
				359
				360	// Inform the preprocessor that we want to retain comments as tokens, so we
				361	// can highlight them.
				362	L.SetCommentRetentionState(true);
				363
				364	// Lex all the tokens in raw mode, to avoid entering #includes or expanding
				365	// macros.
				366	Token Tok;
				367	L.LexFromRawLexer(Tok);
				368
				369	while (Tok.isNot(tok::eof)) {
				370	// Since we are lexing unexpanded tokens, all tokens are from the main
				371	// FileID.
				372	unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
				373	unsigned TokLen = Tok.getLength();
				374	switch (Tok.getKind()) {
				375	default: break;
				376	case tok::identifier: {
				377	// Fill in Result.IdentifierInfo, looking up the identifier in the
				378	// identifier table.
				379	const IdentifierInfo *II =
				380	PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs);
				381
				382	// If this is a pp-identifier, for a keyword, highlight it as such.
				383	if (II->getTokenID() != tok::identifier)
				384	HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
				385	"<span class='keyword'>", "</span>");
				386	break;
				387	}
				388	case tok::comment:
				389	HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
				390	"<span class='comment'>", "</span>");
				391	break;
				392	case tok::wide_string_literal:
				393	// Chop off the L prefix
				394	++TokOffs;
				395	--TokLen;
				396	// FALL THROUGH.
				397	case tok::string_literal:
				398	HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
				399	"<span class='string_literal'>", "</span>");
				400	break;
				401	case tok::hash: {
				402	// If this is a preprocessor directive, all tokens to end of line are too.
				403	if (!Tok.isAtStartOfLine())
				404	break;
				405
				406	// Eat all of the tokens until we get to the next one at the start of
				407	// line.
				408	unsigned TokEnd = TokOffs+TokLen;
				409	L.LexFromRawLexer(Tok);
				410	while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
				411	TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
				412	L.LexFromRawLexer(Tok);
				413	}
				414
				415	// Find end of line. This is a hack.
				416	HighlightRange(RB, TokOffs, TokEnd, BufferStart,
				417	"<span class='directive'>", "</span>");
				418
				419	// Don't skip the next token.
				420	continue;
				421	}
				422	}
				423
				424	L.LexFromRawLexer(Tok);
				425	}
				426	}
				427
				428	namespace {
				429	/// IgnoringDiagClient - This is a diagnostic client that just ignores all
				430	/// diags.
				431	class IgnoringDiagClient : public DiagnosticClient {
				432	void HandleDiagnostic(Diagnostic::Level DiagLevel,
				433	const DiagnosticInfo &Info) {
				434	// Just ignore it.
				435	}
				436	};
				437	}
				438
				439	/// HighlightMacros - This uses the macro table state from the end of the
				440	/// file, to re-expand macros and insert (into the HTML) information about the
				441	/// macro expansions. This won't be perfectly perfect, but it will be
				442	/// reasonably close.
				443	void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
				444	// Re-lex the raw token stream into a token buffer.
				445	const SourceManager &SM = PP.getSourceManager();
				446	std::vector<Token> TokenStream;
				447
				448	const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
				449	Lexer L(FID, FromFile, SM, PP.getLangOptions());
				450
				451	// Lex all the tokens in raw mode, to avoid entering #includes or expanding
				452	// macros.
				453	while (1) {
				454	Token Tok;
				455	L.LexFromRawLexer(Tok);
				456
				457	// If this is a # at the start of a line, discard it from the token stream.
				458	// We don't want the re-preprocess step to see #defines, #includes or other
				459	// preprocessor directives.
				460	if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
				461	continue;
				462
				463	// If this is a ## token, change its kind to unknown so that repreprocessing
				464	// it will not produce an error.
				465	if (Tok.is(tok::hashhash))
				466	Tok.setKind(tok::unknown);
				467
				468	// If this raw token is an identifier, the raw lexer won't have looked up
				469	// the corresponding identifier info for it. Do this now so that it will be
				470	// macro expanded when we re-preprocess it.
				471	if (Tok.is(tok::identifier)) {
				472	// Change the kind of this identifier to the appropriate token kind, e.g.
				473	// turning "for" into a keyword.
				474	Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
				475	}
				476
				477	TokenStream.push_back(Tok);
				478
				479	if (Tok.is(tok::eof)) break;
				480	}
				481
				482	// Temporarily change the diagnostics object so that we ignore any generated
				483	// diagnostics from this pass.
				484	IgnoringDiagClient TmpDC;
				485	Diagnostic TmpDiags(&TmpDC);
				486
				487	// FIXME: This is a huge hack; we reuse the input preprocessor because we want
				488	// its state, but we aren't actually changing it (we hope). This should really
				489	// construct a copy of the preprocessor.
				490	Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
				491	Diagnostic *OldDiags = &TmpPP.getDiagnostics();
				492	TmpPP.setDiagnostics(TmpDiags);
				493
				494	// Inform the preprocessor that we don't want comments.
				495	TmpPP.SetCommentRetentionState(false, false);
				496
				497	// Enter the tokens we just lexed. This will cause them to be macro expanded
				498	// but won't enter sub-files (because we removed #'s).
				499	TmpPP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false);
				500
				501	TokenConcatenation ConcatInfo(TmpPP);
				502
				503	// Lex all the tokens.
				504	Token Tok;
				505	TmpPP.Lex(Tok);
				506	while (Tok.isNot(tok::eof)) {
				507	// Ignore non-macro tokens.
				508	if (!Tok.getLocation().isMacroID()) {
				509	TmpPP.Lex(Tok);
				510	continue;
				511	}
				512
				513	// Okay, we have the first token of a macro expansion: highlight the
				514	// instantiation by inserting a start tag before the macro instantiation and
				515	// end tag after it.
				516	std::pair<SourceLocation, SourceLocation> LLoc =
				517	SM.getInstantiationRange(Tok.getLocation());
				518
				519	// Ignore tokens whose instantiation location was not the main file.
				520	if (SM.getFileID(LLoc.first) != FID) {
				521	TmpPP.Lex(Tok);
				522	continue;
				523	}
				524
				525	assert(SM.getFileID(LLoc.second) == FID &&
				526	"Start and end of expansion must be in the same ultimate file!");
				527
				528	std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
				529	unsigned LineLen = Expansion.size();
				530
				531	Token PrevTok = Tok;
				532	// Okay, eat this token, getting the next one.
				533	TmpPP.Lex(Tok);
				534
				535	// Skip all the rest of the tokens that are part of this macro
				536	// instantiation. It would be really nice to pop up a window with all the
				537	// spelling of the tokens or something.
				538	while (!Tok.is(tok::eof) &&
				539	SM.getInstantiationLoc(Tok.getLocation()) == LLoc.first) {
				540	// Insert a newline if the macro expansion is getting large.
				541	if (LineLen > 60) {
				542	Expansion += "<br>";
				543	LineLen = 0;
				544	}
				545
				546	LineLen -= Expansion.size();
				547
				548	// If the tokens were already space separated, or if they must be to avoid
				549	// them being implicitly pasted, add a space between them.
				550	if (Tok.hasLeadingSpace() \|\|
				551	ConcatInfo.AvoidConcat(PrevTok, Tok))
				552	Expansion += ' ';
				553
				554	// Escape any special characters in the token text.
				555	Expansion += EscapeText(TmpPP.getSpelling(Tok));
				556	LineLen += Expansion.size();
				557
				558	PrevTok = Tok;
				559	TmpPP.Lex(Tok);
				560	}
				561
				562
				563	// Insert the expansion as the end tag, so that multi-line macros all get
				564	// highlighted.
				565	Expansion = "<span class='expansion'>" + Expansion + "</span></span>";
				566
				567	HighlightRange(R, LLoc.first, LLoc.second,
				568	"<span class='macro'>", Expansion.c_str());
				569	}
				570
				571	// Restore diagnostics object back to its own thing.
				572	TmpPP.setDiagnostics(*OldDiags);
				573	}