blob: 41fa04183cc1ae6c14b88c5884049de9db0ae0cd [file] [log] [blame]
//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the SourceManager interface.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_SOURCEMANAGER_H
#define LLVM_CLANG_SOURCEMANAGER_H
#include "clang/Basic/SourceLocation.h"
#include "llvm/Bitcode/SerializationFwd.h"
#include <vector>
#include <set>
#include <list>
#include <cassert>
namespace llvm {
class MemoryBuffer;
}
namespace clang {
class SourceManager;
class FileManager;
class FileEntry;
class IdentifierTokenInfo;
/// SrcMgr - Public enums and private classes that are part of the
/// SourceManager implementation.
///
namespace SrcMgr {
/// CharacteristicKind - This is used to represent whether a file or directory
/// holds normal user code, system code, or system code which is implicitly
/// 'extern "C"' in C++ mode. Entire directories can be tagged with this
/// (this is maintained by DirectoryLookup and friends) as can specific
/// FileIDInfos when a #pragma system_header is seen or various other cases.
///
enum CharacteristicKind {
C_User, C_System, C_ExternCSystem
};
/// ContentCache - Once instance of this struct is kept for every file
/// loaded or used. This object owns the MemoryBuffer object.
class ContentCache {
/// Buffer - The actual buffer containing the characters from the input
/// file. This is owned by the ContentCache object.
mutable const llvm::MemoryBuffer *Buffer;
public:
/// Reference to the file entry. This reference does not own
/// the FileEntry object. It is possible for this to be NULL if
/// the ContentCache encapsulates an imaginary text buffer.
const FileEntry *Entry;
/// SourceLineCache - A new[]'d array of offsets for each source line. This
/// is lazily computed. This is owned by the ContentCache object.
unsigned *SourceLineCache;
/// NumLines - The number of lines in this ContentCache. This is only valid
/// if SourceLineCache is non-null.
unsigned NumLines;
/// getBuffer - Returns the memory buffer for the associated content.
const llvm::MemoryBuffer *getBuffer() const;
/// getSize - Returns the size of the content encapsulated by this
/// ContentCache. This can be the size of the source file or the size of an
/// arbitrary scratch buffer. If the ContentCache encapsulates a source
/// file this size is retrieved from the file's FileEntry.
unsigned getSize() const;
/// getSizeBytesMapped - Returns the number of bytes actually mapped for
/// this ContentCache. This can be 0 if the MemBuffer was not actually
/// instantiated.
unsigned getSizeBytesMapped() const;
void setBuffer(const llvm::MemoryBuffer *B) {
assert(!Buffer && "MemoryBuffer already set.");
Buffer = B;
}
ContentCache(const FileEntry *e = NULL)
: Buffer(NULL), Entry(e), SourceLineCache(NULL), NumLines(0) {}
~ContentCache();
/// The copy ctor does not allow copies where source object has either
/// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory
/// is not transfered, so this is a logical error.
ContentCache(const ContentCache &RHS) : Buffer(NULL),SourceLineCache(NULL) {
Entry = RHS.Entry;
assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
&& "Passed ContentCache object cannot own a buffer.");
NumLines = RHS.NumLines;
}
/// Emit - Emit this ContentCache to Bitcode.
void Emit(llvm::Serializer &S) const;
/// ReadToSourceManager - Reconstitute a ContentCache from Bitcode
// and store it in the specified SourceManager.
static void ReadToSourceManager(llvm::Deserializer &D, SourceManager &SM,
FileManager *FMgr, std::vector<char> &Buf);
private:
// Disable assignments.
ContentCache &operator=(const ContentCache& RHS);
};
/// FileIDInfo - Information about a FileID, basically just the logical file
/// that it represents and include stack information. A File SourceLocation
/// is a byte offset from the start of this.
///
/// FileID's are used to compute the location of a character in memory as well
/// as the instantiation source location, which can be differ from the
/// spelling location. It is different when #line's are active or when macros
/// have been expanded.
///
/// Each FileID has include stack information, indicating where it came from.
/// For the primary translation unit, it comes from SourceLocation() aka 0.
/// This information encodes the #include chain that a token was instantiated
/// from.
///
/// FileIDInfos contain a "ContentCache *", describing the source file,
/// and a Chunk number, which allows a SourceLocation to index into very
/// large files (those which there are not enough FilePosBits to address).
///
struct FileIDInfo {
private:
/// IncludeLoc - The location of the #include that brought in this file.
/// This SourceLocation object has an invalid SLOC for the main file.
SourceLocation IncludeLoc;
/// ChunkNo - Really large buffers are broken up into chunks that are
/// each (1 << SourceLocation::FilePosBits) in size. This specifies the
/// chunk number of this FileID.
unsigned ChunkNo : 30;
/// FileCharacteristic - This is an instance of CharacteristicKind,
/// indicating whether this is a system header dir or not.
unsigned FileCharacteristic : 2;
/// Content - Information about the source buffer itself.
const ContentCache *Content;
public:
/// get - Return a FileIDInfo object.
static FileIDInfo get(SourceLocation IL, unsigned CN,
const ContentCache *Con,
CharacteristicKind FileCharacter) {
FileIDInfo X;
X.IncludeLoc = IL;
X.ChunkNo = CN;
X.Content = Con;
X.FileCharacteristic = FileCharacter;
return X;
}
SourceLocation getIncludeLoc() const { return IncludeLoc; }
unsigned getChunkNo() const { return ChunkNo; }
const ContentCache* getContentCache() const { return Content; }
/// getCharacteristic - Return whether this is a system header or not.
CharacteristicKind getFileCharacteristic() const {
return (CharacteristicKind)FileCharacteristic;
}
/// Emit - Emit this FileIDInfo to Bitcode.
void Emit(llvm::Serializer& S) const;
/// ReadVal - Reconstitute a FileIDInfo from Bitcode.
static FileIDInfo ReadVal(llvm::Deserializer& S);
};
/// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
/// Each MacroIDInfo encodes the Instantiation location - where the macro was
/// instantiated, and the SpellingLoc - where the actual character data for
/// the token came from. An actual macro SourceLocation stores deltas from
/// these positions.
class MacroIDInfo {
SourceLocation InstantiationLoc, SpellingLoc;
public:
SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
SourceLocation getSpellingLoc() const { return SpellingLoc; }
/// get - Return a MacroID for a macro expansion. VL specifies
/// the instantiation location (where the macro is expanded), and SL
/// specifies the spelling location (where the characters from the token
/// come from). Both VL and PL refer to normal File SLocs.
static MacroIDInfo get(SourceLocation VL, SourceLocation SL) {
MacroIDInfo X;
X.InstantiationLoc = VL;
X.SpellingLoc = SL;
return X;
}
/// Emit - Emit this MacroIDInfo to Bitcode.
void Emit(llvm::Serializer& S) const;
/// ReadVal - Reconstitute a MacroIDInfo from Bitcode.
static MacroIDInfo ReadVal(llvm::Deserializer& S);
};
} // end SrcMgr namespace.
} // end clang namespace
namespace std {
template <> struct less<clang::SrcMgr::ContentCache> {
inline bool operator()(const clang::SrcMgr::ContentCache& L,
const clang::SrcMgr::ContentCache& R) const {
return L.Entry < R.Entry;
}
};
} // end std namespace
namespace clang {
/// SourceManager - This file handles loading and caching of source files into
/// memory. This object owns the MemoryBuffer objects for all of the loaded
/// files and assigns unique FileID's for each unique #include chain.
///
/// The SourceManager can be queried for information about SourceLocation
/// objects, turning them into either spelling or instantiation locations.
/// Spelling locations represent where the bytes corresponding to a token came
/// from and instantiation locations represent where the location is in the
/// user's view. In the case of a macro expansion, for example, the spelling
/// location indicates where the expanded token came from and the instantiation
/// location specifies where it was expanded.
class SourceManager {
/// FileInfos - Memoized information about all of the files tracked by this
/// SourceManager. This set allows us to merge ContentCache entries based
/// on their FileEntry*. All ContentCache objects will thus have unique,
/// non-null, FileEntry pointers.
std::set<SrcMgr::ContentCache> FileInfos;
/// MemBufferInfos - Information about various memory buffers that we have
/// read in. This is a list, instead of a vector, because we need pointers to
/// the ContentCache objects to be stable. All FileEntry* within the
/// stored ContentCache objects are NULL, as they do not refer to a file.
std::list<SrcMgr::ContentCache> MemBufferInfos;
/// FileIDs - Information about each FileID. FileID #0 is not valid, so all
/// entries are off by one.
std::vector<SrcMgr::FileIDInfo> FileIDs;
/// MacroIDs - Information about each MacroID.
std::vector<SrcMgr::MacroIDInfo> MacroIDs;
/// LastLineNo - These ivars serve as a cache used in the getLineNumber
/// method which is used to speedup getLineNumber calls to nearby locations.
mutable FileID LastLineNoFileIDQuery;
mutable SrcMgr::ContentCache *LastLineNoContentCache;
mutable unsigned LastLineNoFilePos;
mutable unsigned LastLineNoResult;
/// MainFileID - The file ID for the main source file of the translation unit.
FileID MainFileID;
// SourceManager doesn't support copy construction.
explicit SourceManager(const SourceManager&);
void operator=(const SourceManager&);
public:
SourceManager() {}
~SourceManager() {}
void clearIDTables() {
MainFileID = FileID();
FileIDs.clear();
MacroIDs.clear();
LastLineNoFileIDQuery = FileID();
LastLineNoContentCache = 0;
}
/// getMainFileID - Returns the FileID of the main source file.
FileID getMainFileID() const { return MainFileID; }
/// createFileID - Create a new FileID that represents the specified file
/// being #included from the specified IncludePosition. This returns 0 on
/// error and translates NULL into standard input.
FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
SrcMgr::CharacteristicKind FileCharacter) {
const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
if (IR == 0) return FileID(); // Error opening file?
return createFileID(IR, IncludePos, FileCharacter);
}
/// createMainFileID - Create the FileID for the main source file.
FileID createMainFileID(const FileEntry *SourceFile,
SourceLocation IncludePos) {
assert(MainFileID.isInvalid() && "MainFileID already set!");
MainFileID = createFileID(SourceFile, IncludePos, SrcMgr::C_User);
return MainFileID;
}
/// createFileIDForMemBuffer - Create a new FileID that represents the
/// specified memory buffer. This does no caching of the buffer and takes
/// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
FileID createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
return createFileID(createMemBufferContentCache(Buffer), SourceLocation(),
SrcMgr::C_User);
}
/// createMainFileIDForMembuffer - Create the FileID for a memory buffer
/// that will represent the FileID for the main source. One example
/// of when this would be used is when the main source is read from STDIN.
FileID createMainFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
assert(MainFileID.isInvalid() && "MainFileID already set!");
MainFileID = createFileIDForMemBuffer(Buffer);
return MainFileID;
}
/// getLocForStartOfFile - Return the source location corresponding to the
/// first byte of the specified file.
SourceLocation getLocForStartOfFile(FileID FID) const {
return SourceLocation::getFileLoc(FID.ID, 0);
}
/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
/// that a token at Loc should actually be referenced from InstantiationLoc.
SourceLocation getInstantiationLoc(SourceLocation Loc,
SourceLocation InstantiationLoc);
/// getBuffer - Return the buffer for the specified FileID.
///
const llvm::MemoryBuffer *getBuffer(FileID FID) const {
return getContentCache(FID)->getBuffer();
}
/// getBufferData - Return a pointer to the start and end of the source buffer
/// data for the specified FileID.
std::pair<const char*, const char*> getBufferData(FileID FID) const;
/// getIncludeLoc - Return the location of the #include for the specified
/// SourceLocation. If this is a macro expansion, this transparently figures
/// out which file includes the file being expanded into.
SourceLocation getIncludeLoc(SourceLocation ID) const {
return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc();
}
/// getCharacterData - Return a pointer to the start of the specified location
/// in the appropriate MemoryBuffer.
const char *getCharacterData(SourceLocation SL) const;
/// getColumnNumber - Return the column # for the specified file position.
/// This is significantly cheaper to compute than the line number. This
/// returns zero if the column number isn't known. This may only be called on
/// a file sloc, so you must choose a spelling or instantiation location
/// before calling this method.
unsigned getColumnNumber(SourceLocation Loc) const;
unsigned getSpellingColumnNumber(SourceLocation Loc) const {
return getColumnNumber(getSpellingLoc(Loc));
}
unsigned getInstantiationColumnNumber(SourceLocation Loc) const {
return getColumnNumber(getInstantiationLoc(Loc));
}
/// getLineNumber - Given a SourceLocation, return the spelling line number
/// for the position indicated. This requires building and caching a table of
/// line offsets for the MemoryBuffer, so this is not cheap: use only when
/// about to emit a diagnostic.
unsigned getLineNumber(SourceLocation Loc) const;
unsigned getInstantiationLineNumber(SourceLocation Loc) const {
return getLineNumber(getInstantiationLoc(Loc));
}
unsigned getSpellingLineNumber(SourceLocation Loc) const {
return getLineNumber(getSpellingLoc(Loc));
}
/// getSourceName - This method returns the name of the file or buffer that
/// the SourceLocation specifies. This can be modified with #line directives,
/// etc.
const char *getSourceName(SourceLocation Loc) const;
/// Given a SourceLocation object, return the instantiation location
/// referenced by the ID.
SourceLocation getInstantiationLoc(SourceLocation Loc) const {
// File locations work.
if (Loc.isFileID()) return Loc;
return MacroIDs[Loc.getMacroID()].getInstantiationLoc();
}
/// getSpellingLoc - Given a SourceLocation object, return the spelling
/// location referenced by the ID. This is the place where the characters
/// that make up the lexed token can be found.
SourceLocation getSpellingLoc(SourceLocation Loc) const {
// File locations work!
if (Loc.isFileID()) return Loc;
// Look up the macro token's spelling location.
SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc();
return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs());
}
/// getFileEntryForID - Returns the FileEntry record for the provided FileID.
const FileEntry *getFileEntryForID(FileID FID) const {
return getContentCache(FID)->Entry;
}
/// getCanonicalFileID - Return the canonical FileID for a SourceLocation.
/// A file can have multiple FileIDs if it is large enough to be broken
/// into multiple chunks. This method returns the unique FileID without
/// chunk information for a given SourceLocation. Use this method when
/// you want to compare FileIDs across SourceLocations.
FileID getCanonicalFileID(SourceLocation SpellingLoc) const {
return getDecomposedFileLoc(SpellingLoc).first;
}
/// getDecomposedFileLoc - Decompose the specified file location into a raw
/// FileID + Offset pair. The first element is the FileID, the second is the
/// offset from the start of the buffer of the location.
std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
assert(Loc.isFileID() && "Isn't a File SourceLocation");
// TODO: Add a flag "is first chunk" to SLOC.
const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID());
// If this file has been split up into chunks, factor in the chunk number
// that the FileID references.
unsigned ChunkNo = FIDInfo->getChunkNo();
unsigned Offset = Loc.getRawFilePos();
Offset += (ChunkNo << SourceLocation::FilePosBits);
assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset");
return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset);
}
/// getFullFilePos - This (efficient) method returns the offset from the start
/// of the file that the specified spelling SourceLocation represents. This
/// returns the location of the actual character data, not the instantiation
/// position.
unsigned getFullFilePos(SourceLocation SpellingLoc) const {
return getDecomposedFileLoc(SpellingLoc).second;
}
/// isFromSameFile - Returns true if both SourceLocations correspond to
/// the same file.
bool isFromSameFile(SourceLocation Loc1, SourceLocation Loc2) const {
return getCanonicalFileID(Loc1) == getCanonicalFileID(Loc2);
}
/// isFromMainFile - Returns true if the file of provided SourceLocation is
/// the main file.
bool isFromMainFile(SourceLocation Loc) const {
return getCanonicalFileID(Loc) == getMainFileID();
}
/// isInSystemHeader - Returns if a SourceLocation is in a system header.
bool isInSystemHeader(SourceLocation Loc) const {
return getFileCharacteristic(Loc) != SrcMgr::C_User;
}
SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const {
return getFIDInfo(getSpellingLoc(Loc).getChunkID())
->getFileCharacteristic();
}
SrcMgr::CharacteristicKind getFileCharacteristic(FileID FID) const {
return getFIDInfo(FID)->getFileCharacteristic();
}
// Iterators over FileInfos.
typedef std::set<SrcMgr::ContentCache>::const_iterator fileinfo_iterator;
fileinfo_iterator fileinfo_begin() const { return FileInfos.begin(); }
fileinfo_iterator fileinfo_end() const { return FileInfos.end(); }
/// PrintStats - Print statistics to stderr.
///
void PrintStats() const;
/// Emit - Emit this SourceManager to Bitcode.
void Emit(llvm::Serializer& S) const;
/// Read - Reconstitute a SourceManager from Bitcode.
static SourceManager* CreateAndRegister(llvm::Deserializer& S,
FileManager &FMgr);
private:
friend struct SrcMgr::ContentCache; // Used for deserialization.
/// createFileID - Create a new fileID for the specified ContentCache and
/// include position. This works regardless of whether the ContentCache
/// corresponds to a file or some other input source.
FileID createFileID(const SrcMgr::ContentCache* File,
SourceLocation IncludePos,
SrcMgr::CharacteristicKind DirCharacter);
/// getContentCache - Create or return a cached ContentCache for the specified
/// file. This returns null on failure.
const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile);
/// createMemBufferContentCache - Create a new ContentCache for the specified
/// memory buffer.
const SrcMgr::ContentCache*
createMemBufferContentCache(const llvm::MemoryBuffer *Buf);
const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const {
assert(FID-1 < FileIDs.size() && "Invalid FileID!");
return &FileIDs[FID-1];
}
const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const {
return getFIDInfo(FID.ID);
}
const SrcMgr::ContentCache *getContentCache(FileID FID) const {
return getContentCache(getFIDInfo(FID.ID));
}
/// Return the ContentCache structure for the specified FileID.
/// This is always the physical reference for the ID.
const SrcMgr::ContentCache*
getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
return FIDInfo->getContentCache();
}
};
} // end namespace clang
#endif