Introduce a CIndex API for lexing the raw tokens within a given source
range. The token-annotation function does nothing, yet.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@94551 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index ab7e55b..ff0a0e1 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -861,6 +861,125 @@
*/
/**
+ * \defgroup CINDEX_LEX Lexing and syntactic analysis
+ *
+ * @{
+ */
+
+/**
+ * \brief Describes a kind of token.
+ */
+typedef enum CXTokenKind {
+ /**
+ * \brief A token that contains some kind of punctuation.
+ */
+ CXToken_Punctuation,
+
+ /**
+ * \brief A a language keyword.
+ */
+ CXToken_Keyword,
+
+ /**
+ * \brief An identifier (that is not a keyword).
+ */
+ CXToken_Identifier,
+
+ /**
+ * \brief A numeric, string, or character literal.
+ */
+ CXToken_Literal,
+
+ /**
+ * \brief A comment.
+ */
+ CXToken_Comment
+} CXTokenKind;
+
+/**
+ * \brief Describes a single preprocessing token.
+ */
+typedef struct {
+ unsigned int_data[4];
+ void *ptr_data;
+} CXToken;
+
+/**
+ * \brief Determine the kind of the given token.
+ */
+CINDEX_LINKAGE CXTokenKind clang_getTokenKind(CXToken);
+
+/**
+ * \brief Determine the spelling of the given token.
+ *
+ * The spelling of a token is the textual representation of that token, e.g.,
+ * the text of an identifier or keyword.
+ */
+CINDEX_LINKAGE CXString clang_getTokenSpelling(CXTranslationUnit, CXToken);
+
+/**
+ * \brief Retrieve the source location of the given token.
+ */
+CINDEX_LINKAGE CXSourceLocation clang_getTokenLocation(CXTranslationUnit,
+ CXToken);
+
+/**
+ * \brief Retrieve a source range that covers the given token.
+ */
+CINDEX_LINKAGE CXSourceRange clang_getTokenExtent(CXTranslationUnit, CXToken);
+
+/**
+ * \brief Tokenize the source code described by the given range into raw
+ * lexical tokens.
+ *
+ * \param TU the translation unit whose text is being tokenized.
+ *
+ * \param Range the source range in which text should be tokenized. All of the
+ * tokens produced by tokenization will fall within this source range,
+ *
+ * \param Tokens this pointer will be set to point to the array of tokens
+ * that occur within the given source range. The returned pointer must be
+ * freed with clang_disposeTokens() before the translation unit is destroyed.
+ *
+ * \param NumTokens will be set to the number of tokens in the \c *Tokens
+ * array.
+ *
+ */
+CINDEX_LINKAGE void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
+ CXToken **Tokens, unsigned *NumTokens);
+
+/**
+ * \brief Annotate the given set of tokens by providing cursors for each token
+ * that can be mapped to a specific entity within the abstract syntax tree.
+ *
+ * This token-annotation routine is equivalent to invoking clang_getCursor()
+ * for the source locations of each of the tokens, then accepting only those
+ * cursors that refer to a specific token.
+ *
+ * \param TU the translation unit that owns the given tokens.
+ *
+ * \param Tokens the set of tokens to annotate.
+ *
+ * \param NumTokens the number of tokens in \p Tokens.
+ *
+ * \param Cursors an array of \p NumTokens cursors, whose contents will be
+ * replaced with the cursors corresponding to each token.
+ */
+CINDEX_LINKAGE void clang_annotateTokens(CXTranslationUnit TU,
+ CXToken *Tokens, unsigned NumTokens,
+ CXCursor *Cursors);
+
+/**
+ * \brief Free the given set of tokens.
+ */
+CINDEX_LINKAGE void clang_disposeTokens(CXTranslationUnit TU,
+ CXToken *Tokens, unsigned NumTokens);
+
+/**
+ * @}
+ */
+
+/**
* \defgroup CINDEX_DEBUG Debugging facilities
*
* These routines are used for testing and debugging, only, and should not