Extend builtin "attribute" syntax to include a notation for
printf-like functions, both builtin functions and those in the
C library. The function-call checker now queries this attribute do
determine if we have a printf-like function, rather than scanning
through the list of "known functions IDs". However, there are 5
functions they are not yet "builtins", so the function-call checker
handles them specifically still:

  - fprintf and vfprintf: the builtins mechanism cannot (yet)
    express FILE* arguments, so these can't be encoded.
  - NSLog: the builtins mechanism cannot (yet) express NSString*
    arguments, so this (and NSLogv) can't be encoded.
  - asprintf and vasprintf: these aren't part of the C99 standard
    library, so we really shouldn't be defining them as builtins in
    the general case (and we don't seem to have the machinery to make
    them builtins only on certain targets and depending on whether
    extensions are enabled).



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@64512 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/AST/Builtins.def b/include/clang/AST/Builtins.def
index e3c64bb..5e009d2 100644
--- a/include/clang/AST/Builtins.def
+++ b/include/clang/AST/Builtins.def
@@ -55,9 +55,13 @@
 //  c -> const
 //  F -> this is a libc/libm function with a '__builtin_' prefix added.
 //  f -> this is a libc/libm function without the '__builtin_' prefix. It can
-//       be followed by ':headername' to state which header this function
-//       comes from, but only if 'f:headername' is the last part of the
-//       string.
+//       be followed by ':headername:' to state which header this function
+//       comes from.
+//  p:N: -> this is a printf-like function whose Nth argument is the format 
+//          string.
+//  P:N: -> similar to the p:N: attribute, but the function is like vprintf
+//          in that it accepts its arguments as a va_list rather than 
+//          through an ellipsis
 //  FIXME: gcc has nonnull
 
 // Standard libc/libm functions:
@@ -143,12 +147,12 @@
 BUILTIN(__builtin___strcpy_chk, "c*c*cC*z", "nF")
 BUILTIN(__builtin___strncat_chk, "c*c*cC*zz", "nF")
 BUILTIN(__builtin___strncpy_chk, "c*c*cC*zz", "nF")
-BUILTIN(__builtin___snprintf_chk, "ic*zizcC*.", "F") // FIXME: format printf attribute
-BUILTIN(__builtin___sprintf_chk, "ic*izcC*.", "F") // FIXME: format printf attribute
-BUILTIN(__builtin___vsnprintf_chk, "ic*zizcC*a", "F") // FIXME: format printf attribute
-BUILTIN(__builtin___vsprintf_chk, "ic*izcC*a", "F") // FIXME: format printf attribute
+BUILTIN(__builtin___snprintf_chk, "ic*zizcC*.", "Fp:4:")
+BUILTIN(__builtin___sprintf_chk, "ic*izcC*.", "Fp:3:")
+BUILTIN(__builtin___vsnprintf_chk, "ic*zizcC*a", "FP:4:")
+BUILTIN(__builtin___vsprintf_chk, "ic*izcC*a", "FP:3:")
 //BUILTIN(__builtin___fprintf_chk, "i(FIXME:FILEPTR)icC*.", "F") // FIXME: format printf attribute
-BUILTIN(__builtin___printf_chk, "iicC*.", "F")
+BUILTIN(__builtin___printf_chk, "iicC*.", "F") // FIXME: format printf attribute
 //BUILTIN(__builtin___vfprintf_chk, "i(FIXME:FILEPTR)icC*a", "F") // FIXME: format printf attribute
 BUILTIN(__builtin___vprintf_chk, "iicC*a", "F") // FIXME: format printf attribute
 
@@ -177,22 +181,33 @@
 BUILTIN(__builtin_llvm_memory_barrier,"vbbbbb", "n")
 
 // Builtin library functions
-BUILTIN(alloca, "v*z", "f:stdlib.h")
-BUILTIN(calloc, "v*zz", "f:stdlib.h")
-BUILTIN(malloc, "v*z", "f:stdlib.h")
-BUILTIN(memcpy, "v*v*vC*z", "f:string.h")
-BUILTIN(memmove, "v*v*vC*z", "f:string.h")
-BUILTIN(memset, "v*v*iz", "f:string.h")
-BUILTIN(strcat, "c*c*cC*", "f:string.h")
-BUILTIN(strchr, "c*cC*i", "f:string.h")
-BUILTIN(strcpy, "c*c*cC*", "f:string.h")
-BUILTIN(strcspn, "zcC*cC*", "f:string.h")
-BUILTIN(strlen, "zcC*", "f:string.h")
-BUILTIN(strncat, "c*c*cC*z", "f:string.h")
-BUILTIN(strncpy, "c*c*cC*z", "f:string.h")
-BUILTIN(strpbrk, "c*cC*cC*", "f:string.h")
-BUILTIN(strrchr, "c*cC*i", "f:string.h")
-BUILTIN(strspn, "zcC*cC*", "f:string.h")
-BUILTIN(strstr, "c*cC*cC*", "f:string.h")
+BUILTIN(alloca, "v*z", "f:stdlib.h:")
+BUILTIN(calloc, "v*zz", "f:stdlib.h:")
+BUILTIN(malloc, "v*z", "f:stdlib.h:")
+BUILTIN(memcpy, "v*v*vC*z", "f:string.h:")
+BUILTIN(memmove, "v*v*vC*z", "f:string.h:")
+BUILTIN(memset, "v*v*iz", "f:string.h:")
+BUILTIN(strcat, "c*c*cC*", "f:string.h:")
+BUILTIN(strchr, "c*cC*i", "f:string.h:")
+BUILTIN(strcpy, "c*c*cC*", "f:string.h:")
+BUILTIN(strcspn, "zcC*cC*", "f:string.h:")
+BUILTIN(strlen, "zcC*", "f:string.h:")
+BUILTIN(strncat, "c*c*cC*z", "f:string.h:")
+BUILTIN(strncpy, "c*c*cC*z", "f:string.h:")
+BUILTIN(strpbrk, "c*cC*cC*", "f:string.h:")
+BUILTIN(strrchr, "c*cC*i", "f:string.h:")
+BUILTIN(strspn, "zcC*cC*", "f:string.h:")
+BUILTIN(strstr, "c*cC*cC*", "f:string.h:")
+BUILTIN(printf, "icC*.", "f:stdio.h:p:0:")
+//BUILTIN(fprintf, "i<FIXME:FILEPTR>cC*.", "f:stdio.h:p:1:")
+BUILTIN(snprintf, "ic*zcC*.", "f:stdio.h:p:2:")
+BUILTIN(sprintf, "ic*cC*.", "f:stdio.h:p:1:")
+BUILTIN(vprintf, "icC*a", "f:stdio.h:P:0:")
+//BUILTIN(vfprintf, "i<FIXME:FILEPTR>cC*a", "f:stdio.h:P:1:")
+BUILTIN(vsnprintf, "ic*zcC*a", "f:stdio.h:P:2:")
+BUILTIN(vsprintf, "ic*cC*a", "f:stdio.h:P:1:")
+
+// FIXME: asprintf and vasprintf aren't C99 functions. Should they be
+// target-specific builtins, perhaps? What about NSLog?
 
 #undef BUILTIN
diff --git a/include/clang/AST/Builtins.h b/include/clang/AST/Builtins.h
index f1b63bc..045e2ec 100644
--- a/include/clang/AST/Builtins.h
+++ b/include/clang/AST/Builtins.h
@@ -16,6 +16,7 @@
 #define LLVM_CLANG_AST_BUILTINS_H
 
 #include <cstring>
+#include <string>
 
 namespace clang {
   class TargetInfo;
@@ -87,17 +88,12 @@
 
   /// \brief If this is a library function that comes from a specific
   /// header, retrieve that header name.
-  const char *getHeaderName(unsigned ID) const {
-    char *Name = strchr(GetRecord(ID).Attributes, 'f');
-    if (!Name)
-      return 0;
-    ++Name;
+  std::string getHeaderName(unsigned ID) const;
 
-    if (*Name != ':')
-      return 0;
-
-    return ++Name;
-  }
+  /// \brief Determine whether this builtin is like printf in its
+  /// formatting rules and, if so, set the index to the format string
+  /// argument and whether this function as a va_list argument.
+  bool isPrintfLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg);
 
   /// hasVAListUse - Return true of the specified builtin uses __builtin_va_list
   /// as an operand or return type.
diff --git a/lib/AST/Builtins.cpp b/lib/AST/Builtins.cpp
index b675c72..e7ec137 100644
--- a/lib/AST/Builtins.cpp
+++ b/lib/AST/Builtins.cpp
@@ -49,6 +49,41 @@
     Table.get(TSRecords[i].Name).setBuiltinID(i+Builtin::FirstTSBuiltin);
 }
 
+std::string Builtin::Context::getHeaderName(unsigned ID) const {
+  char *Name = strchr(GetRecord(ID).Attributes, 'f');
+  if (!Name)
+    return 0;
+  ++Name;
+
+  if (*Name != ':')
+    return 0;
+
+  ++Name;
+  char *NameEnd = strchr(Name, ':');
+  assert(NameEnd && "Missing ':' after header name");
+  return std::string(Name, NameEnd);
+}
+
+bool 
+Builtin::Context::isPrintfLike(unsigned ID, unsigned &FormatIdx, 
+                               bool &HasVAListArg) {
+  char *Printf = strpbrk(GetRecord(ID).Attributes, "pP");
+  if (!Printf)
+    return false;
+
+  HasVAListArg = (*Printf == 'P');
+
+  ++Printf;
+  assert(*Printf == ':' && "p or P specifier must have be followed by a ':'");
+  ++Printf;
+
+  char *PrintfEnd = strchr(Printf, ':');
+  assert(PrintfEnd && "printf specifier must end with a ':'");
+
+  FormatIdx = strtol(Printf, 0, 10);
+  return true;
+}
+
 /// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the
 /// pointer over the consumed characters.  This returns the resultant type.
 static QualType DecodeTypeFromStr(const char *&Str, ASTContext &Context, 
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 8667ee2..45ef035 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -131,22 +131,11 @@
   // do extra checking.  
   IdentifierTable &IT = PP.getIdentifierTable();  
 
-  KnownFunctionIDs[id_printf]        = &IT.get("printf");
-  KnownFunctionIDs[id_fprintf]       = &IT.get("fprintf");
-  KnownFunctionIDs[id_sprintf]       = &IT.get("sprintf");
-  KnownFunctionIDs[id_sprintf_chk]   = &IT.get("__builtin___sprintf_chk");
-  KnownFunctionIDs[id_snprintf]      = &IT.get("snprintf");
-  KnownFunctionIDs[id_snprintf_chk]  = &IT.get("__builtin___snprintf_chk");
-  KnownFunctionIDs[id_asprintf]      = &IT.get("asprintf");
   KnownFunctionIDs[id_NSLog]         = &IT.get("NSLog");
-  KnownFunctionIDs[id_vsnprintf]     = &IT.get("vsnprintf");
+  KnownFunctionIDs[id_asprintf]      = &IT.get("asprintf");
+  KnownFunctionIDs[id_fprintf]       = &IT.get("fprintf");
   KnownFunctionIDs[id_vasprintf]     = &IT.get("vasprintf");
   KnownFunctionIDs[id_vfprintf]      = &IT.get("vfprintf");
-  KnownFunctionIDs[id_vsprintf]      = &IT.get("vsprintf");
-  KnownFunctionIDs[id_vsprintf_chk]  = &IT.get("__builtin___vsprintf_chk");
-  KnownFunctionIDs[id_vsnprintf]     = &IT.get("vsnprintf");
-  KnownFunctionIDs[id_vsnprintf_chk] = &IT.get("__builtin___vsnprintf_chk");
-  KnownFunctionIDs[id_vprintf]       = &IT.get("vprintf");
 
   StdNamespace = 0;
   TUScope = 0;
diff --git a/lib/Sema/Sema.h b/lib/Sema/Sema.h
index fdd4c30..db08377 100644
--- a/lib/Sema/Sema.h
+++ b/lib/Sema/Sema.h
@@ -183,18 +183,8 @@
     id_NSLog,
     id_asprintf,
     id_fprintf,
-    id_printf,
-    id_snprintf,
-    id_snprintf_chk,
-    id_sprintf,
-    id_sprintf_chk,
     id_vasprintf,
-    id_vfprintf,    
-    id_vsnprintf,
-    id_vsnprintf_chk,
-    id_vsprintf,
-    id_vsprintf_chk,
-    id_vprintf,
+    id_vfprintf,
     id_num_known_functions
   };
   
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index e058861..ab71255 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -77,40 +77,30 @@
   // more efficient. For example, just map function ids to custom
   // handlers.
 
-  // Search the KnownFunctionIDs for the identifier.
-  unsigned i = 0, e = id_num_known_functions;
-  for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
-  if (i == e) return move(TheCallResult);
-
   // Printf checking.
-  if (i <= id_vprintf) {
-    // Retrieve the index of the format string parameter and determine
-    // if the function is passed a va_arg argument.
-    unsigned format_idx = 0;
-    bool HasVAListArg = false;
-
-    switch (i) {
-    default: assert(false && "No format string argument index.");
-    case id_NSLog:         format_idx = 0; break;
-    case id_asprintf:      format_idx = 1; break;
-    case id_fprintf:       format_idx = 1; break;
-    case id_printf:        format_idx = 0; break;
-    case id_snprintf:      format_idx = 2; break;
-    case id_snprintf_chk:  format_idx = 4; break;
-    case id_sprintf:       format_idx = 1; break;
-    case id_sprintf_chk:   format_idx = 3; break;
-    case id_vasprintf:     format_idx = 1; HasVAListArg = true; break;
-    case id_vfprintf:      format_idx = 1; HasVAListArg = true; break;
-    case id_vsnprintf:     format_idx = 2; HasVAListArg = true; break;
-    case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break;
-    case id_vsprintf:      format_idx = 1; HasVAListArg = true; break;
-    case id_vsprintf_chk:  format_idx = 3; HasVAListArg = true; break;
-    case id_vprintf:       format_idx = 0; HasVAListArg = true; break;
-    }
-
-    CheckPrintfArguments(TheCall, HasVAListArg, format_idx);
+  unsigned format_idx = 0;
+  bool HasVAListArg = false;
+  if (FDecl->getBuiltinID() &&
+      Context.BuiltinInfo.isPrintfLike(FDecl->getBuiltinID(), format_idx,
+                                       HasVAListArg)) {
+    // Found a printf builtin.
+  } else if (FnInfo == KnownFunctionIDs[id_NSLog]) {
+    format_idx = 0;
+    HasVAListArg = false;
+  } else if (FnInfo == KnownFunctionIDs[id_asprintf] ||
+             FnInfo == KnownFunctionIDs[id_fprintf]) {
+    format_idx = 1;
+    HasVAListArg = false;
+  } else if (FnInfo == KnownFunctionIDs[id_vasprintf] ||
+             FnInfo == KnownFunctionIDs[id_vfprintf]) {
+    format_idx = 1;
+    HasVAListArg = true;
+  } else {
+    return move(TheCallResult);
   }
 
+  CheckPrintfArguments(TheCall, HasVAListArg, format_idx);
+
   return move(TheCallResult);
 }
 
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index d0997b0..3c02d4b 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -304,7 +304,7 @@
     Diag(Loc, diag::ext_implicit_lib_function_decl)
       << Context.BuiltinInfo.GetName(BID)
       << R;
-    if (Context.BuiltinInfo.getHeaderName(BID) &&
+    if (!Context.BuiltinInfo.getHeaderName(BID).empty() &&
         Diags.getDiagnosticMapping(diag::ext_implicit_lib_function_decl)
           != diag::MAP_IGNORE)
       Diag(Loc, diag::note_please_include_header)
diff --git a/test/Analysis/dead-stores.c b/test/Analysis/dead-stores.c
index 7d7b369..2d07a80 100644
--- a/test/Analysis/dead-stores.c
+++ b/test/Analysis/dead-stores.c
@@ -12,7 +12,8 @@
 void f2(void *b) {
  char *c = (char*)b; // no-warning
  char *d = b+1; // expected-warning {{never read}}
- printf("%s", c);
+ printf("%s", c); // expected-warning{{implicitly declaring C library function 'printf' with type 'int (char const *, ...)'}} \
+ // expected-note{{please include the header <stdio.h> or explicitly provide a declaration for 'printf'}}
 }
 
 void f3() {
diff --git a/test/Analysis/uninit-vals.c b/test/Analysis/uninit-vals.c
index a7f5733..2f2c33d 100644
--- a/test/Analysis/uninit-vals.c
+++ b/test/Analysis/uninit-vals.c
@@ -31,7 +31,9 @@
 void f6(int i) {
   int x;
   for (i = 0 ; i < 10; i++)
-    printf("%d",x++); // expected-warning {{use of uninitialized variable}}
+    printf("%d",x++); // expected-warning {{use of uninitialized variable}} \
+  // expected-warning{{implicitly declaring C library function 'printf' with type 'int (char const *, ...)'}} \
+  // expected-note{{please include the header <stdio.h> or explicitly provide a declaration for 'printf'}}
 }
 
 void f7(int i) {
diff --git a/test/Rewriter/finally.m b/test/Rewriter/finally.m
index 903cdec..0d623dc 100644
--- a/test/Rewriter/finally.m
+++ b/test/Rewriter/finally.m
@@ -2,7 +2,8 @@
 
 int main() {
   @try {
-    printf("executing try");
+    printf("executing try"); // expected-warning{{implicitly declaring C library function 'printf' with type 'int (char const *, ...)'}} \
+        // expected-note{{please include the header <stdio.h> or explicitly provide a declaration for 'printf'}}
     return(0); // expected-warning{{rewriter doesn't support user-specified control flow semantics for @try/@finally (code may not execute properly)}}
   } @finally {
     printf("executing finally");
diff --git a/test/Sema/block-return.c b/test/Sema/block-return.c
index 64c2993..a370534 100644
--- a/test/Sema/block-return.c
+++ b/test/Sema/block-return.c
@@ -80,5 +80,6 @@
   int (^xx)(const char *s) = ^(char *s) { return 1; }; // expected-warning {{incompatible block pointer types initializing 'int (^)(char *)', expected 'int (^)(char const *)'}}
   int (*yy)(const char *s) = funk; // expected-warning {{incompatible pointer types initializing 'int (char *)', expected 'int (*)(char const *)'}}
   
-  int (^nested)(char *s) = ^(char *str) { void (^nest)(void) = ^(void) { printf("%s\n", str); }; next(); return 1; };
+  int (^nested)(char *s) = ^(char *str) { void (^nest)(void) = ^(void) { printf("%s\n", str); }; next(); return 1; }; // expected-warning{{implicitly declaring C library function 'printf' with type 'int (char const *, ...)'}} \
+  // expected-note{{please include the header <stdio.h> or explicitly provide a declaration for 'printf'}}
 }