Improved naming convention heuristics in the retain/release checker to better
handle method names that contain 'new', 'copy', etc., but those words might be
the substring of larger words such as 'newsgroup' and 'photocopy' that do not
indicate the allocation of objects. This should address the issues discussed in
<rdar://problem/6552389>.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@65224 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/CFRefCount.cpp b/lib/Analysis/CFRefCount.cpp
index 8e120af..4d4bd05 100644
--- a/lib/Analysis/CFRefCount.cpp
+++ b/lib/Analysis/CFRefCount.cpp
@@ -39,8 +39,6 @@
 // Utility functions.
 //===----------------------------------------------------------------------===//
 
-using llvm::CStrInCStrNoCase;
-
 // The "fundamental rule" for naming conventions of methods:
 //  (url broken into two lines)
 //  http://developer.apple.com/documentation/Cocoa/Conceptual/
@@ -53,16 +51,103 @@
 //  or autorelease. Any other time you receive an object, you must
 //  not release it."
 //
+
+using llvm::CStrInCStrNoCase;
+
+enum NamingConvention { NoConvention, CreateRule, InitRule };
+
+static inline bool isWordEnd(char ch, char prev, char next) {
+  return ch == '\0'
+      || (islower(prev) && isupper(ch)) // xxxC
+      || (isupper(prev) && isupper(ch) && islower(next)) // XXCreate
+      || !isalpha(ch);
+}
+  
+static inline const char* parseWord(const char* s) {  
+  char ch = *s, prev = '\0';
+  assert(ch != '\0');
+  char next = *(s+1);
+  while (!isWordEnd(ch, prev, next)) {
+    prev = ch;
+    ch = next;
+    next = *((++s)+1);
+  }
+  return s;
+}
+
+static NamingConvention deriveNamingConvention(const char* s) {
+  // A method/function name may contain a prefix.  We don't know it is there,
+  // however, until we encounter the first '_'.
+  bool InPossiblePrefix = true;
+  bool AtBeginning = true;
+  NamingConvention C = NoConvention;
+  
+  while (*s != '\0') {
+    // Skip '_'.
+    if (*s == '_') {
+      if (InPossiblePrefix) {
+        InPossiblePrefix = false;
+        AtBeginning = true;
+        // Discard whatever 'convention' we
+        // had already derived since it occurs
+        // in the prefix.
+        C = NoConvention;
+      }
+      ++s;
+      continue;
+    }
+    
+    // Skip numbers, ':', etc.
+    if (!isalpha(*s)) {
+      ++s;
+      continue;
+    }
+    
+    const char *wordEnd = parseWord(s);
+    assert(wordEnd > s);
+    unsigned len = wordEnd - s;
+    
+    switch (len) {
+    default:
+      break;
+    case 3:
+      // Methods starting with 'new' follow the create rule.
+      if (AtBeginning && strncasecmp("new", s, len) == 0)
+        C = CreateRule;      
+      break;
+    case 4:
+      // Methods starting with 'alloc' or contain 'copy' follow the
+      // create rule
+      if ((AtBeginning && strncasecmp("alloc", s, len) == 0) ||
+          (strncasecmp("copy", s, len) == 0))
+        C = CreateRule;
+      else // Methods starting with 'init' follow the init rule.
+        if (AtBeginning && strncasecmp("init", s, len) == 0)
+        C = InitRule;      
+      break;
+    }
+    
+    // If we aren't in the prefix and have a derived convention then just
+    // return it now.
+    if (!InPossiblePrefix && C != NoConvention)
+      return C;
+
+    AtBeginning = false;
+    s = wordEnd;
+  }
+
+  // We will get here if there wasn't more than one word
+  // after the prefix.
+  return C;
+}
+
 static bool followsFundamentalRule(const char* s) {
-  while (*s == '_') ++s;  
-  return CStrInCStrNoCase(s, "copy")
-      || CStrInCStrNoCase(s, "new") == s 
-      || CStrInCStrNoCase(s, "alloc") == s;
+  return deriveNamingConvention(s) == CreateRule;
 }
 
 static bool followsReturnRule(const char* s) {
-  while (*s == '_') ++s;  
-  return followsFundamentalRule(s) || CStrInCStrNoCase(s, "init") == s;
+  NamingConvention C = deriveNamingConvention(s);
+  return C == CreateRule || C == InitRule;
 }
 
 //===----------------------------------------------------------------------===//
@@ -942,7 +1027,7 @@
   const char* s = S.getIdentifierInfoForSlot(0)->getName();
   assert (ScratchArgs.empty());
   
-  if (strncmp(s, "init", 4) == 0 || strncmp(s, "_init", 5) == 0)
+  if (deriveNamingConvention(s) == InitRule)
     return getInitMethodSummary(ME);
   
   // Look for methods that return an owned object.
diff --git a/test/Analysis/refcnt_naming.m b/test/Analysis/refcnt_naming.m
index ee89d24..9ffbf6c 100644
--- a/test/Analysis/refcnt_naming.m
+++ b/test/Analysis/refcnt_naming.m
@@ -10,6 +10,14 @@
 @interface NSObject <NSObject> {} @end
 @class NSArray, NSString, NSURL;
 
+@interface NamingTest : NSObject {}
+-(NSObject*)photocopy;    // read as "photocopy"
+-(NSObject*)photoCopy;    // read as "photo Copy"
+-(NSObject*)__blebPRCopy; // read as "bleb PRCopy"
+-(NSObject*)__blebPRcopy; // read as "bleb P Rcopy"
+-(NSObject*)new_theprefixdoesnotcount; // read as "theprefixdoesnotcount"
+@end
+
 @interface MyClass : NSObject
 {
   id myObject;
@@ -34,6 +42,14 @@
   return url; // no-warning
 }
 
+void testNames(NamingTest* x) {
+  [x photocopy]; // no-warning
+  [x photoCopy]; // expected-warning{{leak}}
+  [x __blebPRCopy]; // expected-warning{{leak}}
+  [x __blebPRcopy]; // no-warning
+  [x new_theprefixdoesnotcount]; // no-warning
+}
+
 
 - (void)addObject:(id)X
 {