Ted Kremenek | 0208793 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 1 | // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Shared details for processing format strings of printf and scanf |
| 11 | // (and friends). |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "FormatStringParsing.h" |
| 16 | |
| 17 | using clang::analyze_format_string::ArgTypeResult; |
| 18 | using clang::analyze_format_string::FormatStringHandler; |
| 19 | using clang::analyze_format_string::FormatSpecifier; |
| 20 | using clang::analyze_format_string::LengthModifier; |
| 21 | using clang::analyze_format_string::OptionalAmount; |
| 22 | using clang::analyze_format_string::PositionContext; |
| 23 | using namespace clang; |
| 24 | |
| 25 | // Key function to FormatStringHandler. |
| 26 | FormatStringHandler::~FormatStringHandler() {} |
| 27 | |
| 28 | //===----------------------------------------------------------------------===// |
| 29 | // Functions for parsing format strings components in both printf and |
| 30 | // scanf format strings. |
| 31 | //===----------------------------------------------------------------------===// |
| 32 | |
| 33 | OptionalAmount |
| 34 | clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { |
| 35 | const char *I = Beg; |
| 36 | UpdateOnReturn <const char*> UpdateBeg(Beg, I); |
| 37 | |
| 38 | unsigned accumulator = 0; |
| 39 | bool hasDigits = false; |
| 40 | |
| 41 | for ( ; I != E; ++I) { |
| 42 | char c = *I; |
| 43 | if (c >= '0' && c <= '9') { |
| 44 | hasDigits = true; |
| 45 | accumulator = (accumulator * 10) + (c - '0'); |
| 46 | continue; |
| 47 | } |
| 48 | |
| 49 | if (hasDigits) |
| 50 | return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, |
| 51 | false); |
| 52 | |
| 53 | break; |
| 54 | } |
| 55 | |
| 56 | return OptionalAmount(); |
| 57 | } |
| 58 | |
| 59 | OptionalAmount |
| 60 | clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, |
| 61 | const char *E, |
| 62 | unsigned &argIndex) { |
| 63 | if (*Beg == '*') { |
| 64 | ++Beg; |
| 65 | return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); |
| 66 | } |
| 67 | |
| 68 | return ParseAmount(Beg, E); |
| 69 | } |
| 70 | |
| 71 | OptionalAmount |
| 72 | clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, |
| 73 | const char *Start, |
| 74 | const char *&Beg, |
| 75 | const char *E, |
| 76 | PositionContext p) { |
| 77 | if (*Beg == '*') { |
| 78 | const char *I = Beg + 1; |
| 79 | const OptionalAmount &Amt = ParseAmount(I, E); |
| 80 | |
| 81 | if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { |
| 82 | H.HandleInvalidPosition(Beg, I - Beg, p); |
| 83 | return OptionalAmount(false); |
| 84 | } |
| 85 | |
| 86 | if (I == E) { |
| 87 | // No more characters left? |
| 88 | H.HandleIncompleteSpecifier(Start, E - Start); |
| 89 | return OptionalAmount(false); |
| 90 | } |
| 91 | |
| 92 | assert(Amt.getHowSpecified() == OptionalAmount::Constant); |
| 93 | |
| 94 | if (*I == '$') { |
| 95 | // Handle positional arguments |
| 96 | |
| 97 | // Special case: '*0$', since this is an easy mistake. |
| 98 | if (Amt.getConstantAmount() == 0) { |
| 99 | H.HandleZeroPosition(Beg, I - Beg + 1); |
| 100 | return OptionalAmount(false); |
| 101 | } |
| 102 | |
| 103 | const char *Tmp = Beg; |
| 104 | Beg = ++I; |
| 105 | |
| 106 | return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, |
| 107 | Tmp, 0, true); |
| 108 | } |
| 109 | |
| 110 | H.HandleInvalidPosition(Beg, I - Beg, p); |
| 111 | return OptionalAmount(false); |
| 112 | } |
| 113 | |
| 114 | return ParseAmount(Beg, E); |
| 115 | } |
| 116 | |
| 117 | |
| 118 | bool |
| 119 | clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, |
| 120 | FormatSpecifier &CS, |
| 121 | const char *Start, |
| 122 | const char *&Beg, const char *E, |
| 123 | unsigned *argIndex) { |
| 124 | // FIXME: Support negative field widths. |
| 125 | if (argIndex) { |
| 126 | CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); |
| 127 | } |
| 128 | else { |
| 129 | const OptionalAmount Amt = |
| 130 | ParsePositionAmount(H, Start, Beg, E, |
| 131 | analyze_format_string::FieldWidthPos); |
| 132 | |
| 133 | if (Amt.isInvalid()) |
| 134 | return true; |
| 135 | CS.setFieldWidth(Amt); |
| 136 | } |
| 137 | return false; |
| 138 | } |
| 139 | |
| 140 | bool |
| 141 | clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, |
| 142 | FormatSpecifier &FS, |
| 143 | const char *Start, |
| 144 | const char *&Beg, |
| 145 | const char *E) { |
| 146 | const char *I = Beg; |
| 147 | |
| 148 | const OptionalAmount &Amt = ParseAmount(I, E); |
| 149 | |
| 150 | if (I == E) { |
| 151 | // No more characters left? |
| 152 | H.HandleIncompleteSpecifier(Start, E - Start); |
| 153 | return true; |
| 154 | } |
| 155 | |
| 156 | if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { |
| 157 | // Special case: '%0$', since this is an easy mistake. |
| 158 | if (Amt.getConstantAmount() == 0) { |
| 159 | H.HandleZeroPosition(Start, I - Start); |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | FS.setArgIndex(Amt.getConstantAmount() - 1); |
| 164 | FS.setUsesPositionalArg(); |
| 165 | // Update the caller's pointer if we decided to consume |
| 166 | // these characters. |
| 167 | Beg = I; |
| 168 | return false; |
| 169 | } |
| 170 | |
| 171 | return false; |
| 172 | } |
| 173 | |
| 174 | bool |
| 175 | clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, |
| 176 | const char *&I, |
| 177 | const char *E) { |
| 178 | LengthModifier::Kind lmKind = LengthModifier::None; |
| 179 | const char *lmPosition = I; |
| 180 | switch (*I) { |
| 181 | default: |
| 182 | return false; |
| 183 | case 'h': |
| 184 | ++I; |
| 185 | lmKind = (I != E && *I == 'h') ? |
| 186 | ++I, LengthModifier::AsChar : LengthModifier::AsShort; |
| 187 | break; |
| 188 | case 'l': |
| 189 | ++I; |
| 190 | lmKind = (I != E && *I == 'l') ? |
| 191 | ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; |
| 192 | break; |
| 193 | case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; |
| 194 | case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; |
| 195 | case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; |
| 196 | case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; |
| 197 | case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; |
| 198 | } |
| 199 | LengthModifier lm(lmPosition, lmKind); |
| 200 | FS.setLengthModifier(lm); |
| 201 | return true; |
| 202 | } |
| 203 | |
| 204 | //===----------------------------------------------------------------------===// |
| 205 | // Methods on ArgTypeResult. |
| 206 | //===----------------------------------------------------------------------===// |
| 207 | |
| 208 | bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { |
| 209 | switch (K) { |
| 210 | case InvalidTy: |
| 211 | assert(false && "ArgTypeResult must be valid"); |
| 212 | return true; |
| 213 | |
| 214 | case UnknownTy: |
| 215 | return true; |
| 216 | |
| 217 | case SpecificTy: { |
| 218 | argTy = C.getCanonicalType(argTy).getUnqualifiedType(); |
| 219 | if (T == argTy) |
| 220 | return true; |
| 221 | if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) |
| 222 | switch (BT->getKind()) { |
| 223 | default: |
| 224 | break; |
| 225 | case BuiltinType::Char_S: |
| 226 | case BuiltinType::SChar: |
| 227 | return T == C.UnsignedCharTy; |
| 228 | case BuiltinType::Char_U: |
| 229 | case BuiltinType::UChar: |
| 230 | return T == C.SignedCharTy; |
| 231 | case BuiltinType::Short: |
| 232 | return T == C.UnsignedShortTy; |
| 233 | case BuiltinType::UShort: |
| 234 | return T == C.ShortTy; |
| 235 | case BuiltinType::Int: |
| 236 | return T == C.UnsignedIntTy; |
| 237 | case BuiltinType::UInt: |
| 238 | return T == C.IntTy; |
| 239 | case BuiltinType::Long: |
| 240 | return T == C.UnsignedLongTy; |
| 241 | case BuiltinType::ULong: |
| 242 | return T == C.LongTy; |
| 243 | case BuiltinType::LongLong: |
| 244 | return T == C.UnsignedLongLongTy; |
| 245 | case BuiltinType::ULongLong: |
| 246 | return T == C.LongLongTy; |
| 247 | } |
| 248 | return false; |
| 249 | } |
| 250 | |
| 251 | case CStrTy: { |
| 252 | const PointerType *PT = argTy->getAs<PointerType>(); |
| 253 | if (!PT) |
| 254 | return false; |
| 255 | QualType pointeeTy = PT->getPointeeType(); |
| 256 | if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) |
| 257 | switch (BT->getKind()) { |
| 258 | case BuiltinType::Void: |
| 259 | case BuiltinType::Char_U: |
| 260 | case BuiltinType::UChar: |
| 261 | case BuiltinType::Char_S: |
| 262 | case BuiltinType::SChar: |
| 263 | return true; |
| 264 | default: |
| 265 | break; |
| 266 | } |
| 267 | |
| 268 | return false; |
| 269 | } |
| 270 | |
| 271 | case WCStrTy: { |
| 272 | const PointerType *PT = argTy->getAs<PointerType>(); |
| 273 | if (!PT) |
| 274 | return false; |
| 275 | QualType pointeeTy = |
| 276 | C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); |
| 277 | return pointeeTy == C.getWCharType(); |
| 278 | } |
| 279 | |
| 280 | case CPointerTy: |
| 281 | return argTy->getAs<PointerType>() != NULL || |
| 282 | argTy->getAs<ObjCObjectPointerType>() != NULL; |
| 283 | |
| 284 | case ObjCPointerTy: |
| 285 | return argTy->getAs<ObjCObjectPointerType>() != NULL; |
| 286 | } |
| 287 | |
| 288 | // FIXME: Should be unreachable, but Clang is currently emitting |
| 289 | // a warning. |
| 290 | return false; |
| 291 | } |
| 292 | |
| 293 | QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { |
| 294 | switch (K) { |
| 295 | case InvalidTy: |
| 296 | assert(false && "No representative type for Invalid ArgTypeResult"); |
| 297 | // Fall-through. |
| 298 | case UnknownTy: |
| 299 | return QualType(); |
| 300 | case SpecificTy: |
| 301 | return T; |
| 302 | case CStrTy: |
| 303 | return C.getPointerType(C.CharTy); |
| 304 | case WCStrTy: |
| 305 | return C.getPointerType(C.getWCharType()); |
| 306 | case ObjCPointerTy: |
| 307 | return C.ObjCBuiltinIdTy; |
| 308 | case CPointerTy: |
| 309 | return C.VoidPtrTy; |
| 310 | } |
| 311 | |
| 312 | // FIXME: Should be unreachable, but Clang is currently emitting |
| 313 | // a warning. |
| 314 | return QualType(); |
| 315 | } |
| 316 | |
| 317 | //===----------------------------------------------------------------------===// |
| 318 | // Methods on OptionalAmount. |
| 319 | //===----------------------------------------------------------------------===// |
| 320 | |
| 321 | ArgTypeResult |
| 322 | analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { |
| 323 | return Ctx.IntTy; |
| 324 | } |
| 325 | |
| 326 | //===----------------------------------------------------------------------===// |
| 327 | // Methods on LengthModifier. |
| 328 | //===----------------------------------------------------------------------===// |
| 329 | |
| 330 | const char * |
| 331 | analyze_format_string::LengthModifier::toString() const { |
| 332 | switch (kind) { |
| 333 | case AsChar: |
| 334 | return "hh"; |
| 335 | case AsShort: |
| 336 | return "h"; |
| 337 | case AsLong: // or AsWideChar |
| 338 | return "l"; |
| 339 | case AsLongLong: |
| 340 | return "ll"; |
| 341 | case AsIntMax: |
| 342 | return "j"; |
| 343 | case AsSizeT: |
| 344 | return "z"; |
| 345 | case AsPtrDiff: |
| 346 | return "t"; |
| 347 | case AsLongDouble: |
| 348 | return "L"; |
| 349 | case None: |
| 350 | return ""; |
| 351 | } |
| 352 | return NULL; |
| 353 | } |
| 354 | |
| 355 | //===----------------------------------------------------------------------===// |
| 356 | // Methods on OptionalAmount. |
| 357 | //===----------------------------------------------------------------------===// |
| 358 | |
| 359 | void |
| 360 | analyze_format_string::OptionalAmount::toString(llvm::raw_ostream &os) const { |
| 361 | switch (hs) { |
| 362 | case Invalid: |
| 363 | case NotSpecified: |
| 364 | return; |
| 365 | case Arg: |
| 366 | if (UsesDotPrefix) |
| 367 | os << "."; |
| 368 | if (usesPositionalArg()) |
| 369 | os << "*" << getPositionalArgIndex() << "$"; |
| 370 | else |
| 371 | os << "*"; |
| 372 | break; |
| 373 | case Constant: |
| 374 | if (UsesDotPrefix) |
| 375 | os << "."; |
| 376 | os << amt; |
| 377 | break; |
| 378 | } |
| 379 | } |
| 380 | |