Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 1 | // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Shared details for processing format strings of printf and scanf |
| 11 | // (and friends). |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "FormatStringParsing.h" |
| 16 | |
| 17 | using clang::analyze_format_string::ArgTypeResult; |
| 18 | using clang::analyze_format_string::FormatStringHandler; |
| 19 | using clang::analyze_format_string::FormatSpecifier; |
| 20 | using clang::analyze_format_string::LengthModifier; |
| 21 | using clang::analyze_format_string::OptionalAmount; |
| 22 | using clang::analyze_format_string::PositionContext; |
Ted Kremenek | a412a49 | 2010-07-20 20:04:42 +0000 | [diff] [blame] | 23 | using clang::analyze_format_string::ConversionSpecifier; |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 24 | using namespace clang; |
| 25 | |
| 26 | // Key function to FormatStringHandler. |
| 27 | FormatStringHandler::~FormatStringHandler() {} |
| 28 | |
| 29 | //===----------------------------------------------------------------------===// |
| 30 | // Functions for parsing format strings components in both printf and |
| 31 | // scanf format strings. |
| 32 | //===----------------------------------------------------------------------===// |
| 33 | |
| 34 | OptionalAmount |
| 35 | clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { |
| 36 | const char *I = Beg; |
| 37 | UpdateOnReturn <const char*> UpdateBeg(Beg, I); |
| 38 | |
| 39 | unsigned accumulator = 0; |
| 40 | bool hasDigits = false; |
| 41 | |
| 42 | for ( ; I != E; ++I) { |
| 43 | char c = *I; |
| 44 | if (c >= '0' && c <= '9') { |
| 45 | hasDigits = true; |
| 46 | accumulator = (accumulator * 10) + (c - '0'); |
| 47 | continue; |
| 48 | } |
| 49 | |
| 50 | if (hasDigits) |
| 51 | return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, |
| 52 | false); |
| 53 | |
| 54 | break; |
| 55 | } |
| 56 | |
| 57 | return OptionalAmount(); |
| 58 | } |
| 59 | |
| 60 | OptionalAmount |
| 61 | clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, |
| 62 | const char *E, |
| 63 | unsigned &argIndex) { |
| 64 | if (*Beg == '*') { |
| 65 | ++Beg; |
| 66 | return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); |
| 67 | } |
| 68 | |
| 69 | return ParseAmount(Beg, E); |
| 70 | } |
| 71 | |
| 72 | OptionalAmount |
| 73 | clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, |
| 74 | const char *Start, |
| 75 | const char *&Beg, |
| 76 | const char *E, |
| 77 | PositionContext p) { |
| 78 | if (*Beg == '*') { |
| 79 | const char *I = Beg + 1; |
| 80 | const OptionalAmount &Amt = ParseAmount(I, E); |
| 81 | |
| 82 | if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { |
| 83 | H.HandleInvalidPosition(Beg, I - Beg, p); |
| 84 | return OptionalAmount(false); |
| 85 | } |
| 86 | |
| 87 | if (I == E) { |
| 88 | // No more characters left? |
| 89 | H.HandleIncompleteSpecifier(Start, E - Start); |
| 90 | return OptionalAmount(false); |
| 91 | } |
| 92 | |
| 93 | assert(Amt.getHowSpecified() == OptionalAmount::Constant); |
| 94 | |
| 95 | if (*I == '$') { |
| 96 | // Handle positional arguments |
| 97 | |
| 98 | // Special case: '*0$', since this is an easy mistake. |
| 99 | if (Amt.getConstantAmount() == 0) { |
| 100 | H.HandleZeroPosition(Beg, I - Beg + 1); |
| 101 | return OptionalAmount(false); |
| 102 | } |
| 103 | |
| 104 | const char *Tmp = Beg; |
| 105 | Beg = ++I; |
| 106 | |
| 107 | return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, |
| 108 | Tmp, 0, true); |
| 109 | } |
| 110 | |
| 111 | H.HandleInvalidPosition(Beg, I - Beg, p); |
| 112 | return OptionalAmount(false); |
| 113 | } |
| 114 | |
| 115 | return ParseAmount(Beg, E); |
| 116 | } |
| 117 | |
| 118 | |
| 119 | bool |
| 120 | clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, |
| 121 | FormatSpecifier &CS, |
| 122 | const char *Start, |
| 123 | const char *&Beg, const char *E, |
| 124 | unsigned *argIndex) { |
| 125 | // FIXME: Support negative field widths. |
| 126 | if (argIndex) { |
| 127 | CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); |
| 128 | } |
| 129 | else { |
| 130 | const OptionalAmount Amt = |
| 131 | ParsePositionAmount(H, Start, Beg, E, |
| 132 | analyze_format_string::FieldWidthPos); |
| 133 | |
| 134 | if (Amt.isInvalid()) |
| 135 | return true; |
| 136 | CS.setFieldWidth(Amt); |
| 137 | } |
| 138 | return false; |
| 139 | } |
| 140 | |
| 141 | bool |
| 142 | clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, |
| 143 | FormatSpecifier &FS, |
| 144 | const char *Start, |
| 145 | const char *&Beg, |
| 146 | const char *E) { |
| 147 | const char *I = Beg; |
| 148 | |
| 149 | const OptionalAmount &Amt = ParseAmount(I, E); |
| 150 | |
| 151 | if (I == E) { |
| 152 | // No more characters left? |
| 153 | H.HandleIncompleteSpecifier(Start, E - Start); |
| 154 | return true; |
| 155 | } |
| 156 | |
| 157 | if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { |
| 158 | // Special case: '%0$', since this is an easy mistake. |
| 159 | if (Amt.getConstantAmount() == 0) { |
| 160 | H.HandleZeroPosition(Start, I - Start); |
| 161 | return true; |
| 162 | } |
| 163 | |
| 164 | FS.setArgIndex(Amt.getConstantAmount() - 1); |
| 165 | FS.setUsesPositionalArg(); |
| 166 | // Update the caller's pointer if we decided to consume |
| 167 | // these characters. |
| 168 | Beg = I; |
| 169 | return false; |
| 170 | } |
| 171 | |
| 172 | return false; |
| 173 | } |
| 174 | |
| 175 | bool |
| 176 | clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, |
| 177 | const char *&I, |
| 178 | const char *E) { |
| 179 | LengthModifier::Kind lmKind = LengthModifier::None; |
| 180 | const char *lmPosition = I; |
| 181 | switch (*I) { |
| 182 | default: |
| 183 | return false; |
| 184 | case 'h': |
| 185 | ++I; |
| 186 | lmKind = (I != E && *I == 'h') ? |
| 187 | ++I, LengthModifier::AsChar : LengthModifier::AsShort; |
| 188 | break; |
| 189 | case 'l': |
| 190 | ++I; |
| 191 | lmKind = (I != E && *I == 'l') ? |
| 192 | ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; |
| 193 | break; |
| 194 | case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; |
| 195 | case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; |
| 196 | case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; |
| 197 | case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; |
| 198 | case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; |
| 199 | } |
| 200 | LengthModifier lm(lmPosition, lmKind); |
| 201 | FS.setLengthModifier(lm); |
| 202 | return true; |
| 203 | } |
| 204 | |
| 205 | //===----------------------------------------------------------------------===// |
| 206 | // Methods on ArgTypeResult. |
| 207 | //===----------------------------------------------------------------------===// |
| 208 | |
| 209 | bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { |
| 210 | switch (K) { |
| 211 | case InvalidTy: |
| 212 | assert(false && "ArgTypeResult must be valid"); |
| 213 | return true; |
| 214 | |
| 215 | case UnknownTy: |
| 216 | return true; |
| 217 | |
| 218 | case SpecificTy: { |
| 219 | argTy = C.getCanonicalType(argTy).getUnqualifiedType(); |
| 220 | if (T == argTy) |
| 221 | return true; |
| 222 | if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) |
| 223 | switch (BT->getKind()) { |
| 224 | default: |
| 225 | break; |
| 226 | case BuiltinType::Char_S: |
| 227 | case BuiltinType::SChar: |
| 228 | return T == C.UnsignedCharTy; |
| 229 | case BuiltinType::Char_U: |
| 230 | case BuiltinType::UChar: |
| 231 | return T == C.SignedCharTy; |
| 232 | case BuiltinType::Short: |
| 233 | return T == C.UnsignedShortTy; |
| 234 | case BuiltinType::UShort: |
| 235 | return T == C.ShortTy; |
| 236 | case BuiltinType::Int: |
| 237 | return T == C.UnsignedIntTy; |
| 238 | case BuiltinType::UInt: |
| 239 | return T == C.IntTy; |
| 240 | case BuiltinType::Long: |
| 241 | return T == C.UnsignedLongTy; |
| 242 | case BuiltinType::ULong: |
| 243 | return T == C.LongTy; |
| 244 | case BuiltinType::LongLong: |
| 245 | return T == C.UnsignedLongLongTy; |
| 246 | case BuiltinType::ULongLong: |
| 247 | return T == C.LongLongTy; |
| 248 | } |
| 249 | return false; |
| 250 | } |
| 251 | |
| 252 | case CStrTy: { |
| 253 | const PointerType *PT = argTy->getAs<PointerType>(); |
| 254 | if (!PT) |
| 255 | return false; |
| 256 | QualType pointeeTy = PT->getPointeeType(); |
| 257 | if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) |
| 258 | switch (BT->getKind()) { |
| 259 | case BuiltinType::Void: |
| 260 | case BuiltinType::Char_U: |
| 261 | case BuiltinType::UChar: |
| 262 | case BuiltinType::Char_S: |
| 263 | case BuiltinType::SChar: |
| 264 | return true; |
| 265 | default: |
| 266 | break; |
| 267 | } |
| 268 | |
| 269 | return false; |
| 270 | } |
| 271 | |
| 272 | case WCStrTy: { |
| 273 | const PointerType *PT = argTy->getAs<PointerType>(); |
| 274 | if (!PT) |
| 275 | return false; |
| 276 | QualType pointeeTy = |
| 277 | C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); |
| 278 | return pointeeTy == C.getWCharType(); |
| 279 | } |
Ted Kremenek | 9325eaf | 2010-08-24 22:24:51 +0000 | [diff] [blame] | 280 | |
| 281 | case WIntTy: { |
| 282 | // Instead of doing a lookup for the definition of 'wint_t' (which |
| 283 | // is defined by the system headers) instead see if wchar_t and |
| 284 | // the argument type promote to the same type. |
| 285 | QualType PromoWChar = |
| 286 | C.getWCharType()->isPromotableIntegerType() |
| 287 | ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); |
| 288 | QualType PromoArg = |
| 289 | argTy->isPromotableIntegerType() |
| 290 | ? C.getPromotedIntegerType(argTy) : argTy; |
| 291 | |
| 292 | PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); |
| 293 | PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); |
| 294 | |
| 295 | return PromoWChar == PromoArg; |
| 296 | } |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 297 | |
| 298 | case CPointerTy: |
Anders Carlsson | 6242599 | 2010-11-06 14:58:53 +0000 | [diff] [blame] | 299 | return argTy->isPointerType() || argTy->isObjCObjectPointerType() || |
| 300 | argTy->isNullPtrType(); |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 301 | |
| 302 | case ObjCPointerTy: |
Daniel Dunbar | d6a4d18 | 2011-06-28 23:33:55 +0000 | [diff] [blame] | 303 | return argTy->getAs<ObjCObjectPointerType>() != NULL; |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 304 | } |
| 305 | |
| 306 | // FIXME: Should be unreachable, but Clang is currently emitting |
| 307 | // a warning. |
| 308 | return false; |
| 309 | } |
| 310 | |
| 311 | QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { |
| 312 | switch (K) { |
| 313 | case InvalidTy: |
| 314 | assert(false && "No representative type for Invalid ArgTypeResult"); |
| 315 | // Fall-through. |
| 316 | case UnknownTy: |
| 317 | return QualType(); |
| 318 | case SpecificTy: |
| 319 | return T; |
| 320 | case CStrTy: |
| 321 | return C.getPointerType(C.CharTy); |
| 322 | case WCStrTy: |
| 323 | return C.getPointerType(C.getWCharType()); |
| 324 | case ObjCPointerTy: |
| 325 | return C.ObjCBuiltinIdTy; |
| 326 | case CPointerTy: |
| 327 | return C.VoidPtrTy; |
Ted Kremenek | 9325eaf | 2010-08-24 22:24:51 +0000 | [diff] [blame] | 328 | case WIntTy: { |
| 329 | QualType WC = C.getWCharType(); |
| 330 | return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; |
| 331 | } |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 332 | } |
| 333 | |
| 334 | // FIXME: Should be unreachable, but Clang is currently emitting |
| 335 | // a warning. |
| 336 | return QualType(); |
| 337 | } |
| 338 | |
| 339 | //===----------------------------------------------------------------------===// |
| 340 | // Methods on OptionalAmount. |
| 341 | //===----------------------------------------------------------------------===// |
| 342 | |
| 343 | ArgTypeResult |
| 344 | analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { |
| 345 | return Ctx.IntTy; |
| 346 | } |
| 347 | |
| 348 | //===----------------------------------------------------------------------===// |
| 349 | // Methods on LengthModifier. |
| 350 | //===----------------------------------------------------------------------===// |
| 351 | |
| 352 | const char * |
| 353 | analyze_format_string::LengthModifier::toString() const { |
| 354 | switch (kind) { |
| 355 | case AsChar: |
| 356 | return "hh"; |
| 357 | case AsShort: |
| 358 | return "h"; |
| 359 | case AsLong: // or AsWideChar |
| 360 | return "l"; |
| 361 | case AsLongLong: |
| 362 | return "ll"; |
| 363 | case AsIntMax: |
| 364 | return "j"; |
| 365 | case AsSizeT: |
| 366 | return "z"; |
| 367 | case AsPtrDiff: |
| 368 | return "t"; |
| 369 | case AsLongDouble: |
| 370 | return "L"; |
| 371 | case None: |
| 372 | return ""; |
| 373 | } |
| 374 | return NULL; |
| 375 | } |
| 376 | |
| 377 | //===----------------------------------------------------------------------===// |
| 378 | // Methods on OptionalAmount. |
| 379 | //===----------------------------------------------------------------------===// |
| 380 | |
Ted Kremenek | a412a49 | 2010-07-20 20:04:42 +0000 | [diff] [blame] | 381 | void OptionalAmount::toString(llvm::raw_ostream &os) const { |
Ted Kremenek | 826a345 | 2010-07-16 02:11:22 +0000 | [diff] [blame] | 382 | switch (hs) { |
| 383 | case Invalid: |
| 384 | case NotSpecified: |
| 385 | return; |
| 386 | case Arg: |
| 387 | if (UsesDotPrefix) |
| 388 | os << "."; |
| 389 | if (usesPositionalArg()) |
| 390 | os << "*" << getPositionalArgIndex() << "$"; |
| 391 | else |
| 392 | os << "*"; |
| 393 | break; |
| 394 | case Constant: |
| 395 | if (UsesDotPrefix) |
| 396 | os << "."; |
| 397 | os << amt; |
| 398 | break; |
| 399 | } |
| 400 | } |
| 401 | |
Ted Kremenek | a412a49 | 2010-07-20 20:04:42 +0000 | [diff] [blame] | 402 | //===----------------------------------------------------------------------===// |
Michael J. Spencer | 96827eb | 2010-07-27 04:46:02 +0000 | [diff] [blame] | 403 | // Methods on ConversionSpecifier. |
Ted Kremenek | a412a49 | 2010-07-20 20:04:42 +0000 | [diff] [blame] | 404 | //===----------------------------------------------------------------------===// |
| 405 | |
| 406 | bool FormatSpecifier::hasValidLengthModifier() const { |
| 407 | switch (LM.getKind()) { |
| 408 | case LengthModifier::None: |
| 409 | return true; |
| 410 | |
| 411 | // Handle most integer flags |
| 412 | case LengthModifier::AsChar: |
| 413 | case LengthModifier::AsShort: |
| 414 | case LengthModifier::AsLongLong: |
| 415 | case LengthModifier::AsIntMax: |
| 416 | case LengthModifier::AsSizeT: |
| 417 | case LengthModifier::AsPtrDiff: |
| 418 | switch (CS.getKind()) { |
| 419 | case ConversionSpecifier::dArg: |
| 420 | case ConversionSpecifier::iArg: |
| 421 | case ConversionSpecifier::oArg: |
| 422 | case ConversionSpecifier::uArg: |
| 423 | case ConversionSpecifier::xArg: |
| 424 | case ConversionSpecifier::XArg: |
| 425 | case ConversionSpecifier::nArg: |
| 426 | return true; |
| 427 | default: |
| 428 | return false; |
| 429 | } |
| 430 | |
| 431 | // Handle 'l' flag |
| 432 | case LengthModifier::AsLong: |
| 433 | switch (CS.getKind()) { |
| 434 | case ConversionSpecifier::dArg: |
| 435 | case ConversionSpecifier::iArg: |
| 436 | case ConversionSpecifier::oArg: |
| 437 | case ConversionSpecifier::uArg: |
| 438 | case ConversionSpecifier::xArg: |
| 439 | case ConversionSpecifier::XArg: |
| 440 | case ConversionSpecifier::aArg: |
| 441 | case ConversionSpecifier::AArg: |
| 442 | case ConversionSpecifier::fArg: |
| 443 | case ConversionSpecifier::FArg: |
| 444 | case ConversionSpecifier::eArg: |
| 445 | case ConversionSpecifier::EArg: |
| 446 | case ConversionSpecifier::gArg: |
| 447 | case ConversionSpecifier::GArg: |
| 448 | case ConversionSpecifier::nArg: |
| 449 | case ConversionSpecifier::cArg: |
| 450 | case ConversionSpecifier::sArg: |
| 451 | return true; |
| 452 | default: |
| 453 | return false; |
| 454 | } |
| 455 | |
| 456 | case LengthModifier::AsLongDouble: |
| 457 | switch (CS.getKind()) { |
| 458 | case ConversionSpecifier::aArg: |
| 459 | case ConversionSpecifier::AArg: |
| 460 | case ConversionSpecifier::fArg: |
| 461 | case ConversionSpecifier::FArg: |
| 462 | case ConversionSpecifier::eArg: |
| 463 | case ConversionSpecifier::EArg: |
| 464 | case ConversionSpecifier::gArg: |
| 465 | case ConversionSpecifier::GArg: |
| 466 | return true; |
| 467 | default: |
| 468 | return false; |
| 469 | } |
| 470 | } |
| 471 | return false; |
| 472 | } |
| 473 | |
| 474 | |