| // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Shared details for processing format strings of printf and scanf |
| // (and friends). |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "FormatStringParsing.h" |
| #include "clang/Basic/LangOptions.h" |
| |
| using clang::analyze_format_string::ArgTypeResult; |
| using clang::analyze_format_string::FormatStringHandler; |
| using clang::analyze_format_string::FormatSpecifier; |
| using clang::analyze_format_string::LengthModifier; |
| using clang::analyze_format_string::OptionalAmount; |
| using clang::analyze_format_string::PositionContext; |
| using clang::analyze_format_string::ConversionSpecifier; |
| using namespace clang; |
| |
| // Key function to FormatStringHandler. |
| FormatStringHandler::~FormatStringHandler() {} |
| |
| //===----------------------------------------------------------------------===// |
| // Functions for parsing format strings components in both printf and |
| // scanf format strings. |
| //===----------------------------------------------------------------------===// |
| |
| OptionalAmount |
| clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { |
| const char *I = Beg; |
| UpdateOnReturn <const char*> UpdateBeg(Beg, I); |
| |
| unsigned accumulator = 0; |
| bool hasDigits = false; |
| |
| for ( ; I != E; ++I) { |
| char c = *I; |
| if (c >= '0' && c <= '9') { |
| hasDigits = true; |
| accumulator = (accumulator * 10) + (c - '0'); |
| continue; |
| } |
| |
| if (hasDigits) |
| return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, |
| false); |
| |
| break; |
| } |
| |
| return OptionalAmount(); |
| } |
| |
| OptionalAmount |
| clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, |
| const char *E, |
| unsigned &argIndex) { |
| if (*Beg == '*') { |
| ++Beg; |
| return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); |
| } |
| |
| return ParseAmount(Beg, E); |
| } |
| |
| OptionalAmount |
| clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, |
| const char *Start, |
| const char *&Beg, |
| const char *E, |
| PositionContext p) { |
| if (*Beg == '*') { |
| const char *I = Beg + 1; |
| const OptionalAmount &Amt = ParseAmount(I, E); |
| |
| if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { |
| H.HandleInvalidPosition(Beg, I - Beg, p); |
| return OptionalAmount(false); |
| } |
| |
| if (I == E) { |
| // No more characters left? |
| H.HandleIncompleteSpecifier(Start, E - Start); |
| return OptionalAmount(false); |
| } |
| |
| assert(Amt.getHowSpecified() == OptionalAmount::Constant); |
| |
| if (*I == '$') { |
| // Handle positional arguments |
| |
| // Special case: '*0$', since this is an easy mistake. |
| if (Amt.getConstantAmount() == 0) { |
| H.HandleZeroPosition(Beg, I - Beg + 1); |
| return OptionalAmount(false); |
| } |
| |
| const char *Tmp = Beg; |
| Beg = ++I; |
| |
| return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, |
| Tmp, 0, true); |
| } |
| |
| H.HandleInvalidPosition(Beg, I - Beg, p); |
| return OptionalAmount(false); |
| } |
| |
| return ParseAmount(Beg, E); |
| } |
| |
| |
| bool |
| clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, |
| FormatSpecifier &CS, |
| const char *Start, |
| const char *&Beg, const char *E, |
| unsigned *argIndex) { |
| // FIXME: Support negative field widths. |
| if (argIndex) { |
| CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); |
| } |
| else { |
| const OptionalAmount Amt = |
| ParsePositionAmount(H, Start, Beg, E, |
| analyze_format_string::FieldWidthPos); |
| |
| if (Amt.isInvalid()) |
| return true; |
| CS.setFieldWidth(Amt); |
| } |
| return false; |
| } |
| |
| bool |
| clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, |
| FormatSpecifier &FS, |
| const char *Start, |
| const char *&Beg, |
| const char *E) { |
| const char *I = Beg; |
| |
| const OptionalAmount &Amt = ParseAmount(I, E); |
| |
| if (I == E) { |
| // No more characters left? |
| H.HandleIncompleteSpecifier(Start, E - Start); |
| return true; |
| } |
| |
| if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { |
| // Special case: '%0$', since this is an easy mistake. |
| if (Amt.getConstantAmount() == 0) { |
| H.HandleZeroPosition(Start, I - Start); |
| return true; |
| } |
| |
| FS.setArgIndex(Amt.getConstantAmount() - 1); |
| FS.setUsesPositionalArg(); |
| // Update the caller's pointer if we decided to consume |
| // these characters. |
| Beg = I; |
| return false; |
| } |
| |
| return false; |
| } |
| |
| bool |
| clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, |
| const char *&I, |
| const char *E, |
| const LangOptions &LO, |
| bool IsScanf) { |
| LengthModifier::Kind lmKind = LengthModifier::None; |
| const char *lmPosition = I; |
| switch (*I) { |
| default: |
| return false; |
| case 'h': |
| ++I; |
| lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) |
| : LengthModifier::AsShort; |
| break; |
| case 'l': |
| ++I; |
| lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) |
| : LengthModifier::AsLong; |
| break; |
| case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; |
| case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; |
| case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; |
| case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; |
| case 'q': lmKind = LengthModifier::AsQuad; ++I; break; |
| case 'a': |
| if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { |
| // For scanf in C90, look at the next character to see if this should |
| // be parsed as the GNU extension 'a' length modifier. If not, this |
| // will be parsed as a conversion specifier. |
| ++I; |
| if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { |
| lmKind = LengthModifier::AsAllocate; |
| break; |
| } |
| --I; |
| } |
| return false; |
| case 'm': |
| if (IsScanf) { |
| lmKind = LengthModifier::AsMAllocate; |
| ++I; |
| break; |
| } |
| return false; |
| } |
| LengthModifier lm(lmPosition, lmKind); |
| FS.setLengthModifier(lm); |
| return true; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Methods on ArgTypeResult. |
| //===----------------------------------------------------------------------===// |
| |
| bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { |
| switch (K) { |
| case InvalidTy: |
| llvm_unreachable("ArgTypeResult must be valid"); |
| |
| case UnknownTy: |
| return true; |
| |
| case AnyCharTy: { |
| if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) |
| switch (BT->getKind()) { |
| default: |
| break; |
| case BuiltinType::Char_S: |
| case BuiltinType::SChar: |
| case BuiltinType::UChar: |
| case BuiltinType::Char_U: |
| return true; |
| } |
| return false; |
| } |
| |
| case SpecificTy: { |
| argTy = C.getCanonicalType(argTy).getUnqualifiedType(); |
| if (T == argTy) |
| return true; |
| // Check for "compatible types". |
| if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) |
| switch (BT->getKind()) { |
| default: |
| break; |
| case BuiltinType::Char_S: |
| case BuiltinType::SChar: |
| return T == C.UnsignedCharTy; |
| case BuiltinType::Char_U: |
| case BuiltinType::UChar: |
| return T == C.SignedCharTy; |
| case BuiltinType::Short: |
| return T == C.UnsignedShortTy; |
| case BuiltinType::UShort: |
| return T == C.ShortTy; |
| case BuiltinType::Int: |
| return T == C.UnsignedIntTy; |
| case BuiltinType::UInt: |
| return T == C.IntTy; |
| case BuiltinType::Long: |
| return T == C.UnsignedLongTy; |
| case BuiltinType::ULong: |
| return T == C.LongTy; |
| case BuiltinType::LongLong: |
| return T == C.UnsignedLongLongTy; |
| case BuiltinType::ULongLong: |
| return T == C.LongLongTy; |
| } |
| return false; |
| } |
| |
| case CStrTy: { |
| const PointerType *PT = argTy->getAs<PointerType>(); |
| if (!PT) |
| return false; |
| QualType pointeeTy = PT->getPointeeType(); |
| if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) |
| switch (BT->getKind()) { |
| case BuiltinType::Void: |
| case BuiltinType::Char_U: |
| case BuiltinType::UChar: |
| case BuiltinType::Char_S: |
| case BuiltinType::SChar: |
| return true; |
| default: |
| break; |
| } |
| |
| return false; |
| } |
| |
| case WCStrTy: { |
| const PointerType *PT = argTy->getAs<PointerType>(); |
| if (!PT) |
| return false; |
| QualType pointeeTy = |
| C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); |
| return pointeeTy == C.getWCharType(); |
| } |
| |
| case WIntTy: { |
| // Instead of doing a lookup for the definition of 'wint_t' (which |
| // is defined by the system headers) instead see if wchar_t and |
| // the argument type promote to the same type. |
| QualType PromoWChar = |
| C.getWCharType()->isPromotableIntegerType() |
| ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); |
| QualType PromoArg = |
| argTy->isPromotableIntegerType() |
| ? C.getPromotedIntegerType(argTy) : argTy; |
| |
| PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); |
| PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); |
| |
| return PromoWChar == PromoArg; |
| } |
| |
| case CPointerTy: |
| return argTy->isPointerType() || argTy->isObjCObjectPointerType() || |
| argTy->isNullPtrType(); |
| |
| case ObjCPointerTy: { |
| if (argTy->getAs<ObjCObjectPointerType>() || |
| argTy->getAs<BlockPointerType>()) |
| return true; |
| |
| // Handle implicit toll-free bridging. |
| if (const PointerType *PT = argTy->getAs<PointerType>()) { |
| // Things such as CFTypeRef are really just opaque pointers |
| // to C structs representing CF types that can often be bridged |
| // to Objective-C objects. Since the compiler doesn't know which |
| // structs can be toll-free bridged, we just accept them all. |
| QualType pointee = PT->getPointeeType(); |
| if (pointee->getAsStructureType() || pointee->isVoidType()) |
| return true; |
| } |
| return false; |
| } |
| } |
| |
| llvm_unreachable("Invalid ArgTypeResult Kind!"); |
| } |
| |
| QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { |
| switch (K) { |
| case InvalidTy: |
| llvm_unreachable("No representative type for Invalid ArgTypeResult"); |
| case UnknownTy: |
| return QualType(); |
| case AnyCharTy: |
| return C.CharTy; |
| case SpecificTy: |
| return T; |
| case CStrTy: |
| return C.getPointerType(C.CharTy); |
| case WCStrTy: |
| return C.getPointerType(C.getWCharType()); |
| case ObjCPointerTy: |
| return C.ObjCBuiltinIdTy; |
| case CPointerTy: |
| return C.VoidPtrTy; |
| case WIntTy: { |
| QualType WC = C.getWCharType(); |
| return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; |
| } |
| } |
| |
| llvm_unreachable("Invalid ArgTypeResult Kind!"); |
| } |
| |
| std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { |
| std::string S = getRepresentativeType(C).getAsString(); |
| if (Name && S != Name) |
| return std::string("'") + Name + "' (aka '" + S + "')"; |
| return std::string("'") + S + "'"; |
| } |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Methods on OptionalAmount. |
| //===----------------------------------------------------------------------===// |
| |
| ArgTypeResult |
| analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { |
| return Ctx.IntTy; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Methods on LengthModifier. |
| //===----------------------------------------------------------------------===// |
| |
| const char * |
| analyze_format_string::LengthModifier::toString() const { |
| switch (kind) { |
| case AsChar: |
| return "hh"; |
| case AsShort: |
| return "h"; |
| case AsLong: // or AsWideChar |
| return "l"; |
| case AsLongLong: |
| return "ll"; |
| case AsQuad: |
| return "q"; |
| case AsIntMax: |
| return "j"; |
| case AsSizeT: |
| return "z"; |
| case AsPtrDiff: |
| return "t"; |
| case AsLongDouble: |
| return "L"; |
| case AsAllocate: |
| return "a"; |
| case AsMAllocate: |
| return "m"; |
| case None: |
| return ""; |
| } |
| return NULL; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Methods on ConversionSpecifier. |
| //===----------------------------------------------------------------------===// |
| |
| const char *ConversionSpecifier::toString() const { |
| switch (kind) { |
| case dArg: return "d"; |
| case iArg: return "i"; |
| case oArg: return "o"; |
| case uArg: return "u"; |
| case xArg: return "x"; |
| case XArg: return "X"; |
| case fArg: return "f"; |
| case FArg: return "F"; |
| case eArg: return "e"; |
| case EArg: return "E"; |
| case gArg: return "g"; |
| case GArg: return "G"; |
| case aArg: return "a"; |
| case AArg: return "A"; |
| case cArg: return "c"; |
| case sArg: return "s"; |
| case pArg: return "p"; |
| case nArg: return "n"; |
| case PercentArg: return "%"; |
| case ScanListArg: return "["; |
| case InvalidSpecifier: return NULL; |
| |
| // MacOS X unicode extensions. |
| case CArg: return "C"; |
| case SArg: return "S"; |
| |
| // Objective-C specific specifiers. |
| case ObjCObjArg: return "@"; |
| |
| // GlibC specific specifiers. |
| case PrintErrno: return "m"; |
| } |
| return NULL; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Methods on OptionalAmount. |
| //===----------------------------------------------------------------------===// |
| |
| void OptionalAmount::toString(raw_ostream &os) const { |
| switch (hs) { |
| case Invalid: |
| case NotSpecified: |
| return; |
| case Arg: |
| if (UsesDotPrefix) |
| os << "."; |
| if (usesPositionalArg()) |
| os << "*" << getPositionalArgIndex() << "$"; |
| else |
| os << "*"; |
| break; |
| case Constant: |
| if (UsesDotPrefix) |
| os << "."; |
| os << amt; |
| break; |
| } |
| } |
| |
| bool FormatSpecifier::hasValidLengthModifier() const { |
| switch (LM.getKind()) { |
| case LengthModifier::None: |
| return true; |
| |
| // Handle most integer flags |
| case LengthModifier::AsChar: |
| case LengthModifier::AsShort: |
| case LengthModifier::AsLongLong: |
| case LengthModifier::AsQuad: |
| case LengthModifier::AsIntMax: |
| case LengthModifier::AsSizeT: |
| case LengthModifier::AsPtrDiff: |
| switch (CS.getKind()) { |
| case ConversionSpecifier::dArg: |
| case ConversionSpecifier::iArg: |
| case ConversionSpecifier::oArg: |
| case ConversionSpecifier::uArg: |
| case ConversionSpecifier::xArg: |
| case ConversionSpecifier::XArg: |
| case ConversionSpecifier::nArg: |
| return true; |
| default: |
| return false; |
| } |
| |
| // Handle 'l' flag |
| case LengthModifier::AsLong: |
| switch (CS.getKind()) { |
| case ConversionSpecifier::dArg: |
| case ConversionSpecifier::iArg: |
| case ConversionSpecifier::oArg: |
| case ConversionSpecifier::uArg: |
| case ConversionSpecifier::xArg: |
| case ConversionSpecifier::XArg: |
| case ConversionSpecifier::aArg: |
| case ConversionSpecifier::AArg: |
| case ConversionSpecifier::fArg: |
| case ConversionSpecifier::FArg: |
| case ConversionSpecifier::eArg: |
| case ConversionSpecifier::EArg: |
| case ConversionSpecifier::gArg: |
| case ConversionSpecifier::GArg: |
| case ConversionSpecifier::nArg: |
| case ConversionSpecifier::cArg: |
| case ConversionSpecifier::sArg: |
| case ConversionSpecifier::ScanListArg: |
| return true; |
| default: |
| return false; |
| } |
| |
| case LengthModifier::AsLongDouble: |
| switch (CS.getKind()) { |
| case ConversionSpecifier::aArg: |
| case ConversionSpecifier::AArg: |
| case ConversionSpecifier::fArg: |
| case ConversionSpecifier::FArg: |
| case ConversionSpecifier::eArg: |
| case ConversionSpecifier::EArg: |
| case ConversionSpecifier::gArg: |
| case ConversionSpecifier::GArg: |
| return true; |
| // GNU extension. |
| case ConversionSpecifier::dArg: |
| case ConversionSpecifier::iArg: |
| case ConversionSpecifier::oArg: |
| case ConversionSpecifier::uArg: |
| case ConversionSpecifier::xArg: |
| case ConversionSpecifier::XArg: |
| return true; |
| default: |
| return false; |
| } |
| |
| case LengthModifier::AsAllocate: |
| switch (CS.getKind()) { |
| case ConversionSpecifier::sArg: |
| case ConversionSpecifier::SArg: |
| case ConversionSpecifier::ScanListArg: |
| return true; |
| default: |
| return false; |
| } |
| |
| case LengthModifier::AsMAllocate: |
| switch (CS.getKind()) { |
| case ConversionSpecifier::cArg: |
| case ConversionSpecifier::CArg: |
| case ConversionSpecifier::sArg: |
| case ConversionSpecifier::SArg: |
| case ConversionSpecifier::ScanListArg: |
| return true; |
| default: |
| return false; |
| } |
| } |
| llvm_unreachable("Invalid LengthModifier Kind!"); |
| } |
| |
| bool FormatSpecifier::hasStandardLengthModifier() const { |
| switch (LM.getKind()) { |
| case LengthModifier::None: |
| case LengthModifier::AsChar: |
| case LengthModifier::AsShort: |
| case LengthModifier::AsLong: |
| case LengthModifier::AsLongLong: |
| case LengthModifier::AsIntMax: |
| case LengthModifier::AsSizeT: |
| case LengthModifier::AsPtrDiff: |
| case LengthModifier::AsLongDouble: |
| return true; |
| case LengthModifier::AsAllocate: |
| case LengthModifier::AsMAllocate: |
| case LengthModifier::AsQuad: |
| return false; |
| } |
| llvm_unreachable("Invalid LengthModifier Kind!"); |
| } |
| |
| bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const { |
| switch (CS.getKind()) { |
| case ConversionSpecifier::cArg: |
| case ConversionSpecifier::dArg: |
| case ConversionSpecifier::iArg: |
| case ConversionSpecifier::oArg: |
| case ConversionSpecifier::uArg: |
| case ConversionSpecifier::xArg: |
| case ConversionSpecifier::XArg: |
| case ConversionSpecifier::fArg: |
| case ConversionSpecifier::FArg: |
| case ConversionSpecifier::eArg: |
| case ConversionSpecifier::EArg: |
| case ConversionSpecifier::gArg: |
| case ConversionSpecifier::GArg: |
| case ConversionSpecifier::aArg: |
| case ConversionSpecifier::AArg: |
| case ConversionSpecifier::sArg: |
| case ConversionSpecifier::pArg: |
| case ConversionSpecifier::nArg: |
| case ConversionSpecifier::ObjCObjArg: |
| case ConversionSpecifier::ScanListArg: |
| case ConversionSpecifier::PercentArg: |
| return true; |
| case ConversionSpecifier::CArg: |
| case ConversionSpecifier::SArg: |
| return LangOpt.ObjC1 || LangOpt.ObjC2; |
| case ConversionSpecifier::InvalidSpecifier: |
| case ConversionSpecifier::PrintErrno: |
| return false; |
| } |
| llvm_unreachable("Invalid ConversionSpecifier Kind!"); |
| } |
| |
| bool FormatSpecifier::hasStandardLengthConversionCombination() const { |
| if (LM.getKind() == LengthModifier::AsLongDouble) { |
| switch(CS.getKind()) { |
| case ConversionSpecifier::dArg: |
| case ConversionSpecifier::iArg: |
| case ConversionSpecifier::oArg: |
| case ConversionSpecifier::uArg: |
| case ConversionSpecifier::xArg: |
| case ConversionSpecifier::XArg: |
| return false; |
| default: |
| return true; |
| } |
| } |
| return true; |
| } |