Author: benhamilton Date: Wed Jan 17 09:33:08 2018 New Revision: 322690 URL: http://llvm.org/viewvc/llvm-project?rev=322690&view=rev Log: [Format] Improve ObjC header guessing heuristic
Summary: This improves upon the previous Objective-C header guessing heuristic from rC320479. Now, we run the lexer on C++ header files and look for Objective-C keywords and syntax. We also look for Foundation types. Test Plan: make -j12 FormatTests && ./tools/clang/unittests/Format/FormatTests Reviewers: jolesiak, krasimir Reviewed By: jolesiak Subscribers: klimek, cfe-commits Differential Revision: https://reviews.llvm.org/D42135 Modified: cfe/trunk/lib/Format/Format.cpp cfe/trunk/unittests/Format/FormatTestObjC.cpp Modified: cfe/trunk/lib/Format/Format.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=322690&r1=322689&r2=322690&view=diff ============================================================================== --- cfe/trunk/lib/Format/Format.cpp (original) +++ cfe/trunk/lib/Format/Format.cpp Wed Jan 17 09:33:08 2018 @@ -32,6 +32,7 @@ #include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" @@ -40,6 +41,7 @@ #include <algorithm> #include <memory> #include <string> +#include <unordered_set> #define DEBUG_TYPE "format-formatter" @@ -48,6 +50,16 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) +namespace std { +// Allow using StringRef in std::unordered_set. +template <> struct hash<llvm::StringRef> { +public: + size_t operator()(const llvm::StringRef &s) const { + return llvm::hash_value(s); + } +}; +} // namespace std + namespace llvm { namespace yaml { template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { @@ -1400,6 +1412,101 @@ private: std::set<FormatToken *, FormatTokenLess> DeletedTokens; }; +class ObjCHeaderStyleGuesser : public TokenAnalyzer { +public: + ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style), IsObjC(false) {} + + std::pair<tooling::Replacements, unsigned> + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + assert(Style.Language == FormatStyle::LK_Cpp); + IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords()); + tooling::Replacements Result; + return {Result, 0}; + } + + bool isObjC() { return IsObjC; } + +private: + static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + const AdditionalKeywords &Keywords) { + static const std::unordered_set<StringRef> FoundationIdentifiers = { + "CGFloat", + "NSAffineTransform", + "NSArray", + "NSAttributedString", + "NSCache", + "NSCharacterSet", + "NSCountedSet", + "NSData", + "NSDataDetector", + "NSDecimal", + "NSDecimalNumber", + "NSDictionary", + "NSEdgeInsets", + "NSHashTable", + "NSIndexPath", + "NSIndexSet", + "NSInteger", + "NSLocale", + "NSMapTable", + "NSMutableArray", + "NSMutableAttributedString", + "NSMutableCharacterSet", + "NSMutableData", + "NSMutableDictionary", + "NSMutableIndexSet", + "NSMutableOrderedSet", + "NSMutableSet", + "NSMutableString", + "NSNumber", + "NSNumberFormatter", + "NSOrderedSet", + "NSPoint", + "NSPointerArray", + "NSRange", + "NSRect", + "NSRegularExpression", + "NSSet", + "NSSize", + "NSString", + "NSUInteger", + "NSURL", + "NSURLComponents", + "NSURLQueryItem", + "NSUUID", + }; + + for (auto &Line : AnnotatedLines) { + for (FormatToken *FormatTok = Line->First->Next; FormatTok; + FormatTok = FormatTok->Next) { + if ((FormatTok->Previous->is(tok::at) && + (FormatTok->isObjCAtKeyword(tok::objc_interface) || + FormatTok->isObjCAtKeyword(tok::objc_implementation) || + FormatTok->isObjCAtKeyword(tok::objc_protocol) || + FormatTok->isObjCAtKeyword(tok::objc_end) || + FormatTok->isOneOf(tok::numeric_constant, tok::l_square, + tok::l_brace))) || + (FormatTok->Tok.isAnyIdentifier() && + FoundationIdentifiers.find(FormatTok->TokenText) != + FoundationIdentifiers.end()) || + FormatTok->is(TT_ObjCStringLiteral) || + FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + TT_ObjCBlockLBrace, TT_ObjCBlockLParen, + TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, + TT_ObjCMethodSpecifier, TT_ObjCProperty)) { + return true; + } + } + } + return false; + } + + bool IsObjC; +}; + struct IncludeDirective { StringRef Filename; StringRef Text; @@ -2185,14 +2292,15 @@ llvm::Expected<FormatStyle> getStyle(Str FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); - // This is a very crude detection of whether a header contains ObjC code that - // should be improved over time and probably be done on tokens, not one the - // bare content of the file. - if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ (") || - Code.contains("\n@end\n") || Code.contains("\n@end ") || - Code.endswith("@end"))) - Style.Language = FormatStyle::LK_ObjC; + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + ObjCHeaderStyleGuesser Guesser(*Env, Style); + Guesser.process(); + if (Guesser.isObjC()) { + Style.Language = FormatStyle::LK_ObjC; + } + } FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) Modified: cfe/trunk/unittests/Format/FormatTestObjC.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestObjC.cpp?rev=322690&r1=322689&r2=322690&view=diff ============================================================================== --- cfe/trunk/unittests/Format/FormatTestObjC.cpp (original) +++ cfe/trunk/unittests/Format/FormatTestObjC.cpp Wed Jan 17 09:33:08 2018 @@ -94,6 +94,66 @@ TEST(FormatTestObjCStyle, DetectsObjCInH Style = getStyle("LLVM", "a.h", "none", "void f() {}"); ASSERT_TRUE((bool)Style); EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "@interface Foo\n@end\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "const int interface = 1;\nconst int end = 2;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "@protocol Foo\n@end\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "const int protocol = 1;\nconst int end = 2;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "typedef NS_ENUM(NSInteger, Foo) {};\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", "enum Foo {};"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { Log(@\"Foo\"); }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { Log(\"Foo\"); }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); + + Style = + getStyle("{}", "a.h", "none", "inline void Foo() { id = @[1, 2, 3]; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_ObjC, Style->Language); + + Style = getStyle("{}", "a.h", "none", + "inline void Foo() { int foo[] = {1, 2, 3}; }\n"); + ASSERT_TRUE((bool)Style); + EXPECT_EQ(FormatStyle::LK_Cpp, Style->Language); } TEST_F(FormatTestObjC, FormatObjCTryCatch) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits