VitaNuo updated this revision to Diff 490468.
VitaNuo added a comment.

Remove variant logic in generator.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D142092/new/

https://reviews.llvm.org/D142092

Files:
  clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
  clang/tools/include-mapping/cppreference_parser.py
  clang/tools/include-mapping/gen_std.py

Index: clang/tools/include-mapping/gen_std.py
===================================================================
--- clang/tools/include-mapping/gen_std.py
+++ clang/tools/include-mapping/gen_std.py
@@ -13,12 +13,6 @@
 
 The generated files are located in clang/include/Tooling/Inclusions.
 
-Caveats and FIXMEs:
-  - only symbols directly in "std" namespace are added, we should also add std's
-    subnamespace symbols (e.g. chrono).
-  - symbols with multiple variants or defined in multiple headers aren't added,
-    e.g. std::move, std::swap
-
 Usage:
   1. Install BeautifulSoup dependency, see instruction:
        https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
@@ -104,17 +98,12 @@
     os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
   print(CODE_PREFIX % (args.language.upper(), cppreference_modified_date))
   for symbol in symbols:
-    if len(symbol.headers) == 1:
-      # SYMBOL(unqualified_name, namespace, header)
-      print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
-                                    symbol.headers[0]))
-    elif len(symbol.headers) == 0:
+    if len(symbol.headers) == 0:
       sys.stderr.write("No header found for symbol %s\n" % symbol.name)
     else:
-      # FIXME: support symbols with multiple headers (e.g. std::move).
-      sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
-          symbol.name, ', '.join(symbol.headers)))
-
-
+      for header in symbol.headers:
+        # SYMBOL(unqualified_name, namespace, header)
+        print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
+                                      header))
 if __name__ == '__main__':
   main()
Index: clang/tools/include-mapping/cppreference_parser.py
===================================================================
--- clang/tools/include-mapping/cppreference_parser.py
+++ clang/tools/include-mapping/cppreference_parser.py
@@ -93,7 +93,7 @@
   <a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
   <a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
 
-  Returns a list of tuple (symbol_name, relative_path_to_symbol_page, variant).
+  Returns a list of tuple (symbol_name, relative_path_to_symbol_page).
   """
   symbols = []
   soup = BeautifulSoup(index_page_html, "html.parser")
@@ -103,13 +103,10 @@
     # This accidentally accepts begin/end despite the (iterator) caption: the
     # (since C++11) note is first. They are good symbols, so the bug is unfixed.
     caption = symbol_href.next_sibling
-    variant = None
-    if isinstance(caption, NavigableString) and "(" in caption:
-      variant = caption.text.strip(" ()")
     symbol_tt = symbol_href.find("tt")
     if symbol_tt:
       symbols.append((symbol_tt.text.rstrip("<>()"), # strip any trailing <>()
-                      symbol_href["href"], variant))
+                      symbol_href["href"]))
   return symbols
 
 
@@ -118,7 +115,7 @@
     return _ParseSymbolPage(f.read(), name)
 
 
-def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
+def _GetSymbols(pool, root_dir, index_page_name, namespace):
   """Get all symbols listed in the index page. All symbols should be in the
   given namespace.
 
@@ -134,13 +131,7 @@
   with open(index_page_path, "r") as f:
     # Read each symbol page in parallel.
     results = [] # (symbol_name, promise of [header...])
-    for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()):
-      # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
-      # FIXME: use these as a fallback rather than ignoring entirely.
-      variants_for_symbol = variants_to_accept.get(
-          (namespace or "") + symbol_name, ())
-      if variant and variant not in variants_for_symbol:
-        continue
+    for symbol_name, symbol_page_path in _ParseIndexPage(f.read()):
       path = os.path.join(root_dir, symbol_page_path)
       if os.path.isfile(path):
         results.append((symbol_name,
@@ -166,13 +157,6 @@
   Args:
     parse_pages: a list of tuples (page_root_dir, index_page_name, namespace)
   """
-  # By default we prefer the non-variant versions, as they're more common. But
-  # there are some symbols, whose variant is more common. This list describes
-  # those symbols.
-  variants_to_accept = {
-      # std::remove<> has variant algorithm.
-      "std::remove": ("algorithm"),
-  }
   symbols = []
   # Run many workers to process individual symbol pages under the symbol index.
   # Don't allow workers to capture Ctrl-C.
@@ -180,8 +164,7 @@
       initializer=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
   try:
     for root_dir, page_name, namespace in parse_pages:
-      symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace,
-                                 variants_to_accept))
+      symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace))
   finally:
     pool.terminate()
     pool.join()
Index: clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
===================================================================
--- clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
+++ clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "clang/AST/Decl.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 
@@ -16,8 +17,9 @@
 namespace stdlib {
 
 static llvm::StringRef *HeaderNames;
-static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
-static unsigned *SymbolHeaderIDs;
+static llvm::DenseMap<int, std::pair<llvm::StringRef, llvm::StringRef>>
+    *SymbolNames;
+static llvm::SmallVector<unsigned> *SymbolHeaderIDs;
 static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
 // Maps symbol name -> Symbol::ID, within a namespace.
 using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
@@ -29,7 +31,7 @@
 #include "clang/Tooling/Inclusions/CSymbolMap.inc"
 #include "clang/Tooling/Inclusions/StdSymbolMap.inc"
 #undef SYMBOL
-  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
+  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>;
   SymbolHeaderIDs =
       new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
   NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
@@ -46,18 +48,31 @@
     return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
   };
 
-  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
-                              llvm::StringRef HeaderName) mutable {
+  auto Add = [&, NextAvailSymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
+                                       llvm::StringRef HeaderName) mutable {
     if (NS == "None")
       NS = "";
 
-    SymbolNames[SymIndex] = {NS, Name};
-    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+    bool IsNewSymbol = true;
+    int SymIndex = NextAvailSymIndex;
+    if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(NS)) {
+      auto It = NSSymbols->find(Name);
+      if (It != NSSymbols->end()) {
+        SymIndex = It->getSecond();
+        IsNewSymbol = false;
+      }
+    }
+
+    SymbolNames->try_emplace(SymIndex, std::make_pair(NS, Name));
+
+    unsigned HeaderID = AddHeader(HeaderName);
+    SymbolHeaderIDs[SymIndex].emplace_back(HeaderID);
 
     NSSymbolMap &NSSymbols = AddNS(NS);
     NSSymbols.try_emplace(Name, SymIndex);
 
-    ++SymIndex;
+    if (IsNewSymbol)
+      ++NextAvailSymIndex;
   };
 #define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
 #include "clang/Tooling/Inclusions/CSymbolMap.inc"
@@ -84,8 +99,20 @@
   return Header(It->second);
 }
 llvm::StringRef Header::name() const { return HeaderNames[ID]; }
-llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
-llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
+llvm::StringRef Symbol::scope() const {
+  auto It = SymbolNames->find(ID);
+  if (It != SymbolNames->end()) {
+    return It->second.first;
+  }
+  return "";
+}
+llvm::StringRef Symbol::name() const {
+  auto It = SymbolNames->find(ID);
+  if (It != SymbolNames->end()) {
+    return It->second.second;
+  }
+  return "";
+}
 std::optional<Symbol> Symbol::named(llvm::StringRef Scope,
                                      llvm::StringRef Name) {
   ensureInitialized();
@@ -96,9 +123,14 @@
   }
   return std::nullopt;
 }
-Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+
+Header Symbol::header() const { return Header(SymbolHeaderIDs[ID][0]); }
 llvm::SmallVector<Header> Symbol::headers() const {
-  return {header()}; // FIXME: multiple in case of ambiguity
+  llvm::SmallVector<Header> result;
+  for (auto HeaderID : SymbolHeaderIDs[ID]) {
+    result.emplace_back(Header(HeaderID));
+  }
+  return result;
 }
 
 Recognizer::Recognizer() { ensureInitialized(); }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to