juliehockett created this revision.
juliehockett added reviewers: klimek, sammccall, jakehehrlich.
juliehockett added a project: clang-tools-extra.
Herald added a subscriber: mgorny.
juliehockett added a dependency: D41102: Setup clang-doc frontend framework.

Implements a simple, in-memory reducer for the mapped output of the initial 
tool. This creates a collection object for storing the deduplicated infos on 
each declaration, and populates that from the mapper output. The collection 
object is serialized to LLVM bitstream. On reading each serialized output, it 
checks to see if a merge is necessary and if so, merges the new info with the 
existing info (prefering the existing one if conflicts exist).

For a more detailed overview of the tool, see the design document on the 
mailing list: RFC: clang-doc proposal 
<http://lists.llvm.org/pipermail/cfe-dev/2017-December/056203.html>


https://reviews.llvm.org/D43341

Files:
  clang-doc/CMakeLists.txt
  clang-doc/ClangDocBinary.cpp
  clang-doc/ClangDocBinary.h
  clang-doc/ClangDocMapper.cpp
  clang-doc/ClangDocMapper.h
  clang-doc/ClangDocReducer.cpp
  clang-doc/ClangDocReducer.h
  clang-doc/ClangDocRepresentation.cpp
  clang-doc/ClangDocRepresentation.h
  clang-doc/tool/ClangDocMain.cpp
  test/clang-doc/mapper-namespace.cpp
  test/clang-doc/mapper-type.cpp

Index: test/clang-doc/mapper-type.cpp
===================================================================
--- test/clang-doc/mapper-type.cpp
+++ test/clang-doc/mapper-type.cpp
@@ -2,136 +2,154 @@
 // RUN: mkdir %t
 // RUN: echo "" > %t/compile_flags.txt
 // RUN: cp "%s" "%t/test.cpp"
-// RUN: clang-doc  --dump --omit-filenames -doxygen -p %t %t/test.cpp | FileCheck %s
+// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp > %t/info.bc
+// RUN llvm-bcanalyzer %t/info.bc --dump | FileCheck %s
 
 union A { int X; int Y; };
-// CHECK: ---
-// CHECK: KEY: A
-// CHECK: FullyQualifiedName: A
-// CHECK: Name: A
-// CHECK: TagType: 2
-// CHECK: ID: Member
-// CHECK: Type: int
-// CHECK: Name: A::X
-// CHECK: Access: 3
-// CHECK: ID: Member
-// CHECK: Type: int
-// CHECK: Name: A::Y
-// CHECK: Access: 3
-// CHECK: ---
-// CHECK: KEY: A::A
-// CHECK: FullyQualifiedName: A::A
-// CHECK: Name: A
-// CHECK: Namespace: A
 
 enum B { X, Y };
-// CHECK: ---
-// CHECK: KEY: B
-// CHECK: FullyQualifiedName: B
-// CHECK: Name: B
-// CHECK: ID: Member
-// CHECK: Type: X
-// CHECK: Access: 3
-// CHECK: ID: Member
-// CHECK: Type: Y
-// CHECK: Access: 3
 
 struct C { int i; };
-// CHECK: ---
-// CHECK: KEY: C
-// CHECK: FullyQualifiedName: C
-// CHECK: Name: C
-// CHECK: ID: Member
-// CHECK: Type: int
-// CHECK: Name: C::i
-// CHECK: Access: 3
-// CHECK: ---
-// CHECK: KEY: C::C
-// CHECK: FullyQualifiedName: C::C
-// CHECK: Name: C
-// CHECK: Namespace: C
 
 class D {};
-// CHECK: ---
-// CHECK: KEY: D
-// CHECK: FullyQualifiedName: D
-// CHECK: Name: D
-// CHECK: TagType: 3
-// CHECK: ---
-// CHECK: KEY: D::D
-// CHECK: FullyQualifiedName: D::D
-// CHECK: Name: D
-// CHECK: Namespace: D
 
 class E {
-// CHECK: ---
-// CHECK: KEY: E
-// CHECK: FullyQualifiedName: E
-// CHECK: Name: E
-// CHECK: TagType: 3
-// CHECK: ---
-// CHECK: KEY: E::E
-// CHECK: FullyQualifiedName: E::E
-// CHECK: Name: E
-// CHECK: Namespace: E
-
 public:
 	E() {}
-// CHECK: ---
-// CHECK: KEY: _ZN1EC1Ev
-// CHECK: FullyQualifiedName: E::E
-// CHECK: Name: E
-// CHECK: Namespace: E
-// CHECK: MangledName: _ZN1EC1Ev
-// CHECK: Parent: E
-// CHECK: ID: Return
-// CHECK: Type: void
-// CHECK: Access: 3
-
-	 ~E() {}
-// CHECK: ---
-// CHECK: KEY: _ZN1ED1Ev
-// CHECK: FullyQualifiedName: E::~E
-// CHECK: Name: ~E
-// CHECK: Namespace: E
-// CHECK: MangledName: _ZN1ED1Ev
-// CHECK: Parent: E
-// CHECK: ID: Return
-// CHECK: Type: void
-// CHECK: Access: 3
+	~E() {}
 
 protected:
 	void ProtectedMethod();
-// CHECK:  ---
-// CHECK: KEY: _ZN1E15ProtectedMethodEv
-// CHECK: FullyQualifiedName: _ZN1E15ProtectedMethodEv
-// CHECK: Name: ProtectedMethod
-// CHECK: Namespace: E
 };
 
 void E::ProtectedMethod() {}
-// CHECK: ---
-// CHECK: KEY: _ZN1E15ProtectedMethodEv
-// CHECK: FullyQualifiedName: E::ProtectedMethod
-// CHECK: Name: ProtectedMethod
-// CHECK: Namespace: E
-// CHECK: MangledName: _ZN1E15ProtectedMethodEv
-// CHECK: Parent: E
-// CHECK: ID: Return
-// CHECK: Type: void
-// CHECK: Access: 3
-// CHECK: Access: 1
 
 class F : virtual private D, public E {};
-// CHECK: ---
-// CHECK: KEY: F
-// CHECK: FullyQualifiedName: F
-// CHECK: Name: F
-// CHECK: TagType: 3
-// CHECK: Parent: class E
-// CHECK: VParent: class D
-// CHECK: ---
-// CHECK: KEY: F::F
-// CHECK: FullyQualifiedName: F::F
-// CHECK: Name: F
-// CHECK: Namespace: F
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK: <FunctionBlock NumWords=19 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'E::E'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'E'
+  // CHECK: <MangledName abbrevid=8 op0=9/> blob data = '_ZN1EC1Ev'
+  // CHECK: <Parent abbrevid=9 op0=1/> blob data = 'E'
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=3/>
+    // CHECK: <Type abbrevid=5 op0=4/> blob data = 'void'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </FunctionBlock>
+// CHECK: <FunctionBlock NumWords=20 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=5/> blob data = 'E::~E'
+  // CHECK: <Name abbrevid=5 op0=2/> blob data = '~E'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'E'
+  // CHECK: <MangledName abbrevid=8 op0=9/> blob data = '_ZN1ED1Ev'
+  // CHECK: <Parent abbrevid=9 op0=1/> blob data = 'E'
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=3/>
+    // CHECK: <Type abbrevid=5 op0=4/> blob data = 'void'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </FunctionBlock>
+// CHECK: <FunctionBlock NumWords=29 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=18/> blob data = 'E::ProtectedMethod'
+  // CHECK: <Name abbrevid=5 op0=15/> blob data = 'ProtectedMethod'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'E'
+  // CHECK: <MangledName abbrevid=8 op0=24/> blob data = '_ZN1E15ProtectedMethodEv'
+  // CHECK: <Parent abbrevid=9 op0=1/> blob data = 'E'
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=3/>
+    // CHECK: <Type abbrevid=5 op0=4/> blob data = 'void'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </FunctionBlock>
+// CHECK: <RecordBlock NumWords=22 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'A'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'A'
+  // CHECK: <TagType abbrevid=8 op0=2/>
+  // CHECK: <NamedTypeBlock NumWords=6 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=3/> blob data = 'int'
+    // CHECK: <Name abbrevid=6 op0=4/> blob data = 'A::X'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+  // CHECK: <NamedTypeBlock NumWords=6 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=3/> blob data = 'int'
+    // CHECK: <Name abbrevid=6 op0=4/> blob data = 'A::Y'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </RecordBlock>
+// CHECK: <RecordBlock NumWords=14 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'C'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'C'
+  // CHECK: <TagType abbrevid=8 op0=2/>
+  // CHECK: <NamedTypeBlock NumWords=6 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=3/> blob data = 'int'
+    // CHECK: <Name abbrevid=6 op0=4/> blob data = 'C::i'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </RecordBlock>
+// CHECK: <RecordBlock NumWords=5 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'D'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'D'
+  // CHECK: <TagType abbrevid=8 op0=3/>
+// CHECK: </RecordBlock>
+// CHECK: <RecordBlock NumWords=5 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'E'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK: <TagType abbrevid=8 op0=3/>
+// CHECK: </RecordBlock>
+// CHECK: <RecordBlock NumWords=12 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'F'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK: <TagType abbrevid=8 op0=3/>
+  // CHECK: <Parent abbrevid=9 op0=7/> blob data = 'class E'
+  // CHECK: <VParent abbrevid=10 op0=7/> blob data = 'class D'
+// CHECK: </RecordBlock>
+// CHECK: <EnumBlock NumWords=18 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'B'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'B'
+  // CHECK: <Scoped abbrevid=8 op0=2/>
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=1/> blob data = 'X'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=1/> blob data = 'Y'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </EnumBlock>
+// CHECK: <NonDefBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'F::F'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'F'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'F'
+  // CHECK: <Type abbrevid=7 op0=2/>
+// CHECK: </NonDefBlock>
+// CHECK: <NonDefBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'A::A'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'A'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'A'
+  // CHECK: <Type abbrevid=7 op0=2/>
+// CHECK: </NonDefBlock>
+// CHECK: <NonDefBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'C::C'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'C'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'C'
+  // CHECK: <Type abbrevid=7 op0=2/>
+// CHECK: </NonDefBlock>
+// CHECK: <NonDefBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'D::D'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'D'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'D'
+  // CHECK: <Type abbrevid=7 op0=2/>
+// CHECK: </NonDefBlock>
+// CHECK: <NonDefBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'E::E'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'E'
+  // CHECK: <Type abbrevid=7 op0=2/>
+// CHECK: </NonDefBlock>
Index: test/clang-doc/mapper-namespace.cpp
===================================================================
--- test/clang-doc/mapper-namespace.cpp
+++ test/clang-doc/mapper-namespace.cpp
@@ -2,69 +2,75 @@
 // RUN: mkdir %t
 // RUN: echo "" > %t/compile_flags.txt
 // RUN: cp "%s" "%t/test.cpp"
-// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp | FileCheck %s
+// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp > %t/info.bc
+// RUN llvm-bcanalyzer %t/info.bc --dump | FileCheck %s
 
 namespace A {
-// CHECK: ---
-// CHECK: KEY: A
-// CHECK: FullyQualifiedName: A
-// CHECK: Name: A
 
+void f();
 void f() {};
-// CHECK: ---
-// CHECK: KEY: _ZN1A1fEv
-// CHECK: FullyQualifiedName: A::f
-// CHECK: Name: f
-// CHECK: Namespace: A
-// CHECK: MangledName: _ZN1A1fEv
-// CHECK: ID: Return
-// CHECK: Type: void
-// CHECK: Access: 3
-// CHECK: Access: 3
 
 } // A
 
 namespace A {
-// CHECK: ---
-// CHECK: KEY: A
-// CHECK: FullyQualifiedName: A
-// CHECK: Name: A
-
 namespace B {
-// CHECK: ---
-// CHECK: KEY: A::B
-// CHECK: FullyQualifiedName: A::B
-// CHECK: Name: B
-// CHECK: Namespace: A
 
 enum E { X };
-// CHECK: ---
-// CHECK: KEY: A::B::E
-// CHECK: FullyQualifiedName: A::B::E
-// CHECK: Name: E
-// CHECK: Namespace: A::B
-// CHECK: ID: Member
-// CHECK: Type: A::B::X
-// CHECK: Access: 3
 
 E func(int i) { 
 	return X;
 }
 
-// CHECK: ---
-// CHECK: KEY: _ZN1A1B4funcEi
-// CHECK: FullyQualifiedName: A::B::func
-// CHECK: Name: func
-// CHECK: Namespace: A::B
-// CHECK: MangledName: _ZN1A1B4funcEi
-// CHECK: ID: Return
-// CHECK: Type: enum A::B::E
-// CHECK: Access: 3
-// CHECK: ID: Param
-// CHECK: Type: int
-// CHECK: Name: i
-// CHECK: Access: 3
-// CHECK: Access: 3
-
 } // B
 } // C
+
+// CHECK: <BLOCKINFO_BLOCK/>
+// CHECK: <NamespaceBlock NumWords=5 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=1/> blob data = 'A'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'A'
+// CHECK: </NamespaceBlock>
+// CHECK: <NamespaceBlock NumWords=7 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'A::B'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'B'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'A'
+// CHECK: </NamespaceBlock>
+// CHECK: <FunctionBlock NumWords=17 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=4/> blob data = 'A::f'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'f'
+  // CHECK: <Namespace abbrevid=6 op0=1/> blob data = 'A'
+  // CHECK: <MangledName abbrevid=8 op0=9/> blob data = '_ZN1A1fEv'
+  // CHECK: <NamedTypeBlock NumWords=4 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=3/>
+    // CHECK: <Type abbrevid=5 op0=4/> blob data = 'void'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </FunctionBlock>
+// CHECK: <FunctionBlock NumWords=30 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=10/> blob data = 'A::B::func'
+  // CHECK: <Name abbrevid=5 op0=4/> blob data = 'func'
+  // CHECK: <Namespace abbrevid=6 op0=4/> blob data = 'A::B'
+  // CHECK: <MangledName abbrevid=8 op0=14/> blob data = '_ZN1A1B4funcEi'
+  // CHECK: <NamedTypeBlock NumWords=6 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=3/>
+    // CHECK: <Type abbrevid=5 op0=12/> blob data = 'enum A::B::E'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+  // CHECK: <NamedTypeBlock NumWords=6 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=1/>
+    // CHECK: <Type abbrevid=5 op0=3/> blob data = 'int'
+    // CHECK: <Name abbrevid=6 op0=1/> blob data = 'i'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </FunctionBlock>
+// CHECK: <EnumBlock NumWords=16 BlockCodeSize=5>
+  // CHECK: <FullyQualifiedName abbrevid=4 op0=7/> blob data = 'A::B::E'
+  // CHECK: <Name abbrevid=5 op0=1/> blob data = 'E'
+  // CHECK: <Namespace abbrevid=6 op0=4/> blob data = 'A::B'
+  // CHECK: <Scoped abbrevid=8 op0=192/>
+  // CHECK: <NamedTypeBlock NumWords=5 BlockCodeSize=5>
+    // CHECK: <ID abbrevid=4 op0=2/>
+    // CHECK: <Type abbrevid=5 op0=7/> blob data = 'A::B::X'
+    // CHECK: <Access abbrevid=7 op0=3/>
+  // CHECK: </NamedTypeBlock>
+// CHECK: </EnumBlock>
+
Index: clang-doc/tool/ClangDocMain.cpp
===================================================================
--- clang-doc/tool/ClangDocMain.cpp
+++ clang-doc/tool/ClangDocMain.cpp
@@ -10,6 +10,7 @@
 #include <string>
 #include "ClangDoc.h"
 #include "ClangDocBinary.h"
+#include "ClangDocReducer.h"
 #include "clang/AST/AST.h"
 #include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
@@ -82,19 +83,20 @@
         getInsertArgumentAdjuster("-fparse-all-comments",
                                   tooling::ArgumentInsertPosition::BEGIN),
         ArgAdjuster);
+
+  // Mapping phase
   auto Err =
       Exec->get()->execute(newFrontendActionFactory(&Finder), ArgAdjuster);
   if (Err) errs() << toString(std::move(Err)) << "\n";
 
-  if (DumpResult) {
-    doc::ClangDocBinaryReader Reader(errs());
-    Exec->get()->getToolResults()->forEachResult(
-        [&Reader](StringRef Key, SmallString<2048> Value) {
-          outs() << "---\n"
-                 << "KEY: " << Key.str() << "\n";
-          Reader.readBitstream(Value);
-        }); 
-  }
+  // Reducing phase
+  auto Infos = doc::mergeInfos(Exec->get()->getToolResults());
+  SmallString<2048> Buffer;
+  llvm::BitstreamWriter Stream(Buffer);
+  Writer.writeAllBitstream(*Infos, Stream);
+
+  if (DumpResult)
+    outs() << Buffer;
 
   return 0;
 }
Index: clang-doc/ClangDocRepresentation.h
===================================================================
--- clang-doc/ClangDocRepresentation.h
+++ clang-doc/ClangDocRepresentation.h
@@ -13,6 +13,8 @@
 #include <string>
 #include "clang/AST/Type.h"
 #include "clang/Basic/Specifiers.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 
 using namespace llvm;
@@ -22,49 +24,59 @@
 
 // A representation of a parsed comment.
 struct CommentInfo {
-  StringRef Kind;
-  StringRef Text;
-  StringRef Name;
-  StringRef Direction;
-  StringRef ParamName;
-  StringRef CloseName;
+  std::string Kind;
+  std::string Text;
+  std::string Name;
+  std::string Direction;
+  std::string ParamName;
+  std::string CloseName;
   bool SelfClosing = false;
   bool Explicit = false;
-  llvm::SmallVector<StringRef, 4> AttrKeys;
-  llvm::SmallVector<StringRef, 4> AttrValues;
-  llvm::SmallVector<StringRef, 4> Args;
-  llvm::SmallVector<StringRef, 4> Position;
+  llvm::SmallVector<std::string, 4> AttrKeys;
+  llvm::SmallVector<std::string, 4> AttrValues;
+  llvm::SmallVector<std::string, 4> Args;
+  llvm::SmallVector<std::string, 4> Position;
   std::vector<std::shared_ptr<CommentInfo>> Children;
 };
 
+
 // TODO: Pull the CommentInfo for a parameter or member out of the record or
 // function's CommentInfo.
 // Info for named types (parameters, members).
 struct NamedType {
+  enum FieldName { PARAM=1, MEMBER, RETTYPE };
+  FieldName Field;
   std::string Type;
   std::string Name;
   AccessSpecifier Access = clang::AccessSpecifier::AS_none;
-  CommentInfo Description;
+  llvm::SmallVector<CommentInfo, 2> Description;
+};
+
+struct Location {
+  int LineNumber;
+  std::string Filename;
 };
 
 /// A base struct for Infos.
 struct Info {
   std::string FullyQualifiedName;
   std::string SimpleName;
   std::string Namespace;
-  CommentInfo Description;
+  llvm::SmallVector<CommentInfo, 2> Description;
 };
 
 struct NamespaceInfo : public Info {};
 
 struct SymbolInfo : public Info {
-  int LineNumber;
-  StringRef Filename;
+  llvm::SmallVector<Location, 2> Loc;
 };
 
-struct NonDefInfo : public SymbolInfo {};
+struct NonDefInfo : public SymbolInfo {
+  enum InfoType { NAMESPACE, FUNCTION, RECORD, ENUM };
+  InfoType Type;
+};
 
-// TODO: Expand to allow for documenting templating.
+// TODO: Expand to allow for documenting templating and default args.
 // Info for functions.
 struct FunctionInfo : public SymbolInfo {
   std::string MangledName;
@@ -93,7 +105,47 @@
 
 // TODO: Add functionality to include separate markdown pages.
 
+class InfoSet {
+public:
+  InfoSet() {}
+
+  template <typename T>
+  void insert(StringRef Key, T &I);
+
+  // Returns the info with a Key, if it exists. Valid until next insert().
+  template <typename T>
+  T* find(StringRef Key);
+  
+  void removeNonDef(StringRef Key);
+
+  const std::vector<NamespaceInfo>& getNamespaceInfos() const { return NamespaceInfos; }
+  const std::vector<FunctionInfo>& getFunctionInfos() const { return FunctionInfos; }
+  const std::vector<RecordInfo>& getRecordInfos() const { return RecordInfos; }
+  const std::vector<EnumInfo>& getEnumInfos() const { return EnumInfos; }
+  const llvm::StringMap<NonDefInfo>& getNonDefInfos() const { return NonDefInfos; }
+
+private:
+  // Merge symbols L and R, preferring data from L in case of conflict.
+  // The two symbols must have the same ID.
+  template <typename L, typename R>
+  void mergeInfo(L *Left, R *Right);
+
+  void mergeInfoBase(Info *Left, Info *Right);
+  void mergeSymbolInfoBase(SymbolInfo *Left, SymbolInfo *Right);
+  void mergeNamedType(NamedType *Left, NamedType *Right);
+
+  std::vector<NamespaceInfo> NamespaceInfos;
+  std::vector<FunctionInfo> FunctionInfos;
+  std::vector<RecordInfo> RecordInfos;
+  std::vector<EnumInfo> EnumInfos;
+  llvm::DenseMap<StringRef, size_t> InfoIndex;
+  llvm::StringMap<NonDefInfo> NonDefInfos;
+};
+
+
+
+
 }  // namespace doc
 }  // namespace clang
 
 #endif  // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REPRESENTATION_H
\ No newline at end of file
Index: clang-doc/ClangDocRepresentation.cpp
===================================================================
--- /dev/null
+++ clang-doc/ClangDocRepresentation.cpp
@@ -0,0 +1,174 @@
+///===-- ClangDocRepresentation.cpp - ClangDocRepresenation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangDocRepresentation.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace clang {
+namespace doc {
+
+void InfoSet::mergeInfoBase(Info *Left, Info *Right) {
+  if (Left->FullyQualifiedName.empty())
+    Left->FullyQualifiedName = Right->FullyQualifiedName;
+  if (Left->SimpleName.empty()) Left->SimpleName = Right->SimpleName;
+  if (Left->Namespace.empty()) Left->Namespace = Right->Namespace;
+  for (const auto &CI : Right->Description) Left->Description.emplace_back(CI);
+}
+
+void InfoSet::mergeSymbolInfoBase(SymbolInfo *Left, SymbolInfo *Right) {
+  mergeInfoBase(Left, Right);
+  for (const auto &L : Right->Loc) Left->Loc.emplace_back(L);
+}
+
+void InfoSet::mergeNamedType(NamedType *Left, NamedType *Right) {
+  if (Left->Type.empty()) Left->Type = Right->Type;
+  if (Left->Name.empty()) Left->Name = Right->Name;
+  if (!Left->Access) Left->Access = Right->Access;
+  for (const auto &CI : Right->Description) Left->Description.emplace_back(CI);
+}
+
+template <>
+void InfoSet::mergeInfo(NamespaceInfo *Left, NamespaceInfo *Right) {
+  mergeInfoBase(Left, Right);
+}
+
+template <>
+void InfoSet::mergeInfo(RecordInfo *Left, RecordInfo *Right) {
+  mergeSymbolInfoBase(Left, Right);
+  if (!Left->TagType) Left->TagType = Right->TagType;
+  if (Left->Members.empty())
+    Left->Members = Right->Members;
+  else if (Left->Members.size() == Right->Members.size()) {
+    for (unsigned i = 0; i < Left->Members.size(); ++i)
+      mergeNamedType(&Left->Members[i], &Right->Members[i]);
+  }
+  if (Left->Parents.empty()) Left->Parents = Right->Parents;
+  if (Left->VirtualParents.empty()) Left->VirtualParents = Right->VirtualParents;
+}
+
+template <>
+void InfoSet::mergeInfo(EnumInfo *Left, EnumInfo *Right) {
+  mergeSymbolInfoBase(Left, Right);
+  if (!Left->Scoped) Left->Scoped = Right->Scoped;
+  if (Left->Members.empty())
+    Left->Members = Right->Members;
+  else if (Left->Members.size() == Right->Members.size()) {
+    for (unsigned i = 0; i < Left->Members.size(); ++i)
+      mergeNamedType(&Left->Members[i], &Right->Members[i]);
+  }
+}
+
+template <>
+void InfoSet::mergeInfo(FunctionInfo *Left, FunctionInfo *Right) {
+  mergeSymbolInfoBase(Left, Right);
+  if (Left->MangledName.empty()) Left->MangledName = Right->MangledName;
+  if (Left->Parent.empty()) Left->Parent = Right->Parent;
+  if (!Left->Access) Left->Access = Right->Access;
+  mergeNamedType(&Left->ReturnType, &Right->ReturnType);
+  if (Left->Params.empty())
+    Left->Params = Right->Params;
+  else if (Left->Params.size() == Right->Params.size()) {
+    for (unsigned i = 0; i < Left->Params.size(); ++i)
+      mergeNamedType(&Left->Params[i], &Right->Params[i]);
+  }
+}
+
+#define FIND_FUNC(X)                                                \
+  template <>                                                       \
+  X* InfoSet::find(StringRef Key)  {                          \
+    auto I = InfoIndex.find(Key);                                   \
+    return I == InfoIndex.end() ? nullptr : &X##s[I->second];       \
+  }
+
+FIND_FUNC(NamespaceInfo)
+FIND_FUNC(RecordInfo)
+FIND_FUNC(EnumInfo)
+FIND_FUNC(FunctionInfo)
+
+#undef FIND_FUNC
+
+template <>
+NonDefInfo* InfoSet::find(StringRef Key)  {
+  auto I = NonDefInfos.find(Key);
+  return I == NonDefInfos.end() ? nullptr : &I->second;
+}
+
+#define INSERT_FUNC(X)                                              \
+  template <>                                                       \
+  void InfoSet::insert(StringRef Key, X &I) {                       \
+    auto R = InfoIndex.try_emplace(Key, X##s.size());               \
+    if (auto *NonDef = find<NonDefInfo>(Key)) {               \
+      mergeSymbolInfoBase(&I, NonDef);                              \
+      removeNonDef(Key);                                            \
+    }                                                               \
+    if (R.second)                                                   \
+      X##s.push_back(std::move(I));                                 \
+    else {                                                          \
+      X *E = &X##s[R.first->second];                                \
+      mergeInfo(E, &I);                                             \
+    }                                                               \
+  }
+
+INSERT_FUNC(RecordInfo)
+INSERT_FUNC(EnumInfo)
+INSERT_FUNC(FunctionInfo)
+
+#undef INSERT_FUNC
+
+template <> 
+void InfoSet::insert(StringRef Key, NamespaceInfo &I) {
+  auto R = InfoIndex.try_emplace(Key, NamespaceInfos.size());                                                      \
+  if (R.second)
+    NamespaceInfos.push_back(std::move(I));
+  else {
+    NamespaceInfo *E = &NamespaceInfos[R.first->second];
+    mergeInfo(E, &I);
+  }
+}
+
+template <>
+void InfoSet::insert(StringRef Key, NonDefInfo &I) {
+  switch (I.Type) {
+    case NonDefInfo::NAMESPACE:
+      if (auto *E = find<NamespaceInfo>(Key)) {
+        mergeInfoBase(E, &I);
+        return;
+      }
+    case NonDefInfo::RECORD:
+      if (auto *E = find<RecordInfo>(Key)) {
+        mergeSymbolInfoBase(E, &I);
+        return;
+      }
+    case NonDefInfo::ENUM: 
+      if (auto *E = find<EnumInfo>(Key)) {
+        mergeSymbolInfoBase(E, &I);
+        return;
+      }
+    case NonDefInfo::FUNCTION:
+      if (auto *E = find<FunctionInfo>(Key)) {
+        mergeSymbolInfoBase(E, &I);
+        return;
+      }
+  }
+  if (auto *E = find<NonDefInfo>(Key)) {
+    mergeSymbolInfoBase(E, &I);
+    return;
+  }
+  NonDefInfos[Key] = std::move(I);
+}
+
+void InfoSet::removeNonDef(StringRef Key) {
+  NonDefInfos.erase(Key);
+}
+
+}  // namespace doc
+}  // namespace clang
Index: clang-doc/ClangDocReducer.h
===================================================================
--- /dev/null
+++ clang-doc/ClangDocReducer.h
@@ -0,0 +1,26 @@
+///===-- ClangDocReducer.h - ClangDocReducer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REDUCER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REDUCER_H
+
+#include "ClangDocRepresentation.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+
+namespace clang {
+namespace doc {
+
+// Combine occurrences of the same info across translation units.
+std::unique_ptr<InfoSet> mergeInfos(tooling::ToolResults *Results);
+
+}  // namespace doc
+}  // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REDUCER_H
Index: clang-doc/ClangDocReducer.cpp
===================================================================
--- /dev/null
+++ clang-doc/ClangDocReducer.cpp
@@ -0,0 +1,32 @@
+///===-- ClangDocReducer.h - ClangDocReducer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangDocBinary.h"
+#include "ClangDocReducer.h"
+#include "ClangDocRepresentation.h"
+
+namespace clang {
+namespace doc {
+
+std::unique_ptr<InfoSet> mergeInfos(tooling::ToolResults *Results) {
+  std::unique_ptr<InfoSet> UniqueInfos = llvm::make_unique<InfoSet>();
+  doc::ClangDocBinaryReader Reader;
+  bool Err = false;
+  Results->forEachResult([&](StringRef Key, StringRef Value) {
+    if (!Reader.readBitstreamToInfoSet(Value, UniqueInfos)) {
+      Err = true;
+      return;
+    }
+  });
+  if (Err) return nullptr;
+  return std::move(UniqueInfos);
+}
+
+}  // namespace doc
+}  // namespace clang
Index: clang-doc/ClangDocMapper.h
===================================================================
--- clang-doc/ClangDocMapper.h
+++ clang-doc/ClangDocMapper.h
@@ -76,7 +76,7 @@
                             StringRef Name, const FullComment *C,
                             int LineNumber, StringRef File);
   template <class C>
-  StringRef serializeNonDefInfo(const C *D, StringRef Name,
+  StringRef serializeNonDefInfo(const C *D, StringRef Name, NonDefInfo::InfoType Type,
                                 const FullComment *FC, int LineNumber,
                                 StringRef File);
   void parseFields(RecordInfo &I, const RecordDecl *D) const;
Index: clang-doc/ClangDocMapper.cpp
===================================================================
--- clang-doc/ClangDocMapper.cpp
+++ clang-doc/ClangDocMapper.cpp
@@ -28,16 +28,19 @@
   I.FullyQualifiedName = Name;
   I.SimpleName = D->getNameAsString();
   I.Namespace = getParentNamespace(D);
-  if (FC) parseFullComment(FC, I.Description);
+  if (FC) { 
+    I.Description.emplace_back(CommentInfo{});
+    parseFullComment(FC, I.Description.back());
+  }
   return serialize(I);
 }
 
 template <>
 StringRef ClangDocMapper::emitInfo(const RecordDecl *D, const FullComment *FC,
                                    StringRef Name, int LineNumber,
                                    StringRef File) {
   if (!D->isThisDeclarationADefinition())
-    return serializeNonDefInfo(D, Name, FC, LineNumber, File);
+    return serializeNonDefInfo(D, Name, NonDefInfo::RECORD, FC, LineNumber, File);
   RecordInfo I;
   populateSymbolInfo(I, Name, D->getNameAsString(), getParentNamespace(D), FC,
                      LineNumber, File);
@@ -52,7 +55,7 @@
                                    StringRef Name, int LineNumber,
                                    StringRef File) {
   if (!D->isThisDeclarationADefinition())
-    return serializeNonDefInfo(D, Name, FC, LineNumber, File);
+    return serializeNonDefInfo(D, Name, NonDefInfo::FUNCTION, FC, LineNumber, File);
   FunctionInfo I;
   populateFunctionInfo(I, D, Name, FC, LineNumber, File);
   I.Access = clang::AccessSpecifier::AS_none;
@@ -64,7 +67,7 @@
                                    const FullComment *FC, StringRef Name,
                                    int LineNumber, StringRef File) {
   if (!D->isThisDeclarationADefinition())
-    return serializeNonDefInfo(D, Name, FC, LineNumber, File);
+    return serializeNonDefInfo(D, Name, NonDefInfo::FUNCTION, FC, LineNumber, File);
   FunctionInfo I;
   populateFunctionInfo(I, D, Name, FC, LineNumber, File);
   I.Parent = D->getParent()->getQualifiedNameAsString();
@@ -77,7 +80,7 @@
                                    StringRef Name, int LineNumber,
                                    StringRef File) {
   if (!D->isThisDeclarationADefinition())
-    return serializeNonDefInfo(D, Name, FC, LineNumber, File);
+    return serializeNonDefInfo(D, Name, NonDefInfo::ENUM, FC, LineNumber, File);
   EnumInfo I;
   populateSymbolInfo(I, Name, D->getNameAsString(), getParentNamespace(D), FC,
                      LineNumber, File);
@@ -90,7 +93,7 @@
 SmallString<2048> ClangDocMapper::serialize(T &I) const {
   SmallString<2048> Buffer;
   llvm::BitstreamWriter Stream(Buffer);
-  Writer.serializeInfo(I, Stream);
+  Writer.writeBitstream(I, Stream, /*writeBlockInfo=*/true);
   return Buffer;
 }
 
@@ -107,9 +110,11 @@
   I.FullyQualifiedName = Name;
   I.SimpleName = SimpleName;
   I.Namespace = Namespace;
-  I.LineNumber = LineNumber;
-  I.Filename = File;
-  if (C) parseFullComment(C, I.Description);
+  I.Loc.emplace_back(Location{LineNumber, File});
+  if (C) {
+    I.Description.emplace_back(CommentInfo());
+    parseFullComment(C, I.Description.back());
+  }
 }
 
 void ClangDocMapper::populateFunctionInfo(FunctionInfo &I,
@@ -126,16 +131,19 @@
 }
 
 template <class C>
-StringRef ClangDocMapper::serializeNonDefInfo(const C *D, StringRef Name,
+StringRef ClangDocMapper::serializeNonDefInfo(const C *D, StringRef Name, NonDefInfo::InfoType Type,
                                               const FullComment *FC,
                                               int LineNumber, StringRef File) {
   NonDefInfo I;
+  I.Type = Type;
   I.FullyQualifiedName = Name;
   I.SimpleName = D->getNameAsString();
   I.Namespace = getParentNamespace(D);
-  I.LineNumber = LineNumber;
-  I.Filename = File;
-  if (FC) parseFullComment(FC, I.Description);
+  I.Loc.emplace_back(Location{LineNumber, File});
+  if (FC) {
+    I.Description.emplace_back(CommentInfo());
+    parseFullComment(FC, I.Description.back());
+  }
   return serialize(I);
 }
 
Index: clang-doc/ClangDocBinary.h
===================================================================
--- clang-doc/ClangDocBinary.h
+++ clang-doc/ClangDocBinary.h
@@ -40,20 +40,19 @@
   using RecordData = SmallVector<uint64_t, 128>;
 
   template <typename T>
-  void serializeInfo(const T &I, BitstreamWriter &Stream);
+  void writeBitstream(const T &I, BitstreamWriter &Stream,
+                      bool writeBlockInfo = false);
+  void writeAllBitstream(InfoSet &I, BitstreamWriter &Stream);
 
  private:
-  template <typename T>
-  void writeBitstream(const T &I, BitstreamWriter &Stream);
   void emitBlockInfoBlock(BitstreamWriter &Stream);
   void emitHeader(BitstreamWriter &Stream);
   void emitStringRecord(StringRef Str, unsigned RecordId,
                         BitstreamWriter &Stream);
   void emitLocationRecord(int LineNumber, StringRef File, unsigned RecordId,
                           BitstreamWriter &Stream);
   void emitIntRecord(int Value, unsigned RecordId, BitstreamWriter &Stream);
-  void emitNamedTypeBlock(const NamedType &N, StringRef ID,
-                          BitstreamWriter &Stream);
+  void emitNamedTypeBlock(const NamedType &N, int ID, BitstreamWriter &Stream);
   void emitCommentBlock(const CommentInfo *I, BitstreamWriter &Stream);
 
   void emitRecordID(unsigned ID, const char *Name, BitstreamWriter &Stream);
@@ -67,29 +66,34 @@
   AbbreviationMap Abbrevs;
 };
 
-template <typename T>
-void ClangDocBinaryWriter::serializeInfo(const T &I, BitstreamWriter &Stream) {
-  emitBlockInfoBlock(Stream);
-  writeBitstream(I, Stream);
-}
-
+// Class to read bitstream into an InfoSet collection
 class ClangDocBinaryReader {
  public:
-  ClangDocBinaryReader(raw_ostream &OS) : OS(OS) {}
-
+  ClangDocBinaryReader() {}
   using RecordData = SmallVector<uint64_t, 128>;
 
-  bool readBitstream(SmallString<2048> Bits);
+  // Does not merge infos
+  std::unique_ptr<InfoSet> readBitstream(SmallString<2048> Bits);
+  bool readBitstreamToInfoSet(SmallString<2048> Bits, std::unique_ptr<InfoSet> &IS);
 
  private:
   enum class Cursor { BadBlock = 1, Record, BlockEnd, BlockBegin };
-  bool readBlock(llvm::BitstreamCursor &Stream, unsigned ID);
+  bool validateStream(llvm::BitstreamCursor &Stream);
+  bool readBlockInfoBlock(llvm::BitstreamCursor &Stream);
+  template <typename T>
+  CommentInfo &getCommentInfo(T &I);
+  template <typename T>
+  bool addNamedType(T &I, NamedType &NT);
+  template <typename T>
+  bool readRecord(llvm::BitstreamCursor &Stream, unsigned ID, T &I);
+  template <typename T>
+  bool readBlock(llvm::BitstreamCursor &Stream, unsigned ID, T &I);
   Cursor skipUntilRecordOrBlock(llvm::BitstreamCursor &Stream,
                                 unsigned &BlockOrRecordID);
 
+  SmallVector<uint64_t, 1024> Record;
   Optional<llvm::BitstreamBlockInfo> BlockInfo;
   std::map<unsigned, StringRef> RecordNames;
-  raw_ostream &OS;
 };
 
 }  // namespace doc
Index: clang-doc/ClangDocBinary.cpp
===================================================================
--- clang-doc/ClangDocBinary.cpp
+++ clang-doc/ClangDocBinary.cpp
@@ -45,27 +45,21 @@
   NAMED_TYPE_TYPE,
   NAMED_TYPE_NAME,
   NAMED_TYPE_ACCESS,
-  INFODATATYPES(NAMESPACE) 
-  INFODATATYPES(NONDEF) 
-  NONDEF_LOCATION,
-  INFODATATYPES(ENUM) 
-  ENUM_LOCATION,
+  INFODATATYPES(NAMESPACE) INFODATATYPES(NONDEF) NONDEF_LOCATION,
+  NONDEF_TYPE,
+  INFODATATYPES(ENUM) ENUM_LOCATION,
   ENUM_SCOPED,
   ENUM_NAMED_TYPE,
   ENUM_MEMBER,
-  INFODATATYPES(RECORD) 
-  RECORD_LOCATION,
+  INFODATATYPES(RECORD) RECORD_LOCATION,
   RECORD_TAG_TYPE,
   RECORD_MEMBER,
   RECORD_PARENT,
   RECORD_VPARENT,
-  INFODATATYPES(FUNCTION) 
-  FUNCTION_LOCATION,
+  INFODATATYPES(FUNCTION) FUNCTION_LOCATION,
   FUNCTION_MANGLED_NAME,
   FUNCTION_PARENT,
   FUNCTION_ACCESS,
-  DT_FIRST = COMMENT_KIND,
-  DT_LAST = FUNCTION_ACCESS
 };
 
 #undef INFODATATYPES
@@ -176,10 +170,10 @@
 
 // Common Blocks
 
-void ClangDocBinaryWriter::emitNamedTypeBlock(const NamedType &N, StringRef ID,
+void ClangDocBinaryWriter::emitNamedTypeBlock(const NamedType &N, int ID,
                                               BitstreamWriter &Stream) {
   Stream.EnterSubblock(NAMED_TYPE_BLOCK_ID, 5);
-  emitStringRecord(ID, NAMED_TYPE_ID, Stream);
+  emitIntRecord(ID, NAMED_TYPE_ID, Stream);
   emitStringRecord(N.Type, NAMED_TYPE_TYPE, Stream);
   emitStringRecord(N.Name, NAMED_TYPE_NAME, Stream);
   emitIntRecord(N.Access, NAMED_TYPE_ACCESS, Stream);
@@ -246,7 +240,7 @@
   emitRecordID(NAMED_TYPE_TYPE, "Type", Stream);
   emitRecordID(NAMED_TYPE_NAME, "Name", Stream);
   emitRecordID(NAMED_TYPE_ACCESS, "Access", Stream);
-  emitStringAbbrev(NAMED_TYPE_ID, NAMED_TYPE_BLOCK_ID, Stream);
+  emitIntAbbrev(NAMED_TYPE_ID, NAMED_TYPE_BLOCK_ID, Stream);
   emitStringAbbrev(NAMED_TYPE_TYPE, NAMED_TYPE_BLOCK_ID, Stream);
   emitStringAbbrev(NAMED_TYPE_NAME, NAMED_TYPE_BLOCK_ID, Stream);
   emitIntAbbrev(NAMED_TYPE_ACCESS, NAMED_TYPE_BLOCK_ID, Stream);
@@ -270,7 +264,9 @@
   emitBlockID(NONDEF_BLOCK_ID, "NonDefBlock", Stream);
   INFORECORD(NONDEF)
   emitRecordID(NONDEF_LOCATION, "Location", Stream);
+  emitRecordID(NONDEF_TYPE, "Type", Stream);
   INFOABBREV(NONDEF)
+  emitIntAbbrev(NONDEF_TYPE, NONDEF_BLOCK_ID, Stream);
   emitLocationAbbrev(NONDEF_LOCATION, NONDEF_BLOCK_ID, Stream);
 
   // Enum Block
@@ -320,129 +316,440 @@
   emitStringRecord(I.FullyQualifiedName, X##_FULLY_QUALIFIED_NAME, Stream); \
   emitStringRecord(I.SimpleName, X##_NAME, Stream);                         \
   emitStringRecord(I.Namespace, X##_NAMESPACE, Stream);                     \
-  emitCommentBlock(&I.Description, Stream);
+  for (const auto &CI : I.Description)                             \
+    emitCommentBlock(&CI, Stream);
 
 template <>
 void ClangDocBinaryWriter::writeBitstream(const NamespaceInfo &I,
-                                          BitstreamWriter &Stream) {
+                                          BitstreamWriter &Stream,
+                                          bool writeBlockInfo) {
+  if (writeBlockInfo) emitBlockInfoBlock(Stream);
   Stream.EnterSubblock(NAMESPACE_BLOCK_ID, 5);
   EMITINFO(NAMESPACE)
   Stream.ExitBlock();
 }
 
 template <>
 void ClangDocBinaryWriter::writeBitstream(const NonDefInfo &I,
-                                          BitstreamWriter &Stream) {
+                                          BitstreamWriter &Stream,
+                                          bool writeBlockInfo) {
+  if (writeBlockInfo) emitBlockInfoBlock(Stream);
   Stream.EnterSubblock(NONDEF_BLOCK_ID, 5);
   EMITINFO(NONDEF)
-  emitLocationRecord(I.LineNumber, I.Filename, NONDEF_LOCATION, Stream);
+  emitIntRecord(I.Type, NONDEF_TYPE, Stream);
+  for (const auto &L : I.Loc)
+    emitLocationRecord(L.LineNumber, L.Filename, NONDEF_LOCATION, Stream);
   Stream.ExitBlock();
 }
 
 template <>
 void ClangDocBinaryWriter::writeBitstream(const EnumInfo &I,
-                                          BitstreamWriter &Stream) {
+                                          BitstreamWriter &Stream,
+                                          bool writeBlockInfo) {
+  if (writeBlockInfo) emitBlockInfoBlock(Stream);
   Stream.EnterSubblock(ENUM_BLOCK_ID, 5);
   EMITINFO(ENUM)
-  emitLocationRecord(I.LineNumber, I.Filename, ENUM_LOCATION, Stream);
+  for (const auto &L : I.Loc)
+    emitLocationRecord(L.LineNumber, L.Filename, ENUM_LOCATION, Stream);
   emitIntRecord(I.Scoped, ENUM_SCOPED, Stream);
-  for (const auto &N : I.Members) emitNamedTypeBlock(N, "Member", Stream);
+  for (const auto &N : I.Members)
+    emitNamedTypeBlock(N, NamedType::MEMBER, Stream);
   Stream.ExitBlock();
 }
 
 template <>
 void ClangDocBinaryWriter::writeBitstream(const RecordInfo &I,
-                                          BitstreamWriter &Stream) {
+                                          BitstreamWriter &Stream,
+                                          bool writeBlockInfo) {
+  if (writeBlockInfo) emitBlockInfoBlock(Stream);
   Stream.EnterSubblock(RECORD_BLOCK_ID, 5);
   EMITINFO(RECORD)
-  emitLocationRecord(I.LineNumber, I.Filename, RECORD_LOCATION, Stream);
+  for (const auto &L : I.Loc)
+    emitLocationRecord(L.LineNumber, L.Filename, RECORD_LOCATION, Stream);
   emitIntRecord(I.TagType, RECORD_TAG_TYPE, Stream);
-  for (const auto &N : I.Members) emitNamedTypeBlock(N, "Member", Stream);
+  for (const auto &N : I.Members)
+    emitNamedTypeBlock(N, NamedType::MEMBER, Stream);
   for (const auto &P : I.Parents) emitStringRecord(P, RECORD_PARENT, Stream);
   for (const auto &P : I.VirtualParents)
     emitStringRecord(P, RECORD_VPARENT, Stream);
   Stream.ExitBlock();
 }
 
 template <>
 void ClangDocBinaryWriter::writeBitstream(const FunctionInfo &I,
-                                          BitstreamWriter &Stream) {
+                                          BitstreamWriter &Stream,
+                                          bool writeBlockInfo) {
+  if (writeBlockInfo) emitBlockInfoBlock(Stream);
   Stream.EnterSubblock(FUNCTION_BLOCK_ID, 5);
   EMITINFO(FUNCTION)
-  emitLocationRecord(I.LineNumber, I.Filename, FUNCTION_LOCATION, Stream);
+  for (const auto &L : I.Loc)
+    emitLocationRecord(L.LineNumber, L.Filename, FUNCTION_LOCATION, Stream);
   emitStringRecord(I.MangledName, FUNCTION_MANGLED_NAME, Stream);
   emitStringRecord(I.Parent, FUNCTION_PARENT, Stream);
-  emitNamedTypeBlock(I.ReturnType, "Return", Stream);
-  for (const auto &N : I.Params) emitNamedTypeBlock(N, "Param", Stream);
-  emitIntRecord(I.Access, FUNCTION_ACCESS, Stream);
+  emitNamedTypeBlock(I.ReturnType, NamedType::RETTYPE, Stream);
+  for (const auto &N : I.Params)
+    emitNamedTypeBlock(N, NamedType::PARAM, Stream);
   Stream.ExitBlock();
 }
 
 #undef EMITINFO
 
+void ClangDocBinaryWriter::writeAllBitstream(InfoSet &ISet,
+                                             BitstreamWriter &Stream) {
+  emitBlockInfoBlock(Stream);
+  for (const auto &I : ISet.getNamespaceInfos()) writeBitstream(I, Stream);
+  for (const auto &I : ISet.getFunctionInfos()) writeBitstream(I, Stream);
+  for (const auto &I : ISet.getRecordInfos()) writeBitstream(I, Stream);
+  for (const auto &I : ISet.getEnumInfos()) writeBitstream(I, Stream);
+  for (const auto &I : ISet.getNonDefInfos()) writeBitstream(I.second, Stream);
+}
+
 // Reader
 
-bool ClangDocBinaryReader::readBitstream(SmallString<2048> Bits) {
-  BitstreamCursor Stream(Bits);
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, NamespaceInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    // Locations
+    case NAMESPACE_FULLY_QUALIFIED_NAME:
+      I.FullyQualifiedName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NAMESPACE_NAME:
+      I.SimpleName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NAMESPACE_NAMESPACE:
+      I.Namespace = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
 
-  if (Stream.AtEndOfStream()) return false;
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, RecordInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case RECORD_FULLY_QUALIFIED_NAME:
+      I.FullyQualifiedName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case RECORD_NAME:
+      I.SimpleName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case RECORD_NAMESPACE:
+      I.Namespace = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case RECORD_PARENT:
+      I.Parents.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case RECORD_VPARENT:
+      I.VirtualParents.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case RECORD_LOCATION:
+      I.Loc.emplace_back(Location{(int)Record[0], Blob});
+      // outs() << RecordNames[RecID] << ": " << Blob << ":" << Record[0] << "\n";
+      return true;
+    case RECORD_TAG_TYPE:
+      I.TagType = (TagTypeKind)Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
 
-  // Sniff for the signature.
-  if (Stream.Read(8) != 'D' || Stream.Read(8) != 'O' || Stream.Read(8) != 'C' ||
-      Stream.Read(8) != 'S')
-    return false;
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, EnumInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case ENUM_FULLY_QUALIFIED_NAME:
+      I.FullyQualifiedName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case ENUM_NAME:
+      I.SimpleName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case ENUM_NAMESPACE:
+      I.Namespace = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case ENUM_LOCATION:
+      I.Loc.emplace_back(Location{(int)Record[0], Blob});
+      // outs() << RecordNames[RecID] << ": " << Blob << ":" << Record[0] << "\n";
+      return true;
+    case ENUM_SCOPED:
+      I.Scoped = Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
 
-  // Read the top level blocks.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
-    if (Code != bitc::ENTER_SUBBLOCK) return false;
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, FunctionInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case FUNCTION_FULLY_QUALIFIED_NAME:
+      I.FullyQualifiedName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case FUNCTION_NAME:
+      I.SimpleName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case FUNCTION_NAMESPACE:
+      I.Namespace = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case FUNCTION_PARENT:
+      I.Parent = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case FUNCTION_MANGLED_NAME:
+      I.MangledName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case FUNCTION_LOCATION:
+      I.Loc.emplace_back(Location{(int)Record[0], Blob});
+      // outs() << RecordNames[RecID] << ": " << Blob << ":" << Record[0] << "\n";
+      return true;
+    case FUNCTION_ACCESS:
+      I.Access = (AccessSpecifier)Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
 
-    switch (Stream.ReadSubBlockID()) {
-      case llvm::bitc::BLOCKINFO_BLOCK_ID: {
-        BlockInfo = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
-        if (!BlockInfo) return false;
-        Stream.setBlockInfo(&*BlockInfo);
-        // Extract the record names associated with each field
-        for (unsigned i = BI_FIRST; i <= BI_LAST; ++i) {
-          for (const auto &N : (*BlockInfo).getBlockInfo(i)->RecordNames)
-            RecordNames[N.first] = N.second;
-        }
-        continue;
-      }
-      case NAMESPACE_BLOCK_ID:
-        if (readBlock(Stream, NAMESPACE_BLOCK_ID)) return true;
-        continue;
-      case NONDEF_BLOCK_ID:
-        if (readBlock(Stream, NONDEF_BLOCK_ID)) return true;
-        continue;
-      case NAMED_TYPE_BLOCK_ID:
-        if (readBlock(Stream, NAMED_TYPE_BLOCK_ID)) return true;
-        continue;
-      case COMMENT_BLOCK_ID:
-        if (readBlock(Stream, COMMENT_BLOCK_ID)) return true;
-        continue;
-      case RECORD_BLOCK_ID:
-        if (readBlock(Stream, RECORD_BLOCK_ID)) return true;
-        continue;
-      case ENUM_BLOCK_ID:
-        if (readBlock(Stream, ENUM_BLOCK_ID)) return true;
-        continue;
-      case FUNCTION_BLOCK_ID:
-        if (readBlock(Stream, FUNCTION_BLOCK_ID)) return true;
-        continue;
-      default:
-        if (!Stream.SkipBlock()) return false;
-        continue;
-    }
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, NonDefInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case NONDEF_FULLY_QUALIFIED_NAME:
+      I.FullyQualifiedName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NONDEF_NAME:
+      I.SimpleName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NONDEF_NAMESPACE:
+      I.Namespace = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NONDEF_TYPE:
+      I.Type = (NonDefInfo::InfoType)Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+    case NONDEF_LOCATION:
+      I.Loc.emplace_back(Location{(int)Record[0], Blob});
+      // outs() << RecordNames[RecID] << ": " << Blob << ":" << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
   }
-  return true;
 }
 
-bool ClangDocBinaryReader::readBlock(llvm::BitstreamCursor &Stream,
-                                     unsigned ID) {
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, NamedType &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case NAMED_TYPE_ID:
+      I.Field = (NamedType::FieldName) Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+    case NAMED_TYPE_TYPE:
+      I.Type = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NAMED_TYPE_NAME:
+      I.Name = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case NAMED_TYPE_ACCESS:
+      I.Access = (AccessSpecifier)Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
+
+template <>
+bool ClangDocBinaryReader::readRecord(llvm::BitstreamCursor &Stream,
+                                      unsigned ID, CommentInfo &I) {
+  // Read the record.
+  Record.clear();
+  StringRef Blob;
+  unsigned RecID = Stream.readRecord(ID, Record, &Blob);
+
+  switch ((DataTypes)RecID) {
+    case COMMENT_KIND:
+      I.Kind = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_TEXT:
+      I.Text = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_NAME:
+      I.Name = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_DIRECTION:
+      I.Direction = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_PARAMNAME:
+      I.ParamName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_CLOSENAME:
+      I.CloseName = Blob;
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_ATTRKEY:
+      I.AttrKeys.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_ATTRVAL:
+      I.AttrValues.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_ARG:
+      I.Args.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_POSITION:
+      I.Position.push_back(Blob);
+      // outs() << RecordNames[RecID] << ": " << Blob << "\n";
+      return true;
+    case COMMENT_SELFCLOSING:
+      I.SelfClosing = Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+    case COMMENT_EXPLICIT:
+      I.Explicit = Record[0];
+      // outs() << RecordNames[RecID] << ": " << Record[0] << "\n";
+      return true;
+
+    default:
+      errs() << "Invalid record field in block.\n";
+      return false;
+  }
+}
+
+template <typename T>
+CommentInfo &ClangDocBinaryReader::getCommentInfo(T &I) {
+  I.Description.emplace_back(CommentInfo{});
+  return I.Description.back();
+}
+
+template <>
+CommentInfo &ClangDocBinaryReader::getCommentInfo(CommentInfo &I) {
+  I.Children.emplace_back(std::make_shared<CommentInfo>());
+  return *I.Children.back();
+}
+
+template <typename T>
+bool ClangDocBinaryReader::addNamedType(T &I, NamedType &NT) {
+  errs() << "Should not have a named type subblock.\n";
+  return false;
+}
+
+template <>
+bool ClangDocBinaryReader::addNamedType(RecordInfo &I, NamedType &NT) {
+  if (NT.Field == NamedType::MEMBER) {
+    I.Members.emplace_back(NT);
+    return true;
+  }
+  errs() << "Unknown field.\n";
+  return false;
+}
+
+template <>
+bool ClangDocBinaryReader::addNamedType(EnumInfo &I, NamedType &NT) {
+  if (NT.Field == NamedType::MEMBER) {
+    I.Members.emplace_back(NT);
+    return true;
+  }
+  errs() << "Unknown field.\n";
+  return false;
+}
+
+template <>
+bool ClangDocBinaryReader::addNamedType(FunctionInfo &I, NamedType &NT) {
+  switch (NT.Field) {
+    case (NamedType::PARAM):
+      I.Params.emplace_back(NT);
+      return true;
+    case (NamedType::RETTYPE):
+      // TODO: get rid of copy here
+      I.ReturnType = NT;
+      return true;
+    default:
+      errs() << "Unknown field.\n";
+      return false;
+  }
+}
+
+template <typename T>
+bool ClangDocBinaryReader::readBlock(llvm::BitstreamCursor &Stream, unsigned ID,
+                                     T &I) {
   if (Stream.EnterSubBlock(ID)) return false;
 
-  SmallVector<uint64_t, 1024> Record;
   while (true) {
     unsigned BlockOrCode = 0;
     Cursor Res = skipUntilRecordOrBlock(Stream, BlockOrCode);
@@ -453,75 +760,32 @@
       case Cursor::BlockEnd:
         return true;
       case Cursor::BlockBegin:
-        if (readBlock(Stream, BlockOrCode)) continue;
+        switch (BlockOrCode) {
+          // Blocks can only have Comment or NamedType subblocks
+          case COMMENT_BLOCK_ID:
+            if (readBlock(Stream, COMMENT_BLOCK_ID, getCommentInfo(I)))
+              continue;
+            return false;
+          case NAMED_TYPE_BLOCK_ID: {
+            NamedType N;
+            if (readBlock(Stream, BlockOrCode, N)) {
+              if (addNamedType(I, N)) continue;
+            }
+            return false;
+          }
+          default:
+            errs() << "Invalid subblock type\n";
+            return false;
+        }
         if (!Stream.SkipBlock()) return false;
         continue;
       case Cursor::Record:
         break;
     }
-
-    // Read the record.
-    Record.clear();
-    StringRef Blob;
-    unsigned RecID = Stream.readRecord(BlockOrCode, Record, &Blob);
-    if (RecID < DT_FIRST || RecID > DT_LAST) continue;
-
-#define INFOCASES(X)             \
-  case X##_FULLY_QUALIFIED_NAME: \
-  case X##_NAME:                 \
-  case X##_NAMESPACE:
-
-    switch ((DataTypes)RecID) {
-      // Locations
-      case ENUM_LOCATION:
-      case RECORD_LOCATION:
-      case FUNCTION_LOCATION:
-      case NONDEF_LOCATION:
-        OS << RecordNames[RecID] << ": " << Blob << ":" << Record[0] << "\n";
-        continue;
-
-      // Strings
-      INFOCASES(NAMESPACE)
-      INFOCASES(NONDEF)
-      INFOCASES(ENUM)
-      INFOCASES(RECORD)
-      INFOCASES(FUNCTION)
-      case NAMED_TYPE_ID:
-      case NAMED_TYPE_TYPE:
-      case NAMED_TYPE_NAME:
-      case RECORD_PARENT:
-      case RECORD_VPARENT:
-      case FUNCTION_PARENT:
-      case FUNCTION_MANGLED_NAME:
-      case COMMENT_KIND:
-      case COMMENT_TEXT:
-      case COMMENT_NAME:
-      case COMMENT_DIRECTION:
-      case COMMENT_PARAMNAME:
-      case COMMENT_CLOSENAME:
-      case COMMENT_ATTRKEY:
-      case COMMENT_ATTRVAL:
-      case COMMENT_ARG:
-      case COMMENT_POSITION:
-        OS << RecordNames[RecID] << ": " << Blob << "\n";
-        continue;
-
-      // Ints
-      case ENUM_SCOPED:
-      case RECORD_TAG_TYPE:
-      case NAMED_TYPE_ACCESS:
-      case FUNCTION_ACCESS:
-      case COMMENT_SELFCLOSING:
-      case COMMENT_EXPLICIT:
-        OS << RecordNames[RecID] << ": " << Record[0] << "\n";
-      default:
-        continue;
-    }
+    if (!readRecord(Stream, BlockOrCode, I)) return false;
   }
 }
 
-#undef INFOCASES
-
 ClangDocBinaryReader::Cursor ClangDocBinaryReader::skipUntilRecordOrBlock(
     llvm::BitstreamCursor &Stream, unsigned &BlockOrRecordID) {
   BlockOrRecordID = 0;
@@ -550,5 +814,100 @@
   llvm_unreachable("Premature stream end.");
 }
 
+bool ClangDocBinaryReader::validateStream(llvm::BitstreamCursor &Stream) {
+  if (Stream.AtEndOfStream()) return false;
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'D' || Stream.Read(8) != 'O' || Stream.Read(8) != 'C' ||
+      Stream.Read(8) != 'S')
+    return false;
+  return true;
+}
+
+bool ClangDocBinaryReader::readBlockInfoBlock(llvm::BitstreamCursor &Stream) {
+  BlockInfo = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+  if (!BlockInfo) return false;
+  Stream.setBlockInfo(&*BlockInfo);
+  // Extract the record names associated with each field
+  for (unsigned i = BI_FIRST; i <= BI_LAST; ++i) {
+    for (const auto &N : (*BlockInfo).getBlockInfo(i)->RecordNames)
+      RecordNames[N.first] = N.second;
+  }
+  return true;
+}
+
+bool ClangDocBinaryReader::readBitstreamToInfoSet(SmallString<2048> Bits, std::unique_ptr<InfoSet> &IS) {
+  BitstreamCursor Stream(Bits);
+  if (!validateStream(Stream)) return false;
+
+  // Read the top level blocks.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code != bitc::ENTER_SUBBLOCK) return false;
+
+    switch (auto ID = Stream.ReadSubBlockID()) {
+      case llvm::bitc::BLOCKINFO_BLOCK_ID: 
+        if (readBlockInfoBlock(Stream)) continue;
+        return false;
+      case NAMESPACE_BLOCK_ID: {
+        NamespaceInfo N;
+        if (readBlock(Stream, ID, N)) {
+          IS->insert(N.FullyQualifiedName, N);
+          continue;
+        }
+        return false;
+      }
+      case NONDEF_BLOCK_ID: {
+        NonDefInfo N;
+        if (readBlock(Stream, ID, N)) {
+          IS->insert(N.FullyQualifiedName, N);
+          continue;
+        }
+        return false;
+      }
+      case RECORD_BLOCK_ID: {
+        RecordInfo N;
+        if (readBlock(Stream, ID, N)) {
+          IS->insert(N.FullyQualifiedName, N);
+          continue;
+        }
+        return false;
+      }
+      case ENUM_BLOCK_ID: {
+        EnumInfo N;
+        if (readBlock(Stream, ID, N)) {
+          IS->insert(N.FullyQualifiedName, N);
+          continue;
+        }
+        return false;
+      }
+      case FUNCTION_BLOCK_ID: {
+        FunctionInfo N;
+        if (readBlock(Stream, ID, N)) {
+          IS->insert(N.MangledName, N);
+          continue;
+        }
+        return false;
+      }
+      // NamedType and Comment blocks should not appear at the top level
+      case NAMED_TYPE_BLOCK_ID:
+      case COMMENT_BLOCK_ID:
+        errs() << "Invalid top level block.\n";
+        return false;
+      default:
+        if (!Stream.SkipBlock()) return false;
+        continue;
+    }
+  }
+  return true;
+}
+
+std::unique_ptr<InfoSet> ClangDocBinaryReader::readBitstream(SmallString<2048> Bits) {
+  std::unique_ptr<InfoSet> IS = llvm::make_unique<InfoSet>();
+  if (readBitstreamToInfoSet(Bits, IS))
+    return std::move(IS);
+  return nullptr;
+}
+
 }  // namespace doc
 }  // namespace clang
Index: clang-doc/CMakeLists.txt
===================================================================
--- clang-doc/CMakeLists.txt
+++ clang-doc/CMakeLists.txt
@@ -6,6 +6,8 @@
   ClangDoc.cpp
   ClangDocMapper.cpp
   ClangDocBinary.cpp
+  ClangDocReducer.cpp
+  ClangDocRepresentation.cpp
 
   LINK_LIBS
   clangAnalysis
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to