On 09/05/2018 12:41 PM, Sam McCall wrote:
Thanks. Unclear to me whether it's the enum class or the anonymous namespace that's triggering this (I believe) compiler bug, but r341459 may help...

Still doesn't work.

In file included from ../tools/clang/include/clang/Frontend/CommandLineSourceLoc.h:19:0, from ../tools/clang/include/clang/Frontend/FrontendOptions.h:13, from ../tools/clang/include/clang/Frontend/CompilerInvocation.h:19, from ../tools/clang/include/clang/Frontend/CompilerInstance.h:16, from ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:22: ../include/llvm/Support/CommandLine.h:606:29: error: expected primary-expression before '{' token
   llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                             ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16: note: in expansion of macro 'clEnumValN' clEnumValN(Format::YAML, "yaml", "human-readable YAML format"),
                ^
../include/llvm/Support/CommandLine.h:606:29: error: expected primary-expression before '{' token
   llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                             ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16: note: in expansion of macro 'clEnumValN' clEnumValN(Format::Binary, "binary", "binary RIFF format")),
                ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:69:27: error: 'Format' is not a class, namespace, or enumeration
            llvm::cl::init(Format::YAML));
                           ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp: In function 'int main(int, const char**)': ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:276:23: error: 'clang::clangd::Format' is not a class, namespace, or enumeration
   case clang::clangd::Format::YAML:
                       ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:279:23: error: 'clang::clangd::Format' is not a class, namespace, or enumeration
   case clang::clangd::Format::Binary: {
                       ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10: warning: enumeration value 'YAML' not handled in switch [-Wswitch]
   switch (clang::clangd::Format) {
          ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10: warning: enumeration value 'Binary' not handled in switch [-Wswitch]

Changing the name of the enum from Format to Formats (so the name isn't the same as the variable) makes it compile.

I.e:

@@ -58,17 +58,17 @@ static llvm::cl::opt<bool> MergeOnTheFly(
"usage and an almost instant reduce stage. Optimal for running as a " "standalone tool, but cannot be used with multi-process executors like "
         "MapReduce."),
     llvm::cl::init(true), llvm::cl::Hidden);

-enum Format { YAML, Binary };
-static llvm::cl::opt<Format>
+enum Formats { YAML, Binary };
+static llvm::cl::opt<Formats>
     Format("format", llvm::cl::desc("Format of the index to be written"),
            llvm::cl::values(
- clEnumValN(Format::YAML, "yaml", "human-readable YAML format"),
-               clEnumValN(Format::Binary, "binary", "binary RIFF format")),
-           llvm::cl::init(Format::YAML));
+ clEnumValN(Formats::YAML, "yaml", "human-readable YAML format"), + clEnumValN(Formats::Binary, "binary", "binary RIFF format")),
+           llvm::cl::init(Formats::YAML));

/// Responsible for aggregating symbols from each processed file and producing
 /// the final results. All methods in this class must be thread-safe,
 /// 'consumeSymbols' may be called from multiple threads.
 class SymbolsConsumer {
@@ -271,14 +271,14 @@ int main(int argc, const char **argv) {
   }
   // Reduce phase: combine symbols with the same IDs.
   auto UniqueSymbols = Consumer->mergeResults();
   // Output phase: emit result symbols.
   switch (clang::clangd::Format) {
-  case clang::clangd::Format::YAML:
+  case clang::clangd::Formats::YAML:
     SymbolsToYAML(UniqueSymbols, llvm::outs());
     break;
-  case clang::clangd::Format::Binary: {
+  case clang::clangd::Formats::Binary: {
     clang::clangd::IndexFileOut Out;
     Out.Symbols = &UniqueSymbols;
     llvm::outs() << Out;
   }
   }

seems to compile with gcc 5.4.0.

I've no idea if this is a gcc bug or if it's a bug in clang to not also complain about it.

/Mikael


On Wed, Sep 5, 2018 at 11:05 AM Mikael Holmén <mikael.hol...@ericsson.com <mailto:mikael.hol...@ericsson.com>> wrote:



    On 09/05/2018 09:56 AM, Sam McCall wrote:
     > Sorry! r341451 should fix this, will keep an eye on the buildbots.
     >

    Now it compiles with clang 3.6.0 but with gcc 5.4.0 it fails with

    /proj/bbi_twh/wh_bbi/x86_64-Linux2/bbigcc/1.5.4.0/crosscompiler/bin/g++
    <http://1.5.4.0/crosscompiler/bin/g++>
    -I/proj/bbi_twh/wh_bbi/x86_64-Linux2/bbilibxml2/1/include
    -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS
    -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
    -Itools/clang/tools/extra/clangd/global-symbol-builder
    -I../tools/clang/tools/extra/clangd/global-symbol-builder
    -I../tools/clang/include -Itools/clang/include -I/usr/include/libxml2
    -Iinclude -I../include
    -I../tools/clang/tools/extra/clangd/global-symbol-builder/..
    -I/repo/app/valgrind/3.11.0/include  -fPIC -fvisibility-inlines-hidden
    -Werror=date-time -std=c++11 -Wall -Wextra -Wno-unused-parameter
    -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic
    -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor
    -Wno-comment -fdiagnostics-color -ffunction-sections -fdata-sections
    -fno-common -Woverloaded-virtual -fno-strict-aliasing -O3    -UNDEBUG
    -fno-exceptions -fno-rtti -MMD -MT
    
tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o

    -MF
    
tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o.d

    -o
    
tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o

    -c
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
    In file included from
    ../tools/clang/include/clang/Frontend/CommandLineSourceLoc.h:19:0,
                       from
    ../tools/clang/include/clang/Frontend/FrontendOptions.h:13,
                       from
    ../tools/clang/include/clang/Frontend/CompilerInvocation.h:19,
                       from
    ../tools/clang/include/clang/Frontend/CompilerInstance.h:16,
                       from
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:22:
    ../include/llvm/Support/CommandLine.h:606:52: error: invalid cast from
    type 'llvm::cl::opt<clang::clangd::{anonymous}::Format>' to type 'int'
         llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                                                          ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16:

    note: in expansion of macro 'clEnumValN'
                      clEnumValN(Format::YAML, "yaml", "human-readable YAML
    format"),
                      ^
    ../include/llvm/Support/CommandLine.h:606:29: error: expected
    primary-expression before '{' token
         llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                                   ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16:

    note: in expansion of macro 'clEnumValN'
                      clEnumValN(Format::YAML, "yaml", "human-readable YAML
    format"),
                      ^
    ../include/llvm/Support/CommandLine.h:606:52: error: invalid cast from
    type 'llvm::cl::opt<clang::clangd::{anonymous}::Format>' to type 'int'
         llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                                                          ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16:

    note: in expansion of macro 'clEnumValN'
                      clEnumValN(Format::Binary, "binary", "binary RIFF
    format")),
                      ^
    ../include/llvm/Support/CommandLine.h:606:29: error: expected
    primary-expression before '{' token
         llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                                   ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16:

    note: in expansion of macro 'clEnumValN'
                      clEnumValN(Format::Binary, "binary", "binary RIFF
    format")),
                      ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:69:27:

    error: 'Format' is not a class, namespace, or enumeration
                  llvm::cl::init(Format::YAML));
                                 ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:

    In function 'int main(int, const char**)':
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:276:23:

    error: 'clang::clangd::Format' is not a class, namespace, or enumeration
         case clang::clangd::Format::YAML:
                             ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:279:23:

    error: 'clang::clangd::Format' is not a class, namespace, or enumeration
         case clang::clangd::Format::Binary: {
                             ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10:

    warning: enumeration value 'YAML' not handled in switch [-Wswitch]
         switch (clang::clangd::Format) {
                ^
    
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10:

    warning: enumeration value 'Binary' not handled in switch [-Wswitch]

    /Mikael

     > On Wed, Sep 5, 2018 at 8:46 AM Mikael Holmén
    <mikael.hol...@ericsson.com <mailto:mikael.hol...@ericsson.com>
     > <mailto:mikael.hol...@ericsson.com
    <mailto:mikael.hol...@ericsson.com>>> wrote:
     >
     >     Hi Sam,
     >
     >     This doesn't compile for me. Both clang 3.6.0 and gcc 5.4.0
    complain:
     >
     >     [1/6] Building CXX object
>  tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
     >     FAILED:
>  tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
     >
     >
     >     /usr/bin/clang++  -march=corei7  -DGTEST_HAS_RTTI=0 -D_DEBUG
     >     -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
     >     -D__STDC_LIMIT_MACROS -Itools/clang/tools/extra/clangd
     >     -I../tools/clang/tools/extra/clangd -I../tools/clang/include
     >     -Itools/clang/include -I/usr/include/libxml2 -Iinclude
    -I../include
     >     -I/proj/flexasic/app/valgrind/3.11.0/include  -fPIC
     >     -fvisibility-inlines-hidden -Werror -Werror=date-time
    -std=c++11 -Wall
     >     -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual
     >     -Wmissing-field-initializers -pedantic -Wno-long-long
     >     -Wcovered-switch-default -Wnon-virtual-dtor
    -Wdelete-non-virtual-dtor
     >     -Wstring-conversion -fdiagnostics-color -ffunction-sections
     >     -fdata-sections -fno-common -Woverloaded-virtual
    -Wno-nested-anon-types
     >     -O3    -UNDEBUG  -fno-exceptions -fno-rtti -MMD -MT
>  tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
     >
     >     -MF
>  tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o.d
     >
     >     -o
>  tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
     >
     >     -c ../tools/clang/tools/extra/clangd/index/Serialization.cpp
     >     ../tools/clang/tools/extra/clangd/index/Serialization.cpp:154:10:
     >     error:
     >     no viable conversion from 'clang::clangd::(anonymous
     >     namespace)::StringTableIn' to 'Expected<clang::clangd::(anonymous
     >     namespace)::StringTableIn>'
     >         return Table;
     >                ^~~~~
     >     ../include/llvm/Support/Error.h:434:41: note: candidate
    constructor
     >     (the
     >     implicit copy constructor) not viable: no known conversion from
     >     'clang::clangd::(anonymous namespace)::StringTableIn' to 'const
     >     llvm::Expected<clang::clangd::(anonymous
    namespace)::StringTableIn> &'
     >     for 1st argument
     >     template <class T> class LLVM_NODISCARD Expected {
     >                                               ^
     >     ../include/llvm/Support/Error.h:456:3: note: candidate
    constructor not
     >     viable: no known conversion from 'clang::clangd::(anonymous
     >     namespace)::StringTableIn' to 'llvm::Error' for 1st argument
     >         Expected(Error Err)
     >         ^
     >     ../include/llvm/Support/Error.h:470:3: note: candidate
    constructor not
     >     viable: no known conversion from 'clang::clangd::(anonymous
     >     namespace)::StringTableIn' to 'llvm::ErrorSuccess' for 1st
    argument
     >         Expected(ErrorSuccess) = delete;
     >         ^
     >     ../include/llvm/Support/Error.h:488:3: note: candidate
    constructor not
     >     viable: no known conversion from 'clang::clangd::(anonymous
     >     namespace)::StringTableIn' to
    'llvm::Expected<clang::clangd::(anonymous
     >     namespace)::StringTableIn> &&' for 1st argument
     >         Expected(Expected &&Other) {
    moveConstruct(std::move(Other)); }
     >         ^
     >     ../include/llvm/Support/Error.h:476:36: note: candidate template
     >     ignored: disabled by 'enable_if' [with OtherT =
     >     clang::clangd::(anonymous namespace)::StringTableIn &]
     >                  typename std::enable_if<std::is_convertible<OtherT,
     >     T>::value>::type
     >                                          ^
     >     ../include/llvm/Support/Error.h:493:3: note: candidate template
     >     ignored:
     >     could not match 'Expected<type-parameter-0-0>' against
     >     'clang::clangd::(anonymous namespace)::StringTableIn'
     >         Expected(Expected<OtherT> &&Other,
     >         ^
     >     In file included from
     >     ../tools/clang/tools/extra/clangd/index/Serialization.cpp:9:
     >     In file included from
     >     ../tools/clang/tools/extra/clangd/index/Serialization.h:23:
     >     In file included from
     >     ../tools/clang/tools/extra/clangd/index/Index.h:13:
     >     In file included from
     >     ../tools/clang/include/clang/Index/IndexSymbol.h:14:
     >     In file included from
    ../tools/clang/include/clang/Lex/MacroInfo.h:18:
     >     In file included from
    ../tools/clang/include/clang/Lex/Token.h:17:
     >     In file included from
     >     ../tools/clang/include/clang/Basic/SourceLocation.h:19:
     >     In file included from ../include/llvm/ADT/StringRef.h:13:
     >     In file included from ../include/llvm/ADT/STLExtras.h:20:
     >     ../include/llvm/ADT/Optional.h:41:28: error: call to
    implicitly-deleted
     >     copy constructor of 'clang::clangd::SymbolSlab'
     >             new (storage.buffer) T(*O.getPointer());
     >                                  ^ ~~~~~~~~~~~~~~~
     >     ../include/llvm/ADT/Optional.h:141:3: note: in instantiation
    of member
     >     function
>  'llvm::optional_detail::OptionalStorage<clang::clangd::SymbolSlab,
     >     false>::OptionalStorage' requested here
     >         Optional(const Optional &O) = default;
     >         ^
>  ../tools/clang/tools/extra/clangd/index/Serialization.cpp:325:10:
    note:
     >     in instantiation of function template specialization
>  'llvm::Expected<clang::clangd::IndexFileIn>::Expected<clang::clangd::IndexFileIn
     >
     >     &>' requested here
     >         return Result;
     >                ^
     >     ../tools/clang/tools/extra/clangd/index/Index.h:324:26: note:
    copy
     >     constructor of 'SymbolSlab' is implicitly deleted because
    field 'Arena'
     >     has a deleted copy constructor
     >         llvm::BumpPtrAllocator Arena; // Owns Symbol data that the
     >     Symbols do
     >     not.
     >                                ^
     >     ../include/llvm/Support/Allocator.h:157:3: note: copy
    constructor is
     >     implicitly deleted because
    'BumpPtrAllocatorImpl<llvm::MallocAllocator,
     >     4096, 4096>' has a user-declared move constructor
     >         BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old)
     >         ^
     >     2 errors generated.
     >
     >     Several buildbots fail the same way.
     >
     >     /Mikael
     >
     >     On 09/04/2018 06:16 PM, Sam McCall via cfe-commits wrote:
     >      > Author: sammccall
     >      > Date: Tue Sep  4 09:16:50 2018
     >      > New Revision: 341375
     >      >
     >      > URL: http://llvm.org/viewvc/llvm-project?rev=341375&view=rev
     >      > Log:
     >      > [clangd] Define a compact binary serialization fomat for
    symbol
     >     slab/index.
     >      >
     >      > Summary:
     >      > This is intended to replace the current YAML format for
    general use.
     >      > It's ~10x more compact than YAML, and ~40% more compact than
     >     gzipped YAML:
     >      >    llvmidx.riff = 20M, llvmidx.yaml = 272M,
    llvmidx.yaml.gz = 32M
     >      > It's also simpler/faster to read and write.
     >      >
     >      > The format is a RIFF container (chunks of (type, size,
    data)) with:
     >      >   - a compressed string table
     >      >   - simple binary encoding of symbols (with varints for
    compactness)
     >      > It can be extended to include occurrences, Dex posting
    lists, etc.
     >      >
     >      > There's no rich backwards-compatibility scheme, but a version
     >     number is included
     >      > so we can detect incompatible files and do ad-hoc back-compat.
     >      >
     >      > Alternatives considered:
     >      >   - compressed YAML or JSON: bulky and slow to load
     >      >   - llvm bitstream: confusing model and libraries are hard to
     >     use. My attempt
     >      >     produced slightly larger files, and the code was
    longer and
     >     slower.
     >      >   - protobuf or similar: would be really nice (esp for
     >     back-compat) but the
     >      >     dependency is a big hassle
     >      >   - ad-hoc binary format without a container: it seems clear
     >     we're going
     >      >     to add posting lists and occurrences here, and that
    they will
     >     benefit
     >      >     from sharing a string table. The container makes it
    easy to debug
     >      >     these pieces in isolation, and make them optional.
     >      >
     >      > Reviewers: ioeric
     >      >
     >      > Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang,
     >     arphaman, kadircet, cfe-commits
     >      >
     >      > Differential Revision: https://reviews.llvm.org/D51585
     >      >
     >      > Added:
     >      >      clang-tools-extra/trunk/clangd/RIFF.cpp
     >      >      clang-tools-extra/trunk/clangd/RIFF.h
     >      >      clang-tools-extra/trunk/clangd/index/Serialization.cpp
     >      >      clang-tools-extra/trunk/clangd/index/Serialization.h
     >      >      clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp
>      > clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
     >      > Modified:
     >      >      clang-tools-extra/trunk/clangd/CMakeLists.txt
     >      >
>  clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
     >      >      clang-tools-extra/trunk/clangd/index/Index.cpp
     >      >      clang-tools-extra/trunk/clangd/index/Index.h
     >      >      clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
     >      >      clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
     >      >      clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
     >      >
     >     clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
     >      >
     >      > Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=341375&r1=341374&r2=341375&view=diff
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
     >      > +++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue Sep  4
     >     09:16:50 2018
     >      > @@ -29,6 +29,7 @@ add_clang_library(clangDaemon
     >      >     Protocol.cpp
     >      >     ProtocolHandlers.cpp
     >      >     Quality.cpp
     >      > +  RIFF.cpp
     >      >     SourceCode.cpp
     >      >     Threading.cpp
     >      >     Trace.cpp
     >      > @@ -41,6 +42,7 @@ add_clang_library(clangDaemon
     >      >     index/Index.cpp
     >      >     index/MemIndex.cpp
     >      >     index/Merge.cpp
     >      > +  index/Serialization.cpp
     >      >     index/SymbolCollector.cpp
     >      >     index/SymbolYAML.cpp
     >      >
     >      >
     >      > Added: clang-tools-extra/trunk/clangd/RIFF.cpp
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.cpp?rev=341375&view=auto
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/RIFF.cpp (added)
     >      > +++ clang-tools-extra/trunk/clangd/RIFF.cpp Tue Sep  4
    09:16:50 2018
     >      > @@ -0,0 +1,88 @@
     >      > +//===--- RIFF.cpp - Binary container file format
     >     --------------------------===//
     >      > +//
     >      > +//                     The LLVM Compiler Infrastructure
     >      > +//
     >      > +// This file is distributed under the University of Illinois
     >     Open Source
     >      > +// License. See LICENSE.TXT for details.
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +
     >      > +#include "RIFF.h"
     >      > +#include "llvm/Support/Endian.h"
     >      > +
     >      > +using namespace llvm;
     >      > +namespace clang {
     >      > +namespace clangd {
     >      > +namespace riff {
     >      > +
     >      > +static Error makeError(const char *Msg) {
     >      > +  return createStringError(inconvertibleErrorCode(), Msg);
     >      > +}
     >      > +
     >      > +Expected<Chunk> readChunk(StringRef &Stream) {
     >      > +  if (Stream.size() < 8)
     >      > +    return makeError("incomplete chunk header");
     >      > +  Chunk C;
     >      > +  std::copy(Stream.begin(), Stream.begin() + 4,
    C.ID.begin());
     >      > +  Stream = Stream.drop_front(4);
     >      > +  uint32_t Len =
     >     support::endian::read32le(Stream.take_front(4).begin());
     >      > +  Stream = Stream.drop_front(4);
     >      > +  if (Stream.size() < Len)
     >      > +    return makeError("truncated chunk");
     >      > +  C.Data = Stream.take_front(Len);
     >      > +  Stream = Stream.drop_front(Len);
     >      > +  if (Len % 2 & !Stream.empty()) { // Skip padding byte.
     >      > +    if (Stream.front())
     >      > +      return makeError("nonzero padding byte");
     >      > +    Stream = Stream.drop_front();
     >      > +  }
     >      > +  return C;
     >      > +};
     >      > +
     >      > +raw_ostream &operator<<(raw_ostream &OS, const Chunk &C) {
     >      > +  OS.write(C.ID.begin(), C.ID.size());
     >      > +  char Size[4];
     >      > +  llvm::support::endian::write32le(Size, C.Data.size());
     >      > +  OS.write(Size, sizeof(Size));
     >      > +  OS << C.Data;
     >      > +  if (C.Data.size() % 2)
     >      > +    OS.write(0);
     >      > +  return OS;
     >      > +}
     >      > +
     >      > +llvm::Expected<File> readFile(llvm::StringRef Stream) {
     >      > +  auto RIFF = readChunk(Stream);
     >      > +  if (!RIFF)
     >      > +    return RIFF.takeError();
     >      > +  if (RIFF->ID != fourCC("RIFF"))
     >      > +    return makeError("not a RIFF container");
     >      > +  if (RIFF->Data.size() < 4)
     >      > +    return makeError("RIFF chunk too short");
     >      > +  File F;
     >      > +  std::copy(RIFF->Data.begin(), RIFF->Data.begin() + 4,
     >     F.Type.begin());
     >      > +  for (llvm::StringRef Body = RIFF->Data.drop_front(4);
     >     !Body.empty();)
     >      > +    if (auto Chunk = readChunk(Body)) {
     >      > +      F.Chunks.push_back(*Chunk);
     >      > +    } else
     >      > +      return Chunk.takeError();
     >      > +  return F;
     >      > +}
     >      > +
     >      > +raw_ostream &operator<<(raw_ostream &OS, const File &F) {
     >      > +  // To avoid copies, we serialize the outer RIFF chunk
    "by hand".
     >      > +  size_t DataLen = 4; // Predict length of RIFF chunk data.
     >      > +  for (const auto &C : F.Chunks)
     >      > +    DataLen += 4 + 4 + C.Data.size() + (C.Data.size() % 2);
     >      > +  OS << "RIFF";
     >      > +  char Size[4];
     >      > +  llvm::support::endian::write32le(Size, DataLen);
     >      > +  OS.write(Size, sizeof(Size));
     >      > +  OS.write(F.Type.begin(), F.Type.size());
     >      > +  for (const auto &C : F.Chunks)
     >      > +    OS << C;
     >      > +  return OS;
     >      > +}
     >      > +
     >      > +} // namespace riff
     >      > +} // namespace clangd
     >      > +} // namespace clang
     >      >
     >      > Added: clang-tools-extra/trunk/clangd/RIFF.h
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.h?rev=341375&view=auto
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/RIFF.h (added)
     >      > +++ clang-tools-extra/trunk/clangd/RIFF.h Tue Sep  4
    09:16:50 2018
     >      > @@ -0,0 +1,81 @@
     >      > +//===--- RIFF.h - Binary container file format
     >     -------------------*- C++-*-===//
     >      > +//
     >      > +//                     The LLVM Compiler Infrastructure
     >      > +//
     >      > +// This file is distributed under the University of Illinois
     >     Open Source
     >      > +// License. See LICENSE.TXT for details.
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +//
     >      > +// Tools for reading and writing data in RIFF containers.
     >      > +//
     >      > +// A chunk consists of:
     >      > +//   - ID      : char[4]
     >      > +//   - Length  : uint32
     >      > +//   - Data    : byte[Length]
     >      > +//   - Padding : byte[Length % 2]
     >      > +// The semantics of a chunk's Data are determined by its ID.
     >      > +// The format makes it easy to skip over uninteresting or
     >     unknown chunks.
     >      > +//
     >      > +// A RIFF file is a single chunk with ID "RIFF". Its Data is:
     >      > +//   - Type    : char[4]
     >      > +//   - Chunks  : chunk[]
     >      > +//
     >      > +// This means that a RIFF file consists of:
     >      > +//   - "RIFF"          : char[4]
     >      > +//   - File length - 8 : uint32
     >      > +//   - File type       : char[4]
     >      > +//   - Chunks          : chunk[]
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
     >      > +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
     >      > +#include "llvm/ADT/StringRef.h"
     >      > +#include "llvm/Support/Error.h"
     >      > +#include "llvm/Support/ScopedPrinter.h"
     >      > +#include <array>
     >      > +
     >      > +namespace clang {
     >      > +namespace clangd {
     >      > +namespace riff {
     >      > +
     >      > +// A FourCC identifies a chunk in a file, or the type of file
     >     itself.
     >      > +using FourCC = std::array<char, 4>;
     >      > +// Get a FourCC from a string literal, e.g. fourCC("RIFF").
     >      > +inline constexpr FourCC fourCC(const char (&Literal)[5]) {
     >      > +  return FourCC{{Literal[0], Literal[1], Literal[2],
    Literal[3]}};
     >      > +}
     >      > +// A chunk is a section in a RIFF container.
     >      > +struct Chunk {
     >      > +  FourCC ID;
     >      > +  llvm::StringRef Data;
     >      > +};
     >      > +inline bool operator==(const Chunk &L, const Chunk &R) {
     >      > +  return std::tie(L.ID <http://L.ID> <http://L.ID>,
    L.Data) == std::tie(R.ID <http://R.ID>
     >     <http://R.ID>, R.Data);
     >      > +}
     >      > +// A File is a RIFF container, which is a typed chunk
    sequence.
     >      > +struct File {
     >      > +  FourCC Type;
     >      > +  std::vector<Chunk> Chunks;
     >      > +};
     >      > +inline bool operator==(const File &L, const File &R) {
     >      > +  return std::tie(L.Type, L.Chunks) == std::tie(R.Type,
    R.Chunks);
     >      > +}
     >      > +
     >      > +// Reads a single chunk from the start of Stream.
     >      > +// Stream is updated to exclude the consumed chunk.
     >      > +llvm::Expected<Chunk> readChunk(llvm::StringRef &Stream);
     >      > +
     >      > +// Serialize a single chunk to OS.
     >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
    const Chunk &);
     >      > +
     >      > +// Parses a RIFF file consisting of a single RIFF chunk.
     >      > +llvm::Expected<File> readFile(llvm::StringRef Stream);
     >      > +
     >      > +// Serialize a RIFF file (i.e. a single RIFF chunk) to OS.
     >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
    const File &);
     >      > +
     >      > +} // namespace riff
     >      > +} // namespace clangd
     >      > +} // namespace clang
     >      > +#endif
     >      >
     >      > Modified:
>  clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp?rev=341375&r1=341374&r2=341375&view=diff
     >      >
>  ==============================================================================
     >      > ---
>  clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
     >     (original)
     >      > +++
>  clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
     >     Tue Sep  4 09:16:50 2018
     >      > @@ -7,15 +7,16 @@
     >      >   //
     >      >
>  //===----------------------------------------------------------------------===//
     >      >   //
     >      > -// GlobalSymbolBuilder is a tool to generate YAML-format
    symbols
     >     across the
     >      > -// whole project. This tools is for **experimental**
    only. Don't
     >     use it in
     >      > -// production code.
     >      > +// GlobalSymbolBuilder is a tool to extract symbols from
    a whole
     >     project.
     >      > +// This tool is **experimental** only. Don't use it in
     >     production code.
     >      >   //
     >      >
>  //===----------------------------------------------------------------------===//
     >      >
     >      > +#include "RIFF.h"
     >      >   #include "index/CanonicalIncludes.h"
     >      >   #include "index/Index.h"
     >      >   #include "index/Merge.h"
     >      > +#include "index/Serialization.h"
     >      >   #include "index/SymbolCollector.h"
     >      >   #include "index/SymbolYAML.h"
     >      >   #include "clang/Frontend/CompilerInstance.h"
     >      > @@ -59,6 +60,14 @@ static llvm::cl::opt<bool> MergeOnTheFly
     >      >           "MapReduce."),
     >      >       llvm::cl::init(true), llvm::cl::Hidden);
     >      >
     >      > +enum class Format { YAML, Binary };
     >      > +static llvm::cl::opt<Format>
     >      > +    Format("format", llvm::cl::desc("Format of the index
    to be
     >     written"),
     >      > +           llvm::cl::values(
     >      > +               clEnumValN(Format::YAML, "yaml",
    "human-readable
     >     YAML format"),
     >      > +               clEnumValN(Format::Binary, "binary",
    "binary RIFF
     >     format")),
     >      > +           llvm::cl::init(Format::YAML));
     >      > +
     >      >   /// Responsible for aggregating symbols from each processed
     >     file and producing
     >      >   /// the final results. All methods in this class must be
     >     thread-safe,
     >      >   /// 'consumeSymbols' may be called from multiple threads.
     >      > @@ -210,8 +219,8 @@ int main(int argc, const char **argv) {
     >      >     llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
     >      >
     >      >     const char *Overview = R"(
     >      > -  This is an **experimental** tool to generate YAML-format
     >     project-wide symbols
     >      > -  for clangd (global code completion). It would be
    changed and
     >     deprecated
     >      > +  This is an **experimental** tool to extract symbols from a
     >     whole project
     >      > +  for clangd (global code completion). It will be changed and
     >     deprecated
     >      >     eventually. Don't use it in production code!
     >      >
     >      >     Example usage for building index for the whole project
    using
     >     CMake compile
     >      > @@ -262,7 +271,16 @@ int main(int argc, const char **argv) {
     >      >     }
     >      >     // Reduce phase: combine symbols with the same IDs.
     >      >     auto UniqueSymbols = Consumer->mergeResults();
     >      > -  // Output phase: emit YAML for result symbols.
     >      > -  SymbolsToYAML(UniqueSymbols, llvm::outs());
     >      > +  // Output phase: emit result symbols.
     >      > +  switch (clang::clangd::Format) {
     >      > +  case clang::clangd::Format::YAML:
     >      > +    SymbolsToYAML(UniqueSymbols, llvm::outs());
     >      > +    break;
     >      > +  case clang::clangd::Format::Binary: {
     >      > +    clang::clangd::IndexFileOut Out;
     >      > +    Out.Symbols = &UniqueSymbols;
     >      > +    llvm::outs() << Out;
     >      > +  }
     >      > +  }
     >      >     return 0;
     >      >   }
     >      >
     >      > Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=341375&r1=341374&r2=341375&view=diff
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
     >      > +++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Sep  4
     >     09:16:50 2018
     >      > @@ -10,6 +10,7 @@
     >      >   #include "Index.h"
     >      >   #include "llvm/ADT/StringExtras.h"
     >      >   #include "llvm/ADT/StringRef.h"
     >      > +#include "llvm/Support/Error.h"
     >      >   #include "llvm/Support/SHA1.h"
     >      >   #include "llvm/Support/raw_ostream.h"
     >      >
     >      > @@ -28,21 +29,20 @@ SymbolID::SymbolID(StringRef USR)
     >      >       : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
     >      >
     >      >   raw_ostream &operator<<(raw_ostream &OS, const SymbolID
    &ID) {
     >      > -  OS << toHex(toStringRef(ID.HashValue));
     >      > -  return OS;
     >      > +  return OS << toHex(ID.raw());
     >      >   }
     >      >
     >      > -std::string SymbolID::str() const {
     >      > -  std::string ID;
     >      > -  llvm::raw_string_ostream OS(ID);
     >      > -  OS << *this;
     >      > -  return OS.str();
     >      > +SymbolID SymbolID::fromRaw(llvm::StringRef Raw) {
     >      > +  SymbolID ID;
     >      > +  assert(Raw.size() == RawSize);
     >      > +  memcpy(ID.HashValue.data(), Raw.data(), RawSize);
     >      > +  return ID;
     >      >   }
     >      >
     >      > +std::string SymbolID::str() const { return toHex(raw()); }
     >      > +
     >      >   void operator>>(StringRef Str, SymbolID &ID) {
     >      > -  std::string HexString = fromHex(Str);
     >      > -  assert(HexString.size() == ID.HashValue.size());
     >      > -  std::copy(HexString.begin(), HexString.end(),
     >     ID.HashValue.begin());
     >      > +  ID = SymbolID::fromRaw(fromHex(Str));
     >      >   }
     >      >
     >      >   raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) {
     >      > @@ -78,34 +78,18 @@ SymbolSlab::const_iterator SymbolSlab::f
     >      >   }
     >      >
     >      >   // Copy the underlying data of the symbol into the owned
    arena.
     >      > -static void own(Symbol &S, llvm::UniqueStringSaver &Strings,
     >      > -                BumpPtrAllocator &Arena) {
     >      > -  // Intern replaces V with a reference to the same
    string owned
     >     by the arena.
     >      > -  auto Intern = [&](StringRef &V) { V = Strings.save(V); };
     >      > -
     >      > -  // We need to copy every StringRef field onto the arena.
     >      > -  Intern(S.Name);
     >      > -  Intern(S.Scope);
     >      > -  Intern(S.CanonicalDeclaration.FileURI);
     >      > -  Intern(S.Definition.FileURI);
     >      > -
     >      > -  Intern(S.Signature);
     >      > -  Intern(S.CompletionSnippetSuffix);
     >      > -
     >      > -  Intern(S.Documentation);
     >      > -  Intern(S.ReturnType);
     >      > -  for (auto &I : S.IncludeHeaders)
     >      > -    Intern(I.IncludeHeader);
     >      > +static void own(Symbol &S, llvm::UniqueStringSaver
    &Strings) {
     >      > +  visitStrings(S, [&](StringRef &V) { V =
    Strings.save(V); });
     >      >   }
     >      >
     >      >   void SymbolSlab::Builder::insert(const Symbol &S) {
     >      >     auto R = SymbolIndex.try_emplace(S.ID <http://S.ID>
    <http://S.ID>,
     >     Symbols.size());
     >      >     if (R.second) {
     >      >       Symbols.push_back(S);
     >      > -    own(Symbols.back(), UniqueStrings, Arena);
     >      > +    own(Symbols.back(), UniqueStrings);
     >      >     } else {
     >      >       auto &Copy = Symbols[R.first->second] = S;
     >      > -    own(Copy, UniqueStrings, Arena);
     >      > +    own(Copy, UniqueStrings);
     >      >     }
     >      >   }
     >      >
     >      > @@ -118,7 +102,7 @@ SymbolSlab SymbolSlab::Builder::build()
     >      >     BumpPtrAllocator NewArena;
     >      >     llvm::UniqueStringSaver Strings(NewArena);
     >      >     for (auto &S : Symbols)
     >      > -    own(S, Strings, NewArena);
     >      > +    own(S, Strings);
     >      >     return SymbolSlab(std::move(NewArena),
    std::move(Symbols));
     >      >   }
     >      >
     >      >
     >      > Modified: clang-tools-extra/trunk/clangd/index/Index.h
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=341375&r1=341374&r2=341375&view=diff
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/index/Index.h (original)
     >      > +++ clang-tools-extra/trunk/clangd/index/Index.h Tue Sep  4
     >     09:16:50 2018
     >      > @@ -84,26 +84,28 @@ public:
     >      >       return HashValue < Sym.HashValue;
     >      >     }
     >      >
     >      > +  constexpr static size_t RawSize = 20;
     >      > +  llvm::StringRef raw() const {
     >      > +    return StringRef(reinterpret_cast<const char
     >     *>(HashValue.data()), RawSize);
     >      > +  }
     >      > +  static SymbolID fromRaw(llvm::StringRef);
     >      >     // Returns a 40-bytes hex encoded string.
     >      >     std::string str() const;
     >      >
     >      >   private:
     >      > -  static constexpr unsigned HashByteLength = 20;
     >      > -
     >      > -  friend llvm::hash_code hash_value(const SymbolID &ID) {
     >      > -    // We already have a good hash, just return the first
    bytes.
     >      > -    static_assert(sizeof(size_t) <= HashByteLength, "size_t
     >     longer than SHA1!");
     >      > -    size_t Result;
     >      > -    memcpy(&Result, ID.HashValue.data(), sizeof(size_t));
     >      > -    return llvm::hash_code(Result);
     >      > -  }
     >      > -  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
     >      > -                                       const SymbolID &ID);
     >      >     friend void operator>>(llvm::StringRef Str, SymbolID &ID);
     >      >
     >      > -  std::array<uint8_t, HashByteLength> HashValue;
     >      > +  std::array<uint8_t, RawSize> HashValue;
     >      >   };
     >      >
     >      > +inline llvm::hash_code hash_value(const SymbolID &ID) {
     >      > +  // We already have a good hash, just return the first
    bytes.
     >      > +  assert(sizeof(size_t) <= SymbolID::RawSize && "size_t
    longer
     >     than SHA1!");
     >      > +  size_t Result;
     >      > +  memcpy(&Result, ID.raw().data(), sizeof(size_t));
     >      > +  return llvm::hash_code(Result);
     >      > +}
     >      > +
     >      >   // Write SymbolID into the given stream. SymbolID is
    encoded as
     >     a 40-bytes
     >      >   // hex string.
     >      >   llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const
     >     SymbolID &ID);
     >      > @@ -246,6 +248,21 @@ struct Symbol {
     >      >   };
     >      >   llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const
     >     Symbol &S);
     >      >
     >      > +// Invokes Callback with each StringRef& contained in the
    Symbol.
     >      > +// Useful for deduplicating backing strings.
     >      > +template <typename Callback> void visitStrings(Symbol &S,
    const
     >     Callback &CB) {
     >      > +  CB(S.Name);
     >      > +  CB(S.Scope);
     >      > +  CB(S.CanonicalDeclaration.FileURI);
     >      > +  CB(S.Definition.FileURI);
     >      > +  CB(S.Signature);
     >      > +  CB(S.CompletionSnippetSuffix);
     >      > +  CB(S.Documentation);
     >      > +  CB(S.ReturnType);
     >      > +  for (auto &Include : S.IncludeHeaders)
     >      > +    CB(Include.IncludeHeader);
     >      > +}
     >      > +
     >      >   // Computes query-independent quality score for a Symbol.
     >      >   // This currently falls in the range [1, ln(#indexed
    documents)].
     >      >   // FIXME: this should probably be split into symbol ->
    signals
     >      >
     >      > Added: clang-tools-extra/trunk/clangd/index/Serialization.cpp
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=341375&view=auto
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/index/Serialization.cpp
    (added)
     >      > +++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue
     >     Sep  4 09:16:50 2018
     >      > @@ -0,0 +1,366 @@
     >      > +//===-- Serialization.cpp - Binary serialization of index
    data
     >     ------------===//
     >      > +//
     >      > +//                     The LLVM Compiler Infrastructure
     >      > +//
     >      > +// This file is distributed under the University of Illinois
     >     Open Source
     >      > +// License. See LICENSE.TXT for details.
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +#include "Serialization.h"
     >      > +#include "../RIFF.h"
     >      > +#include "llvm/Support/Compression.h"
     >      > +#include "llvm/Support/Endian.h"
     >      > +#include "llvm/Support/Error.h"
     >      > +
     >      > +using namespace llvm;
     >      > +namespace clang {
     >      > +namespace clangd {
     >      > +namespace {
     >      > +Error makeError(const Twine &Msg) {
     >      > +  return make_error<StringError>(Msg,
    inconvertibleErrorCode());
     >      > +}
     >      > +
     >      > +// IO PRIMITIVES
     >      > +// We use little-endian 32 bit ints, sometimes with
     >     variable-length encoding.
     >      > +
     >      > +StringRef consume(StringRef &Data, int N) {
     >      > +  StringRef Ret = Data.take_front(N);
     >      > +  Data = Data.drop_front(N);
     >      > +  return Ret;
     >      > +}
     >      > +
     >      > +uint8_t consume8(StringRef &Data) {
     >      > +  uint8_t Ret = Data.front();
     >      > +  Data = Data.drop_front();
     >      > +  return Ret;
     >      > +}
     >      > +
     >      > +uint32_t consume32(StringRef &Data) {
     >      > +  auto Ret = support::endian::read32le(Data.bytes_begin());
     >      > +  Data = Data.drop_front(4);
     >      > +  return Ret;
     >      > +}
     >      > +
     >      > +void write32(uint32_t I, raw_ostream &OS) {
     >      > +  char buf[4];
     >      > +  support::endian::write32le(buf, I);
     >      > +  OS.write(buf, sizeof(buf));
     >      > +}
     >      > +
     >      > +// Variable-length int encoding (varint) uses the bottom
    7 bits
     >     of each byte
     >      > +// to encode the number, and the top bit to indicate whether
     >     more bytes follow.
     >      > +// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
     >      > +// This represents 0x1a | 0x2f<<7 = 6042.
     >      > +// A 32-bit integer takes 1-5 bytes to encode; small
    numbers are
     >     more compact.
     >      > +void writeVar(uint32_t I, raw_ostream &OS) {
     >      > +  constexpr static uint8_t More = 1 << 7;
     >      > +  if (LLVM_LIKELY(I < 1 << 7)) {
     >      > +    OS.write(I);
     >      > +    return;
     >      > +  }
     >      > +  for (;;) {
     >      > +    OS.write(I | More);
     >      > +    I >>= 7;
     >      > +    if (I < 1 << 7) {
     >      > +      OS.write(I);
     >      > +      return;
     >      > +    }
     >      > +  }
     >      > +}
     >      > +
     >      > +uint32_t consumeVar(StringRef &Data) {
     >      > +  constexpr static uint8_t More = 1 << 7;
     >      > +  uint8_t B = consume8(Data);
     >      > +  if (LLVM_LIKELY(!(B & More)))
     >      > +    return B;
     >      > +  uint32_t Val = B & ~More;
     >      > +  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
     >      > +    B = consume8(Data);
     >      > +    Val |= (B & ~More) << Shift;
     >      > +  }
     >      > +  return Val;
     >      > +}
     >      > +
     >      > +// STRING TABLE ENCODING
     >      > +// Index data has many string fields, and many strings are
     >     identical.
     >      > +// We store each string once, and refer to them by index.
     >      > +//
     >      > +// The string table's format is:
     >      > +//   - UncompressedSize : uint32
     >      > +//   - CompressedData   : byte[CompressedSize]
     >      > +//
     >      > +// CompressedData is a zlib-compressed
    byte[UncompressedSize].
     >      > +// It contains a sequence of null-terminated strings, e.g.
     >     "foo\0bar\0".
     >      > +// These are sorted to improve compression.
     >      > +
     >      > +// Maps each string to a canonical representation.
     >      > +// Strings remain owned externally (e.g. by SymbolSlab).
     >      > +class StringTableOut {
     >      > +  DenseSet<StringRef> Unique;
     >      > +  std::vector<StringRef> Sorted;
     >      > +  // Since strings are interned, look up can be by pointer.
     >      > +  DenseMap<std::pair<const char *, size_t>, unsigned> Index;
     >      > +
     >      > +public:
     >      > +  // Add a string to the table. Overwrites S if an identical
     >     string exists.
     >      > +  void intern(StringRef &S) { S = *Unique.insert(S).first; };
     >      > +  // Finalize the table and write it to OS. No more
    strings may
     >     be added.
     >      > +  void finalize(raw_ostream &OS) {
     >      > +    Sorted = {Unique.begin(), Unique.end()};
     >      > +    std::sort(Sorted.begin(), Sorted.end());
     >      > +    for (unsigned I = 0; I < Sorted.size(); ++I)
     >      > +      Index.try_emplace({Sorted[I].data(),
    Sorted[I].size()}, I);
     >      > +
     >      > +    std::string RawTable;
     >      > +    for (StringRef S : Sorted) {
     >      > +      RawTable.append(S);
     >      > +      RawTable.push_back(0);
     >      > +    }
     >      > +    SmallString<1> Compressed;
     >      > +    cantFail(zlib::compress(RawTable, Compressed));
     >      > +    write32(RawTable.size(), OS);
     >      > +    OS << Compressed;
     >      > +  }
     >      > +  // Get the ID of an string, which must be interned.
    Table must
     >     be finalized.
     >      > +  unsigned index(StringRef S) const {
     >      > +    assert(!Sorted.empty() && "table not finalized");
     >      > +    assert(Index.count({S.data(), S.size()}) && "string not
     >     interned");
     >      > +    return Index.find({S.data(), S.size()})->second;
     >      > +  }
     >      > +};
     >      > +
     >      > +struct StringTableIn {
     >      > +  BumpPtrAllocator Arena;
     >      > +  std::vector<StringRef> Strings;
     >      > +};
     >      > +
     >      > +Expected<StringTableIn> readStringTable(StringRef Data) {
     >      > +  if (Data.size() < 4)
     >      > +    return makeError("Bad string table: not enough
    metadata");
     >      > +  size_t UncompressedSize = consume32(Data);
     >      > +  SmallString<1> Uncompressed;
     >      > +  if (Error E = llvm::zlib::uncompress(Data, Uncompressed,
     >     UncompressedSize))
     >      > +    return std::move(E);
     >      > +
     >      > +  StringTableIn Table;
     >      > +  StringSaver Saver(Table.Arena);
     >      > +  for (StringRef Rest = Uncompressed; !Rest.empty();) {
     >      > +    auto Len = Rest.find(0);
     >      > +    if (Len == StringRef::npos)
     >      > +      return makeError("Bad string table: not null
    terminated");
     >      > +    Table.Strings.push_back(Saver.save(consume(Rest, Len)));
     >      > +    Rest = Rest.drop_front();
     >      > +  }
     >      > +  return Table;
     >      > +}
     >      > +
     >      > +// SYMBOL ENCODING
     >      > +// Each field of clangd::Symbol is encoded in turn (see
     >     implementation).
     >      > +//  - StringRef fields encode as varint (index into the
    string
     >     table)
     >      > +//  - enums encode as the underlying type
     >      > +//  - most numbers encode as varint
     >      > +
     >      > +// It's useful to the implementation to assume symbols have a
     >     bounded size.
     >      > +constexpr size_t SymbolSizeBound = 512;
     >      > +// To ensure the bounded size, restrict the number of include
     >     headers stored.
     >      > +constexpr unsigned MaxIncludes = 50;
     >      > +
     >      > +void writeSymbol(const Symbol &Sym, const StringTableOut
    &Strings,
     >      > +                 raw_ostream &OS) {
     >      > +  auto StartOffset = OS.tell();
     >      > +  OS << Sym.ID.raw(); // TODO: once we start writing
    xrefs and
     >     posting lists,
     >      > +                      // symbol IDs should probably be in a
     >     string table.
     >      > +  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
     >      > +  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
     >      > +  writeVar(Strings.index(Sym.Name), OS);
     >      > +  writeVar(Strings.index(Sym.Scope), OS);
     >      > +  for (const auto &Loc : {Sym.Definition,
     >     Sym.CanonicalDeclaration}) {
     >      > +    writeVar(Strings.index(Loc.FileURI), OS);
     >      > +    for (const auto &Endpoint : {Loc.Start, Loc.End}) {
     >      > +      writeVar(Endpoint.Line, OS);
     >      > +      writeVar(Endpoint.Column, OS);
     >      > +    }
     >      > +  }
     >      > +  writeVar(Sym.References, OS);
     >      > +  OS.write(Sym.IsIndexedForCodeCompletion);
     >      > +  OS.write(static_cast<uint8_t>(Sym.Origin));
     >      > +  writeVar(Strings.index(Sym.Signature), OS);
     >      > +  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
     >      > +  writeVar(Strings.index(Sym.Documentation), OS);
     >      > +  writeVar(Strings.index(Sym.ReturnType), OS);
     >      > +
     >      > +  auto WriteInclude = [&](const
     >     Symbol::IncludeHeaderWithReferences &Include) {
     >      > +    writeVar(Strings.index(Include.IncludeHeader), OS);
     >      > +    writeVar(Include.References, OS);
     >      > +  };
     >      > +  // There are almost certainly few includes, so we can just
     >     write them.
     >      > +  if (LLVM_LIKELY(Sym.IncludeHeaders.size() <=
    MaxIncludes)) {
     >      > +    writeVar(Sym.IncludeHeaders.size(), OS);
     >      > +    for (const auto &Include : Sym.IncludeHeaders)
     >      > +      WriteInclude(Include);
     >      > +  } else {
     >      > +    // If there are too many, make sure we truncate the least
     >     important.
     >      > +    using Pointer = const
    Symbol::IncludeHeaderWithReferences *;
     >      > +    std::vector<Pointer> Pointers;
     >      > +    for (const auto &Include : Sym.IncludeHeaders)
     >      > +      Pointers.push_back(&Include);
     >      > +    std::sort(Pointers.begin(), Pointers.end(), [](Pointer L,
     >     Pointer R) {
     >      > +      return L->References > R->References;
     >      > +    });
     >      > +    Pointers.resize(MaxIncludes);
     >      > +
     >      > +    writeVar(MaxIncludes, OS);
     >      > +    for (Pointer P : Pointers)
     >      > +      WriteInclude(*P);
     >      > +  }
     >      > +
     >      > +  assert(OS.tell() - StartOffset < SymbolSizeBound && "Symbol
     >     length unsafe!");
     >      > +  (void)StartOffset; // Unused in NDEBUG;
     >      > +}
     >      > +
     >      > +Expected<Symbol> readSymbol(StringRef &Data, const
    StringTableIn
     >     &Strings) {
     >      > +  // Usually we can skip bounds checks because the buffer
    is huge.
     >      > +  // Near the end of the buffer, this would be unsafe. In
    this
     >     rare case, copy
     >      > +  // the data into a bigger buffer so we can again skip
    the checks.
     >      > +  if (LLVM_UNLIKELY(Data.size() < SymbolSizeBound)) {
     >      > +    std::string Buf(Data);
     >      > +    Buf.resize(SymbolSizeBound);
     >      > +    StringRef ExtendedData = Buf;
     >      > +    auto Ret = readSymbol(ExtendedData, Strings);
     >      > +    unsigned BytesRead = Buf.size() - ExtendedData.size();
     >      > +    if (BytesRead > Data.size())
     >      > +      return makeError("read past end of data");
     >      > +    Data = Data.drop_front(BytesRead);
     >      > +    return Ret;
     >      > +  }
     >      > +
     >      > +#define READ_STRING(Field)
     >                     \
     >      > +  do {
     >                     \
     >      > +    auto StringIndex = consumeVar(Data);
     >                     \
     >      > +    if (LLVM_UNLIKELY(StringIndex >=
    Strings.Strings.size()))
     >                    \
     >      > +      return makeError("Bad string index");
     >                    \
     >      > +    Field = Strings.Strings[StringIndex];
     >                    \
     >      > +  } while (0)
     >      > +
     >      > +  Symbol Sym;
     >      > +  Sym.ID = SymbolID::fromRaw(consume(Data, 20));
     >      > +  Sym.SymInfo.Kind =
    static_cast<index::SymbolKind>(consume8(Data));
     >      > +  Sym.SymInfo.Lang =
     >     static_cast<index::SymbolLanguage>(consume8(Data));
     >      > +  READ_STRING(Sym.Name);
     >      > +  READ_STRING(Sym.Scope);
     >      > +  for (SymbolLocation *Loc : {&Sym.Definition,
     >     &Sym.CanonicalDeclaration}) {
     >      > +    READ_STRING(Loc->FileURI);
     >      > +    for (auto &Endpoint : {&Loc->Start, &Loc->End}) {
     >      > +      Endpoint->Line = consumeVar(Data);
     >      > +      Endpoint->Column = consumeVar(Data);
     >      > +    }
     >      > +  }
     >      > +  Sym.References = consumeVar(Data);
     >      > +  Sym.IsIndexedForCodeCompletion = consume8(Data);
     >      > +  Sym.Origin = static_cast<SymbolOrigin>(consume8(Data));
     >      > +  READ_STRING(Sym.Signature);
     >      > +  READ_STRING(Sym.CompletionSnippetSuffix);
     >      > +  READ_STRING(Sym.Documentation);
     >      > +  READ_STRING(Sym.ReturnType);
     >      > +  unsigned IncludeHeaderN = consumeVar(Data);
     >      > +  if (IncludeHeaderN > MaxIncludes)
     >      > +    return makeError("too many IncludeHeaders");
     >      > +  Sym.IncludeHeaders.resize(IncludeHeaderN);
     >      > +  for (auto &I : Sym.IncludeHeaders) {
     >      > +    READ_STRING(I.IncludeHeader);
     >      > +    I.References = consumeVar(Data);
     >      > +  }
     >      > +
     >      > +#undef READ_STRING
     >      > +  return Sym;
     >      > +}
     >      > +
     >      > +} // namespace
     >      > +
     >      > +// FILE ENCODING
     >      > +// A file is a RIFF chunk with type 'CdIx'.
     >      > +// It contains the sections:
     >      > +//   - meta: version number
     >      > +//   - stri: string table
     >      > +//   - symb: symbols
     >      > +
     >      > +// The current versioning scheme is simple - non-current
     >     versions are rejected.
     >      > +// This allows arbitrary format changes, which invalidate
    stored
     >     data.
     >      > +// Later we may want to support some backward compatibility.
     >      > +constexpr static uint32_t Version = 1;
     >      > +
     >      > +Expected<IndexFileIn> readIndexFile(StringRef Data) {
     >      > +  auto RIFF = riff::readFile(Data);
     >      > +  if (!RIFF)
     >      > +    return RIFF.takeError();
     >      > +  if (RIFF->Type != riff::fourCC("CdIx"))
     >      > +    return makeError("wrong RIFF type");
     >      > +  StringMap<StringRef> Chunks;
     >      > +  for (const auto &Chunk : RIFF->Chunks)
     >      > +    Chunks.try_emplace(StringRef(Chunk.ID.data(),
     >     Chunk.ID.size()), Chunk.Data);
     >      > +
     >      > +  for (StringRef RequiredChunk : {"meta", "stri"})
     >      > +    if (!Chunks.count(RequiredChunk))
     >      > +      return makeError("missing required chunk " +
    RequiredChunk);
     >      > +
     >      > +  StringRef Meta = Chunks.lookup("meta");
     >      > +  if (Meta.size() < 4 || consume32(Meta) != Version)
     >      > +    return makeError("wrong version");
     >      > +
     >      > +  auto Strings = readStringTable(Chunks.lookup("stri"));
     >      > +  if (!Strings)
     >      > +    return Strings.takeError();
     >      > +
     >      > +  IndexFileIn Result;
     >      > +  if (Chunks.count("symb")) {
     >      > +    StringRef SymbolData = Chunks.lookup("symb");
     >      > +    SymbolSlab::Builder Symbols;
     >      > +    while (!SymbolData.empty())
     >      > +      if (auto Sym = readSymbol(SymbolData, *Strings))
     >      > +        Symbols.insert(*Sym);
     >      > +      else
     >      > +        return Sym.takeError();
     >      > +    Result.Symbols = std::move(Symbols).build();
     >      > +  }
     >      > +  return Result;
     >      > +}
     >      > +
     >      > +raw_ostream &operator<<(raw_ostream &OS, const
    IndexFileOut &Data) {
     >      > +  assert(Data.Symbols && "An index file without symbols
    makes no
     >     sense!");
     >      > +  riff::File RIFF;
     >      > +  RIFF.Type = riff::fourCC("CdIx");
     >      > +
     >      > +  SmallString<4> Meta;
     >      > +  {
     >      > +    raw_svector_ostream MetaOS(Meta);
     >      > +    write32(Version, MetaOS);
     >      > +  }
     >      > +  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
     >      > +
     >      > +  StringTableOut Strings;
     >      > +  std::vector<Symbol> Symbols;
     >      > +  for (const auto &Sym : *Data.Symbols) {
     >      > +    Symbols.emplace_back(Sym);
     >      > +    visitStrings(Symbols.back(), [&](StringRef &S) {
     >     Strings.intern(S); });
     >      > +  }
     >      > +
     >      > +  std::string StringSection;
     >      > +  {
     >      > +    raw_string_ostream StringOS(StringSection);
     >      > +    Strings.finalize(StringOS);
     >      > +  }
     >      > +  RIFF.Chunks.push_back({riff::fourCC("stri"),
    StringSection});
     >      > +
     >      > +  std::string SymbolSection;
     >      > +  {
     >      > +    raw_string_ostream SymbolOS(SymbolSection);
     >      > +    for (const auto &Sym : Symbols)
     >      > +      writeSymbol(Sym, Strings, SymbolOS);
     >      > +  }
     >      > +  RIFF.Chunks.push_back({riff::fourCC("symb"),
    SymbolSection});
     >      > +
     >      > +  return OS << RIFF;
     >      > +}
     >      > +
     >      > +} // namespace clangd
     >      > +} // namespace clang
     >      >
     >      > Added: clang-tools-extra/trunk/clangd/index/Serialization.h
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=341375&view=auto
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/index/Serialization.h
    (added)
     >      > +++ clang-tools-extra/trunk/clangd/index/Serialization.h
    Tue Sep
     >     4 09:16:50 2018
     >      > @@ -0,0 +1,48 @@
     >      > +//===--- Serialization.h - Binary serialization of index data
     >     ----*- C++-*-===//
     >      > +//
     >      > +//                     The LLVM Compiler Infrastructure
     >      > +//
     >      > +// This file is distributed under the University of Illinois
     >     Open Source
     >      > +// License. See LICENSE.TXT for details.
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +//
     >      > +// This file provides a compact binary serialization of
    indexed
     >     symbols.
     >      > +//
     >      > +// It writes two sections:
     >      > +//  - a string table (which is compressed)
     >      > +//  - lists of encoded symbols
     >      > +//
     >      > +// The format has a simple versioning scheme: the version is
     >     embedded in the
     >      > +// data and non-current versions are rejected when reading.
     >      > +//
     >      >
>  +//===----------------------------------------------------------------------===//
     >      > +
     >      > +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
     >      > +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
     >      > +#include "Index.h"
     >      > +#include "llvm/Support/Error.h"
     >      > +
     >      > +namespace clang {
     >      > +namespace clangd {
     >      > +
     >      > +// Specifies the contents of an index file to be written.
     >      > +struct IndexFileOut {
     >      > +  const SymbolSlab *Symbols;
     >      > +  // TODO: Support serializing symbol occurrences.
     >      > +  // TODO: Support serializing Dex posting lists.
     >      > +};
     >      > +// Serializes an index file. (This is a RIFF container
    chunk).
     >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const
     >     IndexFileOut &);
     >      > +
     >      > +// Holds the contents of an index file that was read.
     >      > +struct IndexFileIn {
     >      > +  llvm::Optional<SymbolSlab> Symbols;
     >      > +};
     >      > +// Parse an index file. The input must be a RIFF
    container chunk.
     >      > +llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
     >      > +
     >      > +} // namespace clangd
     >      > +} // namespace clang
     >      > +
     >      > +#endif
     >      >
     >      > Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
     >      > URL:
     >
    
http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp?rev=341375&r1=341374&r2=341375&view=diff
     >      >
>  ==============================================================================
     >      > --- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
    (original)
     >      > +++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
    Tue Sep
     >     4 09:16:50 2018
     >      > @@ -9,6 +9,7 @@
     >      >
     >      >   #include "SymbolYAML.h"
     >      >   #include "Index.h"
     >      > +#include "Serialization.h"
     >      >   #include "dex/DexIndex.h"
     >      >   #include "llvm/ADT/Optional.h"
     >      >   #include "llvm/ADT/SmallVector.h"
     >      > @@ -189,10 +190,20 @@ std::unique_ptr<SymbolIndex> loadIndex(l
     >      >       llvm::errs() << "Can't open " << SymbolFile << "\n";
     >      >       return nullptr;
     >      >     }
     >      > -  auto Slab = symbolsFromYAML(Buffer.get()->getBuffer());
     >      > +  StringRef Data = Buffer->get()->getBuffer();
     >      >
     >      > -  return UseDex ? dex::DexIndex::build(std::move(Slab))
     >      > -                : MemIndex::build(std::move(Slab),
    RefSlab());
     >      > +  llvm::Optional<SymbolSlab> Slab;
     >      > +  if (Data.startswith("RIFF")) { // Magic for binary
    index file.
     >      > +    if (auto RIFF = readIndexFile(Data))
     >      > +      Slab = std::move(RIFF->Symbols);
     >      > +    else
     >      > +      llvm::errs() << "Bad RIFF: " <<
>

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to