r281923 - Reorder initializers in CallStackFrame so that we don't get a warning.
Author: sfantao Date: Mon Sep 19 13:13:13 2016 New Revision: 281923 URL: http://llvm.org/viewvc/llvm-project?rev=281923&view=rev Log: Reorder initializers in CallStackFrame so that we don't get a warning. Modified: cfe/trunk/lib/AST/ExprConstant.cpp Modified: cfe/trunk/lib/AST/ExprConstant.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=281923&r1=281922&r2=281923&view=diff == --- cfe/trunk/lib/AST/ExprConstant.cpp (original) +++ cfe/trunk/lib/AST/ExprConstant.cpp Mon Sep 19 13:13:13 2016 @@ -961,8 +961,8 @@ void SubobjectDesignator::diagnosePointe CallStackFrame::CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments) -: Info(Info), Caller(Info.CurrentCall), CallLoc(CallLoc), Callee(Callee), - Index(Info.NextCallIndex++), This(This), Arguments(Arguments) { +: Info(Info), Caller(Info.CurrentCall), Callee(Callee), This(This), + Arguments(Arguments), CallLoc(CallLoc), Index(Info.NextCallIndex++) { Info.CurrentCall = this; ++Info.CallStackDepth; } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder
sfantao updated this revision to Diff 72117. sfantao added a comment. - Rebase. https://reviews.llvm.org/D18172 Files: include/clang/Driver/Compilation.h lib/Driver/Driver.cpp lib/Driver/Types.cpp test/Driver/cuda-bindings.cu test/Driver/cuda-phases.cu Index: test/Driver/cuda-phases.cu === --- test/Driver/cuda-phases.cu +++ test/Driver/cuda-phases.cu @@ -13,194 +13,189 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s 2>&1 \ // RUN: | FileCheck -check-prefix=BIN %s -// BIN: 0: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) -// BIN: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// BIN: 2: compiler, {1}, ir, (host-cuda) -// BIN: 3: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) -// BIN: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30) -// BIN: 5: compiler, {4}, ir, (device-cuda, sm_30) -// BIN: 6: backend, {5}, assembler, (device-cuda, sm_30) -// BIN: 7: assembler, {6}, object, (device-cuda, sm_30) -// BIN: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object -// BIN: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler -// BIN: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) -// BIN: 11: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir -// BIN: 12: backend, {11}, assembler, (host-cuda) -// BIN: 13: assembler, {12}, object, (host-cuda) -// BIN: 14: linker, {13}, image, (host-cuda) +// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) +// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda) +// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda) +// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) +// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, sm_30) +// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, sm_30) +// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, sm_30) +// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, sm_30) +// BIN-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P7]]}, object +// BIN-DAG: [[P9:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P6]]}, assembler +// BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-cuda) +// BIN-DAG: [[P11:[0-9]+]]: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-cuda (nvptx64-nvidia-cuda)" {[[P10]]}, ir +// BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-cuda) +// BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-cuda) +// BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-cuda) // // Test single gpu architecture up to the assemble phase. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM %s -// ASM: 0: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) -// ASM: 1: preprocessor, {0}, cuda-cpp-output, (device-cuda, sm_30) -// ASM: 2: compiler, {1}, ir, (device-cuda, sm_30) -// ASM: 3: backend, {2}, assembler, (device-cuda, sm_30) -// ASM: 4: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {3}, assembler -// ASM: 5: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) -// ASM: 6: preprocessor, {5}, cuda-cpp-output, (host-cuda) -// ASM: 7: compiler, {6}, ir, (host-cuda) -// ASM: 8: backend, {7}, assembler, (host-cuda) +// ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) +// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (device-cuda, sm_30) +// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-cuda, sm_30) +// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-cuda, sm_30) +// ASM-DAG: [[P4:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P3]]}, assembler +// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) +// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, cuda-cpp-output, (host-cuda) +// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-cuda) +// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-cuda) // // Test two gpu architectures with complete compilation. // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ // RUN: | FileCheck -check-prefix=BIN2 %s -// BIN2: 0: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) -// BIN2: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// BIN2: 2: compiler, {1}, ir, (host-cuda) -// BIN2: 3: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) -// BIN2: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30) -// BIN2: 5: compiler, {4}, ir, (device-cuda, sm_30) -// BIN2: 6: backend, {5}, assembler, (device-cuda, sm_30) -// BIN2: 7: assembler, {6}, object, (device-cuda, sm_30) -// BIN2: 8: offload, "d
Re: [PATCH] D21840: [Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
sfantao updated this revision to Diff 72118. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21840 Files: include/clang/Driver/Action.h lib/Driver/Driver.cpp Index: lib/Driver/Driver.cpp === --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1922,7 +1922,7 @@ // Create the offload action with all dependences. When an offload action // is created the kinds are propagated to the host action, so we don't have -// to do that explicitely here. +// to do that explicitly here. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch*/ nullptr, ActiveOffloadKinds); @@ -2360,142 +2360,293 @@ } } } -/// Collapse an offloading action looking for a job of the given type. The input -/// action is changed to the input of the collapsed sequence. If we effectively -/// had a collapse return the corresponding offloading action, otherwise return -/// null. -template -static OffloadAction *collapseOffloadingAction(Action *&CurAction) { - if (!CurAction) -return nullptr; - if (auto *OA = dyn_cast(CurAction)) { -if (OA->hasHostDependence()) - if (auto *HDep = dyn_cast(OA->getHostDependence())) { -CurAction = HDep; -return OA; - } -if (OA->hasSingleDeviceDependence()) - if (auto *DDep = dyn_cast(OA->getSingleDeviceDependence())) { -CurAction = DDep; -return OA; + +namespace { +/// Utility class to control the collapse of dependent actions and select the +/// tools accordingly. +class ToolSelector final { + /// The tool chain this selector refers to. + const ToolChain &TC; + + /// The compilation this selector refers to. + const Compilation &C; + + /// The base action this selector refers to. + const JobAction *BaseAction; + + /// Set to true if the current toolchain refers to host actions. + bool IsHostSelector; + + /// Set to true if save-temps and embed-bitcode functionalities are active. + bool SaveTemps; + bool EmbedBitcode; + + /// Get dependence action or null if that does not exist. If \a CanBeCollapsed + /// is false, that action must be legal to collapse or null will be returned. + const JobAction *getDependenceAction(const ActionList &Inputs, + ActionList &SavedOffloadAction, + bool CanBeCollapsed = true) { +// An option can be collapsed only if it has a single input. +if (Inputs.size() != 1) + return nullptr; + +Action *CurAction = *Inputs.begin(); +if (!CurAction->isCollapsingWithDependingActionLegal() && CanBeCollapsed) + return nullptr; + +// If the input action is an offload action. Look through it and save any +// offload action that can be dropped in the event of a collapse. +if (auto *OA = dyn_cast(CurAction)) { + // If the depending action is a device action, we will attempt to collapse + // only with other device actions. Otherwise, we would do the same but + // with host actions only. + if (!IsHostSelector) { +if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) { + CurAction = + OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true); + if (!CurAction->isCollapsingWithDependingActionLegal() && + CanBeCollapsed) +return nullptr; + SavedOffloadAction.push_back(OA); + return dyn_cast(CurAction); +} + } else if (OA->hasHostDependence()) { +CurAction = OA->getHostDependence(); +if (!CurAction->isCollapsingWithDependingActionLegal() && +CanBeCollapsed) + return nullptr; +SavedOffloadAction.push_back(OA); +return dyn_cast(CurAction); } + return nullptr; +} + +return dyn_cast(CurAction); } - return nullptr; -} -// Returns a Tool for a given JobAction. In case the action and its -// predecessors can be combined, updates Inputs with the inputs of the -// first combined action. If one of the collapsed actions is a -// CudaHostAction, updates CollapsedCHA with the pointer to it so the -// caller can deal with extra handling such action requires. -static const Tool *selectToolForJob(Compilation &C, bool SaveTemps, -bool EmbedBitcode, const ToolChain *TC, -const JobAction *JA, -const ActionList *&Inputs, -ActionList &CollapsedOffloadAction) { - const Tool *ToolForJob = nullptr; - CollapsedOffloadAction.clear(); - - // See if we should look for a compiler with an integrated assembler. We match - // bottom up, so what we are actually looking for is an assembler job with a - // compiler input. - - // Look through offload actions between assembler and backend actions. - Action *BackendJA = (isa(JA)
Re: [PATCH] D21845: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
sfantao updated this revision to Diff 72120. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21845 Files: lib/Driver/Driver.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -35,3 +35,106 @@ // RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s // CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. + +/// ### + +/// Check the phases graph when using a single target, different from the host. +/// We should have an offload action joining the host compile and device +/// preprocessor and another one joining the device linking outputs to the host +/// action. +// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s +// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp) +// CHK-PHASES: 4: assembler, {3}, object, (host-openmp) +// CHK-PHASES: 5: linker, {4}, image, (host-openmp) +// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp) +// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp) +// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp) +// CHK-PHASES: 11: assembler, {10}, object, (device-openmp) +// CHK-PHASES: 12: linker, {11}, image, (device-openmp) +// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image + +/// ### + +/// Check the phases when using multiple targets. Here we also add a library to +/// make sure it is treated as input by the device. +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s +// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp) +// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp) +// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp) +// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp) +// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp) +// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp) +// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp) +// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir +// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp) +// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp) +// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp) +// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp) +// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir +// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp) +// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp) +// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp) +// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image + + +/// ### + +/// Check the phases when using multiple targets and multiple source files +// RUN: echo " " > %t.c +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s +// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp) +// CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp) +// CHK-PHASES-FILES: 2: preprocessor, {1}, cpp-output, (host-
Re: [PATCH] D21843: [Driver][OpenMP] Create tool chains for OpenMP offloading kind.
sfantao updated this revision to Diff 72119. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21843 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Driver/Action.h include/clang/Driver/Driver.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- /dev/null +++ test/Driver/openmp-offload.c @@ -0,0 +1,37 @@ +/// +/// Perform several driver tests for OpenMP offloading +/// + +/// ### + +/// Check whether an invalid OpenMP target is specified: +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// RUN: %clang -### -fopenmp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' + +/// ### + +/// Check warning for empty -fopenmp-targets +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// RUN: %clang -### -fopenmp -fopenmp-targets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-fopenmp-targets=' + +/// ### + +/// Check error for no -fopenmp option +// RUN: %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s +// RUN: %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s +// CHK-NO-FOPENMP: error: The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading. + +/// ### + +/// Check warning for duplicate offloading targets. +// RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s +// CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. Index: lib/Driver/Tools.cpp === --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -2978,72 +2978,23 @@ CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins")); } -namespace { -enum OpenMPRuntimeKind { - /// An unknown OpenMP runtime. We can't generate effective OpenMP code - /// without knowing what runtime to target. - OMPRT_Unknown, - - /// The LLVM OpenMP runtime. When completed and integrated, this will become - /// the default for Clang. - OMPRT_OMP, - - /// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for - /// this runtime but can swallow the pragmas, and find and link against the - /// runtime library itself. - OMPRT_GOMP, - - /// The legacy name for the LLVM OpenMP runtime from when it was the Intel - /// OpenMP runtime. We support this mode for users with existing dependencies - /// on this runtime library name. - OMPRT_IOMP5 -}; -} - -/// Compute the desired OpenMP runtime from the flag provided. -static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC, - const ArgList &Args) { - StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); - - const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); - if (A) -RuntimeName = A->getValue(); - - auto RT = llvm::StringSwitch(RuntimeName) -.Case("libomp", OMPRT_OMP) -.Case("libgomp", OMPRT_GOMP) -.Case("libiomp5", OMPRT_IOMP5) -.Default(OMPRT_Unknown); - - if (RT == OMPRT_Unknown) { -if (A) - TC.getDriver().Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << A->getValue(); -else - // FIXME: We could use a nicer diagnostic here. - TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; - } - - return RT; -} - static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, const ArgList &Args) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) return; - switch (getOpenMPRuntime(TC, Args)) { - case OMPRT_OMP: + switch (TC.getDriver().getOpenMPRuntime(Args)) { + case Driver::OMPRT_OMP: CmdArgs.push_back("-lomp"); break; - case OMPRT_GOMP: + case Driver::OMPRT_GOMP: CmdArgs.push_back("-lgomp
Re: [PATCH] D21847: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
sfantao updated this revision to Diff 72121. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21847 Files: include/clang/Driver/Options.td lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -138,3 +138,104 @@ // CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp) // CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp) // CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image + + +/// ### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the merge of the host compile and +/// backend phases. There are also two offloading specific options: +/// -fopenmp-is-device: will tell the frontend that it will generate code for a +/// target. +/// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// We use -fopenmp-dump-offload-linker-script to dump the linker script and +/// check its contents. +/// +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s + +// Make sure we are not dumping the script unless the user requested it. +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s + +// +// Check the linker script contains what we expect. +// +// CHK-LKS: /* +// CHK-LKS: OpenMP Offload Linker Script. +// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. +// CHK-LKS: */ +// CHK-LKS: TARGET(binary) +// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) +// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS: SECTIONS +// CHK-LKS: { +// CHK-LKS: .omp_offloading : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: { +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: [[T1BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); +// CHK-LKS: [[T2BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); +// CHK-LKS: } +// CHK-LKS: .omp_offloading.entries : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: SUBALIGN(0x01) +// CHK-LKS: { +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_begin = .); +// CHK-LKS: *(.omp_offloading.entries) +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_end = .); +// CHK-LKS: } +// CHK-LKS: } +// CHK-LKS: INSERT BEFORE .data + +// +// Generate host BC file. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// +// Compile for the powerpc device. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}"[[T1OBJ]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1
Re: [PATCH] D21848: [Driver][OpenMP] Add logic for offloading-specific argument translation.
sfantao updated this revision to Diff 72122. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21848 Files: include/clang/Driver/Compilation.h include/clang/Driver/ToolChain.h lib/Driver/Compilation.cpp lib/Driver/Driver.cpp lib/Driver/MSVCToolChain.cpp lib/Driver/ToolChains.cpp lib/Driver/ToolChains.h test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -212,24 +212,24 @@ // // Compile for the powerpc device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}"[[T1OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] // // Compile for the x86 device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}"[[T2OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] // // Generate host object from the BC file and link using the linker script. Index: lib/Driver/ToolChains.h === --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -222,6 +222,9 @@ bool isPIEDefault() const override; bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, +Action::OffloadKind DeviceOffloadKind) const override; protected: Tool *getTool(Action::ActionClass AC) const override; @@ -317,8 +320,8 @@ bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, -const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, +Action::OffloadKind DeviceOffloadKind) const override; bool IsBlocksDefault() const override { // Always allow blocks on Apple; users interested in versioning are @@ -522,8 +525,8 @@ bool isCrossCompiling() const override { return fa
Re: [PATCH] D21853: [Driver][OpenMP] Update actions builder to create unbundling action when necessary.
sfantao updated this revision to Diff 72124. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21853 Files: include/clang/Driver/Action.h include/clang/Driver/Types.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/ToolChain.cpp lib/Driver/Types.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -267,3 +267,56 @@ // CHK-BUACTIONS: 17: backend, {2}, assembler, (host-openmp) // CHK-BUACTIONS: 18: assembler, {17}, object, (host-openmp) // CHK-BUACTIONS: 19: clang-offload-bundler, {9, 16, 18}, object, (host-openmp) + +/// ### + +/// Check separate compilation with offloading - unbundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBACTIONS %s + +// CHK-UBACTIONS: 0: input, "somelib", object, (host-openmp) +// CHK-UBACTIONS: 1: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBACTIONS: 2: clang-offload-unbundler, {1}, cpp-output, (host-openmp) +// CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp) +// CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp) +// CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp) +// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp) +// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir +// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp) +// CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir +// CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp) +// CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp) +// CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp) +// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le--linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image + +/// ### + +/// Check separate compilation with offloading - unbundling/bundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s + +// CHK-UBUACTIONS: 0: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBUACTIONS: 1: clang-offload-unbundler, {0}, cpp-output, (host-openmp) +// CHK-UBUACTIONS: 2: compiler, {1}, ir, (host-openmp) +// CHK-UBUACTIONS: 3: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 4: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {3}, ir +// CHK-UBUACTIONS: 5: backend, {4}, assembler, (device-openmp) +// CHK-UBUACTIONS: 6: assembler, {5}, object, (device-openmp) +// CHK-UBUACTIONS: 7: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {6}, object +// CHK-UBUACTIONS: 8: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-UBUACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBUACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBUACTIONS: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, object +// CHK-UBUACTIONS: 13: backend, {2}, assembler, (host-openmp) +// CHK-UBUACTIONS: 14: assembler, {13}, object, (host-openmp) +// CHK-UBUACTIONS: 15: clang-offload-bundler, {7, 12, 14}, object, (host-openmp) + Index: lib/Driver/Types.cpp === --- lib/Driver/Types.cpp +++ lib/Driver/Types.cpp @@ -163,6 +163,10 @@ } } +bool types::isSrcFile(ID Id) { + return Id != TY_Object && getPreprocessedType(Id) != TY_INVALID; +} + types::ID types::lookupTypeForExtension(const char *Ext) { return llvm::StringSwitch(Ext) .Case("c", TY_C) Index: lib/Driver/ToolChain.cpp === --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -265,6 +265,7 @@ return getClang(); case Action::OffloadBundlingJobClass: + case Action::OffloadUnbundlingJobClass: // FIXME: Add a tool for the bundling actions. return nullptr; } I
Re: [PATCH] D21852: [Driver][OpenMP] Update actions builder to create bundling action when necessary.
sfantao updated this revision to Diff 72123. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21852 Files: include/clang/Driver/Action.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/ToolChain.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -239,3 +239,31 @@ // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" [[HOSTOBJ:.+\.o]]" [[HOSTASM:.+\.s]] // CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[HOSTBIN:.+\.out]]" {{.*}}"-lomptarget" {{.*}}"-T" "[[HOSTLK:.+\.lk]]" + + +/// ### + +/// Check separate compilation with offloading - bundling actions +// RUN: %clang -### -ccc-print-phases -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUACTIONS %s + +// CHK-BUACTIONS: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-BUACTIONS: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHK-BUACTIONS: 2: compiler, {1}, ir, (host-openmp) +// CHK-BUACTIONS: 3: input, "[[INPUT]]", c, (device-openmp) +// CHK-BUACTIONS: 4: preprocessor, {3}, cpp-output, (device-openmp) +// CHK-BUACTIONS: 5: compiler, {4}, ir, (device-openmp) +// CHK-BUACTIONS: 6: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {5}, ir +// CHK-BUACTIONS: 7: backend, {6}, assembler, (device-openmp) +// CHK-BUACTIONS: 8: assembler, {7}, object, (device-openmp) +// CHK-BUACTIONS: 9: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, object +// CHK-BUACTIONS: 10: input, "[[INPUT]]", c, (device-openmp) +// CHK-BUACTIONS: 11: preprocessor, {10}, cpp-output, (device-openmp) +// CHK-BUACTIONS: 12: compiler, {11}, ir, (device-openmp) +// CHK-BUACTIONS: 13: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {12}, ir +// CHK-BUACTIONS: 14: backend, {13}, assembler, (device-openmp) +// CHK-BUACTIONS: 15: assembler, {14}, object, (device-openmp) +// CHK-BUACTIONS: 16: offload, "device-openmp (x86_64-pc-linux-gnu)" {15}, object +// CHK-BUACTIONS: 17: backend, {2}, assembler, (host-openmp) +// CHK-BUACTIONS: 18: assembler, {17}, object, (host-openmp) +// CHK-BUACTIONS: 19: clang-offload-bundler, {9, 16, 18}, object, (host-openmp) Index: lib/Driver/ToolChain.cpp === --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -263,6 +263,10 @@ case Action::VerifyPCHJobClass: case Action::BackendJobClass: return getClang(); + + case Action::OffloadBundlingJobClass: +// FIXME: Add a tool for the bundling actions. +return nullptr; } llvm_unreachable("Invalid tool kind."); Index: lib/Driver/Driver.cpp === --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1562,6 +1562,9 @@ /// found. virtual bool initialize() { return false; } +/// Return true if the builder can use bundling/unbundling. +virtual bool canUseBundlerUnbundler() const { return false; } + /// Return true if this builder is valid. We have a valid builder if we have /// associated device tool chains. bool isValid() { return !ToolChains.empty(); } @@ -1898,6 +1901,26 @@ return ABRT_Success; } +void appendTopLevelActions(ActionList &AL) override { + if (OpenMPDeviceActions.empty()) +return; + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // Append all device actions followed by the proper offload action. + auto TI = ToolChains.begin(); + for (auto *A : OpenMPDeviceActions) { +OffloadAction::DeviceDependences Dep; +Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP); +AL.push_back(C.MakeAction(Dep, A->getType())); +++TI; + } + // We no longer need the action stored in this builder. + OpenMPDeviceActions.clear(); +} + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -1924,6 +1947,11 @@ DeviceLinkerInputs.resize(ToolChains.size()); return false; } + +bool canUseBundlerUnbundler() const override { + // OpenMP should use bundled files whenever possible. + return true; +} }; /// @@ -1933,6 +1961,9 @@ /// Speciali
Re: [PATCH] D21856: [Driver][OpenMP] Add support to create jobs for bundling actions.
sfantao updated this revision to Diff 72125. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21856 Files: include/clang/Driver/Action.h include/clang/Driver/ToolChain.h lib/Driver/Action.cpp lib/Driver/ToolChain.cpp lib/Driver/Tools.cpp lib/Driver/Tools.h test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -320,3 +320,37 @@ // CHK-UBUACTIONS: 14: assembler, {13}, object, (host-openmp) // CHK-UBUACTIONS: 15: clang-offload-bundler, {7, 12, 14}, object, (host-openmp) +/// ### + +/// Check separate compilation with offloading - bundling jobs construct +// RUN: %clang -### -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUJOBS %s +// RUN: %clang -### -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BUJOBS-ST %s + +// Create host BC. +// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// Create target 1 object. +// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" + +// Create target 2 object. +// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP:.+\.i]]" "-x" "c" "[[INPUT]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" + +// Create host object and bundle. +// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "-x" "ir" "[[HOSTBC]]" +// CHK-BUJOBS: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" +// CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" +// CHK-BUJOBS-ST: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" Index: lib/Driver/Tools.h === --- lib/Driver/Tools.h +++ lib/Driver/Tools.h @@ -137,6 +137,19 @@ const char *LinkingOutput) const override; }; +/// Offload bundler tool. +class LLVM_LIBRARY_VISIBILITY OffloadBundler final : public Tool { +public: + OffloadBundler(const ToolChain &TC) + : Tool("offload bundler", "clang-offload-bundler", TC) {} + + bool h
Re: [PATCH] D21857: [Driver][OpenMP] Add support to create jobs for unbundling actions.
sfantao updated this revision to Diff 72126. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21857 Files: include/clang/Driver/Action.h include/clang/Driver/Driver.h include/clang/Driver/Tool.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/Tool.cpp lib/Driver/Tools.cpp lib/Driver/Tools.h test/Driver/cuda-bindings.cu test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -175,8 +175,8 @@ // CHK-LKS: TARGET(binary) // CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) // CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) -// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) -// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]) // CHK-LKS: SECTIONS // CHK-LKS: { // CHK-LKS: .omp_offloading : @@ -354,3 +354,92 @@ // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" // CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" // CHK-BUJOBS-ST: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" + +/// ### + +/// Check separate compilation with offloading - unbundling jobs construct +// RUN: touch %t.i +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS-ST %s +// RUN: touch %t.o +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2 %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2-ST %s + +// Unbundle and create host BC. +// CHK-UBJOBS: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-UBJOBS-ST: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// Create target 1 object. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[T1BIN:.+\.out]]" {{.*}}"[[T1OBJ]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" +// CHK-UBJOBS-ST: ld" {{.*}}"-o" "[[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"[[T1OBJ]]" + +// Create target 2 object. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[T2BIN:.+\.out]]" {{.*}}"[[T2OBJ]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.
r282865 - [CUDA][OpenMP] Add a generic offload action builder
Author: sfantao Date: Fri Sep 30 10:34:19 2016 New Revision: 282865 URL: http://llvm.org/viewvc/llvm-project?rev=282865&view=rev Log: [CUDA][OpenMP] Add a generic offload action builder Summary: This patch proposes a new class to generate and record action dependences related with offloading. The builder provides three main functionalities: - Add device dependences to host actions. - Add host dependence to device actions. - Register device top-level actions. The constructor of the builder detect the programming models that should be supported, and generates a specialized builder for each. If a new programming model is to be added in the future, only a new specialized builder has to be implemented. When the specialized builder is generated, it produces programming-model-specific diagnostics. A CUDA specialized builder is proposed in the patch that mostly consists of the partition of the current `buildCudaAction` by the three different functionalities. Reviewers: tra, echristo, ABataev, jlebar, hfinkel Subscribers: Hahnfeld, whchung, guansong, jlebar, mehdi_amini, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18172 Modified: cfe/trunk/include/clang/Driver/Compilation.h cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Types.cpp cfe/trunk/test/Driver/cuda-bindings.cu cfe/trunk/test/Driver/cuda-phases.cu Modified: cfe/trunk/include/clang/Driver/Compilation.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Compilation.h?rev=282865&r1=282864&r2=282865&view=diff == --- cfe/trunk/include/clang/Driver/Compilation.h (original) +++ cfe/trunk/include/clang/Driver/Compilation.h Fri Sep 30 10:34:19 2016 @@ -115,6 +115,12 @@ public: return OrderedOffloadingToolchains.equal_range(Kind); } + /// Return true if an offloading tool chain of a given kind exists. + template bool hasOffloadToolChain() const { +return OrderedOffloadingToolchains.find(Kind) != + OrderedOffloadingToolchains.end(); + } + /// Return an offload toolchain of the provided kind. Only one is expected to /// exist. template Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=282865&r1=282864&r2=282865&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Fri Sep 30 10:34:19 2016 @@ -1400,139 +1400,536 @@ void Driver::BuildInputs(const ToolChain } } -// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE -// input action and then wraps each in CudaDeviceAction paired with -// appropriate GPU arch name. In case of partial (i.e preprocessing -// only) or device-only compilation, each device action is added to /p -// Actions and /p Current is released. Otherwise the function creates -// and returns a new CudaHostAction which wraps /p Current and device -// side actions. -static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, -const Arg *InputArg, Action *HostAction, -ActionList &Actions) { - Arg *PartialCompilationArg = Args.getLastArg( - options::OPT_cuda_host_only, options::OPT_cuda_device_only, - options::OPT_cuda_compile_host_device); - bool CompileHostOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only); - bool CompileDeviceOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_device_only); - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - assert(HostTC && "No toolchain for host compilation."); - if (HostTC->getTriple().isNVPTX()) { -// We do not support targeting NVPTX for host compilation. Throw -// an error and abort pipeline construction early so we don't trip -// asserts that assume device-side compilation. -C.getDriver().Diag(diag::err_drv_cuda_nvptx_host); -return nullptr; - } - - if (CompileHostOnly) { -OffloadAction::HostDependence HDep(*HostAction, *HostTC, - /*BoundArch=*/nullptr, Action::OFK_Cuda); -return C.MakeAction(HDep); - } - - // Collect all cuda_gpu_arch parameters, removing duplicates. - SmallVector GpuArchList; - llvm::SmallSet GpuArchs; - for (Arg *A : Args) { -if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) - continue; -A->claim(); +namespace { +/// Provides a convenient interface for different programming models to generate +/// the required device actions. +class OffloadingActionBuilder final { + /// Flag used to trace errors in the builder. + bool IsValid = false; + + /// The compilation that is using this builder. + Compilation &C; + +
[PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder
This revision was automatically updated to reflect the committed changes. sfantao marked an inline comment as done. Closed by commit rL282865: [CUDA][OpenMP] Add a generic offload action builder (authored by sfantao). Changed prior to commit: https://reviews.llvm.org/D18172?vs=72117&id=73060#toc Repository: rL LLVM https://reviews.llvm.org/D18172 Files: cfe/trunk/include/clang/Driver/Compilation.h cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Types.cpp cfe/trunk/test/Driver/cuda-bindings.cu cfe/trunk/test/Driver/cuda-phases.cu Index: cfe/trunk/include/clang/Driver/Compilation.h === --- cfe/trunk/include/clang/Driver/Compilation.h +++ cfe/trunk/include/clang/Driver/Compilation.h @@ -115,6 +115,12 @@ return OrderedOffloadingToolchains.equal_range(Kind); } + /// Return true if an offloading tool chain of a given kind exists. + template bool hasOffloadToolChain() const { +return OrderedOffloadingToolchains.find(Kind) != + OrderedOffloadingToolchains.end(); + } + /// Return an offload toolchain of the provided kind. Only one is expected to /// exist. template Index: cfe/trunk/test/Driver/cuda-bindings.cu === --- cfe/trunk/test/Driver/cuda-bindings.cu +++ cfe/trunk/test/Driver/cuda-bindings.cu @@ -34,8 +34,8 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM %s -// ASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" -// ASM: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" +// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // // Test two gpu architectures with complete compilation. @@ -62,9 +62,9 @@ // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \ // RUN:--cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ // RUN: | FileCheck -check-prefix=ASM2 %s -// ASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" -// ASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_35.s" -// ASM2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_30.s" +// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-device-cuda-nvptx64-nvidia-cuda-sm_35.s" +// ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s" // // Test one or more gpu architecture with complete compilation in host-only Index: cfe/trunk/test/Driver/cuda-phases.cu === --- cfe/trunk/test/Driver/cuda-phases.cu +++ cfe/trunk/test/Driver/cuda-phases.cu @@ -13,194 +13,189 @@ // // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases --cuda-gpu-arch=sm_30 %s 2>&1 \ // RUN: | FileCheck -check-prefix=BIN %s -// BIN: 0: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) -// BIN: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) -// BIN: 2: compiler, {1}, ir, (host-cuda) -// BIN: 3: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) -// BIN: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_30) -// BIN: 5: compiler, {4}, ir, (device-cuda, sm_30) -// BIN: 6: backend, {5}, assembler, (device-cuda, sm_30) -// BIN: 7: assembler, {6}, object, (device-cuda, sm_30) -// BIN: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {7}, object -// BIN: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {6}, assembler -// BIN: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) -// BIN: 11: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir -// BIN: 12: backend, {11}, assembler, (host-cuda) -// BIN: 13: assembler, {12}, object, (host-cuda) -// BIN: 14: linker, {13}, image, (host-cuda) +// BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (host-cuda) +// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, cuda-cpp-output, (host-cuda) +// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-cuda) +// BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", cuda, (device-cuda, sm_30) +// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, cuda-cpp-output, (device-cuda, sm_30) +// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-cuda, sm_30) +// BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-cuda, sm_30) +// BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-cuda, sm_30) +// BIN-DAG: [[P8:[0-9]+]]: offload, "device-cuda (nvptx64-nvidia-cuda:sm_30)" {[[P7]]}, object +// BIN-DAG: [[
r291124 - [OpenMP] Add fields for flags in the offload entry descriptor.
Author: sfantao Date: Thu Jan 5 10:02:49 2017 New Revision: 291124 URL: http://llvm.org/viewvc/llvm-project?rev=291124&view=rev Log: [OpenMP] Add fields for flags in the offload entry descriptor. Summary: This patch adds two fields to the offload entry descriptor. One field is meant to signal Ctors/Dtors and `link` global variables, and the other is reserved for runtime library use. Currently, these fields are only filled with zeros in the current code generation, but that will change when `declare target` is added. The reason, we are adding these fields now is to make the code generation consistent with the runtime library proposal under review in https://reviews.llvm.org/D14031. Reviewers: ABataev, hfinkel, carlo.bertolli, kkwli0, arpith-jacob, Hahnfeld Subscribers: cfe-commits, caomhin, jholewinski Differential Revision: https://reviews.llvm.org/D28298 Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h cfe/trunk/test/OpenMP/target_codegen.cpp cfe/trunk/test/OpenMP/target_codegen_registration.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=291124&r1=291123&r2=291124&view=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Thu Jan 5 10:02:49 2017 @@ -2701,14 +2701,16 @@ void CGOpenMPRuntime::OffloadEntriesInfo "only required for the device " "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = - OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); + OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, + /*Flags=*/0); ++OffloadingEntriesNum; } void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, llvm::Constant *ID) { + llvm::Constant *Addr, llvm::Constant *ID, + int32_t Flags) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { @@ -2719,9 +2721,10 @@ void CGOpenMPRuntime::OffloadEntriesInfo assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); +Entry.setFlags(Flags); return; } else { -OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); +OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; } } @@ -2888,7 +2891,8 @@ CGOpenMPRuntime::createOffloadingBinaryD } void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, uint64_t Size) { + llvm::Constant *Addr, uint64_t Size, + int32_t Flags) { StringRef Name = Addr->getName(); auto *TgtOffloadEntryType = cast( CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); @@ -2918,6 +2922,8 @@ void CGOpenMPRuntime::createOffloadEntry EntryInit.add(AddrPtr); EntryInit.add(StrPtr); EntryInit.addInt(CGM.SizeTy, Size); + EntryInit.addInt(CGM.Int32Ty, Flags); + EntryInit.addInt(CGM.Int32Ty, 0); llvm::GlobalVariable *Entry = EntryInit.finishAndCreateGlobal(".omp_offloading.entry", Align, @@ -3090,6 +3096,8 @@ QualType CGOpenMPRuntime::getTgtOffloadE // // (function or global) // char *name; // Name of the function or global. // size_t size; // Size of the entry info (0 if it a function). + // int32_tflags; // Flags associated with the entry, e.g. 'link'. + // int32_treserved; // Reserved, to use by the runtime library. // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); @@ -3098,6 +3106,10 @@ QualType CGOpenMPRuntime::getTgtOffloadE addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); addFieldToRecordDecl(C, RD, C.getSizeType()); +addFieldToRecordDecl( +C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); +addFieldToRecordDecl( +C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); RD->completeDefinition(); TgtOffloadEntryQTy = C.getRecordType(RD);
Re: [PATCH] D13909: clang-offload-bundler - offload files bundling/unbundling tool
sfantao updated this revision to Diff 68086. sfantao marked 5 inline comments as done. sfantao added a comment. - Fix comments and diagnostics. https://reviews.llvm.org/D13909 Files: test/CMakeLists.txt test/Driver/clang-offload-bundler.c tools/CMakeLists.txt tools/clang-offload-bundler/CMakeLists.txt tools/clang-offload-bundler/ClangOffloadBundler.cpp Index: tools/clang-offload-bundler/ClangOffloadBundler.cpp === --- /dev/null +++ tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -0,0 +1,681 @@ +//===-- clang-offload-bundler/ClangOffloadBundler.cpp - Clang format tool -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// \brief This file implements a clang-offload-bundler that bundles different +/// files that relate with the same source code but different targets into a +/// single one. Also the implements the opposite functionality, i.e. unbundle +/// files previous created by this tool. +/// +//===--===// + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/Version.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" + +using namespace llvm; +using namespace llvm::object; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -version +// and -help) will be hidden. +static cl::OptionCategory +ClangOffloadBundlerCategory("clang-offload-bundler options"); + +static cl::list +InputFileNames("inputs", cl::CommaSeparated, cl::OneOrMore, + cl::desc("[,...]"), + cl::cat(ClangOffloadBundlerCategory)); +static cl::list +OutputFileNames("outputs", cl::CommaSeparated, cl::OneOrMore, +cl::desc("[,...]"), +cl::cat(ClangOffloadBundlerCategory)); +static cl::list +TargetNames("targets", cl::CommaSeparated, cl::OneOrMore, +cl::desc("[-,...]"), +cl::cat(ClangOffloadBundlerCategory)); +static cl::opt +FilesType("type", cl::Required, + cl::desc("Type of the files to be bundled/unbundled.\n" + "Current supported types are:\n" + " i - cpp-output\n" + " ii - c++-cpp-output\n" + " ll - llvm\n" + " bc - llvm-bc\n" + " s - assembler\n" + " o - object\n" + " gch - precompiled-header\n" + " ast - clang AST file"), + cl::cat(ClangOffloadBundlerCategory)); +static cl::opt +Unbundle("unbundle", + cl::desc("Unbundle bundled file into several output files.\n"), + cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + +/// Magic string that marks the existence of offloading data. +#define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" + +/// The index of the host input in the list of inputs. +static unsigned HostInputIndex = ~0u; + +/// Obtain the offload kind and real machine triple out of the target +/// information specified by the user. +static void getOffloadKindAndTriple(StringRef Target, StringRef &OffloadKind, +StringRef &Triple) { + auto KindTriplePair = Target.split('-'); + OffloadKind = KindTriplePair.first; + Triple = KindTriplePair.second; +} +static bool hasHostKind(StringRef Target) { + StringRef OffloadKind; + StringRef Triple; + getOffloadKindAndTriple(Target, OffloadKind, Triple); + return OffloadKind == "host"; +} + +/// Generic file handler interface. +class FileHandler { +public: + /// Update the file handler with information from the header of the bundled + /// file + virtual void ReadHeader(MemoryBuffer &Input) = 0; + /// Read the marker of the next bundled to be read in the file. The triple of + /// the target associated with that bundle is returned. An empty string is + /// returned if there are no more bundles to be read. + virtual StringRef ReadBundleStart(MemoryBuffer &Input) = 0; + /// Read the marker that closes the current bundle. + virtual void ReadBundleEnd(MemoryBuffer &Input) = 0; + /// Read the current bundle and write the result i
Re: [PATCH] D13909: clang-offload-bundler - offload files bundling/unbundling tool
sfantao added a comment. Hi Jonas, Thanks for the review! Comment at: test/CMakeLists.txt:27-33 @@ -26,8 +26,9 @@ list(APPEND CLANG_TEST_DEPS clang clang-headers clang-format c-index-test diagtool clang-tblgen + clang-offload-bundler ) Hahnfeld wrote: > I think `clang-offload-bundler` needs to be added as dependency for the > `clang` target because it will really need the bundler at runtime, not only > when testing... > > (Disclaimer: I'm no CMake expert) The bundler tool already depends on clang, so that would cause a circular dependency. I think that in general not building the bundler is fine - the user may not be interested in doing offloading, so if he attempts to do so, that would fail as, say, ld was not in the system. I'm adding it only for testing because there are tests that will exercise the bundler that will fail if the driver does not detect the tool. Should we ask someone in specific for an opinion? Let me know your thoughts. Comment at: tools/clang-offload-bundler/ClangOffloadBundler.cpp:151 @@ +150,3 @@ + +/// Read 8-byte integers to/from a buffer in little-endian format. +static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) { Hahnfeld wrote: > `to/from`? Thanks for catching this! Fixed in the last diff, it should be `from`. Comment at: tools/clang-offload-bundler/ClangOffloadBundler.cpp:164 @@ +163,3 @@ + +/// Write and write 8-byte integers to/from a buffer in little-endian format. +static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) { Hahnfeld wrote: > Duplicate `and write`? `to/from`? Fixed in the last diff, it should be `to`. Comment at: tools/clang-offload-bundler/ClangOffloadBundler.cpp:568 @@ +567,3 @@ + if (!FoundHostBundle) { +llvm::errs() << "error: Can't find bundles for all requested targets\n"; +return true; Hahnfeld wrote: > Better say that we haven't found the bundle for the host? Makes sense, I changed the message in the last diff. https://reviews.llvm.org/D13909 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21851: [Driver][OpenMP][CUDA] Add capability to bundle object files in sections of the host binary format.
sfantao updated this revision to Diff 68089. sfantao marked an inline comment as done. sfantao added a comment. - Remove redundant return statement. https://reviews.llvm.org/D21851 Files: test/Driver/clang-offload-bundler.c test/Driver/clang-offload-bundler.c.o tools/clang-offload-bundler/ClangOffloadBundler.cpp Index: tools/clang-offload-bundler/ClangOffloadBundler.cpp === --- tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -72,20 +72,40 @@ cl::desc("Unbundle bundled file into several output files.\n"), cl::init(false), cl::cat(ClangOffloadBundlerCategory)); +static cl::opt PrintExternalCommands( +"###", +cl::desc("Print any external commands that are to be executed " + "instead of actually executing them - for testing purposes.\n"), +cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + +static cl::opt DumpTemporaryFiles( +"dump-temporary-files", +cl::desc("Dumps any temporary files created - for testing purposes.\n"), +cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + /// Magic string that marks the existence of offloading data. #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" /// The index of the host input in the list of inputs. static unsigned HostInputIndex = ~0u; +/// Path to the current binary. +static std::string BundlerExecutable; + /// Obtain the offload kind and real machine triple out of the target /// information specified by the user. static void getOffloadKindAndTriple(StringRef Target, StringRef &OffloadKind, StringRef &Triple) { auto KindTriplePair = Target.split('-'); OffloadKind = KindTriplePair.first; Triple = KindTriplePair.second; } +static StringRef getTriple(StringRef Target) { + StringRef OffloadKind; + StringRef Triple; + getOffloadKindAndTriple(Target, OffloadKind, Triple); + return Triple; +} static bool hasHostKind(StringRef Target) { StringRef OffloadKind; StringRef Triple; @@ -116,8 +136,8 @@ /// \a OS. virtual void WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) = 0; /// Write the marker that closes a bundle for the triple \a TargetTriple to \a - /// OS. - virtual void WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0; + /// OS. Return true if any error was found. + virtual bool WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0; /// Write the bundle from \a Input into \a OS. virtual void WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0; @@ -303,15 +323,250 @@ } } void WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) {} - void WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) {} + bool WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) { +return false; + } void WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) { OS.write(Input.getBufferStart(), Input.getBufferSize()); } BinaryFileHandler() : FileHandler() {} ~BinaryFileHandler() {} }; +/// Handler for object files. The bundles are organized by sections with a +/// designated name. +/// +/// In order to bundle we create an IR file with the content of each section and +/// use incremental linking to produce the resulting object. We also add section +/// with a single byte to state the name of the component the main object file +/// (the one we are bundling into) refers to. +/// +/// To unbundle, we use just copy the contents of the designated section. If the +/// requested bundle refer to the main object file, we just copy it with no +/// changes. +class ObjectFileHandler final : public FileHandler { + + /// The object file we are currently dealing with. + ObjectFile &Obj; + + /// Return the input file contents. + StringRef getInputFileContents() const { return Obj.getData(); } + + /// Return true if the provided section is an offload section and return the + /// triple by reference. + static bool IsOffloadSection(SectionRef CurSection, + StringRef &OffloadTriple) { +StringRef SectionName; +CurSection.getName(SectionName); + +if (SectionName.empty()) + return false; + +// If it does not start with the reserved suffix, just skip this section. +if (!SectionName.startswith(OFFLOAD_BUNDLER_MAGIC_STR)) + return false; + +// Return the triple that is right after the reserved prefix. +OffloadTriple = SectionName.substr(sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1); +return true; + } + + /// Total number of inputs. + unsigned NumberOfInputs = 0; + + /// Total number of processed inputs, i.e, inputs that were already + /// read from the buffers. + unsigned NumberOfProcessedInputs = 0; + + /// LLVM context used to to create the auxiliar modules. + LLVMContext VMContext; + + /// LLVM module used to create an object with all the
Re: [PATCH] D23526: [CUDA] Collapsed offload actions should not be top-level jobs.
sfantao added a comment. Hi Art, Thanks for the patch! That looks good. https://reviews.llvm.org/D23526 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r278806 - Add empty --gcc-toolchain empty to cuda-detect test.
Author: sfantao Date: Tue Aug 16 09:31:39 2016 New Revision: 278806 URL: http://llvm.org/viewvc/llvm-project?rev=278806&view=rev Log: Add empty --gcc-toolchain empty to cuda-detect test. Unless we overload the default gcc toolchain with an empty string the system root used in the tests will be ignored if the user builds clang with a custom gcc toolchain. Modified: cfe/trunk/test/Driver/cuda-detect.cu Modified: cfe/trunk/test/Driver/cuda-detect.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-detect.cu?rev=278806&r1=278805&r2=278806&view=diff == --- cfe/trunk/test/Driver/cuda-detect.cu (original) +++ cfe/trunk/test/Driver/cuda-detect.cu Tue Aug 16 09:31:39 2016 @@ -75,6 +75,7 @@ // Verify that C++ include paths are passed for both host and device frontends. // RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \ // RUN: --stdlib=libstdc++ --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree2 2>&1 \ +// RUN: --gcc-toolchain="" \ // RUN: | FileCheck %s --check-prefix CHECK-CXXINCLUDE // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r278811 - Reorder stderr redirection in test command.
Author: sfantao Date: Tue Aug 16 09:38:39 2016 New Revision: 278811 URL: http://llvm.org/viewvc/llvm-project?rev=278811&view=rev Log: Reorder stderr redirection in test command. Modified: cfe/trunk/test/Driver/cuda-detect.cu Modified: cfe/trunk/test/Driver/cuda-detect.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-detect.cu?rev=278811&r1=278810&r2=278811&view=diff == --- cfe/trunk/test/Driver/cuda-detect.cu (original) +++ cfe/trunk/test/Driver/cuda-detect.cu Tue Aug 16 09:38:39 2016 @@ -74,8 +74,8 @@ // Verify that C++ include paths are passed for both host and device frontends. // RUN: %clang -### -no-canonical-prefixes -target x86_64-linux-gnu %s \ -// RUN: --stdlib=libstdc++ --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree2 2>&1 \ -// RUN: --gcc-toolchain="" \ +// RUN: --stdlib=libstdc++ --sysroot=%S/Inputs/ubuntu_14.04_multiarch_tree2 \ +// RUN: --gcc-toolchain="" 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-CXXINCLUDE // CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D13909: clang-offload-bundler - offload files bundling/unbundling tool
sfantao updated this revision to Diff 69124. sfantao marked an inline comment as done. sfantao added a comment. - Add clang-offload bundler as dependency to clang. https://reviews.llvm.org/D13909 Files: test/CMakeLists.txt test/Driver/clang-offload-bundler.c tools/CMakeLists.txt tools/clang-offload-bundler/CMakeLists.txt tools/clang-offload-bundler/ClangOffloadBundler.cpp Index: tools/clang-offload-bundler/ClangOffloadBundler.cpp === --- /dev/null +++ tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -0,0 +1,681 @@ +//===-- clang-offload-bundler/ClangOffloadBundler.cpp - Clang format tool -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// \brief This file implements a clang-offload-bundler that bundles different +/// files that relate with the same source code but different targets into a +/// single one. Also the implements the opposite functionality, i.e. unbundle +/// files previous created by this tool. +/// +//===--===// + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/Version.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" + +using namespace llvm; +using namespace llvm::object; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -version +// and -help) will be hidden. +static cl::OptionCategory +ClangOffloadBundlerCategory("clang-offload-bundler options"); + +static cl::list +InputFileNames("inputs", cl::CommaSeparated, cl::OneOrMore, + cl::desc("[,...]"), + cl::cat(ClangOffloadBundlerCategory)); +static cl::list +OutputFileNames("outputs", cl::CommaSeparated, cl::OneOrMore, +cl::desc("[,...]"), +cl::cat(ClangOffloadBundlerCategory)); +static cl::list +TargetNames("targets", cl::CommaSeparated, cl::OneOrMore, +cl::desc("[-,...]"), +cl::cat(ClangOffloadBundlerCategory)); +static cl::opt +FilesType("type", cl::Required, + cl::desc("Type of the files to be bundled/unbundled.\n" + "Current supported types are:\n" + " i - cpp-output\n" + " ii - c++-cpp-output\n" + " ll - llvm\n" + " bc - llvm-bc\n" + " s - assembler\n" + " o - object\n" + " gch - precompiled-header\n" + " ast - clang AST file"), + cl::cat(ClangOffloadBundlerCategory)); +static cl::opt +Unbundle("unbundle", + cl::desc("Unbundle bundled file into several output files.\n"), + cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + +/// Magic string that marks the existence of offloading data. +#define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__" + +/// The index of the host input in the list of inputs. +static unsigned HostInputIndex = ~0u; + +/// Obtain the offload kind and real machine triple out of the target +/// information specified by the user. +static void getOffloadKindAndTriple(StringRef Target, StringRef &OffloadKind, +StringRef &Triple) { + auto KindTriplePair = Target.split('-'); + OffloadKind = KindTriplePair.first; + Triple = KindTriplePair.second; +} +static bool hasHostKind(StringRef Target) { + StringRef OffloadKind; + StringRef Triple; + getOffloadKindAndTriple(Target, OffloadKind, Triple); + return OffloadKind == "host"; +} + +/// Generic file handler interface. +class FileHandler { +public: + /// Update the file handler with information from the header of the bundled + /// file + virtual void ReadHeader(MemoryBuffer &Input) = 0; + /// Read the marker of the next bundled to be read in the file. The triple of + /// the target associated with that bundle is returned. An empty string is + /// returned if there are no more bundles to be read. + virtual StringRef ReadBundleStart(MemoryBuffer &Input) = 0; + /// Read the marker that closes the current bundle. + virtual void ReadBundleEnd(MemoryBuffer &Input) = 0; + /// Read the current bundle an
Re: [PATCH] D13909: clang-offload-bundler - offload files bundling/unbundling tool
sfantao added a comment. Hi Jonas, Thanks again for the review! Comment at: test/CMakeLists.txt:27-33 @@ -26,8 +26,9 @@ list(APPEND CLANG_TEST_DEPS clang clang-headers clang-format c-index-test diagtool clang-tblgen + clang-offload-bundler ) Hahnfeld wrote: > Most users will get it anyway because it is built for the `install` target > and I think the build system should do its best to build and install all > needed dependencies. > > I think this currently only fails when using `make clang` and then trying to > invoke the compiler from the build directory. I agree that this should be > quite rare but that could be fixed by `add_dependencies(clang > clang-offload-bundler)` which has worked for me. However I now don't have a > really strong opinion here because it works with the `install` target. Ok, I added the line `add_dependencies(clang clang-offload-bundler) ` as you suggest. I was worried that referring to clang libs would cause a circular dependency, but it seems to work just fine. https://reviews.llvm.org/D13909 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r279632 - clang-offload-bundler - offload files bundling/unbundling tool
Author: sfantao Date: Wed Aug 24 10:21:05 2016 New Revision: 279632 URL: http://llvm.org/viewvc/llvm-project?rev=279632&view=rev Log: clang-offload-bundler - offload files bundling/unbundling tool Summary: One of the goals of programming models that support offloading (e.g. OpenMP) is to enable users to offload with little effort, by annotating the code with a few pragmas. I'd also like to save users the trouble of changing their existent applications' build system. So having the compiler always return a single file instead of one for the host and each target even if the user is doing separate compilation is desirable. This diff proposes a tool named clang-offload-bundler (happy to change the name if required) that is used to bundle files associated with the same user source file but different targets, or to unbundle a file into separate files associated with different targets. This tool supports the driver support for OpenMP under review in http://reviews.llvm.org/D9888. The tool is used there to enable separate compilation, so that the very first action on input files that are not source files is a "unbundling action" and the very last non-linking action is a "bundling action". The format of the bundled files is currently very simple: text formats are concatenated with comments that have a magic string and target identifying triple in between, and binary formats have a header that contains the triple and the offset and size of the code for host and each target. The goal is to improve this tool in the future to deal with archive files so that each individual file in the archive is properly dealt with. We see that archives are very commonly used in current applications to combine separate compilation results. So I'm convinced users would enjoy this feature. This tool can be used like this: `clang-offload-bundler -targets=triple1,triple2 -type=ii -inputs=a.triple1.ii,a.triple2.ii -outputs=a.ii` or `clang-offload-bundler -targets=triple1,triple2 -type=ii -outputs=a.triple1.ii,a.triple2.ii -inputs=a.ii -unbundle` I implemented the tool under clang/tools. Please let me know if something like this should live somewhere else. This patch is prerequisite for http://reviews.llvm.org/D9888. Reviewers: hfinkel, rsmith, echristo, chandlerc, tra, jlebar, ABataev, Hahnfeld Subscribers: whchung, caomhin, andreybokhanko, arpith-jacob, carlo.bertolli, mehdi_amini, guansong, Hahnfeld, cfe-commits Differential Revision: https://reviews.llvm.org/D13909 Added: cfe/trunk/test/Driver/clang-offload-bundler.c cfe/trunk/tools/clang-offload-bundler/ cfe/trunk/tools/clang-offload-bundler/CMakeLists.txt cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Modified: cfe/trunk/test/CMakeLists.txt cfe/trunk/tools/CMakeLists.txt Modified: cfe/trunk/test/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CMakeLists.txt?rev=279632&r1=279631&r2=279632&view=diff == --- cfe/trunk/test/CMakeLists.txt (original) +++ cfe/trunk/test/CMakeLists.txt Wed Aug 24 10:21:05 2016 @@ -29,6 +29,7 @@ list(APPEND CLANG_TEST_DEPS clang-format c-index-test diagtool clang-tblgen + clang-offload-bundler ) if(CLANG_ENABLE_STATIC_ANALYZER) Added: cfe/trunk/test/Driver/clang-offload-bundler.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c?rev=279632&view=auto == --- cfe/trunk/test/Driver/clang-offload-bundler.c (added) +++ cfe/trunk/test/Driver/clang-offload-bundler.c Wed Aug 24 10:21:05 2016 @@ -0,0 +1,222 @@ +// +// Generate all the types of files we can bundle. +// +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -E -o %t.i +// RUN: %clangxx -O0 -target powerpc64le-ibm-linux-gnu -x c++ %s -E -o %t.ii +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -S -emit-llvm -o %t.ll +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -c -emit-llvm -o %t.bc +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -S -o %t.s +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -emit-ast -o %t.ast + +// +// Generate an empty file to help with the checks of empty files. +// +// RUN: touch %t.empty + +// +// Generate a couple of files to bundle with. +// +// RUN: echo 'Content of device file 1' > %t.tgt1 +// RUN: echo 'Content of device file 2' > %t.tgt2 + +// +// Check help message. +// +// RUN: clang-offload-bundler --help | FileCheck %s --check-prefix CK-HELP +// CK-HELP: {{.*}}OVERVIEW: A tool to bundle several input files of the specified type +// CK-HELP: {{.*}}referring to the same source file but different targets into a single +// CK-HELP: {{.*}}one. The resulting file can also be unbundled into different files by +// CK-HELP: {{.*}}this tool if -unbundle is provided. +// CK-HELP: {{.*}}USAGE: clang-offload-bundler [subcommand] [options] +//
r279634 - [Driver][OpenMP][CUDA] Add capability to bundle object files in sections of the host binary format.
Author: sfantao Date: Wed Aug 24 10:39:07 2016 New Revision: 279634 URL: http://llvm.org/viewvc/llvm-project?rev=279634&view=rev Log: [Driver][OpenMP][CUDA] Add capability to bundle object files in sections of the host binary format. Summary: This patch adds the capability to bundle object files in sections of the host binary using a designated naming convention for these sections. This patch uses the functionality of the object reader already in the LLVM library to read bundled files, and invokes clang with the incremental linking options to create bundle files. Bundling files involves creating an IR file with the contents of the bundle assigned as initializers of globals binded to the designated sections. This way the bundling implementation is agnostic of the host object format. The features added by this patch were requested in the RFC discussion in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. Reviewers: echristo, tra, jlebar, hfinkel, ABataev, Hahnfeld Subscribers: mkuron, whchung, cfe-commits, andreybokhanko, Hahnfeld, arpith-jacob, carlo.bertolli, mehdi_amini, caomhin Differential Revision: https://reviews.llvm.org/D21851 Added: cfe/trunk/test/Driver/clang-offload-bundler.c.o Modified: cfe/trunk/test/Driver/clang-offload-bundler.c cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Modified: cfe/trunk/test/Driver/clang-offload-bundler.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c?rev=279634&r1=279633&r2=279634&view=diff == --- cfe/trunk/test/Driver/clang-offload-bundler.c (original) +++ cfe/trunk/test/Driver/clang-offload-bundler.c Wed Aug 24 10:39:07 2016 @@ -6,6 +6,7 @@ // RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -S -emit-llvm -o %t.ll // RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -c -emit-llvm -o %t.bc // RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -S -o %t.s +// RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -c -o %t.o // RUN: %clang -O0 -target powerpc64le-ibm-linux-gnu %s -emit-ast -o %t.ast // @@ -215,6 +216,40 @@ // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 +// +// Check object bundle/unbundle. The content should be bundled into an ELF +// section (we are using a PowerPC little-endian host which uses ELF). We +// have an already bundled file to check the unbundle and do a dry run on the +// bundling as it cannot be tested in all host platforms that will run these +// tests. +// + +// RUN: clang-offload-bundler -type=o -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.o,%t.tgt1,%t.tgt2 -outputs=%t.bundle3.o -### -dump-temporary-files 2>&1 \ +// RUN: | FileCheck %s --check-prefix CK-OBJ-CMD +// CK-OBJ-CMD: private constant [1 x i8] zeroinitializer, section "__CLANG_OFFLOAD_BUNDLE__host-powerpc64le-ibm-linux-gnu" +// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 1\0A", section "__CLANG_OFFLOAD_BUNDLE__openmp-powerpc64le-ibm-linux-gnu" +// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 2\0A", section "__CLANG_OFFLOAD_BUNDLE__openmp-x86_64-pc-linux-gnu" +// CK-OBJ-CMD: clang" "-r" "-target" "powerpc64le-ibm-linux-gnu" "-o" "{{.+}}.o" "{{.+}}.o" "{{.+}}.bc" "-nostdlib" + +// RUN: clang-offload-bundler -type=o -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.o,%t.res.tgt1,%t.res.tgt2 -inputs=%s.o -unbundle +// RUN: diff %s.o %t.res.o +// RUN: diff %t.tgt1 %t.res.tgt1 +// RUN: diff %t.tgt2 %t.res.tgt2 +// RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.o,%t.res.tgt2 -inputs=%s.o -unbundle +// RUN: diff %s.o %t.res.o +// RUN: diff %t.tgt1 %t.res.tgt1 +// RUN: diff %t.tgt2 %t.res.tgt2 + +// Check if we can unbundle a file with no magic strings. +// RUN: clang-offload-bundler -type=o -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.o,%t.res.tgt1,%t.res.tgt2 -inputs=%t.o -unbundle +// RUN: diff %t.o %t.res.o +// RUN: diff %t.empty %t.res.tgt1 +// RUN: diff %t.empty %t.res.tgt2 +// RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.o,%t.res.tgt2 -inputs=%t.o -unbundle +// RUN: diff %t.o %t.res.o +// RUN: diff %t.empty %t.res.tgt1 +// RUN: diff %t.empty %t.res.tgt2 + // Some code so that we can create a binary out of this file. int A = 0; void test_func(void) { Added: cfe/trunk/test/Driver/clang-offload-bundler.c.o URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c.o?rev=279634&view=auto =
r279635 - Add target REQUIRES directives to offload bundler test.
Author: sfantao Date: Wed Aug 24 10:47:06 2016 New Revision: 279635 URL: http://llvm.org/viewvc/llvm-project?rev=279635&view=rev Log: Add target REQUIRES directives to offload bundler test. Modified: cfe/trunk/test/Driver/clang-offload-bundler.c Modified: cfe/trunk/test/Driver/clang-offload-bundler.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c?rev=279635&r1=279634&r2=279635&view=diff == --- cfe/trunk/test/Driver/clang-offload-bundler.c (original) +++ cfe/trunk/test/Driver/clang-offload-bundler.c Wed Aug 24 10:47:06 2016 @@ -1,3 +1,6 @@ +// REQUIRES: x86-registered-target +// REQUIRES: powerpc-registered-target + // // Generate all the types of files we can bundle. // ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r279653 - Fix offload bundler tests so that diagnostic can start with caps.
Author: sfantao Date: Wed Aug 24 13:52:18 2016 New Revision: 279653 URL: http://llvm.org/viewvc/llvm-project?rev=279653&view=rev Log: Fix offload bundler tests so that diagnostic can start with caps. Windows require that. Modified: cfe/trunk/test/Driver/clang-offload-bundler.c Modified: cfe/trunk/test/Driver/clang-offload-bundler.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c?rev=279653&r1=279652&r2=279653&view=diff == --- cfe/trunk/test/Driver/clang-offload-bundler.c (original) +++ cfe/trunk/test/Driver/clang-offload-bundler.c Wed Aug 24 13:52:18 2016 @@ -68,7 +68,7 @@ // RUN: not clang-offload-bundler -type=i -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2.notexist -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR5 // RUN: not clang-offload-bundler -type=i -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.i,%t.tgt1,%t.tgt2 -inputs=%t.bundle.i.notexist -unbundle 2>&1 | FileCheck %s --check-prefix CK-ERR5 -// CK-ERR5: error: Can't open file {{.+}}.notexist: No such file or directory +// CK-ERR5: error: Can't open file {{.+}}.notexist: {{N|n}}o such file or directory // RUN: not clang-offload-bundler -type=invalid -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2 -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR6 // CK-ERR6: error: invalid file type specified. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r279741 - Fix offload bundler test to support Windows new lines.
Author: sfantao Date: Thu Aug 25 09:35:20 2016 New Revision: 279741 URL: http://llvm.org/viewvc/llvm-project?rev=279741&view=rev Log: Fix offload bundler test to support Windows new lines. Modified: cfe/trunk/test/Driver/clang-offload-bundler.c Modified: cfe/trunk/test/Driver/clang-offload-bundler.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/clang-offload-bundler.c?rev=279741&r1=279740&r2=279741&view=diff == --- cfe/trunk/test/Driver/clang-offload-bundler.c (original) +++ cfe/trunk/test/Driver/clang-offload-bundler.c Thu Aug 25 09:35:20 2016 @@ -230,8 +230,8 @@ // RUN: clang-offload-bundler -type=o -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.o,%t.tgt1,%t.tgt2 -outputs=%t.bundle3.o -### -dump-temporary-files 2>&1 \ // RUN: | FileCheck %s --check-prefix CK-OBJ-CMD // CK-OBJ-CMD: private constant [1 x i8] zeroinitializer, section "__CLANG_OFFLOAD_BUNDLE__host-powerpc64le-ibm-linux-gnu" -// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 1\0A", section "__CLANG_OFFLOAD_BUNDLE__openmp-powerpc64le-ibm-linux-gnu" -// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 2\0A", section "__CLANG_OFFLOAD_BUNDLE__openmp-x86_64-pc-linux-gnu" +// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 1{{.+}}", section "__CLANG_OFFLOAD_BUNDLE__openmp-powerpc64le-ibm-linux-gnu" +// CK-OBJ-CMD: private constant [25 x i8] c"Content of device file 2{{.+}}", section "__CLANG_OFFLOAD_BUNDLE__openmp-x86_64-pc-linux-gnu" // CK-OBJ-CMD: clang" "-r" "-target" "powerpc64le-ibm-linux-gnu" "-o" "{{.+}}.o" "{{.+}}.o" "{{.+}}.bc" "-nostdlib" // RUN: clang-offload-bundler -type=o -targets=host-powerpc64le-ibm-linux-gnu,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.o,%t.res.tgt1,%t.res.tgt2 -inputs=%s.o -unbundle ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D24165: [clang-offload-bundler] Fix some Clang-tidy modernize-use-override and Include What You Use warnings; other minor fixes
sfantao added a comment. Hi Eugene, The patch looks good to me. You may wait for someone that could okay the patch to take a look too. Thanks, Samuel Repository: rL LLVM https://reviews.llvm.org/D24165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D24165: [clang-offload-bundler] Fix some Clang-tidy modernize-use-override and Include What You Use warnings; other minor fixes
sfantao added a comment. Hi Eugene, The patch looks good to me. You may wait for someone that could okay the patch to take a look too. Thanks, Samuel Repository: rL LLVM https://reviews.llvm.org/D24165 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26774: [CUDA] Driver changes to support CUDA compilation on MacOS.
sfantao added a comment. Hi Justin, Thanks for the patch. Comment at: clang/lib/Driver/Driver.cpp:479 +// the device toolchain we create depends on both. +ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; +if (!CudaTC) { I am not sure I understand why to pair host and device toolchain in the map. The driver can be used to several compilations, but how do these compilation use different host toolchains? Can you give an example of an invocation? Maybe add it to the regression tests bellow. https://reviews.llvm.org/D26774 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r289450 - Fix format and a few typos in comments.
Author: sfantao Date: Mon Dec 12 12:00:20 2016 New Revision: 289450 URL: http://llvm.org/viewvc/llvm-project?rev=289450&view=rev Log: Fix format and a few typos in comments. Modified: cfe/trunk/include/clang-c/Index.h cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/Sema/SemaOpenMP.cpp Modified: cfe/trunk/include/clang-c/Index.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang-c/Index.h?rev=289450&r1=289449&r2=289450&view=diff == --- cfe/trunk/include/clang-c/Index.h (original) +++ cfe/trunk/include/clang-c/Index.h Mon Dec 12 12:00:20 2016 @@ -326,7 +326,7 @@ clang_isFileMultipleIncludeGuarded(CXTra * * \param tu the translation unit * -* \param file_name the name of the file. + * \param file_name the name of the file. * * \returns the file handle for the named file in the translation unit \p tu, * or a NULL file handle if the file was not a part of this translation unit. Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=289450&r1=289449&r2=289450&view=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Dec 12 12:00:20 2016 @@ -490,7 +490,7 @@ enum OpenMPSchedType { OMP_sch_runtime = 37, OMP_sch_auto = 38, /// static with chunk adjustment (e.g., simd) - OMP_sch_static_balanced_chunked = 45, + OMP_sch_static_balanced_chunked = 45, /// \brief Lower bound for 'ordered' versions. OMP_ord_lower = 64, OMP_ord_static_chunked = 65, @@ -2930,8 +2930,8 @@ void CGOpenMPRuntime::createOffloadEntry void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Emit the offloading entries and metadata so that the device codegen side - // can - // easily figure out what to emit. The produced metadata looks like this: + // can easily figure out what to emit. The produced metadata looks like + // this: // // !omp_offload.info = !{!1, ...} // @@ -,9 +,8 @@ void CGOpenMPRuntime::emitReduction(Code auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); auto *ThreadId = getThreadID(CGF, Loc); auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - auto *RL = -CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(), -CGF.VoidPtrTy); + auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t * ThreadId, // i32 Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=289450&r1=289449&r2=289450&view=diff == --- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original) +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Dec 12 12:00:20 2016 @@ -326,8 +326,8 @@ public: Scope *getCurScope() { return Stack.back().CurScope; } SourceLocation getConstructLoc() { return Stack.back().ConstructLoc; } - // Do the check specified in \a Check to all component lists and return true - // if any issue is found. + /// Do the check specified in \a Check to all component lists and return true + /// if any issue is found. bool checkMappableExprComponentListsForDecl( ValueDecl *VD, bool CurrentRegionOnly, const llvm::function_ref< @@ -355,8 +355,8 @@ public: return false; } - // Create a new mappable expression component list associated with a given - // declaration and initialize it with the provided list of components. + /// Create a new mappable expression component list associated with a given + /// declaration and initialize it with the provided list of components. void addMappableExpressionComponents( ValueDecl *VD, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, @@ -919,7 +919,7 @@ bool Sema::IsOpenMPCapturedByRef(ValueDe OpenMPClauseKind WhereFoundClauseKind) { // Only the map clause information influences how a variable is // captured. E.g. is_device_ptr does not require changing the default - // behaviour. + // behavior. if (WhereFoundClauseKind != OMPC_map) return false; @@ -3359,7 +3359,7 @@ Expr *OpenMPIterationSpaceChecker::Build return nullptr; } -/// \brief Build instillation of the counter be used for codegen. +/// \brief Build initialization of the counter to be used for codegen. Expr *OpenMPIterationSpaceChecker::BuildCounterInit() const { return LB; } /// \brief Build step of the counter be used for codegen. @@ -6124,11 +6124,10 @@ StmtResult Sema::ActOnOpenMPTeamsDis
r289458 - Fix typo and remove unnecessary statement.
Author: sfantao Date: Mon Dec 12 13:26:31 2016 New Revision: 289458 URL: http://llvm.org/viewvc/llvm-project?rev=289458&view=rev Log: Fix typo and remove unnecessary statement. Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=289458&r1=289457&r2=289458&view=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Dec 12 13:26:31 2016 @@ -2769,7 +2769,6 @@ createOffloadingBinaryDescriptorFunction Args.push_back(&DummyPtr); CodeGenFunction CGF(CGM); - GlobalDecl(); auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto FTy = CGM.getTypes().GetFunctionType(FI); auto *Fn = @@ -6142,7 +6141,7 @@ bool CGOpenMPRuntime::emitTargetFunction // Try to detect target regions in the function. scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); - // We should not emit any function othen that the ones created during the + // We should not emit any function other that the ones created during the // scanning. Therefore, we signal that this function is completely dealt // with. return true; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21840: [Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
sfantao added a comment. Hi Hal, Thanks for the review! In https://reviews.llvm.org/D21840#555719, @hfinkel wrote: > The naming here is a bit hard to follow, we have 'dependent action', > 'dependency action', 'depending action', and I think they're all supposed to > mean the same thing. Only 'dependent action' sounds right to me, can we use > that universally (i.e. in all comments and names of functions and variables)? I agree the Depending/Dependence stuff can be confusing. However, I tried to use Depending and Dependence to indicate different things: - Depending action -> an action that depends on the current one - Dependence action -> an action that is a dependence to the current one Of course they all are dependent actions, so your suggestion definitely makes sense. So, in the last diff I indicate: - Depending action -> Next Dependent action - Dependence action -> Prev(ious) Dependent action I hope this helps clarifying things. Let me know you thoughts. Thanks again! Samuel Comment at: lib/Driver/Driver.cpp:2394 +Action *CurAction = *Inputs.begin(); +if (!CurAction->isCollapsingWithDependingActionLegal() && CanBeCollapsed) + return nullptr; hfinkel wrote: > As a micro-optimization, check CanBeCollapsed first, then call the function: > > if (CanBeCollapsed && !CurAction->isCollapsingWithDependingActionLegal()) > Ok, makes sense. Fixed this in the last diff. Comment at: lib/Driver/Driver.cpp:2444 + /// collapsed with it. + struct JobActionInfoTy final { +/// The action this info refers to. hfinkel wrote: > Putting "Ty" on the end of a type name seems unusual for our code base (we > generally use that for typedefs or for variables that represent types of > other entities). Just JobActionInfo should be fine. Ok, fixed that in the last diff. Comment at: lib/Driver/Driver.cpp:2474 + const Tool * + attemptCombineAssembleBackendCompile(ArrayRef ActionInfo, + const ActionList *&Inputs, hfinkel wrote: > I don't think we need 'attempt' in the name here, just make this: > > combineAssembleBackendCompile Ok, fixed in last diff. Comment at: lib/Driver/Driver.cpp:2632 + +if (!T) + T = attemptCombineAssembleBackendCompile(ActionChain, Inputs, hfinkel wrote: > I don't think the syntactic regularity here is helpful enough to justify this > extra if. Just do: > > const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs, > CollapsedOffloadAction); > Ok, fixed in last diff. https://reviews.llvm.org/D21840 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21840: [Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
sfantao updated this revision to Diff 75698. sfantao marked 10 inline comments as done. sfantao added a comment. - Address Hal Finkel suggestions - rename functions/reorder code/fix comments. https://reviews.llvm.org/D21840 Files: include/clang/Driver/Action.h lib/Driver/Driver.cpp Index: lib/Driver/Driver.cpp === --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1930,7 +1930,7 @@ // Create the offload action with all dependences. When an offload action // is created the kinds are propagated to the host action, so we don't have -// to do that explicitely here. +// to do that explicitly here. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch*/ nullptr, ActiveOffloadKinds); @@ -2368,142 +2368,288 @@ } } } -/// Collapse an offloading action looking for a job of the given type. The input -/// action is changed to the input of the collapsed sequence. If we effectively -/// had a collapse return the corresponding offloading action, otherwise return -/// null. -template -static OffloadAction *collapseOffloadingAction(Action *&CurAction) { - if (!CurAction) -return nullptr; - if (auto *OA = dyn_cast(CurAction)) { -if (OA->hasHostDependence()) - if (auto *HDep = dyn_cast(OA->getHostDependence())) { -CurAction = HDep; -return OA; - } -if (OA->hasSingleDeviceDependence()) - if (auto *DDep = dyn_cast(OA->getSingleDeviceDependence())) { -CurAction = DDep; -return OA; + +namespace { +/// Utility class to control the collapse of dependent actions and select the +/// tools accordingly. +class ToolSelector final { + /// The tool chain this selector refers to. + const ToolChain &TC; + + /// The compilation this selector refers to. + const Compilation &C; + + /// The base action this selector refers to. + const JobAction *BaseAction; + + /// Set to true if the current toolchain refers to host actions. + bool IsHostSelector; + + /// Set to true if save-temps and embed-bitcode functionalities are active. + bool SaveTemps; + bool EmbedBitcode; + + /// Get previous dependent action or null if that does not exist. If + /// \a CanBeCollapsed is false, that action must be legal to collapse or + /// null will be returned. + const JobAction *getPrevDependentAction(const ActionList &Inputs, + ActionList &SavedOffloadAction, + bool CanBeCollapsed = true) { +// An option can be collapsed only if it has a single input. +if (Inputs.size() != 1) + return nullptr; + +Action *CurAction = *Inputs.begin(); +if (CanBeCollapsed && +!CurAction->isCollapsingWithNextDependentActionLegal()) + return nullptr; + +// If the input action is an offload action. Look through it and save any +// offload action that can be dropped in the event of a collapse. +if (auto *OA = dyn_cast(CurAction)) { + // If the dependent action is a device action, we will attempt to collapse + // only with other device actions. Otherwise, we would do the same but + // with host actions only. + if (!IsHostSelector) { +if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) { + CurAction = + OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true); + if (CanBeCollapsed && + !CurAction->isCollapsingWithNextDependentActionLegal()) +return nullptr; + SavedOffloadAction.push_back(OA); + return dyn_cast(CurAction); +} + } else if (OA->hasHostDependence()) { +CurAction = OA->getHostDependence(); +if (CanBeCollapsed && +!CurAction->isCollapsingWithNextDependentActionLegal()) + return nullptr; +SavedOffloadAction.push_back(OA); +return dyn_cast(CurAction); } + return nullptr; +} + +return dyn_cast(CurAction); } - return nullptr; -} -// Returns a Tool for a given JobAction. In case the action and its -// predecessors can be combined, updates Inputs with the inputs of the -// first combined action. If one of the collapsed actions is a -// CudaHostAction, updates CollapsedCHA with the pointer to it so the -// caller can deal with extra handling such action requires. -static const Tool *selectToolForJob(Compilation &C, bool SaveTemps, -bool EmbedBitcode, const ToolChain *TC, -const JobAction *JA, -const ActionList *&Inputs, -ActionList &CollapsedOffloadAction) { - const Tool *ToolForJob = nullptr; - CollapsedOffloadAction.clear(); - - // See if we should look for a compiler with an integrated assembler. We match - // bottom up, so what we are actually looking for
[PATCH] D21843: [Driver][OpenMP] Create tool chains for OpenMP offloading kind.
sfantao updated this revision to Diff 75705. sfantao marked an inline comment as done. sfantao added a comment. - Address Hal Finkel comments - make diagnostic message more informative. https://reviews.llvm.org/D21843 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Driver/Action.h include/clang/Driver/Driver.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- /dev/null +++ test/Driver/openmp-offload.c @@ -0,0 +1,37 @@ +/// +/// Perform several driver tests for OpenMP offloading +/// + +/// ### + +/// Check whether an invalid OpenMP target is specified: +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// RUN: %clang -### -fopenmp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s +// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' + +/// ### + +/// Check warning for empty -fopenmp-targets +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// RUN: %clang -### -fopenmp -fopenmp-targets= %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s +// CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-fopenmp-targets=' + +/// ### + +/// Check error for no -fopenmp option +// RUN: %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s +// RUN: %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-FOPENMP %s +// CHK-NO-FOPENMP: error: The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5. + +/// ### + +/// Check warning for duplicate offloading targets. +// RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s +// CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. Index: lib/Driver/Tools.cpp === --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3009,72 +3009,23 @@ CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins")); } -namespace { -enum OpenMPRuntimeKind { - /// An unknown OpenMP runtime. We can't generate effective OpenMP code - /// without knowing what runtime to target. - OMPRT_Unknown, - - /// The LLVM OpenMP runtime. When completed and integrated, this will become - /// the default for Clang. - OMPRT_OMP, - - /// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for - /// this runtime but can swallow the pragmas, and find and link against the - /// runtime library itself. - OMPRT_GOMP, - - /// The legacy name for the LLVM OpenMP runtime from when it was the Intel - /// OpenMP runtime. We support this mode for users with existing dependencies - /// on this runtime library name. - OMPRT_IOMP5 -}; -} - -/// Compute the desired OpenMP runtime from the flag provided. -static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC, - const ArgList &Args) { - StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); - - const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); - if (A) -RuntimeName = A->getValue(); - - auto RT = llvm::StringSwitch(RuntimeName) -.Case("libomp", OMPRT_OMP) -.Case("libgomp", OMPRT_GOMP) -.Case("libiomp5", OMPRT_IOMP5) -.Default(OMPRT_Unknown); - - if (RT == OMPRT_Unknown) { -if (A) - TC.getDriver().Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << A->getValue(); -else - // FIXME: We could use a nicer diagnostic here. - TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; - } - - return RT; -} - static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, const ArgList &Args) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) return; - switch (getOpenMPRuntime(TC, Args)) { - case OMPRT_OMP: + switch (TC.getDriver().getOpenMPRuntime(Args
[PATCH] D21843: [Driver][OpenMP] Create tool chains for OpenMP offloading kind.
sfantao added a comment. Hi Hal, Thanks for the review! Comment at: include/clang/Basic/DiagnosticDriverKinds.td:163 +def err_drv_expecting_fopenmp_with_fopenmp_targets : Error< + "The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading.">; +def warn_drv_omp_offload_target_duplicate : Warning< hfinkel wrote: > This message does not tell the user how they might make their -fopenmp option > "compatible with offloading." Please make sure the message does, or is has an > associated hint message which does. > Ok, the message is now: `The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.` https://reviews.llvm.org/D21843 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21845: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
sfantao added a comment. Hi Hal, Thanks for the review! Fixed the typos in the new diff. Comment at: lib/Driver/Driver.cpp:1949 +SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs)); + // hfinkel wrote: > Since we can have both OpenMP offloading and CUDA, please add a test that the > phases work correctly for that case (or that we produce an error if that > can't currently work correctly). Added new test for that. The phases generation should work well if CUDA and OpenMP offloading are used on the same file. However, the bindings for these phases cannot be generated given that the NVPTX toolchain support for OpenMP is not implemented yet and the CUDA implementation interprets actions differently, e.g. in CUDA linking is the combination of binaries of different devices (GPUs) whereas for OpenMP actual linking takes place, i.e. symbols are resolved by looking into other compilation units. https://reviews.llvm.org/D21845 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21845: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
sfantao updated this revision to Diff 75722. sfantao marked 7 inline comments as done. sfantao added a comment. - Fix typos and add test tht checks phases when OpenMP and CUDA are used simultaneously. https://reviews.llvm.org/D21845 Files: lib/Driver/Driver.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -2,6 +2,11 @@ /// Perform several driver tests for OpenMP offloading /// +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: powerpc-registered-target +// REQUIRES: nvptx-registered-target + /// ### /// Check whether an invalid OpenMP target is specified: @@ -35,3 +40,136 @@ // RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s // CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. + +/// ### + +/// Check the phases graph when using a single target, different from the host. +/// We should have an offload action joining the host compile and device +/// preprocessor and another one joining the device linking outputs to the host +/// action. +// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s +// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp) +// CHK-PHASES: 4: assembler, {3}, object, (host-openmp) +// CHK-PHASES: 5: linker, {4}, image, (host-openmp) +// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp) +// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp) +// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp) +// CHK-PHASES: 11: assembler, {10}, object, (device-openmp) +// CHK-PHASES: 12: linker, {11}, image, (device-openmp) +// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image + +/// ### + +/// Check the phases when using multiple targets. Here we also add a library to +/// make sure it is treated as input by the device. +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s +// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp) +// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp) +// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp) +// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp) +// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp) +// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp) +// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp) +// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir +// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp) +// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp) +// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp) +// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp) +// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir +// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp) +// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp) +// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp) +// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image + + +/// ### + +/// Check the phases when using
[PATCH] D21847: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
sfantao updated this revision to Diff 75730. sfantao marked 3 inline comments as done. sfantao added a comment. - Address Hal Finkel comments - fix comments/fix linker script comment. https://reviews.llvm.org/D21847 Files: include/clang/Driver/Options.td lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -173,3 +173,104 @@ // CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) // CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) // CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image + +/// ### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the merge of the host compile and +/// backend phases. There are also two offloading specific options: +/// -fopenmp-is-device: will tell the frontend that it will generate code for a +/// target. +/// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// We use -fopenmp-dump-offload-linker-script to dump the linker script and +/// check its contents. +/// +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s + +// Make sure we are not dumping the script unless the user requested it. +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s + +// +// Check the linker script contains what we expect. +// +// CHK-LKS: /* +// CHK-LKS: OpenMP Offload Linker Script +// CHK-LKS: *** Automatically generated by clang *** +// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. +// CHK-LKS: */ +// CHK-LKS: TARGET(binary) +// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) +// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS: SECTIONS +// CHK-LKS: { +// CHK-LKS: .omp_offloading : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: { +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: [[T1BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); +// CHK-LKS: [[T2BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); +// CHK-LKS: } +// CHK-LKS: .omp_offloading.entries : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: SUBALIGN(0x01) +// CHK-LKS: { +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_begin = .); +// CHK-LKS: *(.omp_offloading.entries) +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_end = .); +// CHK-LKS: } +// CHK-LKS: } +// CHK-LKS: INSERT BEFORE .data + +// +// Generate host BC file. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// +// Compile for the powerpc device. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenm
[PATCH] D21847: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
sfantao added a comment. Hi Hal, Thanks for the review! Comments inlined. Comment at: lib/Driver/Tools.cpp:334 + LksStream << " OpenMP Offload Linker Script.\n"; + LksStream << "*/\n"; + LksStream << "TARGET(binary)\n"; hfinkel wrote: > We should also say 'autogenerated' somewhere in this comment. Ok, makes sense. The comment is now: ``` OpenMP Offload Linker Script. *** Automatically generated by clang *** ``` Comment at: lib/Driver/Tools.cpp:386 + // Dump the contents of the linker script if the user requested that. + if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) +llvm::errs() << LksBuffer; hfinkel wrote: > I don't see why this is needed if we have -save-temps - I think we should > remove this option entirely. The reason for adding this option is that the test is done when the driver is in dry-run mode (`-###`) so I'm not supposed to generate any files. If we don't run in dry-run mode, we need to allow linking to actually happen, therefore the machine where the tests runs needs to have a gcc-based toolchain and ld. Is there a way to request that in the required features set in llvm-lit config file? Should I add a new feature? https://reviews.llvm.org/D21847 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21848: [Driver][OpenMP] Add logic for offloading-specific argument translation.
sfantao updated this revision to Diff 75732. sfantao marked 3 inline comments as done. sfantao added a comment. - Fix typos and check -dynamic when it comes to translating arguments for offloading gcc toolchains. https://reviews.llvm.org/D21848 Files: include/clang/Driver/Compilation.h include/clang/Driver/ToolChain.h lib/Driver/Compilation.cpp lib/Driver/Driver.cpp lib/Driver/MSVCToolChain.cpp lib/Driver/ToolChains.cpp lib/Driver/ToolChains.h test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -247,24 +247,24 @@ // // Compile for the powerpc device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}"[[T1OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] // // Compile for the x86 device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}"[[T2OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T2BIN]]" {{.*}}[[T2OBJ]] // // Generate host object from the BC file and link using the linker script. Index: lib/Driver/ToolChains.h === --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -222,6 +222,9 @@ bool isPIEDefault() const override; bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, +Action::OffloadKind DeviceOffloadKind) const override; protected: Tool *getTool(Action::ActionClass AC) const override; @@ -317,8 +320,8 @@ bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, -StringRef BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, +Action::OffloadKind DeviceOffloadKind) const override; bool IsBlocksDefault() const override { // Always all
[PATCH] D21848: [Driver][OpenMP] Add logic for offloading-specific argument translation.
sfantao added a comment. Hi Hal, Thanks for the review! Comment at: lib/Driver/ToolChains.cpp:2854 + case options::OPT_shared: + case options::OPT_static: + case options::OPT_fPIC: hfinkel wrote: > And also? > > case options::OPT_dynamic: Oh, yes, that one too! Thanks! https://reviews.llvm.org/D21848 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21853: [Driver][OpenMP] Update actions builder to create unbundling action when necessary.
sfantao marked 7 inline comments as done. sfantao added a comment. Hi Hal, Thanks for the review! Comments inlined. Comment at: include/clang/Driver/Action.h:504 + /// unbundling action. + struct DependingActionInfoTy final { +/// \brief The tool chain of the depending action. hfinkel wrote: > Don't need 'Ty' in the name of this struct. Ok, using `DependentActionInfo` now. Comment at: lib/Driver/Driver.cpp:2091 +InputArg->getOption().getKind() == llvm::opt::Option::InputClass && +!types::isSrcFile(HostAction->getType())) { + auto UnbundlingHostAction = hfinkel wrote: > hfinkel wrote: > > This checks that the file needs to be preprocessed. What does preprocessing > > have to do with this? I don't imagine that providing a preprocessed source > > file as input should invoke the unbundler . > On second thought, this is okay. It does not make sense to have a non-bundled > preprocessed source for the input there, as the host and device compilation > don't share a common preprocessor state. > > We do need to be careful, perhaps, about .s files (which don't need > preprocessing as .S files do) -- we should probably assume that all > non-bundled .s files are host assembly code. Yes, that is what we do. If the bundler tool detects that the input is not a bundle, it assumes it is host code/bits. In either case, we still generate the unbundling tool as the driver doesn't check the contents of the files. Comment at: test/Driver/openmp-offload.c:274 +/// Check separate compilation with offloading - unbundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ hfinkel wrote: > hfinkel wrote: > > Oh, are you using .i to indicate a bundle instead of a preprocessed file? > > Don't do that. Please use a different suffix -- the bundler has its own > > file format. > Never mind; this is okay too. Ok, there is no particular suffix to indicate a file is a bundle. The (un)bundler, however, has the machinery to detect if a given file is a bundle, it just uses the extension to understand if it is a human readable file, bitcode file, or object file, because the bundle format is different in those three cases. https://reviews.llvm.org/D21853 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D21853: [Driver][OpenMP] Update actions builder to create unbundling action when necessary.
sfantao updated this revision to Diff 75741. sfantao marked 3 inline comments as done. sfantao added a comment. - Fix typos and use StringRef() instead of const char * to follow what the Driver does today when it comes to specify the bound architectures. https://reviews.llvm.org/D21853 Files: include/clang/Driver/Action.h include/clang/Driver/Types.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/ToolChain.cpp lib/Driver/Types.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -302,3 +302,56 @@ // CHK-BUACTIONS: 17: backend, {2}, assembler, (host-openmp) // CHK-BUACTIONS: 18: assembler, {17}, object, (host-openmp) // CHK-BUACTIONS: 19: clang-offload-bundler, {9, 16, 18}, object, (host-openmp) + +/// ### + +/// Check separate compilation with offloading - unbundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBACTIONS %s + +// CHK-UBACTIONS: 0: input, "somelib", object, (host-openmp) +// CHK-UBACTIONS: 1: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBACTIONS: 2: clang-offload-unbundler, {1}, cpp-output, (host-openmp) +// CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp) +// CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp) +// CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp) +// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp) +// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir +// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp) +// CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir +// CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp) +// CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp) +// CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp) +// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le--linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image + +/// ### + +/// Check separate compilation with offloading - unbundling/bundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s + +// CHK-UBUACTIONS: 0: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBUACTIONS: 1: clang-offload-unbundler, {0}, cpp-output, (host-openmp) +// CHK-UBUACTIONS: 2: compiler, {1}, ir, (host-openmp) +// CHK-UBUACTIONS: 3: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 4: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {3}, ir +// CHK-UBUACTIONS: 5: backend, {4}, assembler, (device-openmp) +// CHK-UBUACTIONS: 6: assembler, {5}, object, (device-openmp) +// CHK-UBUACTIONS: 7: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {6}, object +// CHK-UBUACTIONS: 8: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-UBUACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBUACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBUACTIONS: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, object +// CHK-UBUACTIONS: 13: backend, {2}, assembler, (host-openmp) +// CHK-UBUACTIONS: 14: assembler, {13}, object, (host-openmp) +// CHK-UBUACTIONS: 15: clang-offload-bundler, {7, 12, 14}, object, (host-openmp) + Index: lib/Driver/Types.cpp === --- lib/Driver/Types.cpp +++ lib/Driver/Types.cpp @@ -170,6 +170,10 @@ } } +bool types::isSrcFile(ID Id) { + return Id != TY_Object && getPreprocessedType(Id) != TY_INVALID; +} + types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { return llvm::StringSwitch(Ext) .Case("c", TY_C) Index: lib/Driver/ToolChain.cpp === --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -265,6 +265,7 @@ retu
[PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder
sfantao added a comment. Hi Justin Thanks for letting me know. I'm looking into it. Thanks again, Samuel Repository: rL LLVM https://reviews.llvm.org/D18172 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285263 - Fix bug when compiling CUDA code with -emit-llvm and -o.
Author: sfantao Date: Wed Oct 26 19:53:34 2016 New Revision: 285263 URL: http://llvm.org/viewvc/llvm-project?rev=285263&view=rev Log: Fix bug when compiling CUDA code with -emit-llvm and -o. In this case the device code is not injected into an host action and therefore the user should get an error as -o can't be used when generating two outputs. Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/cuda-output-asm.cu Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285263&r1=285262&r2=285263&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Wed Oct 26 19:53:34 2016 @@ -1611,6 +1611,11 @@ class OffloadingActionBuilder final { // We avoid creating host action in device-only mode. return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; + } else if (CurPhase > phases::Backend) { +// If we are past the backend phase and still have a device action, we +// don't have to do anything as this action is already a device +// top-level action. +return ABRT_Success; } assert(CurPhase < phases::Backend && "Generating single CUDA " Modified: cfe/trunk/test/Driver/cuda-output-asm.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-output-asm.cu?rev=285263&r1=285262&r2=285263&view=diff == --- cfe/trunk/test/Driver/cuda-output-asm.cu (original) +++ cfe/trunk/test/Driver/cuda-output-asm.cu Wed Oct 26 19:53:34 2016 @@ -1,4 +1,4 @@ -// Tests CUDA compilation with -S. +// Tests CUDA compilation with -S and -emit-llvm. // REQUIRES: clang-driver // REQUIRES: x86-registered-target @@ -26,4 +26,6 @@ // RUN: %clang -### -S -target x86_64-linux-gnu --cuda-device-only \ // RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 -o foo.s %s 2>&1 \ // RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s +// RUN: %clang -### -emit-llvm -c -target x86_64-linux-gnu -o foo.s %s 2>&1 \ +// RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285264 - Remove check for -o option in offloading actions builder.
Author: sfantao Date: Wed Oct 26 20:08:58 2016 New Revision: 285264 URL: http://llvm.org/viewvc/llvm-project?rev=285264&view=rev Log: Remove check for -o option in offloading actions builder. This check is also present when jobs are built, so the offloading builder check is not needed anymore. Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/cuda-output-asm.cu Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285264&r1=285263&r2=285264&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Wed Oct 26 20:08:58 2016 @@ -1418,9 +1418,6 @@ class OffloadingActionBuilder final { /// The compilation that is using this builder. Compilation &C; - /// The derived arguments associated with this builder. - DerivedArgList &Args; - /// Map between an input argument and the offload kinds used to process it. std::map InputArgToOffloadKindMap; @@ -1760,7 +1757,7 @@ class OffloadingActionBuilder final { public: OffloadingActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) - : C(C), Args(Args) { + : C(C) { // Create a specialized builder for each device toolchain. IsValid = true; @@ -1876,31 +1873,17 @@ public: /// Add the offloading top level actions to the provided action list. bool appendTopLevelActions(ActionList &AL, Action *HostAction, const Arg *InputArg) { -auto NumActions = AL.size(); - for (auto *SB : SpecializedBuilders) { if (!SB->isValid()) continue; SB->appendTopLevelActions(AL); } -assert(NumActions <= AL.size() && "Expecting more actions, not less!"); - // Propagate to the current host action (if any) the offload information // associated with the current input. if (HostAction) HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], /*BoundArch=*/nullptr); - -// If any action is added by the builders, -o is ambiguous if we have more -// than one top-level action. -if (NumActions < AL.size() && Args.hasArg(options::OPT_o) && -AL.size() > 1) { - C.getDriver().Diag( - clang::diag::err_drv_output_argument_with_multiple_files); - return true; -} - return false; } Modified: cfe/trunk/test/Driver/cuda-output-asm.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-output-asm.cu?rev=285264&r1=285263&r2=285264&view=diff == --- cfe/trunk/test/Driver/cuda-output-asm.cu (original) +++ cfe/trunk/test/Driver/cuda-output-asm.cu Wed Oct 26 20:08:58 2016 @@ -29,3 +29,5 @@ // RUN: %clang -### -emit-llvm -c -target x86_64-linux-gnu -o foo.s %s 2>&1 \ // RUN: | FileCheck -check-prefix MULTIPLE-OUTPUT-FILES %s // MULTIPLE-OUTPUT-FILES: error: cannot specify -o when generating multiple output files +// Make sure we do not get duplicate diagnostics. +// MULTIPLE-OUTPUT-FILES-NOT: error: cannot specify -o when generating multiple output files ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D18172: [CUDA][OpenMP] Add a generic offload action builder
sfantao added a comment. In https://reviews.llvm.org/D18172#580276, @jlebar wrote: > Hi, Samuel. > > This change introduced a new crash / assertion failure in the driver. > > $ echo | llvm-run clang -emit-llvm -c -x cuda - -o /dev/null > > > Before this patch, we get an error (perhaps not as helpful as we might want, > but still): > > clang-3.8: error: cannot specify -o when generating multiple output files > > > After this patch, we get an assertion failure: > > clang: ../clang/lib/Driver/Driver.cpp:1610: virtual (anonymous > namespace)::OffloadingActionBuilder::DeviceActionBuilder::ActionBuilderReturnCode > (anonymous > namespace)::OffloadingActionBuilder::CudaActionBuilder::getDeviceDepences(OffloadAction::DeviceDependences > &, phases::ID, phases::ID, PhasesTy &): Assertion `CurPhase < > phases::Backend && "Generating single CUDA " "instructions should only occur > " "before the backend phase!"' failed. > #0 0x01b07e28 llvm::sys::PrintStackTrace(llvm::raw_ostream&) > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x1b07e28) > #1 0x01b08566 SignalHandler(int) > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x1b08566) > #2 0x7f4bb7f89330 __restore_rt > (/lib/x86_64-linux-gnu/libpthread.so.0+0x10330) > #3 0x7f4bb6b7cc37 gsignal > /build/eglibc-oGUzwX/eglibc-2.19/signal/../nptl/sysdeps/unix/sysv/linux/raise.c:56:0 > #4 0x7f4bb6b80028 abort > /build/eglibc-oGUzwX/eglibc-2.19/stdlib/abort.c:91:0 > #5 0x7f4bb6b75bf6 __assert_fail_base > /build/eglibc-oGUzwX/eglibc-2.19/assert/assert.c:92:0 > #6 0x7f4bb6b75ca2 (/lib/x86_64-linux-gnu/libc.so.6+0x2fca2) > #7 0x01fa809b > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x1fa809b) > #8 0x01f99981 > clang::driver::Driver::BuildActions(clang::driver::Compilation&, > llvm::opt::DerivedArgList&, > llvm::SmallVector, > 16u> const&, llvm::SmallVector&) const > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x1f99981) > #9 0x01f9431c > clang::driver::Driver::BuildCompilation(llvm::ArrayRef) > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x1f9431c) > #10 0x007c0254 main > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x7c0254) > #11 0x7f4bb6b67f45 __libc_start_main > /build/eglibc-oGUzwX/eglibc-2.19/csu/libc-start.c:321:0 > #12 0x007bd9a2 _start > (/usr/local/google/home/jlebar/llvm/release/bin/clang+0x7bd9a2) > Stack dump: > 0. Program arguments: /usr/local/google/home/jlebar/llvm/release/bin/clang > -emit-llvm -c -x cuda - -o /dev/null > 1. Compilation construction > 2. Building compilation actions > Aborted (core dumped) > > > This was reported two weeks ago by Gurunath Kadam on cfe-dev, but I just got > around to bisecting it. > > Would you mind spinning a fix for this? This should be fixed in r285263. You should now get the same error message as before. Repository: rL LLVM https://reviews.llvm.org/D18172 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285307 - [Driver][CUDA][OpenMP] Reimplement tool selection in the driver.
Author: sfantao Date: Thu Oct 27 11:29:20 2016 New Revision: 285307 URL: http://llvm.org/viewvc/llvm-project?rev=285307&view=rev Log: [Driver][CUDA][OpenMP] Reimplement tool selection in the driver. Summary: This creates a tool selector in the driver that replaces the existing one. The goal is to better organize the code and make the selector easier to scale, in particular in the presence of offload actions that can be collapsed. The current implementation became more confusing when the support for offloading actions was added. This concern was expressed by Eric in http://reviews.llvm.org/D9888. This patch does not add new testing, it preserves the existing functionality. Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel Subscribers: whchung, guansong, mkuron, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, caomhin, arpith-jacob, carlo.bertolli Differential Revision: https://reviews.llvm.org/D21840 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/lib/Driver/Driver.cpp Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285307&r1=285306&r2=285307&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 11:29:20 2016 @@ -92,6 +92,12 @@ private: ActionList Inputs; + /// Flag that is set to true if this action can be collapsed with others + /// actions that depend on it. This is true by default and set to false when + /// the action is used by two different tool chains, which is enabled by the + /// offloading support implementation. + bool CanBeCollapsedWithNextDependentAction = true; + protected: /// /// Offload information. @@ -136,6 +142,15 @@ public: return input_const_range(input_begin(), input_end()); } + /// Mark this action as not legal to collapse. + void setCannotBeCollapsedWithNextDependentAction() { +CanBeCollapsedWithNextDependentAction = false; + } + /// Return true if this function can be collapsed with others. + bool isCollapsingWithNextDependentActionLegal() const { +return CanBeCollapsedWithNextDependentAction; + } + /// Return a string containing the offload kind of the action. std::string getOffloadingKindPrefix() const; /// Return a string that can be used as prefix in order to generate unique Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285307&r1=285306&r2=285307&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Thu Oct 27 11:29:20 2016 @@ -1918,7 +1918,7 @@ public: // Create the offload action with all dependences. When an offload action // is created the kinds are propagated to the host action, so we don't have -// to do that explicitely here. +// to do that explicitly here. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch*/ nullptr, ActiveOffloadKinds); @@ -2356,142 +2356,288 @@ void Driver::BuildJobs(Compilation &C) c } } } -/// Collapse an offloading action looking for a job of the given type. The input -/// action is changed to the input of the collapsed sequence. If we effectively -/// had a collapse return the corresponding offloading action, otherwise return -/// null. -template -static OffloadAction *collapseOffloadingAction(Action *&CurAction) { - if (!CurAction) -return nullptr; - if (auto *OA = dyn_cast(CurAction)) { -if (OA->hasHostDependence()) - if (auto *HDep = dyn_cast(OA->getHostDependence())) { -CurAction = HDep; -return OA; - } -if (OA->hasSingleDeviceDependence()) - if (auto *DDep = dyn_cast(OA->getSingleDeviceDependence())) { -CurAction = DDep; -return OA; + +namespace { +/// Utility class to control the collapse of dependent actions and select the +/// tools accordingly. +class ToolSelector final { + /// The tool chain this selector refers to. + const ToolChain &TC; + + /// The compilation this selector refers to. + const Compilation &C; + + /// The base action this selector refers to. + const JobAction *BaseAction; + + /// Set to true if the current toolchain refers to host actions. + bool IsHostSelector; + + /// Set to true if save-temps and embed-bitcode functionalities are active. + bool SaveTemps; + bool EmbedBitcode; + + /// Get previous dependent action or null if that does not exist. If + /// \a CanBeCollapsed is false, that action must be legal to collapse or + /// null will be returned. + const JobAction *getPrevDependentAction(const ActionList &Inputs, + ActionList &SavedOffloadAction, + b
r285311 - [Driver][OpenMP] Create tool chains for OpenMP offloading kind.
Author: sfantao Date: Thu Oct 27 11:38:05 2016 New Revision: 285311 URL: http://llvm.org/viewvc/llvm-project?rev=285311&view=rev Log: [Driver][OpenMP] Create tool chains for OpenMP offloading kind. Summary: This patch adds new logic to create the necessary tool chains to support offloading for OpenMP. The OpenMP related options are checked and the tool chains created accordingly. Diagnostics are emitted in case the options are illegal or express unknown targets. Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel Subscribers: whchung, mkuron, mehdi_amini, cfe-commits, Hahnfeld, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21843 Added: cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td cfe/trunk/include/clang/Driver/Action.h cfe/trunk/include/clang/Driver/Driver.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Tools.cpp Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=285311&r1=285310&r2=285311&view=diff == --- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original) +++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Thu Oct 27 11:38:05 2016 @@ -159,6 +159,11 @@ def err_drv_omp_host_ir_file_not_found : "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">; def err_drv_omp_host_target_not_supported : Error< "The target '%0' is not a supported OpenMP host target.">; +def err_drv_expecting_fopenmp_with_fopenmp_targets : Error< + "The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">; +def warn_drv_omp_offload_target_duplicate : Warning< + "The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">, + InGroup; def err_drv_bitcode_unsupported_on_toolchain : Error< "-fembed-bitcode is not supported on versions of iOS prior to 6.0">; Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285311&r1=285310&r2=285311&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 11:38:05 2016 @@ -80,6 +80,7 @@ public: OFK_Host = 0x01, // The device offloading tool chains - one bit for each programming model. OFK_Cuda = 0x02, +OFK_OpenMP = 0x04, }; static const char *getClassName(ActionClass AC); Modified: cfe/trunk/include/clang/Driver/Driver.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Driver.h?rev=285311&r1=285310&r2=285311&view=diff == --- cfe/trunk/include/clang/Driver/Driver.h (original) +++ cfe/trunk/include/clang/Driver/Driver.h Thu Oct 27 11:38:05 2016 @@ -91,6 +91,26 @@ class Driver { LTOKind LTOMode; public: + enum OpenMPRuntimeKind { +/// An unknown OpenMP runtime. We can't generate effective OpenMP code +/// without knowing what runtime to target. +OMPRT_Unknown, + +/// The LLVM OpenMP runtime. When completed and integrated, this will become +/// the default for Clang. +OMPRT_OMP, + +/// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for +/// this runtime but can swallow the pragmas, and find and link against the +/// runtime library itself. +OMPRT_GOMP, + +/// The legacy name for the LLVM OpenMP runtime from when it was the Intel +/// OpenMP runtime. We support this mode for users with existing +/// dependencies on this runtime library name. +OMPRT_IOMP5 + }; + // Diag - Forwarding function for diagnostics. DiagnosticBuilder Diag(unsigned DiagID) const { return Diags.Report(DiagID); @@ -272,6 +292,9 @@ public: bool embedBitcodeEnabled() const { return BitcodeEmbed == EmbedBitcode; } bool embedBitcodeMarkerOnly() const { return BitcodeEmbed == EmbedMarker; } + /// Compute the desired OpenMP runtime from the flags provided. + OpenMPRuntimeKind getOpenMPRuntime(const llvm::opt::ArgList &Args) const; + /// @} /// @name Primary Functionality /// @{ Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285311&r1=285310&r2=285311&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Thu Oct 27 11:38:05 2016 @@ -87,6 +87,8 @@ std::string Action::getOffloadingKin
r285314 - [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
Author: sfantao Date: Thu Oct 27 12:08:03 2016 New Revision: 285314 URL: http://llvm.org/viewvc/llvm-project?rev=285314&view=rev Log: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions. Summary: This patch adds a new specialized action builder to create OpenMP offloading actions. The specialized builder is added to the action builder already containing the CUDA specialized builder. OpenMP offloading dependences between host and device actions (expressed with OffloadActions) are different that what is used for CUDA: - Device compile action depends on the host compile action - the device frontend extracts the information about the declarations that have to be emitted by looking into the metadata produced by the host frontend. - The host link action depends on the device link actions - the device images are embedded in the host binary at link time. Reviewers: echristo, tra, rsmith, jlebar, ABataev, hfinkel Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21845 Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285314&r1=285313&r2=285314&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Thu Oct 27 12:08:03 2016 @@ -1544,8 +1544,9 @@ class OffloadingActionBuilder final { /// added to the provided host action \a HostAction. By default it is /// inactive. virtual ActionBuilderReturnCode -getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, - phases::ID FinalPhase, PhasesTy &Phases) { +getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) { return ABRT_Inactive; } @@ -1603,8 +1604,9 @@ class OffloadingActionBuilder final { : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} ActionBuilderReturnCode -getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, - phases::ID FinalPhase, PhasesTy &Phases) override { +getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { if (!IsActive) return ABRT_Inactive; @@ -1828,7 +1830,118 @@ class OffloadingActionBuilder final { } }; - /// Add the implementation for other specialized builders here. + /// OpenMP action builder. The host bitcode is passed to the device frontend + /// and all the device linked images are passed to the host link phase. + class OpenMPActionBuilder final : public DeviceActionBuilder { +/// The OpenMP actions for the current input. +ActionList OpenMPDeviceActions; + +/// The linker inputs obtained for each toolchain. +SmallVector DeviceLinkerInputs; + + public: +OpenMPActionBuilder(Compilation &C, DerivedArgList &Args, +const Driver::InputList &Inputs) +: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {} + +ActionBuilderReturnCode +getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // The host only depends on device action in the linking phase, when all + // the device images have to be embedded in the host image. + if (CurPhase == phases::Link) { +assert(ToolChains.size() == DeviceLinkerInputs.size() && + "Toolchains and linker inputs sizes do not match."); +auto LI = DeviceLinkerInputs.begin(); +for (auto *A : OpenMPDeviceActions) { + LI->push_back(A); + ++LI; +} + +// We passed the device action as a host dependence, so we don't need to +// do anything else with them. +OpenMPDeviceActions.clear(); +return ABRT_Success; + } + + // By default, we produce an action for each device arch. + for (Action *&A : OpenMPDeviceActions) +A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + + return ABRT_Success; +} + +ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + + // If this is an input action replicate it for each OpenMP toolchain. + if (auto *IA = dyn_cast(HostAction)) { +OpenMPDeviceActions.clear();
[PATCH] D21847: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
sfantao updated this revision to Diff 76061. sfantao marked 2 inline comments as done. sfantao added a comment. - Capitalize Clang in linker script comment and explain that the linker script dump option is required to test the driver with -###. https://reviews.llvm.org/D21847 Files: include/clang/Driver/Options.td lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -173,3 +173,104 @@ // CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) // CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) // CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image + +/// ### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the merge of the host compile and +/// backend phases. There are also two offloading specific options: +/// -fopenmp-is-device: will tell the frontend that it will generate code for a +/// target. +/// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// We use -fopenmp-dump-offload-linker-script to dump the linker script and +/// check its contents. +/// +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s + +// Make sure we are not dumping the script unless the user requested it. +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s + +// +// Check the linker script contains what we expect. +// +// CHK-LKS: /* +// CHK-LKS: OpenMP Offload Linker Script +// CHK-LKS: *** Automatically generated by clang *** +// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. +// CHK-LKS: */ +// CHK-LKS: TARGET(binary) +// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) +// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS: SECTIONS +// CHK-LKS: { +// CHK-LKS: .omp_offloading : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: { +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: [[T1BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); +// CHK-LKS: [[T2BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); +// CHK-LKS: } +// CHK-LKS: .omp_offloading.entries : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: SUBALIGN(0x01) +// CHK-LKS: { +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_begin = .); +// CHK-LKS: *(.omp_offloading.entries) +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_end = .); +// CHK-LKS: } +// CHK-LKS: } +// CHK-LKS: INSERT BEFORE .data + +// +// Generate host BC file. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// +// Compile for the powerpc device. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1O
[PATCH] D21847: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
sfantao updated this revision to Diff 76062. sfantao added a comment. - Capitalize Clang in the regression test too. https://reviews.llvm.org/D21847 Files: include/clang/Driver/Options.td lib/Driver/Driver.cpp lib/Driver/Tools.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -173,3 +173,104 @@ // CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) // CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) // CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image + +/// ### + +/// Check of the commands passed to each tool when using valid OpenMP targets. +/// Here we also check that offloading does not break the use of integrated +/// assembler. It does however preclude the merge of the host compile and +/// backend phases. There are also two offloading specific options: +/// -fopenmp-is-device: will tell the frontend that it will generate code for a +/// target. +/// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by +/// the target code generation to gather information about which declaration +/// really need to be emitted. +/// We use -fopenmp-dump-offload-linker-script to dump the linker script and +/// check its contents. +/// +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s + +// Make sure we are not dumping the script unless the user requested it. +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: %clang -### -fopenmp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s + +// +// Check the linker script contains what we expect. +// +// CHK-LKS: /* +// CHK-LKS: OpenMP Offload Linker Script +// CHK-LKS: *** Automatically generated by Clang *** +// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. +// CHK-LKS: */ +// CHK-LKS: TARGET(binary) +// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) +// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS: SECTIONS +// CHK-LKS: { +// CHK-LKS: .omp_offloading : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: { +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: [[T1BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); +// CHK-LKS: . = ALIGN(0x10); +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); +// CHK-LKS: [[T2BIN]] +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); +// CHK-LKS: } +// CHK-LKS: .omp_offloading.entries : +// CHK-LKS: ALIGN(0x10) +// CHK-LKS: SUBALIGN(0x01) +// CHK-LKS: { +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_begin = .); +// CHK-LKS: *(.omp_offloading.entries) +// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.entries_end = .); +// CHK-LKS: } +// CHK-LKS: } +// CHK-LKS: INSERT BEFORE .data + +// +// Generate host BC file. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// +// Compile for the powerpc device. +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: ld" {{.*}}"-o"
r285319 - [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains.
Author: sfantao Date: Thu Oct 27 12:31:22 2016 New Revision: 285319 URL: http://llvm.org/viewvc/llvm-project?rev=285319&view=rev Log: [Driver][OpenMP] Build jobs for OpenMP offloading actions for targets using gcc tool chains. Summary: This patch adds logic to create jobs for OpenMP offloading actions by: - tuning the jobs result information to use the offloading prefix even for (device) linking actions. - replacing the device inputs of the host linking jobs by a linker script that embed them in the right sections. Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21847 Modified: cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Tools.cpp cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=285319&r1=285318&r2=285319&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Thu Oct 27 12:31:22 2016 @@ -1103,6 +1103,8 @@ def fopenmp_use_tls : Flag<["-"], "fopen def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>, HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; +def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, Group, + Flags<[NoArgumentUnused]>; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">; Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285319&r1=285318&r2=285319&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Thu Oct 27 12:31:22 2016 @@ -2843,12 +2843,16 @@ InputInfo Driver::BuildJobsForAction( bool BuildForOffloadDevice) const { // The bound arch is not necessarily represented in the toolchain's triple -- // for example, armv7 and armv7s both map to the same triple -- so we need - // both in our map. + // both in our map. Also, we need to add the offloading device kind, as the + // same tool chain can be used for host and device for some programming + // models, e.g. OpenMP. std::string TriplePlusArch = TC->getTriple().normalize(); if (!BoundArch.empty()) { TriplePlusArch += "-"; TriplePlusArch += BoundArch; } + TriplePlusArch += "-"; + TriplePlusArch += A->getOffloadingKindPrefix(); std::pair ActionTC = {A, TriplePlusArch}; auto CachedResult = CachedResults.find(ActionTC); if (CachedResult != CachedResults.end()) { @@ -3169,14 +3173,14 @@ const char *Driver::GetNamedOutputPath(C // clang-cl uses BaseName for the executable name. NamedOutput = MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image); -} else if (MultipleArchs && !BoundArch.empty()) { +} else { SmallString<128> Output(getDefaultImageName()); Output += JA.getOffloadingFileNamePrefix(NormalizedTriple); - Output += "-"; - Output.append(BoundArch); + if (MultipleArchs && !BoundArch.empty()) { +Output += "-"; +Output.append(BoundArch); + } NamedOutput = C.getArgs().MakeArgString(Output.c_str()); -} else { - NamedOutput = getDefaultImageName(); } } else if (JA.getType() == types::TY_PCH && IsCLMode()) { NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str()); Modified: cfe/trunk/lib/Driver/Tools.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=285319&r1=285318&r2=285319&view=diff == --- cfe/trunk/lib/Driver/Tools.cpp (original) +++ cfe/trunk/lib/Driver/Tools.cpp Thu Oct 27 12:31:22 2016 @@ -230,7 +230,8 @@ static void addDirectoryList(const ArgLi } static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, -const ArgList &Args, ArgStringList &CmdArgs) { +const ArgList &Args, ArgStringList &CmdArgs, +const JobAction &JA) { const Driver &D = TC.getDriver(); // Add extra linker input arguments which are not treated as inputs @@ -238,6 +239,14 @@ static void AddLinkerInputs(const ToolCh Args.AddAllArgValues(CmdArgs, options:
r285320 - [Driver][OpenMP] Add logic for offloading-specific argument translation.
Author: sfantao Date: Thu Oct 27 12:39:44 2016 New Revision: 285320 URL: http://llvm.org/viewvc/llvm-project?rev=285320&view=rev Log: [Driver][OpenMP] Add logic for offloading-specific argument translation. Summary: This patch includes support for argument translation that is specific of a given offloading kind. Additionally, it implements the translation for OpenMP device kinds in the gcc tool chain. With this patch, it is possible to compile a functional OpenMP application with offloading capabilities with no separate compilation. Reviewers: echristo, tra, jlebar, rsmith, ABataev, hfinkel Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21848 Modified: cfe/trunk/include/clang/Driver/Compilation.h cfe/trunk/include/clang/Driver/ToolChain.h cfe/trunk/lib/Driver/Compilation.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/MSVCToolChain.cpp cfe/trunk/lib/Driver/ToolChains.cpp cfe/trunk/lib/Driver/ToolChains.h cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Driver/Compilation.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Compilation.h?rev=285320&r1=285319&r2=285320&view=diff == --- cfe/trunk/include/clang/Driver/Compilation.h (original) +++ cfe/trunk/include/clang/Driver/Compilation.h Thu Oct 27 12:39:44 2016 @@ -67,11 +67,27 @@ class Compilation { /// The root list of jobs. JobList Jobs; - /// Cache of translated arguments for a particular tool chain and bound - /// architecture. - llvm::DenseMap, - llvm::opt::DerivedArgList *> - TCArgs; + /// Cache of translated arguments for a particular tool chain, bound + /// architecture, and device offload kind. + struct TCArgsKey final { +const ToolChain *TC = nullptr; +StringRef BoundArch; +Action::OffloadKind DeviceOffloadKind = Action::OFK_None; +bool operator<(const TCArgsKey &K) const { + if (TC < K.TC) +return true; + else if (TC == K.TC && BoundArch < K.BoundArch) +return true; + else if (TC == K.TC && BoundArch == K.BoundArch && + DeviceOffloadKind < K.DeviceOffloadKind) +return true; + return false; +} +TCArgsKey(const ToolChain *TC, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind) +: TC(TC), BoundArch(BoundArch), DeviceOffloadKind(DeviceOffloadKind) {} + }; + std::map TCArgs; /// Temporary files which should be removed on exit. llvm::opt::ArgStringList TempFiles; @@ -182,10 +198,15 @@ public: /// getArgsForToolChain - Return the derived argument list for the /// tool chain \p TC (or the default tool chain, if TC is not specified). + /// If a device offloading kind is specified, a translation specific for that + /// kind is performed, if any. /// /// \param BoundArch - The bound architecture name, or 0. - const llvm::opt::DerivedArgList &getArgsForToolChain(const ToolChain *TC, - StringRef BoundArch); + /// \param DeviceOffloadKind - The offload device kind that should be used in + /// the translation, if any. + const llvm::opt::DerivedArgList & + getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, + Action::OffloadKind DeviceOffloadKind); /// addTempFile - Add a file to remove on exit, and returns its /// argument. Modified: cfe/trunk/include/clang/Driver/ToolChain.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/ToolChain.h?rev=285320&r1=285319&r2=285320&view=diff == --- cfe/trunk/include/clang/Driver/ToolChain.h (original) +++ cfe/trunk/include/clang/Driver/ToolChain.h Thu Oct 27 12:39:44 2016 @@ -190,12 +190,15 @@ public: /// TranslateArgs - Create a new derived argument list for any argument /// translations this ToolChain may wish to perform, or 0 if no tool chain - /// specific translations are needed. + /// specific translations are needed. If \p DeviceOffloadKind is specified + /// the translation specific for that offload kind is performed. /// /// \param BoundArch - The bound architecture name, or 0. + /// \param DeviceOffloadKind - The device offload kind used for the + /// translation. virtual llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, -StringRef BoundArch) const { + TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, +Action::OffloadKind DeviceOffloadKind) const { return nullptr; } Modified: cfe/trunk/lib/Driver/Compilation.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Compilation.cpp?rev=285320&r1=285319&r2=285320&view=diff ==
r285323 - [Driver][OpenMP] Update actions builder to create bundling action when necessary.
Author: sfantao Date: Thu Oct 27 12:50:43 2016 New Revision: 285323 URL: http://llvm.org/viewvc/llvm-project?rev=285323&view=rev Log: [Driver][OpenMP] Update actions builder to create bundling action when necessary. Summary: In order to save the user from dealing with multiple output files (for host and device) while using separate compilation, a new action `OffloadBundlingAction` is used when the last phase is not linking. This action will then result in a job that uses the proposed bundling tool to create a single preprocessed/IR/ASM/Object file from multiple ones. The job creation for the new action will be proposed in a separate patch. Reviewers: echristo, tra, jlebar, ABataev, hfinkel Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21852 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/ToolChain.cpp cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285323&r1=285322&r2=285323&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 12:50:43 2016 @@ -66,9 +66,10 @@ public: DsymutilJobClass, VerifyDebugInfoJobClass, VerifyPCHJobClass, +OffloadBundlingJobClass, JobClassFirst = PreprocessJobClass, -JobClassLast = VerifyPCHJobClass +JobClassLast = OffloadBundlingJobClass }; // The offloading kind determines if this action is binded to a particular @@ -481,6 +482,18 @@ public: } }; +class OffloadBundlingJobAction : public JobAction { + void anchor() override; + +public: + // Offloading bundling doesn't change the type of output. + OffloadBundlingJobAction(ActionList &Inputs); + + static bool classof(const Action *A) { +return A->getKind() == OffloadBundlingJobClass; + } +}; + } // end namespace driver } // end namespace clang Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285323&r1=285322&r2=285323&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Thu Oct 27 12:50:43 2016 @@ -36,6 +36,8 @@ const char *Action::getClassName(ActionC case DsymutilJobClass: return "dsymutil"; case VerifyDebugInfoJobClass: return "verify-debug-info"; case VerifyPCHJobClass: return "verify-pch"; + case OffloadBundlingJobClass: +return "clang-offload-bundler"; } llvm_unreachable("invalid class"); @@ -346,3 +348,8 @@ void VerifyPCHJobAction::anchor() {} VerifyPCHJobAction::VerifyPCHJobAction(Action *Input, types::ID Type) : VerifyJobAction(VerifyPCHJobClass, Input, Type) {} + +void OffloadBundlingJobAction::anchor() {} + +OffloadBundlingJobAction::OffloadBundlingJobAction(ActionList &Inputs) +: JobAction(OffloadBundlingJobClass, Inputs, Inputs.front()->getType()) {} Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285323&r1=285322&r2=285323&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Thu Oct 27 12:50:43 2016 @@ -1568,6 +1568,9 @@ class OffloadingActionBuilder final { /// found. virtual bool initialize() { return false; } +/// Return true if the builder can use bundling/unbundling. +virtual bool canUseBundlerUnbundler() const { return false; } + /// Return true if this builder is valid. We have a valid builder if we have /// associated device tool chains. bool isValid() { return !ToolChains.empty(); } @@ -1911,6 +1914,26 @@ class OffloadingActionBuilder final { return ABRT_Success; } +void appendTopLevelActions(ActionList &AL) override { + if (OpenMPDeviceActions.empty()) +return; + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // Append all device actions followed by the proper offload action. + auto TI = ToolChains.begin(); + for (auto *A : OpenMPDeviceActions) { +OffloadAction::DeviceDependences Dep; +Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP); +AL.push_back(C.MakeAction(Dep, A->getType())); +++TI; + } + // We no longer need the action stored in this builder. + OpenMPDeviceActions.clear(); +} + void appendLinkDependences(OffloadAction::DeviceDependences &DA
r285324 - [Driver][OpenMP] Update actions builder to create unbundling action when necessary.
Author: sfantao Date: Thu Oct 27 13:00:51 2016 New Revision: 285324 URL: http://llvm.org/viewvc/llvm-project?rev=285324&view=rev Log: [Driver][OpenMP] Update actions builder to create unbundling action when necessary. Summary: Each time that offloading support is requested by the user and the input file is not a source file, an action `OffloadUnbundlingAction` is created to signal that the input file may contain bundles, so that the proper tool is then invoked to attempt to extract the components of the bundle. This patch adds the logic to create that action in offload action builder. The job creation for the new action will be proposed in a separate patch. Reviewers: echristo, tra, jlebar, ABataev, hfinkel Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21853 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/include/clang/Driver/Types.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/ToolChain.cpp cfe/trunk/lib/Driver/Types.cpp cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285324&r1=285323&r2=285324&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 13:00:51 2016 @@ -67,9 +67,10 @@ public: VerifyDebugInfoJobClass, VerifyPCHJobClass, OffloadBundlingJobClass, +OffloadUnbundlingJobClass, JobClassFirst = PreprocessJobClass, -JobClassLast = OffloadBundlingJobClass +JobClassLast = OffloadUnbundlingJobClass }; // The offloading kind determines if this action is binded to a particular @@ -494,6 +495,52 @@ public: } }; +class OffloadUnbundlingJobAction final : public JobAction { + void anchor() override; + +public: + /// Type that provides information about the actions that depend on this + /// unbundling action. + struct DependentActionInfo final { +/// \brief The tool chain of the dependent action. +const ToolChain *DependentToolChain = nullptr; +/// \brief The bound architecture of the dependent action. +StringRef DependentBoundArch; +/// \brief The offload kind of the dependent action. +const OffloadKind DependentOffloadKind = OFK_None; +DependentActionInfo(const ToolChain *DependentToolChain, +StringRef DependentBoundArch, +const OffloadKind DependentOffloadKind) +: DependentToolChain(DependentToolChain), + DependentBoundArch(DependentBoundArch), + DependentOffloadKind(DependentOffloadKind){}; + }; + +private: + /// Container that keeps information about each dependence of this unbundling + /// action. + SmallVector DependentActionInfoArray; + +public: + // Offloading unbundling doesn't change the type of output. + OffloadUnbundlingJobAction(Action *Input); + + /// Register information about a dependent action. + void registerDependentActionInfo(const ToolChain *TC, StringRef BoundArch, + OffloadKind Kind) { +DependentActionInfoArray.push_back({TC, BoundArch, Kind}); + } + + /// Return the information about all depending actions. + ArrayRef getDependentActionsInfo() const { +return DependentActionInfoArray; + } + + static bool classof(const Action *A) { +return A->getKind() == OffloadUnbundlingJobClass; + } +}; + } // end namespace driver } // end namespace clang Modified: cfe/trunk/include/clang/Driver/Types.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=285324&r1=285323&r2=285324&view=diff == --- cfe/trunk/include/clang/Driver/Types.h (original) +++ cfe/trunk/include/clang/Driver/Types.h Thu Oct 27 13:00:51 2016 @@ -80,6 +80,11 @@ namespace types { /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers). bool isObjC(ID Id); + /// isSrcFile - Is this a source file, i.e. something that still has to be + /// preprocessed. The logic behind this is the same that decides if the first + /// compilation phase is a preprocessing one. + bool isSrcFile(ID Id); + /// lookupTypeForExtension - Lookup the type to use for the file /// extension \p Ext. ID lookupTypeForExtension(llvm::StringRef Ext); Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285324&r1=285323&r2=285324&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Thu Oct 27 13:00:51 2016 @@ -38,6 +38,8 @@ const char *Action::getClass
[PATCH] D21853: [Driver][OpenMP] Update actions builder to create unbundling action when necessary.
sfantao updated this revision to Diff 76064. sfantao marked an inline comment as done. sfantao added a comment. - Add comment explaing that the bundler tool can detect if the input file is a bundle or not. https://reviews.llvm.org/D21853 Files: include/clang/Driver/Action.h include/clang/Driver/Types.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/ToolChain.cpp lib/Driver/Types.cpp test/Driver/openmp-offload.c Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -302,3 +302,56 @@ // CHK-BUACTIONS: 17: backend, {2}, assembler, (host-openmp) // CHK-BUACTIONS: 18: assembler, {17}, object, (host-openmp) // CHK-BUACTIONS: 19: clang-offload-bundler, {9, 16, 18}, object, (host-openmp) + +/// ### + +/// Check separate compilation with offloading - unbundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBACTIONS %s + +// CHK-UBACTIONS: 0: input, "somelib", object, (host-openmp) +// CHK-UBACTIONS: 1: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBACTIONS: 2: clang-offload-unbundler, {1}, cpp-output, (host-openmp) +// CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp) +// CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp) +// CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp) +// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp) +// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir +// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp) +// CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le--linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir +// CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp) +// CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp) +// CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp) +// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le--linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image + +/// ### + +/// Check separate compilation with offloading - unbundling/bundling actions +// RUN: touch %t.i +// RUN: %clang -### -ccc-print-phases -fopenmp -c -o %t.o -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBUACTIONS %s + +// CHK-UBUACTIONS: 0: input, "[[INPUT:.+\.i]]", cpp-output, (host-openmp) +// CHK-UBUACTIONS: 1: clang-offload-unbundler, {0}, cpp-output, (host-openmp) +// CHK-UBUACTIONS: 2: compiler, {1}, ir, (host-openmp) +// CHK-UBUACTIONS: 3: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 4: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (powerpc64le-ibm-linux-gnu)" {3}, ir +// CHK-UBUACTIONS: 5: backend, {4}, assembler, (device-openmp) +// CHK-UBUACTIONS: 6: assembler, {5}, object, (device-openmp) +// CHK-UBUACTIONS: 7: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {6}, object +// CHK-UBUACTIONS: 8: compiler, {1}, ir, (device-openmp) +// CHK-UBUACTIONS: 9: offload, "host-openmp (powerpc64le--linux)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-UBUACTIONS: 10: backend, {9}, assembler, (device-openmp) +// CHK-UBUACTIONS: 11: assembler, {10}, object, (device-openmp) +// CHK-UBUACTIONS: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, object +// CHK-UBUACTIONS: 13: backend, {2}, assembler, (host-openmp) +// CHK-UBUACTIONS: 14: assembler, {13}, object, (host-openmp) +// CHK-UBUACTIONS: 15: clang-offload-bundler, {7, 12, 14}, object, (host-openmp) + Index: lib/Driver/Types.cpp === --- lib/Driver/Types.cpp +++ lib/Driver/Types.cpp @@ -170,6 +170,10 @@ } } +bool types::isSrcFile(ID Id) { + return Id != TY_Object && getPreprocessedType(Id) != TY_INVALID; +} + types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { return llvm::StringSwitch(Ext) .Case("c", TY_C) Index: lib/Driver/ToolChain.cpp === --- lib/Driver/ToolChain.cpp +++ lib/Driver/ToolChain.cpp @@ -265,6 +265,7 @@ return getClang(); case Action::OffloadBundlingJ
r285325 - [Driver][OpenMP] Add support to create jobs for bundling actions.
Author: sfantao Date: Thu Oct 27 13:04:42 2016 New Revision: 285325 URL: http://llvm.org/viewvc/llvm-project?rev=285325&view=rev Log: [Driver][OpenMP] Add support to create jobs for bundling actions. Summary: This patch adds the support to create a job for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool. Reviewers: echristo, tra, jlebar, ABataev, hfinkel Subscribers: whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21856 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/include/clang/Driver/ToolChain.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/ToolChain.cpp cfe/trunk/lib/Driver/Tools.cpp cfe/trunk/lib/Driver/Tools.h cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285325&r1=285324&r2=285325&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 13:04:42 2016 @@ -160,6 +160,8 @@ public: /// files for each offloading kind. std::string getOffloadingFileNamePrefix(llvm::StringRef NormalizedTriple) const; + /// Return a string containing a offload kind name. + static StringRef GetOffloadKindName(OffloadKind Kind); /// Set the device offload info of this action and propagate it to its /// dependences. Modified: cfe/trunk/include/clang/Driver/ToolChain.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/ToolChain.h?rev=285325&r1=285324&r2=285325&view=diff == --- cfe/trunk/include/clang/Driver/ToolChain.h (original) +++ cfe/trunk/include/clang/Driver/ToolChain.h Thu Oct 27 13:04:42 2016 @@ -85,10 +85,12 @@ private: mutable std::unique_ptr Clang; mutable std::unique_ptr Assemble; mutable std::unique_ptr Link; + mutable std::unique_ptr OffloadBundler; Tool *getClang() const; Tool *getAssemble() const; Tool *getLink() const; Tool *getClangAs() const; + Tool *getOffloadBundler() const; mutable std::unique_ptr SanitizerArguments; Modified: cfe/trunk/lib/Driver/Action.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=285325&r1=285324&r2=285325&view=diff == --- cfe/trunk/lib/Driver/Action.cpp (original) +++ cfe/trunk/lib/Driver/Action.cpp Thu Oct 27 13:04:42 2016 @@ -9,6 +9,7 @@ #include "clang/Driver/Action.h" #include "clang/Driver/ToolChain.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -128,6 +129,22 @@ Action::getOffloadingFileNamePrefix(llvm return Res; } +/// Return a string with the offload kind name. If that is not defined, we +/// assume 'host'. +llvm::StringRef Action::GetOffloadKindName(OffloadKind Kind) { + switch (Kind) { + case OFK_None: + case OFK_Host: +return "host"; + case OFK_Cuda: +return "cuda"; + case OFK_OpenMP: +return "openmp"; + +// TODO: Add other programming models here. + } +} + void InputAction::anchor() {} InputAction::InputAction(const Arg &_Input, types::ID _Type) Modified: cfe/trunk/lib/Driver/ToolChain.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChain.cpp?rev=285325&r1=285324&r2=285325&view=diff == --- cfe/trunk/lib/Driver/ToolChain.cpp (original) +++ cfe/trunk/lib/Driver/ToolChain.cpp Thu Oct 27 13:04:42 2016 @@ -239,6 +239,12 @@ Tool *ToolChain::getLink() const { return Link.get(); } +Tool *ToolChain::getOffloadBundler() const { + if (!OffloadBundler) +OffloadBundler.reset(new tools::OffloadBundler(*this)); + return OffloadBundler.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -266,8 +272,7 @@ Tool *ToolChain::getTool(Action::ActionC case Action::OffloadBundlingJobClass: case Action::OffloadUnbundlingJobClass: -// FIXME: Add a tool for the bundling actions. -return nullptr; +return getOffloadBundler(); } llvm_unreachable("Invalid tool kind."); Modified: cfe/trunk/lib/Driver/Tools.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=285325&r1=285324&r2=285325&view=diff == --- cfe/trunk/lib/Driver/Tools.cpp (original) +++ cfe/trunk/lib/Driver/Tools.cpp Thu Oct 27 13:04:42 2016 @@ -7053,6 +7053,72 @@ void ClangAs::ConstructJob(Compilation & SplitDebugName(Args, Input)); } +void OffloadBundler::ConstructJob(
r285326 - [Driver][OpenMP] Add support to create jobs for unbundling actions.
Author: sfantao Date: Thu Oct 27 13:14:55 2016 New Revision: 285326 URL: http://llvm.org/viewvc/llvm-project?rev=285326&view=rev Log: [Driver][OpenMP] Add support to create jobs for unbundling actions. Summary: This patch adds the support to create jobs for the `OffloadBundlingAction` which will invoke the `clang-offload-bundler` tool to unbundle input files. Unlike other actions, unbundling actions have multiple outputs. Therefore, this patch adds the required changes to have a variant of `Tool::ConstructJob` with multiple outputs. The way the naming of the results is implemented is also slightly modified so that the same action can use a different offloading prefix for each use by the different offloading actions. With this patch, it is possible to compile a functional OpenMP binary with offloading support, even with separate compilation. Reviewers: echristo, tra, jlebar, ABataev, hfinkel Subscribers: mkuron, whchung, mehdi_amini, cfe-commits, Hahnfeld, andreybokhanko, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D21857 Modified: cfe/trunk/include/clang/Driver/Action.h cfe/trunk/include/clang/Driver/Driver.h cfe/trunk/include/clang/Driver/Tool.h cfe/trunk/lib/Driver/Action.cpp cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Tool.cpp cfe/trunk/lib/Driver/Tools.cpp cfe/trunk/lib/Driver/Tools.h cfe/trunk/test/Driver/cuda-bindings.cu cfe/trunk/test/Driver/openmp-offload.c cfe/trunk/test/Driver/opt-record.c Modified: cfe/trunk/include/clang/Driver/Action.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=285326&r1=285325&r2=285326&view=diff == --- cfe/trunk/include/clang/Driver/Action.h (original) +++ cfe/trunk/include/clang/Driver/Action.h Thu Oct 27 13:14:55 2016 @@ -157,9 +157,12 @@ public: /// Return a string containing the offload kind of the action. std::string getOffloadingKindPrefix() const; /// Return a string that can be used as prefix in order to generate unique - /// files for each offloading kind. - std::string - getOffloadingFileNamePrefix(llvm::StringRef NormalizedTriple) const; + /// files for each offloading kind. By default, no prefix is used for + /// non-device kinds, except if \a CreatePrefixForHost is set. + static std::string + GetOffloadingFileNamePrefix(OffloadKind Kind, + llvm::StringRef NormalizedTriple, + bool CreatePrefixForHost = false); /// Return a string containing a offload kind name. static StringRef GetOffloadKindName(OffloadKind Kind); Modified: cfe/trunk/include/clang/Driver/Driver.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Driver.h?rev=285326&r1=285325&r2=285326&view=diff == --- cfe/trunk/include/clang/Driver/Driver.h (original) +++ cfe/trunk/include/clang/Driver/Driver.h Thu Oct 27 13:14:55 2016 @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/Action.h" #include "clang/Driver/Phases.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" @@ -42,7 +43,6 @@ class FileSystem; namespace driver { - class Action; class Command; class Compilation; class InputInfo; @@ -417,14 +417,14 @@ public: /// BuildJobsForAction - Construct the jobs to perform for the action \p A and /// return an InputInfo for the result of running \p A. Will only construct - /// jobs for a given (Action, ToolChain, BoundArch) tuple once. + /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. InputInfo BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const; + Action::OffloadKind TargetDeviceOffloadKind) const; /// Returns the default name for linked images (e.g., "a.out"). const char *getDefaultImageName() const; @@ -495,7 +495,7 @@ private: bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const; + Action::OffloadKind TargetDeviceOffloadKind) const; public: /// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and Modified: cfe/trunk/include/clang/Driver/Tool.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Tool.h?rev=285326&r1=285325&r2=285326&view=diff == --- cfe/trunk/include/clang/Driver/Tool.h (original) +++ cfe/trunk/inc
[PATCH] D21857: [Driver][OpenMP] Add support to create jobs for unbundling actions.
sfantao updated this revision to Diff 76069. sfantao added a comment. - Rebase. https://reviews.llvm.org/D21857 Files: include/clang/Driver/Action.h include/clang/Driver/Driver.h include/clang/Driver/Tool.h lib/Driver/Action.cpp lib/Driver/Driver.cpp lib/Driver/Tool.cpp lib/Driver/Tools.cpp lib/Driver/Tools.h test/Driver/cuda-bindings.cu test/Driver/openmp-offload.c test/Driver/opt-record.c Index: test/Driver/opt-record.c === --- test/Driver/opt-record.c +++ test/Driver/opt-record.c @@ -11,7 +11,7 @@ // CHECK-NO-O: "-cc1" // CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml" -// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-device-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml" +// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-cuda-{{nvptx64|nvptx}}-nvidia-cuda-sm_20.opt.yaml" // CHECK-EQ: "-cc1" // CHECK-EQ: "-opt-record-file" "BAR.txt" Index: test/Driver/openmp-offload.c === --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -210,8 +210,8 @@ // CHK-LKS: TARGET(binary) // CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) // CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) -// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]) -// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]) +// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]) // CHK-LKS: SECTIONS // CHK-LKS: { // CHK-LKS: .omp_offloading : @@ -389,3 +389,92 @@ // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTASM:.+\.s]]" "-x" "ir" "[[HOSTBC]]" // CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le--linux" "-filetype" "obj" {{.*}}"-o" "[[HOSTOBJ:.+\.o]]" "[[HOSTASM]]" // CHK-BUJOBS-ST: clang-offload-bundler" "-type=o" "-targets=openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu,host-powerpc64le--linux" "-outputs=[[RES:.+\.o]]" "-inputs=[[T1OBJ]],[[T2OBJ]],[[HOSTOBJ]]" + +/// ### + +/// Check separate compilation with offloading - unbundling jobs construct +// RUN: touch %t.i +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.i -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS-ST %s +// RUN: touch %t.o +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2 %s +// RUN: %clang -### -fopenmp -o %t.out -lsomelib -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %t.o -save-temps 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-UBJOBS2-ST %s + +// Unbundle and create host BC. +// CHK-UBJOBS: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-UBJOBS-ST: clang-offload-bundler" "-type=i" "-targets=host-powerpc64le--linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=[[INPUT:.+\.i]]" "-outputs=[[HOSTPP:.+\.i]],[[T1PP:.+\.i]],[[T2PP:.+\.i]]" "-unbundle" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" + +// Create target 1 object. +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS: ld" {{.*}}"-o" "[[T1BIN:.+\.out]]" {{.*}}"[[T1OBJ]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[
r285327 - Use -fopenmp=libomp in all OpenMP offloading tests.
Author: sfantao Date: Thu Oct 27 13:29:57 2016 New Revision: 285327 URL: http://llvm.org/viewvc/llvm-project?rev=285327&view=rev Log: Use -fopenmp=libomp in all OpenMP offloading tests. This will make sure the right features are being tested even for machines that default to libgomp. Modified: cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/test/Driver/openmp-offload.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/openmp-offload.c?rev=285327&r1=285326&r2=285327&view=diff == --- cfe/trunk/test/Driver/openmp-offload.c (original) +++ cfe/trunk/test/Driver/openmp-offload.c Thu Oct 27 13:29:57 2016 @@ -12,8 +12,6 @@ /// Check whether an invalid OpenMP target is specified: // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s -// RUN: %clang -### -fopenmp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-INVALID-TARGET %s // CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' /// ### @@ -21,8 +19,6 @@ /// Check warning for empty -fopenmp-targets // RUN: %clang -### -fopenmp=libomp -fopenmp-targets= %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s -// RUN: %clang -### -fopenmp -fopenmp-targets= %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-EMPTY-OMPTARGETS %s // CHK-EMPTY-OMPTARGETS: warning: joined argument expects additional value: '-fopenmp-targets=' /// ### @@ -37,7 +33,7 @@ /// ### /// Check warning for duplicate offloading targets. -// RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ +// RUN: %clang -### -ccc-print-phases -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s // CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. @@ -47,7 +43,7 @@ /// We should have an offload action joining the host compile and device /// preprocessor and another one joining the device linking outputs to the host /// action. -// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: %clang -ccc-print-phases -fopenmp=libomp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES %s // CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp) // CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) @@ -68,7 +64,7 @@ /// Check the phases when using multiple targets. Here we also add a library to /// make sure it is treated as input by the device. -// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp=libomp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s // CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp) // CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp) @@ -100,7 +96,7 @@ /// Check the phases when using multiple targets and multiple source files // RUN: echo " " > %t.c -// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \ +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp=libomp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s // CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp) // CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp) @@ -148,7 +144,7 @@ /// Check the phases graph when using a single GPU target, and check the OpenMP /// and CUDA phases are articulated correctly. -// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \ +// RUN: %clang -ccc-print-phases -fopenmp=libomp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-WITH-CUDA %s // CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp) // CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp) @@ -188,15 +184,15 @@ /// We
[PATCH] D21845: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
sfantao added a comment. A PR was generated as requested by Hal explaining why we do not generate jobs for NVPTX targets yet. https://llvm.org/bugs/show_bug.cgi?id=30812 https://reviews.llvm.org/D21845 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D9888: [OPENMP] Driver support for OpenMP offloading
sfantao abandoned this revision. sfantao marked 8 inline comments as done. sfantao added a comment. Hi Jonas, In https://reviews.llvm.org/D9888#581809, @Hahnfeld wrote: > I think these changes have been contributed to trunk in multiple commits so > this can be closed? You're right, this can be closed now. Thanks! Samuel https://reviews.llvm.org/D9888 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285404 - Change OpenMP offload driver tests so that it doesn't use the full file path during tests.
Author: sfantao Date: Fri Oct 28 10:11:50 2016 New Revision: 285404 URL: http://llvm.org/viewvc/llvm-project?rev=285404&view=rev Log: Change OpenMP offload driver tests so that it doesn't use the full file path during tests. This was causing failures on windows bots. Modified: cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/test/Driver/openmp-offload.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/openmp-offload.c?rev=285404&r1=285403&r2=285404&view=diff == --- cfe/trunk/test/Driver/openmp-offload.c (original) +++ cfe/trunk/test/Driver/openmp-offload.c Fri Oct 28 10:11:50 2016 @@ -236,40 +236,62 @@ // // Generate host BC file. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "c" "[[INPUT:.+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTPP:.+\.i]]" "-x" "c" "[[INPUT:.+\.c]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[HOSTBC:.+\.bc]]" "-x" "cpp-output" "[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" " +// CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-E" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" " +// CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le--linux" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" // // Compile for the powerpc device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" -// CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN]]" {{.*}}"[[T1OBJ]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "[[T1BIN]]" {{.*}}[[T1OBJ]] +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS: ld" {{.*}}"-o" "{{.*}}[[T1BIN]]" {{.*}}"{{.*}}[[T1OBJ]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]" +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "{{.*}}[[T1BIN]]" {{.*}}[[T1OBJ]] // // Compile for the x86 device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" -// CHK-COMMANDS: ld" {{.*}}"-o" "[[T2BIN]]" {{.*}}"[[T2OBJ]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}
[PATCH] D21845: [Driver][OpenMP] Add specialized action builder for OpenMP offloading actions.
sfantao added a comment. Hi Michael, In https://reviews.llvm.org/D21845#581988, @mkuron wrote: > I think `OffloadAction::DeviceDependences::add(..., ..., > /*BoundArch=*/nullptr, Action::OFK_OpenMP)` is never sufficient. The invalid > `BoundArch` eventually ends up in `NVPTX::Assembler::ConstructJob` and > triggers an assert; I don't think there is any code path with OpenMP > offloading where the GPU architecture is set correctly. If I compile a simple > test file with > > clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -c example.c > -march=sm_30 > > > the error message is the following: > > clang: /llvm/tools/clang/lib/Driver/Tools.cpp:11960: virtual void > clang::driver::tools::NVPTX::Assembler::ConstructJob(clang::driver::Compilation&, > const clang::driver::JobAction&, const clang::driver::InputInfo&, const > InputInfoList&, const llvm::opt::ArgList&, const char*) const: Assertion > `gpu_arch != CudaArch::UNKNOWN && "Device action expected to have an > architecture."' failed. > > > On a related but different note, leaving out `-march=sm_30` in the clang call > above causes an earlier assert to trigger: > > clang: /llvm/tools/clang/lib/Driver/ToolChains.cpp:5049: virtual void > clang::driver::toolchains::CudaToolChain::addClangTargetOptions(const > llvm::opt::ArgList&, llvm::opt::ArgStringList&) const: Assertion > `!GpuArch.empty() && "Must have an explicit GPU arch."' failed. > > > The more appropriate flag would probably be `--cuda-gpu-arch=sm_30`, but that > is not recognized. > > I thought I'd just report this here as it seemed to me that with the merge of > all of @sfantao's code yesterday the OpenMP offloading support should mostly > work. If this is not the case or I should report the issue elsewhere, please > let me know. Also, I'm not sure if/how this relates to the bug report you > mentioned. These patches do not implement any specific support for GPUs. Only toolchains based on gcc are expected to work. GPUs will require some extra work on the toolchain which is under progress. In any case, it is not nice to have these assertions when trying an unsupported toolchain. I'll work on a diagnostic so that the driver stops before attempting to create jobs for unsupported toolchains. Thanks for reporting this! https://reviews.llvm.org/D21845 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285408 - Define extra variable in OpenMP offloading driver tests.
Author: sfantao Date: Fri Oct 28 10:42:38 2016 New Revision: 285408 URL: http://llvm.org/viewvc/llvm-project?rev=285408&view=rev Log: Define extra variable in OpenMP offloading driver tests. Modified: cfe/trunk/test/Driver/openmp-offload.c Modified: cfe/trunk/test/Driver/openmp-offload.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/openmp-offload.c?rev=285408&r1=285407&r2=285408&view=diff == --- cfe/trunk/test/Driver/openmp-offload.c (original) +++ cfe/trunk/test/Driver/openmp-offload.c Fri Oct 28 10:42:38 2016 @@ -250,7 +250,8 @@ // // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" -// CHK-COMMANDS: ld" {{.*}}"-o" "{{.*}}[[T1BIN]]" {{.*}}"{{.*}}[[T1OBJ]]" +// CHK-COMMANDS: ld" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " @@ -259,14 +260,15 @@ // CHK-COMMANDS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "{{.*}}[[T1BIN]]" {{.*}}[[T1OBJ]] - +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]" // // Compile for the x86 device. // // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" -// CHK-COMMANDS: ld" {{.*}}"-o" "{{.*}}[[T2BIN]]" {{.*}}"{{.*}}[[T2OBJ]]" +// CHK-COMMANDS: ld" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[T2PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" " @@ -275,7 +277,8 @@ // CHK-COMMANDS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[T2OBJ:[^\\/]+\.o]]" "{{.*}}[[T2ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" "{{.*}}[[T2BIN]]" {{.*}}[[T2OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" // // Generate host object from the BC file and link using the linker script. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r285925 - Rename the version of ConstructJob for multiple outputs to ConstructJobMultipleOutputs.
Author: sfantao Date: Thu Nov 3 10:41:50 2016 New Revision: 285925 URL: http://llvm.org/viewvc/llvm-project?rev=285925&view=rev Log: Rename the version of ConstructJob for multiple outputs to ConstructJobMultipleOutputs. It was causing trouble with the GCC bots. Modified: cfe/trunk/include/clang/Driver/Tool.h cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/lib/Driver/Tool.cpp cfe/trunk/lib/Driver/Tools.cpp cfe/trunk/lib/Driver/Tools.h Modified: cfe/trunk/include/clang/Driver/Tool.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Tool.h?rev=285925&r1=285924&r2=285925&view=diff == --- cfe/trunk/include/clang/Driver/Tool.h (original) +++ cfe/trunk/include/clang/Driver/Tool.h Thu Nov 3 10:41:50 2016 @@ -138,11 +138,11 @@ public: /// tool chain specific translations applied. /// \param LinkingOutput If this output will eventually feed the /// linker, then this is the final output name of the linked image. - virtual void ConstructJob(Compilation &C, const JobAction &JA, -const InputInfoList &Outputs, -const InputInfoList &Inputs, -const llvm::opt::ArgList &TCArgs, -const char *LinkingOutput) const; + virtual void ConstructJobMultipleOutputs(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const; }; } // end namespace driver Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=285925&r1=285924&r2=285925&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Thu Nov 3 10:41:50 2016 @@ -3199,7 +3199,7 @@ InputInfo Driver::BuildJobsForActionNoCa C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); else - T->ConstructJob( + T->ConstructJobMultipleOutputs( C, *JA, UnbundlingResults, InputInfos, C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); Modified: cfe/trunk/lib/Driver/Tool.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tool.cpp?rev=285925&r1=285924&r2=285925&view=diff == --- cfe/trunk/lib/Driver/Tool.cpp (original) +++ cfe/trunk/lib/Driver/Tool.cpp Thu Nov 3 10:41:50 2016 @@ -23,11 +23,11 @@ Tool::Tool(const char *_Name, const char Tool::~Tool() { } -void Tool::ConstructJob(Compilation &C, const JobAction &JA, -const InputInfoList &Outputs, -const InputInfoList &Inputs, -const llvm::opt::ArgList &TCArgs, -const char *LinkingOutput) const { +void Tool::ConstructJobMultipleOutputs(Compilation &C, const JobAction &JA, + const InputInfoList &Outputs, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const { assert(Outputs.size() == 1 && "Expected only one output by default!"); ConstructJob(C, JA, Outputs.front(), Inputs, TCArgs, LinkingOutput); -}; +} Modified: cfe/trunk/lib/Driver/Tools.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=285925&r1=285924&r2=285925&view=diff == --- cfe/trunk/lib/Driver/Tools.cpp (original) +++ cfe/trunk/lib/Driver/Tools.cpp Thu Nov 3 10:41:50 2016 @@ -7133,11 +7133,10 @@ void OffloadBundler::ConstructJob(Compil CmdArgs, None)); } -void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfoList &Outputs, - const InputInfoList &Inputs, - const llvm::opt::ArgList &TCArgs, - const char *LinkingOutput) const { +void OffloadBundler::ConstructJobMultipleOutputs( +Compilation &C, const JobAction &JA, const InputInfoList &Outputs, +const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, +const char *LinkingOutput) const { // The version with multiple outputs is expected to refer to a unbundling job. auto &UA = cast(JA); Modified: cfe/trunk/lib/Driver/Tools.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.h?rev=285925&r1=285924&r2=28592
r247584 - Update cxx-irgen.cpp test to allow signext in alwaysinline functions.
Author: sfantao Date: Mon Sep 14 12:41:32 2015 New Revision: 247584 URL: http://llvm.org/viewvc/llvm-project?rev=247584&view=rev Log: Update cxx-irgen.cpp test to allow signext in alwaysinline functions. This was causing an error in Power8 targets. Modified: cfe/trunk/test/Modules/cxx-irgen.cpp Modified: cfe/trunk/test/Modules/cxx-irgen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Modules/cxx-irgen.cpp?rev=247584&r1=247583&r2=247584&view=diff == --- cfe/trunk/test/Modules/cxx-irgen.cpp (original) +++ cfe/trunk/test/Modules/cxx-irgen.cpp Mon Sep 14 12:41:32 2015 @@ -26,7 +26,7 @@ namespace EmitInlineMethods { }; } -// CHECK-DAG: define internal i32 @_ZN1SIiE1gEv.alwaysinline() #[[ALWAYS_INLINE:.*]] align +// CHECK-DAG: define internal {{.*}}i32 @_ZN1SIiE1gEv.alwaysinline() #[[ALWAYS_INLINE:.*]] align int a = S::g(); int b = h(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao created this revision. sfantao added reviewers: ABataev, hfinkel, rjmccall. sfantao added subscribers: cfe-commits, Hahnfeld. This patch rebases and creates a new revision for http://reviews.llvm.org/D11361 as requested by John. Here's the adapted original summary (the global captures issue has been fixed in the meantime): This patch implements the outlining for offloading functions for code annotated with the OpenMP target directive. It uses a temporary naming of the outlined functions that will have to be updated later on once target side codegen and registration of offloading libraries is implemented - the naming needs to be made unique in the produced library. Unlike other captured regions, target offloading cannot use directly the Capture declaration, as each captured field has to be passed explicitly to the runtime library and associated with potentially different mapping types (to/from/alloc...). A proxy function is used to wrap the default capturing implemented in clang and adapt it to what OpenMP offloading requires. Thanks! Samuel http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,753 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao abandoned this revision. sfantao added a comment. Closing revision. It has been replaced by http://reviews.llvm.org/D12871 has suggested by John. Thanks! Samuel http://reviews.llvm.org/D11361 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao updated this revision to Diff 34869. sfantao added a comment. Update code to use implicit parameters for the captured variables using `GenerateOpenMPCapturedStmtFunction`, similarly to what is done to other directives. `UseOnlyReferences` boolean was added to `GenerateOpenMPCapturedStmtFunction`, given that for target regions, all the captures need to be references, VAT sizes included. `UseOnlyReferences` is set to false by default and only set to true when emitting a target directive. http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.h test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,606 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i
Re: [PATCH] D12262: [OpenMP] Capture global variables in target regions.
sfantao updated the summary for this revision. sfantao updated this revision to Diff 34870. sfantao added a comment. Rebase with the last changes in http://reviews.llvm.org/D12871. http://reviews.llvm.org/D12262 Files: include/clang/Basic/OpenMPKinds.h include/clang/Sema/Sema.h lib/Basic/OpenMPKinds.cpp lib/Sema/SemaExpr.cpp lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen_global_capture.cpp Index: test/OpenMP/target_codegen_global_capture.cpp === --- /dev/null +++ test/OpenMP/target_codegen_global_capture.cpp @@ -0,0 +1,173 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +// CHECK-DAG: [[GA:@.+]] = global double 1.00e+00 +// CHECK-DAG: [[GB:@.+]] = global double 2.00e+00 +// CHECK-DAG: [[GC:@.+]] = global double 3.00e+00 +// CHECK-DAG: [[GD:@.+]] = global double 4.00e+00 +// CHECK-DAG: [[FA:@.+]] = internal global float 5.00e+00 +// CHECK-DAG: [[FB:@.+]] = internal global float 6.00e+00 +// CHECK-DAG: [[FC:@.+]] = internal global float 7.00e+00 +// CHECK-DAG: [[FD:@.+]] = internal global float 8.00e+00 +// CHECK-DAG: [[BA:@.+]] = internal global float 9.00e+00 +// CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01 +// CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01 +// CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01 +double Ga = 1.0; +double Gb = 2.0; +double Gc = 3.0; +double Gd = 4.0; + +// CHECK: define {{.*}} @{{.*}}foo{{.*}}( +// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]]) +// CHECK: [[LA:%.+]] = alloca i16 +// CHECK: [[LB:%.+]] = alloca i16 +// CHECK: [[LC:%.+]] = alloca i16 +// CHECK: [[LD:%.+]] = alloca i16 +int foo(short a, short b, short c, short d){ + static float Sa = 5.0; + static float Sb = 6.0; + static float Sc = 7.0; + static float Sd = 8.0; + + // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8* + // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]], align + // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8* + // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]], align + // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8* + // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]], align + // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]], align + // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]], align + // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK: call i32 @__tgt_target + // CHECK: call void [[OFFLOADF:@.+]]( + // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd + #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0) + { +b += 1; +Gb += 1.0; +Sb += 1.0; + +// CHECK: define internal void [[OFFLOADF]]({{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}, {{.+}}* {{.*}}%{{.+}}) +// The parallel region only uses 3 captures. +//
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao updated this revision to Diff 34933. sfantao added a comment. Minor changes to address Alexey's remarks. http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.h test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,606 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i16* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] +
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao added inline comments. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3044-3054 @@ +3043,13 @@ + + if (auto *VAT = dyn_cast(ElementType.getTypePtr())) { +auto VATInfo = CGF.getVLASize(VAT); +Size = llvm::ConstantInt::get( +CGM.SizeTy, +CGM.getContext().getTypeSizeInChars(VATInfo.second).getQuantity()); +Size = CGF.Builder.CreateNUWMul(Size, VATInfo.first); + } else { +uint64_t ElementTypeSize = +CGM.getContext().getTypeSizeInChars(ElementType).getQuantity(); +Size = llvm::ConstantInt::get(CGM.SizeTy, ElementTypeSize); + } + ABataev wrote: > Use `getTypeSize(CGF, ElementType)` instead In some previous review of this patch there was a suggestion to change from `getTypeSize(CGF, ElementType)/8` to `getTypeSizeInChars(ElementType).getQuantity()`. Just double checking if you really want me to revert that. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3082-3128 @@ +3081,49 @@ + + // Generate the code to launch the target region. The pattern is the + // following: + // + // ... + // br IfCond (if any), omp_offload, omp_offload_fail + // + // omp_offload.try: + // ; create arrays for offloading + // error = __tgt_target(...) + // br error, omp_offload_fail, omp_offload_end + // + // omp_offload.fail: + // host_version(...) + // + // omp_offload.end: + // ... + // + + auto OffloadTryBlock = CGF.createBasicBlock("omp_offload.try"); + auto OffloadFailBlock = CGF.createBasicBlock("omp_offload.fail"); + auto ContBlock = CGF.createBasicBlock("omp_offload.end"); + + if (IfCond) +CGF.EmitBranchOnBoolExpr(IfCond, OffloadTryBlock, OffloadFailBlock, + /*TrueCount=*/0); + + CGF.EmitBlock(OffloadTryBlock); + + unsigned PointerNumVal = BasePointers.size(); + llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); + llvm::Value *BasePointersArray; + llvm::Value *PointersArray; + llvm::Value *SizesArray; + llvm::Value *MapTypesArray; + + if (PointerNumVal) { +llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); +QualType PointerArrayType = CGF.getContext().getConstantArrayType( +CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal, +/*IndexTypeQuals=*/0); + +BasePointersArray = +CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); +PointersArray = +CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); + +// If we don't have any VLA types, we can use a constant array for the map +// sizes, otherwise we need to fill up the arrays as we do for the pointers. ABataev wrote: > Could you use `emitOMPIfClause()` function instead of this long block? I don't think I can reuse the current implementation. The reason is that the "else" basic block is always emitted and the "if" basic block needs to have a branch to the "else" basic block, and all these basic blocks are only visible inside `emitOMPIfClause()`. Basically, the if clause codegen would have to be combined with the testing of the return code of the offloading call. Comment at: lib/CodeGen/CGStmtOpenMP.cpp:38 @@ +37,3 @@ + if (UseOnlyReferences) { +LValue LV = MakeNaturalAlignAddrLValue( +CreateMemTemp(CurField->getType(), "__vla_size_ref").getPointer(), ABataev wrote: > Use `MakeAddrLValue(CreateMemTemp(CurField->getType(), "__vla_size_ref"), > CurField->getType())` instead. Done! Comment at: lib/CodeGen/CGStmtOpenMP.cpp:117-122 @@ -102,4 +116,8 @@ if (FD->hasCapturedVLAType()) { auto *ExprArg = EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); + if (UseOnlyReferences) { +auto ExprArgRef = MakeNaturalAlignAddrLValue(ExprArg, FD->getType()); +ExprArg = EmitLoadOfLValue(ExprArgRef, SourceLocation()).getScalarVal(); + } auto VAT = FD->getCapturedVLAType(); ABataev wrote: > ``` > if (UseOnlyReferences) > ArgLVal = CGF.EmitLoadOfReferenceLValue(ArgLVal.getAddress(), > FD->getType()->casAs()); > auto *ExprArg = EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); > ``` Done! http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao added inline comments. Comment at: lib/CodeGen/CGStmtOpenMP.cpp:80-81 @@ -67,1 +79,4 @@ II = &getContext().Idents.get("vla"); + if (UseOnlyReferences) +ArgType = getContext().getLValueReferenceType( +ArgType, /*SpelledAsLValue=*/false); Done! http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao updated this revision to Diff 34936. sfantao added a comment. Fix imprecision in previous diff. http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.h test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,606 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i16* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao added inline comments. Comment at: lib/CodeGen/CGStmtOpenMP.cpp:121-123 @@ -103,4 +120,5 @@ +ArgLVal.getAddress(), ArgLVal.getType()->castAs()); auto *ExprArg = EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); VLASizeMap[VAT->getSizeExpr()] = ExprArg; I am actually doing `ArgLVal.getType()` instead `FD->getType()`. FD doesn't have a reference type here. http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12614: [OpenMP] Offloading descriptor registration and device codegen.
sfantao updated the summary for this revision. sfantao updated this revision to Diff 34955. sfantao added a comment. Rebase on top of last changes in http://reviews.llvm.org/D12871. http://reviews.llvm.org/D12614 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Basic/LangOptions.def include/clang/Basic/LangOptions.h include/clang/Driver/CC1Options.td include/clang/Driver/Options.td lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenModule.cpp lib/Frontend/CompilerInvocation.cpp lib/Serialization/ASTReader.cpp lib/Serialization/ASTWriter.cpp test/OpenMP/target_codegen.cpp test/OpenMP/target_codegen_registration.cpp test/OpenMP/target_messages.cpp Index: test/OpenMP/target_messages.cpp === --- test/OpenMP/target_messages.cpp +++ test/OpenMP/target_messages.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s +// RUN: not %clang_cc1 -fopenmp -std=c++11 -omptargets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s +// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' void foo() { } Index: test/OpenMP/target_codegen_registration.cpp === --- /dev/null +++ test/OpenMP/target_codegen_registration.cpp @@ -0,0 +1,437 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RxUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK:[[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao updated this revision to Diff 35121. sfantao added a comment. Update if clause emission for target directive following Alexey's remark. http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.h test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,644 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 + // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: store i32 0, i32* [[RHV:%.+]], align 4 + // CHECK: store i32 -1, i32* [[RHV]], align 4 + // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + +
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao added inline comments. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3044-3054 @@ +3043,13 @@ + + if (auto *VAT = dyn_cast(ElementType.getTypePtr())) { +auto VATInfo = CGF.getVLASize(VAT); +Size = llvm::ConstantInt::get( +CGM.SizeTy, +CGM.getContext().getTypeSizeInChars(VATInfo.second).getQuantity()); +Size = CGF.Builder.CreateNUWMul(Size, VATInfo.first); + } else { +uint64_t ElementTypeSize = +CGM.getContext().getTypeSizeInChars(ElementType).getQuantity(); +Size = llvm::ConstantInt::get(CGM.SizeTy, ElementTypeSize); + } + ABataev wrote: > You were using uint64_t ASTContext::getTypeSize(ElementType), but I'm talking > about static llvm::Value *getTypeSize(CGF, ElementType), which is defined in > CGOpenMPRuntime.cpp. It does exactly the same thing you're doing in this part > of code. Got it! Done! Just a related question: isn't the loop in getTypeSize() doing the same thing as CGF.getVLASize()? Just wondering if that should be reused in there. Thanks! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3082-3128 @@ +3081,49 @@ + + // Generate the code to launch the target region. The pattern is the + // following: + // + // ... + // br IfCond (if any), omp_offload, omp_offload_fail + // + // omp_offload.try: + // ; create arrays for offloading + // error = __tgt_target(...) + // br error, omp_offload_fail, omp_offload_end + // + // omp_offload.fail: + // host_version(...) + // + // omp_offload.end: + // ... + // + + auto OffloadTryBlock = CGF.createBasicBlock("omp_offload.try"); + auto OffloadFailBlock = CGF.createBasicBlock("omp_offload.fail"); + auto ContBlock = CGF.createBasicBlock("omp_offload.end"); + + if (IfCond) +CGF.EmitBranchOnBoolExpr(IfCond, OffloadTryBlock, OffloadFailBlock, + /*TrueCount=*/0); + + CGF.EmitBlock(OffloadTryBlock); + + unsigned PointerNumVal = BasePointers.size(); + llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); + llvm::Value *BasePointersArray; + llvm::Value *PointersArray; + llvm::Value *SizesArray; + llvm::Value *MapTypesArray; + + if (PointerNumVal) { +llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); +QualType PointerArrayType = CGF.getContext().getConstantArrayType( +CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal, +/*IndexTypeQuals=*/0); + +BasePointersArray = +CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); +PointersArray = +CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); + +// If we don't have any VLA types, we can use a constant array for the map +// sizes, otherwise we need to fill up the arrays as we do for the pointers. ABataev wrote: > You can emit an empty else block, it will be optimized by backend In the new diff I now use a temp variable to keep the error code and try to reuse the if clause current logic. Let me know if this along the lines you want it to be. http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D9888: [OPENMP] Driver support for OpenMP offloading
sfantao updated this revision to Diff 36263. sfantao added a comment. This diff refactors the original patch and is rebased on top of the latests offloading changes inserted for CUDA. Here I don't touch the CUDA support. I tried, however, to have the implementation modular enough so that it could eventually be combined with the CUDA implementation. In my view OpenMP offloading is more general in the sense that it does not refer to a given tool chain, instead it uses existing toolchains to generate code for offloading devices. So, I believe that a tool chain (which I did not include in this patch) targeting NVPTX will be able to handle both CUDA and OpenMP offloading models. Chris, Art, I understand you have worked out the latest CUDA changes so any feedback from you is greatly appreciated! Here are few more details about this diff: Add tool to bundle and unbundle corresponding host and device files into a single one. One of the goals of OpenMP offloading is to enable users to offload with little effort, by annotating the code with a few pragmas. I'd also like to save users the trouble of changing their existent applications' build system. So having the compiler always return a single file instead of one for the host and each target even if the user is doing separate compilation is desirable. This diff includes a tool named clang-offload-bundled (happy to change the name or even include it in the driver if someone thinks it is the best direction to go) that is used on all input files that are not source files to unbundle them, and on top level jobs that are not linking jobs to bundle the results obtained for host and each target. The format of the bundled files is currently very simple: text formats are concatenated with comments that have a magic string and target identifying triple in between, and binary formats have a header that contains the triple and the offset and size of the code for host and each target. This tool still has to be improved in the future to deal with archive files so that each individual file in the archive is properly dealt with. We see that archives are very commonly used in current application to combine separate compilation results. So I'm convinced users would enjoy this feature. The building of the driver actions is unchanged. I don't create device specific actions. Instead only the bundling/unbundling are inserted as first or last action if the file type requires that. Add offloading kind to `ToolChain` Offloading does not require a new toolchain to be created. Existent toolchains are used and the offloading kind is used to drive specific behavior in each toolchain so that valid device code is generated. This is a major difference from what is currently done for CUDA. But I guess the CUDA implementation easily fits this design and the Nvidia GPU toolchain could be reused for both CUDA and OpenMP offloading. Use Job results cache to easily use host results in device actions and vice-versa. An array of the results for each job is kept so that the device job can use the result previously generated for the host and used it as input or vice-versa. In OpenMP the device declarations have be communicated from the host frontend to the device frontend. So this is used to conveniently pass that information. Unlike CUDA, OpenMP doesn't have already outline functions with "device" attributes that the frontend can rely on to make the decision on what to be emitted or not. The result cache can also be updated to keep the required information for the CUDA implementation to decide host/device binaries combining (injection is the term used in the code). I don't have a concrete proposal for that however, given that is not clear to me what are the plans for CUDA to support separate compilation, I understand that the CUDA binary is inserted directly in host IR (Art, can you shed some light on this?). Use compiler generated linker script to do the device/host code combining and correctly support separate compilation. Currently the OpenMP support in the toolchains is only implemented for Generic GCC targets and a linker script is used to embed the resulting device images into the host binary ELF sections. Also, the linker script defines the symbols that are emitted during code generation so that the address of the images can be easily retrieved. Minor refactoring of the existing code to enable reusing. I've outlined some of the exiting code into static function so that it could be reused by the new offloading related hooks. Any comments/remarks are very welcome! Thanks! Samuel http://reviews.llvm.org/D9888 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Driver/Action.h include/clang/Driver/CC1Options.td include/clang/Driver/Driver.h include/clang/Driver/Options.td include/clang/Driver/ToolChain.h include/clang/Driver/Types.h lib/Driver/Action.cpp lib/Drive
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao updated this revision to Diff 36317. sfantao added a comment. Address Alexey's remarks from last review. http://reviews.llvm.org/D12871 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.h test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,644 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 + // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: store i32 0, i32* [[RHV:%.+]], align 4 + // CHECK: store i32 -1, i32* [[RHV]], align 4 + // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + + // CHECK: store i32 [[RET
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao added inline comments. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3044-3054 @@ +3043,13 @@ +Sizes.push_back(Size); +MapTypes.push_back(MapType); + } + + // Keep track on whether the host function has to be executed. + auto OffloadErrorQType = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); + auto OffloadError = CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"); + CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), OffloadError, +/*Volatile=*/false, OffloadErrorQType); + + // Fill up the pointer arrays and transfer execution to the device. + auto &&ThenGen = [this, BasePointers, Pointers, Sizes, MapTypes, ABataev wrote: > Not quite. getVLASize() works only for VLA types, while getTypeSize() works > for types with constant size too. Oh, sure. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3050 @@ +3049,3 @@ + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); + auto OffloadError = CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"); + CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), OffloadError, ABataev wrote: > Turn this to LValue like > CGF.MakeAddrLValue(CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), > OffloadErrorQType); and use this Lvalue rather than Address. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3055-3057 @@ +3054,5 @@ + // Fill up the pointer arrays and transfer execution to the device. + auto &&ThenGen = [this, BasePointers, Pointers, Sizes, MapTypes, +hasVLACaptures, Device, OffloadError, +OffloadErrorQType](CodeGenFunction &CGF) { +unsigned PointerNumVal = BasePointers.size(); ABataev wrote: > Pointers, Sizes, MapTypes must be captured by reference. Oh, sure. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3198-3204 @@ +3197,9 @@ + + // Notify that the host version must be executed. + auto &&ElseGen = [this, OffloadError, +OffloadErrorQType](CodeGenFunction &CGF) { +CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), + OffloadError, /*Volatile=*/false, OffloadErrorQType); + }; + + if (IfCond) { ABataev wrote: > Move this to 'then' branch of the next if-stmt. Ok, done. http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r249148 - [OpenMP] Target directive host codegen.
Author: sfantao Date: Fri Oct 2 11:14:20 2015 New Revision: 249148 URL: http://llvm.org/viewvc/llvm-project?rev=249148&view=rev Log: [OpenMP] Target directive host codegen. This patch implements the outlining for offloading functions for code annotated with the OpenMP target directive. It uses a temporary naming of the outlined functions that will have to be updated later on once target side codegen and registration of offloading libraries is implemented - the naming needs to be made unique in the produced library. Added: cfe/trunk/test/OpenMP/target_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=249148&r1=249147&r2=249148&view=diff == --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Fri Oct 2 11:14:20 2015 @@ -41,6 +41,8 @@ public: /// \brief Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, +/// \brief Region with outlined function for standalone 'target' directive. +TargetRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, @@ -211,6 +213,29 @@ private: CGOpenMPRegionInfo *OuterRegionInfo; }; +/// \brief API for captured statement code generation in OpenMP target +/// constructs. For this captures, implicit parameters are used instead of the +/// captured fields. +class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +public: + CGOpenMPTargetRegionInfo(const CapturedStmt &CS, + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, + /*HasCancel = */ false) {} + + /// \brief This is unused for target regions because each starts executing + /// with a single thread. + const VarDecl *getThreadIDVariable() const override { return nullptr; } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { return ".omp_offloading."; } + + static bool classof(const CGCapturedStmtInfo *Info) { +return CGOpenMPRegionInfo::classof(Info) && + cast(Info)->getRegionKind() == TargetRegion; + } +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; @@ -877,6 +902,22 @@ CGOpenMPRuntime::createRuntimeFunction(O RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } + case OMPRTL__tgt_target: { +// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t +// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t +// *arg_types); +llvm::Type *TypeParams[] = {CGM.Int32Ty, +CGM.VoidPtrTy, +CGM.Int32Ty, +CGM.VoidPtrPtrTy, +CGM.VoidPtrPtrTy, +CGM.SizeTy->getPointerTo(), +CGM.Int32Ty->getPointerTo()}; +llvm::FunctionType *FnTy = +llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); +RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); +break; + } } return RTLFn; } @@ -2952,3 +2993,265 @@ void CGOpenMPRuntime::emitCancelCall(Cod ThenGen(CGF); } } + +llvm::Value * +CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D, +const RegionCodeGenTy &CodeGen) { + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + CodeGenFunction CGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + return CGF.GenerateOpenMPCapturedStmtFunction(CS, /*UseOnlyReferences=*/true); +} + +void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, + const Expr *IfCond, const Expr *Device, + ArrayRef CapturedVars) { + /// \brief Values for bit flags used to specify the mapping type for + /// offloading. + enum OpenMPOffloadMappingFlags { +/// \brief Allocate memory on the device and move data from host to device. +OMP_MAP_TO = 0x01, +/// \brief Allocate memory on the device and move data from device to host. +OMP_MAP_FROM = 0x02, + }; + + enum OpenMPOffloadingReservedDeviceIDs { +/// \brief Device ID if the device was not defined, runtime should get it +/// from environment variables in the spec. +
Re: [PATCH] D12871: [OpenMP] Target directive host codegen - rebased
sfantao closed this revision. sfantao added a comment. Committed in r249148! Thanks, Samuel http://reviews.llvm.org/D12871 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r249154 - [OpenMP] Capture global variables in target regions.
Author: sfantao Date: Fri Oct 2 12:14:03 2015 New Revision: 249154 URL: http://llvm.org/viewvc/llvm-project?rev=249154&view=rev Log: [OpenMP] Capture global variables in target regions. All global variables that are not enclosed in a declare target region must be captured in the target region as local variables do. Currently, there is no support for declare target, so this patch adds support for capturing all the global variables used in a the target region. Added: cfe/trunk/test/OpenMP/target_codegen_global_capture.cpp Modified: cfe/trunk/include/clang/Basic/OpenMPKinds.h cfe/trunk/include/clang/Sema/Sema.h cfe/trunk/lib/Basic/OpenMPKinds.cpp cfe/trunk/lib/Sema/SemaExpr.cpp cfe/trunk/lib/Sema/SemaOpenMP.cpp Modified: cfe/trunk/include/clang/Basic/OpenMPKinds.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/OpenMPKinds.h?rev=249154&r1=249153&r2=249154&view=diff == --- cfe/trunk/include/clang/Basic/OpenMPKinds.h (original) +++ cfe/trunk/include/clang/Basic/OpenMPKinds.h Fri Oct 2 12:14:03 2015 @@ -109,6 +109,12 @@ bool isOpenMPWorksharingDirective(OpenMP /// parallel', otherwise - false. bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind); +/// \brief Checks if the specified directive is a target-kind directive. +/// \param DKind Specified directive. +/// \return true - the directive is a target-like directive like 'omp target', +/// otherwise - false. +bool isOpenMPTargetDirective(OpenMPDirectiveKind DKind); + /// \brief Checks if the specified directive is a teams-kind directive. /// \param DKind Specified directive. /// \return true - the directive is a teams-like directive like 'omp teams', Modified: cfe/trunk/include/clang/Sema/Sema.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=249154&r1=249153&r2=249154&view=diff == --- cfe/trunk/include/clang/Sema/Sema.h (original) +++ cfe/trunk/include/clang/Sema/Sema.h Fri Oct 2 12:14:03 2015 @@ -7728,6 +7728,11 @@ public: /// is performed. bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level); + /// \brief Check if the specified variable is captured by 'target' directive. + /// \param Level Relative level of nested OpenMP construct for that the check + /// is performed. + bool isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level); + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, Expr *Op); /// \brief Called on start of new data sharing attribute block. Modified: cfe/trunk/lib/Basic/OpenMPKinds.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/OpenMPKinds.cpp?rev=249154&r1=249153&r2=249154&view=diff == --- cfe/trunk/lib/Basic/OpenMPKinds.cpp (original) +++ cfe/trunk/lib/Basic/OpenMPKinds.cpp Fri Oct 2 12:14:03 2015 @@ -424,6 +424,10 @@ bool clang::isOpenMPParallelDirective(Op DKind == OMPD_parallel_sections; // TODO add next directives. } +bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) { + return DKind == OMPD_target; // TODO add next directives. +} + bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_teams; // TODO add next directives. } Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=249154&r1=249153&r2=249154&view=diff == --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Fri Oct 2 12:14:03 2015 @@ -13183,10 +13183,18 @@ bool Sema::tryCaptureVariable( if (getLangOpts().OpenMP) { if (auto *RSI = dyn_cast(CSI)) { // OpenMP private variables should not be captured in outer scope, so -// just break here. +// just break here. Similarly, global variables that are captured in a +// target region should not be captured outside the scope of the region. if (RSI->CapRegionKind == CR_OpenMP) { - if (isOpenMPPrivateVar(Var, OpenMPLevel)) { -Nested = true; + auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel); + // When we detect target captures we are looking from inside the + // target region, therefore we need to propagate the capture from the + // enclosing region. Therefore, the capture is not initially nested. + if (isTargetCap) +FunctionScopesIndex--; + + if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) { +Nested = !isTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); break; Modified: cfe/trunk/lib/Se
Re: [PATCH] D12262: [OpenMP] Capture global variables in target regions.
sfantao closed this revision. sfantao added a comment. Committed in r249154. Thanks! Samuel http://reviews.llvm.org/D12262 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12614: [OpenMP] Offloading descriptor registration and device codegen.
sfantao updated this revision to Diff 36410. sfantao added a comment. Fix bug for when no offloading triples are specified. http://reviews.llvm.org/D12614 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Basic/LangOptions.def include/clang/Basic/LangOptions.h include/clang/Driver/CC1Options.td include/clang/Driver/Options.td lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenModule.cpp lib/Frontend/CompilerInvocation.cpp lib/Serialization/ASTReader.cpp lib/Serialization/ASTWriter.cpp test/OpenMP/target_codegen.cpp test/OpenMP/target_codegen_global_capture.cpp test/OpenMP/target_codegen_registration.cpp test/OpenMP/target_messages.cpp Index: test/OpenMP/target_messages.cpp === --- test/OpenMP/target_messages.cpp +++ test/OpenMP/target_messages.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s +// RUN: not %clang_cc1 -fopenmp -std=c++11 -omptargets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s +// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' void foo() { } Index: test/OpenMP/target_codegen_registration.cpp === --- /dev/null +++ test/OpenMP/target_codegen_registration.cpp @@ -0,0 +1,437 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RxUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK:[[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-D
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao updated this revision to Diff 32211. sfantao added a comment. This patch tries to avoid as much as possible changing the common infrastructure, by adapting the CapturedDecl creation in SEMA and by adding support to a second type of capture - ImplicitParamDecl (on top of the existent FieldDecl). Also adds codegen for device clause as the Parsing and SEMA support was added in the meantime. The regression test was not updated yet. I wanted to make sure the direction this is taking is approved before diving into that. Thanks! Samuel http://reviews.llvm.org/D11361 Files: include/clang/AST/Decl.h include/clang/AST/Stmt.h include/clang/Basic/CapturedStmt.h include/clang/Sema/ScopeInfo.h lib/CodeGen/CGExpr.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmt.cpp lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,583 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +// REQUIRES: powerpc-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, and only 6 will have mapped arguments. + +// CHECK-DAG: [[MAPT2:@.+]] = private constant [1 x i32] [i32 3] +// CHECK-DAG: [[MAPT3:@.+]] = private constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[MAPT5:@.+]] = private constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT6:@.+]] = private constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + + // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] + // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i32* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target if(1) + { +a += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BP:%[^,]+]], i
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao added a comment. Alexey, John, Thanks for the review! I've tried to address your concerns in the last diff. Please, check the inlined comments to find answers for the remarks of the previous diff. Thanks again! Samuel Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:863-864 @@ -840,1 +862,4 @@ } + case OMPRTL__tgt_target: { +// Build to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t +// arg_num, void** args_base, void **args, int64_t *arg_sizes, int32_t rjmccall wrote: > Spurious "to" at the start. Fixed! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2921 @@ +2920,3 @@ + uint64_t SizeVal = + CGM.getDataLayout().getTypeSizeInBits(V->getType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); rjmccall wrote: > getTypeStoreSize() Now using getTypeSizeInChars from the ASTcontext as suggested bellow. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2930 @@ +2929,3 @@ + uint64_t SizeVal = + CGM.getDataLayout().getTypeSizeInBits(PtrTy->getElementType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); rjmccall wrote: > You should ask the ASTContext to compute this size instead of making > assumptions about the layout size of the IR type. > > Also, what are the semantics supposed to be for mapping to and from? Do > referents need to be trivially copyable? What if there are pointers or > references? What happens to virtual bases? Using using getTypeSizeInChars from the ASTcontext. This patch only deals with trivially copiable types. By default, a variable that is captured in the target region is mapped "by value" using a to-from policy. In order to do something different than the default, the user has to use a map clause (I'll send out a patch for it once we have the Parsing/SEMA in place). As per the current spec, the map clause allows a user to map the pointee of a pointer as well as only mapping a section of an array or pointee. In the next version of the OpenMP spec we will have the ability to map aggregate fields and more support for "deep" copy. We expect to be able to handle all the cases with the proper flags in OpenMPOffloadMappingFlags. About virtual bases: OpenMP 4.0 forbids virtual members in mappable variable. Nevertheless, it is possible this constraint be lifted in future versions. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2940 @@ +2939,3 @@ + uint64_t SizeVal = + CGM.getDataLayout().getTypeSizeInBits(PtrTy->getElementType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); rjmccall wrote: > Same thing, please ask the ASTContext. > > Also, you might need to care about variably-sized types here. Now using ASTContext. I was planing to deal with the VLA types once I add support for the map clause, but I agree it makes more sense to do it now. Thanks for the suggestion. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3009 @@ +3008,3 @@ +SizesArray = +CGF.Builder.CreateAlloca(CGM.Int64Ty, PointerNum, ".offload_sizes"); + rjmccall wrote: > This is creating a bunch of dynamic allocas instead of just temporary values. > Please call CreateMemTemp with an array type instead. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3014 @@ +3013,3 @@ +llvm::Constant *MapTypesArrayInit = +llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); +MapTypesArray = rjmccall wrote: > The sizes aren't constant if you've captured a VLA. But this comment is > actually just wrong, because this isn't building something for the sizes at > all; it's building something for the flags. > > That said, I think you ought to be able to do the same thing with the sizes > when you don't have a VLA. Sorry for the error in the comment. It is fixed now. Also, I added code to deal with constant sizes for when we don't have VLAs. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3017 @@ +3016,3 @@ +new llvm::GlobalVariable(CGM.getModule(), MapTypesArrayInit->getType(), + true, llvm::GlobalValue::PrivateLinkage, + MapTypesArrayInit, ".offload_maptypes"); rjmccall wrote: > Please comment boolean arguments like this: >/*constant*/ true > > And please mark this variable unnamed_addr. Added comments for the boolean. Setting unnamed_addr now, thanks! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3037 @@ +3036,3 @@ + llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP1_32( + SPtrTy->getElementType(), SizesArray, i); + rjmccall wrote: > You already know the element types for all of these. The code will be much > more readable if you just use those types directly. Ok, now using the types explicitly. Comment at: lib/CodeGen/CGOpenMPRunt
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao updated this revision to Diff 32322. sfantao added a comment. Adress reviewers concerns. Also fix issue with target regions with no arguments and in the VLA size computation I found in the meantime. http://reviews.llvm.org/D11361 Files: include/clang/AST/Decl.h include/clang/AST/Stmt.h include/clang/Basic/CapturedStmt.h include/clang/Sema/ScopeInfo.h lib/CodeGen/CGExpr.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmt.cpp lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,583 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +// REQUIRES: powerpc-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, and only 6 will have mapped arguments. + +// CHECK-DAG: [[MAPT2:@.+]] = private constant [1 x i32] [i32 3] +// CHECK-DAG: [[MAPT3:@.+]] = private constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[MAPT5:@.+]] = private constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT6:@.+]] = private constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + + // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] + // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i32* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target if(1) + { +a += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0)) + + // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] + // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao added a comment. Alexey, Thanks for the review! Find my comments inlined. Thanks again! Samuel Comment at: lib/CodeGen/CGExpr.cpp:1969 @@ -1945,3 +1968,3 @@ else - return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), - CapturedStmtInfo->getContextValue()); + return EmitCapturedValue(*this, CapturedStmtInfo->lookup(VD), + CapturedStmtInfo->getContextValue()); ABataev wrote: > Samuel, why you don't want to capture all used variables in CapturedDecl > instead of creating ImplicitParamDecl for each captured variable? Also, you > will solve possible trouble with VLAs automatically using CapturedDecl. Alexey, I'm not sure I understand what you mean here. Unlike the other captured regions, the target region outlined function does not take a context that captures all the variables in fields of a record as argument. Instead, it takes all the captured references as arguments. This will enable the device runtime library to decide what is best to pass the arguments to the device (see my response to John's question in my previous diff). It happens that all the machinery in the common infrastructure that creates the outlined functions (`CodeGenFunction::StartFunction` and `GenerateCapturedStmtFunction`) is prepared to get the context record from the `CapuredDecl`. Therefore, in order to not disrupt the common infrastructure, in `Sema::ActOnOpenMPRegionEnd` I am creating a new `CapturedDecl` that contains implicit parameters. I gather the information to build the new `CapturedDecl` from the `CapturedDecl` that is created with the context argument and the `RecordDecl` fields so that I don't need to touch the capturing code in Sema. Having `CapturedDecl` with implicit parameters will drive `StartFunction` to create the outlined region with the right signature without having to change anything in there. I only had to guard the initialization of VLAs and 'this' in `GenerateCapturedStmtFunction` to not do anything that expects the context argument. However, during the emission of the VLAs that happens in `StartFunction`, the emission of these implicit parameters is attempted based on references that are marked as `refersToEnclosingVariableOrCapture`- this is the reason for the change in `EmitDeclRefLValue`. Given that the references in the outlined function statements are still expecting the VLAs expression used in the caller of the outlined function, `PrepareOMPTargetDirectiveBodyEmission` will make sure that the mapped values to those expressions will be the same as the ones that use the new expression based on implicit parameters. Let me know if you need me to clarify anything. Thanks! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2953 @@ +2952,3 @@ + BasePointer = Pointer = LV.getAddress(); + uint64_t SizeVal = CGM.getContext().getTypeSize(ri->getType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); ABataev wrote: > Use CGM.getContext().getTypeSizeInChars() instead of > CGM.getContext().getTypeSize() / 8. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2954 @@ +2953,3 @@ + uint64_t SizeVal = CGM.getContext().getTypeSize(ri->getType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); + ABataev wrote: > Maybe llvm::ConstantInt::get(CGF.SizeTy, SizeVal)? I agree, it makes more sense to use size_t. Thanks for the suggestion! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2963 @@ +2962,3 @@ + uint64_t SizeVal = + CGM.getContext().getTypeSize(PtrTy->getPointeeType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); ABataev wrote: > Use CGM.getContext().getTypeSizeInChars() instead of > CGM.getContext().getTypeSize() / 8. done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2980 @@ +2979,3 @@ +uint64_t ElementTypeSize = +CGM.getContext().getTypeSize(ElementType) / 8; +Size = CGF.Builder.getInt64(ElementTypeSize); ABataev wrote: > The same Done! Comment at: lib/CodeGen/CGStmtOpenMP.cpp:2144-2145 @@ +2143,4 @@ + auto *ThisRef = LocalDeclMap[*pi]; + LValue Addr = LValue::MakeAddr(ThisRef, ri->getType(), CharUnits(), + CGM.getContext()); + CXXThisValue = EmitLoadOfLValue(Addr, CS.getLocStart()).getScalarVal(); ABataev wrote: > MakeNaturalAlignAddrLValue(ThisRef, ri->getType())? Now using `MakeNaturalAlignAddrLValue`. Comment at: lib/CodeGen/CGStmtOpenMP.cpp:2147 @@ +2146,3 @@ + CXXThisValue = EmitLoadOfLValue(Addr, CS.getLocStart()).getScalarVal(); + ; + continue; ABataev wrote: > Extra semicolon Fixed. http://reviews.llvm.org/D11361 ___ cfe-commits mailing list
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao updated this revision to Diff 32796. sfantao added a comment. Implement proxy function for target directive. Move the creation of the target region parameters from `CGOpenMPRuntime::emitTargetCall` to CodeGenFunction::EmitTargetDirective because we need to access the VLA Maps of the target enclosing function. Update regression test and add run directive for 32-bit target. http://reviews.llvm.org/D11361 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,754 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +// REQUIRES: powerpc-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao added a comment. Thanks for review. The new diff now uses a proxy function. See other comments inlined. Thanks again! Samuel Comment at: lib/CodeGen/CGExpr.cpp:1969-1970 @@ -1945,4 +1968,4 @@ else - return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), - CapturedStmtInfo->getContextValue()); + return EmitCapturedValue(*this, CapturedStmtInfo->lookup(VD), + CapturedStmtInfo->getContextValue()); } ABataev wrote: > Instead I would do the same thing I did for tasks. > Generate captured function as is. But also create the second function with > the profile required for target codegen. This function must gather all its > parameters into a single record and then call auto generated captured > function. This captured function must be marked as AlwaysInline. > In this case you don't need to make some additional changes in Sema for > particular processing of target directives, you will just need to generate > simple function in codegen. > This may result in a little bit slower performance, but we may improve it > later, when we have time to improve codegen for outlined functions for > CapturedDecls. > I don't like the idea of reinventing of features, that are invented already, > like capturing of VLAs, exposing some private functions (like > getVLASizeMap()) etc. > so the code would be like this: > ``` > void .omp_outlined.(%captures *ptr) always_inline { > ; > } > void .target_omp_outlined.(int* a, float* b, ...) { > %captures rec; > rec.a_ref = a; > rec.b_ref = b; > ... > .omp_outlined.(&rec); > } > ``` Ok, I am now using the proxy function. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3065 @@ +3064,3 @@ + QualType SizeArrayType = CGF.getContext().getConstantArrayType( + CGF.getContext().getIntTypeForBitwidth(64, /*Signed=*/true), + PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); ABataev wrote: > Maybe CGF.getContext().getSizeType()? Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3163 @@ +3162,3 @@ +OffloadingArgs); + auto Error = CGF.Builder.CreateICmpNE(Return, CGF.Builder.getInt32(0)); + CGF.Builder.CreateCondBr(Error, OffloadFailBlock, ContBlock); ABataev wrote: > CGF.Builder.CreateIsNotNull()? Done! Comment at: test/OpenMP/target_codegen.cpp:1-3 @@ +1,4 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics ABataev wrote: > It would be good to see the tests for 32 bit target. Done! http://reviews.llvm.org/D11361 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao updated this revision to Diff 32843. sfantao added a comment. Address reviewer concerns. http://reviews.llvm.org/D11361 Files: lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp test/OpenMP/target_codegen.cpp Index: test/OpenMP/target_codegen.cpp === --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,754 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +// REQUIRES: powerpc-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { +a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i16* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i16* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHEC
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao added a comment. In http://reviews.llvm.org/D11361#229744, @ABataev wrote: > Another one thing I forget to mention. Current implementation of > CGOpenMPRuntime is libomp-specific. You're trying to add functionality that > is libtarget-specific. Maybe it is a good idea to separate support for libomp > and libtarget runtime libraries? Not sure what do you mean by separation. Different files? Different codegen class? My perspective is that the two things should be together given that they both address the same specification, and I see that interaction is required between the two components. E.g. teams codegen will have to interact with the target codegen (communicate number of teams/threads ) and the teams codegen will require the libomp interface in its implementation. We could always separate the two things in the future if we see that is a better way to organize the code. Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2887 @@ +2886,3 @@ +llvm::Value * +CGOpenMPRuntime::emitTargetOutlinedFunction(CodeGenFunction &CGF, +const OMPExecutableDirective &D, ABataev wrote: > I don't think you need this argument. You're emitting a new outlined function > here and don't need info about your current function. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2906-2911 @@ +2905,8 @@ + + CodeGenFunction TargetAuxCGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(TargetAuxCGF, &CGInfo); + auto *TargetAuxFn = TargetAuxCGF.GenerateCapturedStmtFunction(CS); + TargetAuxFn->addFnAttr(llvm::Attribute::AlwaysInline); + + // Collect the arguments of the main function. ABataev wrote: > You'd better to emit internal function separately in a new static function. > Then you don't need to create TargetAuxCGF and TargetMainCGF. You should use > just CGF everywhere. One CodeGenFunction instance per function. Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:2970-2972 @@ +2969,5 @@ + + auto ai = Args.begin(); + for (RecordDecl::field_iterator ri = RD->field_begin(), re = RD->field_end(); + ri != re; ++ri, ++ai) { + ABataev wrote: > Variable names should start with an upper case letter (e.g. Leader or Boats). Ok, thought iterators were an exception to that rule. Fixed now! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3070-3107 @@ +3069,40 @@ +} else { + // We expect all the sizes to be constant, so we collect them to create + // a constant array. + SmallVector ConstSizes; + for (auto *V : Sizes) +ConstSizes.push_back(cast(V)->getZExtValue()); + + auto SizeTypeBytes = + CGF.getContext() + .getTypeSizeInChars(CGF.getContext().getSizeType()) + .getQuantity(); + + llvm::Constant *SizesArrayInit; + switch (SizeTypeBytes) { + default: +llvm_unreachable("Unexpected size-type type!"); + case 1: { +SmallVector ConstSizesL(ConstSizes.begin(), + ConstSizes.end()); +SizesArrayInit = +llvm::ConstantDataArray::get(CGM.getLLVMContext(), ConstSizesL); + } break; + case 2: { +SmallVector ConstSizesL(ConstSizes.begin(), + ConstSizes.end()); +SizesArrayInit = +llvm::ConstantDataArray::get(CGM.getLLVMContext(), ConstSizesL); + } break; + case 4: { +SmallVector ConstSizesL(ConstSizes.begin(), + ConstSizes.end()); +SizesArrayInit = +llvm::ConstantDataArray::get(CGM.getLLVMContext(), ConstSizesL); + } break; + case 8: { +SizesArrayInit = +llvm::ConstantDataArray::get(CGM.getLLVMContext(), ConstSizes); + } break; + } + auto *SizesArrayGbl = new llvm::GlobalVariable( ABataev wrote: > Try instead: > SizesArrayInit = > llvm::ConstantArray::get(llvm::ArrayType::get(CGM.SizeTy, Sizes.size()), > Sizes); > Done! Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3161-3164 @@ +3160,6 @@ + } else { +BasePointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy); +PointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy); +SizesArray = llvm::Constant::getNullValue(CGM.SizeTy->getPointerTo()); +MapTypesArray = llvm::Constant::getNullValue(CGM.Int32Ty->getPointerTo()); + } ABataev wrote: > llvm::ConstantPointerNull::get(); Done! Comment at: lib/CodeGen/CGStmtOpenMP.cpp:2139-2203 @@ +2138,67 @@ + + bool hasVLACaptures = false; + const CapturedStmt &CS = *cast(S.getAssociatedStmt()); + auto ri = CS.getCapturedRecordDecl()->field_begin(); + auto ii = CS.capture_init_begin(); + for (CapturedStmt::const_capture_iterator ci = CS.capture_be
[PATCH] D12262: [OpenMP] Capture global variables in target regions.
sfantao created this revision. sfantao added reviewers: ABataev, rjmccall, hfinkel. sfantao added a subscriber: cfe-commits. All global variables that are not enclosed in a declare target region must be captured in the target region as local variables do. Currently, there is no support for declare target, so this patch adds support for capturing all the global variables used in a the target region. This patch requires http://reviews.llvm.org/D11361. http://reviews.llvm.org/D12262 Files: include/clang/Basic/OpenMPKinds.h include/clang/Sema/Sema.h lib/Basic/OpenMPKinds.cpp lib/Sema/SemaExpr.cpp lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen_global_capture.cpp Index: test/OpenMP/target_codegen_global_capture.cpp === --- /dev/null +++ test/OpenMP/target_codegen_global_capture.cpp @@ -0,0 +1,186 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +// CHECK-DAG: [[GA:@.+]] = global double 1.00e+00 +// CHECK-DAG: [[GB:@.+]] = global double 2.00e+00 +// CHECK-DAG: [[GC:@.+]] = global double 3.00e+00 +// CHECK-DAG: [[GD:@.+]] = global double 4.00e+00 +// CHECK-DAG: [[FA:@.+]] = internal global float 5.00e+00 +// CHECK-DAG: [[FB:@.+]] = internal global float 6.00e+00 +// CHECK-DAG: [[FC:@.+]] = internal global float 7.00e+00 +// CHECK-DAG: [[FD:@.+]] = internal global float 8.00e+00 +// CHECK-DAG: [[BA:@.+]] = internal global float 9.00e+00 +// CHECK-DAG: [[BB:@.+]] = internal global float 1.00e+01 +// CHECK-DAG: [[BC:@.+]] = internal global float 1.10e+01 +// CHECK-DAG: [[BD:@.+]] = internal global float 1.20e+01 +double Ga = 1.0; +double Gb = 2.0; +double Gc = 3.0; +double Gd = 4.0; + +// CHECK: define {{.*}} @{{.*}}foo{{.*}}( +// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]], +// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]]) +// CHECK: [[LA:%.+]] = alloca i16 +// CHECK: [[LB:%.+]] = alloca i16 +// CHECK: [[LC:%.+]] = alloca i16 +// CHECK: [[LD:%.+]] = alloca i16 +int foo(short a, short b, short c, short d){ + static float Sa = 5.0; + static float Sb = 6.0; + static float Sc = 7.0; + static float Sd = 8.0; + + // CHECK-DAG: [[REFB:%.+]] = bitcast i16* [[LB]] to i8* + // CHECK-DAG: store i8* [[REFB]], i8** [[GEPB:%.+]] + // CHECK-DAG: [[REFC:%.+]] = bitcast i16* [[LC]] to i8* + // CHECK-DAG: store i8* [[REFC]], i8** [[GEPC:%.+]] + // CHECK-DAG: [[REFD:%.+]] = bitcast i16* [[LD]] to i8* + // CHECK-DAG: store i8* [[REFD]], i8** [[GEPD:%.+]] + // CHECK-DAG: store i8* bitcast (double* [[GB]] to i8*), i8** [[GEPGB:%.+]] + // CHECK-DAG: store i8* bitcast (double* [[GC]] to i8*), i8** [[GEPGC:%.+]] + // CHECK-DAG: store i8* bitcast (double* [[GD]] to i8*), i8** [[GEPGD:%.+]] + // CHECK-DAG: store i8* bitcast (float* [[FB]] to i8*), i8** [[GEPFB:%.+]] + // CHECK-DAG: store i8* bitcast (float* [[FC]] to i8*), i8** [[GEPFC:%.+]] + // CHECK-DAG: store i8* bitcast (float* [[FD]] to i8*), i8** [[GEPFD:%.+]] + // CHECK-DAG: [[GEPB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{.+}} + // CHECK: call i32 @__tgt_target + // CHECK: call void [[OFFLOADF:@.+]]( + // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd + #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0) + { +b += 1; +Gb += 1.0; +Sb += 1.0; + + +// CHECK: define internal void [[PARF:@.+]](i32* %{{.*}}, i32* %{{.*}}, +// CHECK: defin
Re: [PATCH] D11361: [OpenMP] Target directive host codegen
sfantao added a comment. Two more inlined comments that I forgot to integrate in my previous response. Thanks! Samuel Comment at: lib/CodeGen/CGOpenMPRuntime.h:190-204 @@ -180,2 +189,17 @@ + /// \brief Values for bit flags used to specify the mapping type for + /// offloading. + enum OpenMPOffloadMappingFlags { +/// \brief Allocate memory on the device and move data from host to device. +OMP_MAP_TO = 0x01, +/// \brief Allocate memory on the device and move data from device to host. +OMP_MAP_FROM = 0x02, + }; + + enum OpenMPOffloadingReservedDeviceIDs { +/// \brief Device ID if the device was not defined, runtime should get it +/// from environment variables in the spec. +OMP_DEVICEID_UNDEF = -1, + }; + CodeGenModule &CGM; /// \brief Default const ident_t object used for initialization of all other Got it, not exposed anymore. Comment at: lib/CodeGen/CGOpenMPRuntime.h:761 @@ -714,2 +760,2 @@ #endif Unlike the other enums, more than one map types need to be combined. E.g., to/from are two different enums. Once the map clause and 4.1 get to be support, we will have more combinations. I see two options here: add enums for all combinations or use a typedef each time an ineger refer to map types, so the code is more readable. Let me know your thoughts. http://reviews.llvm.org/D11361 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D12306: [OpenMP] Implement the creation and registration of the offloading descriptor.
sfantao created this revision. sfantao added reviewers: ABataev, rjmccall, hfinkel. sfantao added a subscriber: cfe-commits. All the offloading information is bundled into a descriptor that is passed to the runtime library so that it can extract all the binaries and map variables properly. This descriptor includes the range of the target binaries (that will be defined by the linker) for each device selected by the user, as well as table with the information about each function and variable that is to be mapped (given that we do not support 'declare target' directives yet this is only implemented for function, but the logic can/will be reused). This patch adds support for the creation of the descriptor as well as the registration/unregistration of the descriptor with the runtime library. The registration is implemented in a high priority global initializer so that the registration happens always before any initializer (that can potentially include target regions) is run. The frontend flag (it, or something similar, will have to be promoted to driver option in the future) -omptargets= was created to exercise the new functionality. It takes the list of triples of the devices the user wants to offload to. http://reviews.llvm.org/D12306 Files: include/clang/Basic/DiagnosticDriverKinds.td include/clang/Basic/LangOptions.h include/clang/Driver/Options.td lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenModule.cpp lib/Frontend/CompilerInvocation.cpp lib/Serialization/ASTReader.cpp lib/Serialization/ASTWriter.cpp test/OpenMP/target_codegen.cpp test/OpenMP/target_codegen_registration.cpp test/OpenMP/target_messages.cpp Index: test/OpenMP/target_messages.cpp === --- test/OpenMP/target_messages.cpp +++ test/OpenMP/target_messages.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s +// RUN: not %clang_cc1 -fopenmp -std=c++11 -omptargets=aaa-bbb-ccc-ddd -o - %s 2>&1 | FileCheck %s +// CHECK: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd' void foo() { } Index: test/OpenMP/target_codegen_registration.cpp === --- /dev/null +++ test/OpenMP/target_codegen_registration.cpp @@ -0,0 +1,282 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -omptargets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]] } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-NOT: type { i8*, +// CHECK-NTARGET-NOT: type { i32, + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 3] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_