At a quick glance Os and Oz should probably map to O2. On Mon, Jan 18, 2016, 4:19 PM Justin Lebar <jle...@google.com> wrote:
> jlebar created this revision. > jlebar added a reviewer: tra. > jlebar added subscribers: jhen, echristo, cfe-commits. > > Previously we'd crash the driver if you passed -O0. Now we try to > handle all of clang's various optimization flags in a sane way. > > http://reviews.llvm.org/D16307 > > Files: > lib/Driver/Tools.cpp > test/Driver/cuda-external-tools.cu > > Index: test/Driver/cuda-external-tools.cu > =================================================================== > --- test/Driver/cuda-external-tools.cu > +++ test/Driver/cuda-external-tools.cu > @@ -4,14 +4,31 @@ > // REQUIRES: x86-registered-target > // REQUIRES: nvptx-registered-target > > -// Regular compile with -O2. > +// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas > O3. > +// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT0 %s > +// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT1 %s > // RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \ > // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT2 %s > +// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT3 %s > +// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT3 %s > +// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT3 %s > > // Regular compile without -O. This should result in us passing -O0 to > ptxas. > // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ > // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT0 %s > > +// Regular compiles with -Os and -Oz. For lack of a better option, we map > +// these to ptxas -O3. > +// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT3 %s > +// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \ > +// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix > OPT3 %s > + > // Regular compile targeting sm_35. > // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s > 2>&1 \ > // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s > @@ -42,7 +59,9 @@ > // ARCH64: "-m64" > // ARCH32: "-m32" > // OPT0: "-O0" > +// OPT1: "-O1" > // OPT2: "-O2" > +// OPT3: "-O3" > // SM20: "--gpu-name" "sm_20" > // SM35: "--gpu-name" "sm_35" > // SM20: "--output-file" "[[CUBINFILE:[^"]*]]" > Index: lib/Driver/Tools.cpp > =================================================================== > --- lib/Driver/Tools.cpp > +++ lib/Driver/Tools.cpp > @@ -10645,10 +10645,34 @@ > ArgStringList CmdArgs; > CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); > > - // Clang's default optimization level is -O0, but ptxas's default is > -O3. > - CmdArgs.push_back(Args.MakeArgString( > - llvm::Twine("-O") + > - Args.getLastArgValue(options::OPT_O_Group, "0").data())); > + // Map the -O we received to -O{0,1,2,3}. > + // > + // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's > default, > + // so it may correspond more closely to the spirit of clang -O2. > + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { > + // -O3 seems like the least-bad option when -Osomething is specified > to > + // clang but it isn't handled above. > + StringRef OOpt = "3"; > + if (A->getOption().matches(options::OPT_O4) || > + A->getOption().matches(options::OPT_Ofast)) > + OOpt = "3"; > + else if (A->getOption().matches(options::OPT_O0)) > + OOpt = "0"; > + else if (A->getOption().matches(options::OPT_O)) { > + // -Os, -Oz, and -O(anything else) map to -O3, for lack of better > options. > + OOpt = llvm::StringSwitch<const char *>(A->getValue()) > + .Case("1", "1") > + .Case("2", "2") > + .Case("s", "3") > + .Case("z", "3") > + .Default("3"); > + } > + CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); > + } else { > + // If no -O was passed, pass -O0 to ptxas -- no opt flag should > correspond > + // to no optimizations, but ptxas's default is -O3. > + CmdArgs.push_back("-O0"); > + } > > // Don't bother passing -g to ptxas: It's enabled by default at -O0, and > // not supported at other optimization levels. > > >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits