blob: 280c60966bd0e4d9c5810077393492bc09e4c34e [file] [log] [blame]
Justin Lebar21e5d4f2016-01-14 21:41:27 +00001// Tests that ptxas and fatbinary are correctly during CUDA compilation.
2//
3// REQUIRES: clang-driver
4// REQUIRES: x86-registered-target
5// REQUIRES: nvptx-registered-target
6
Justin Lebar2836dcd2016-01-19 19:52:21 +00007// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
8// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
9// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
10// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
11// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000012// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
13// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000014// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
15// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
16// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
17// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
18// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
19// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000020
Artem Belevich0a0e54c2016-02-16 22:03:20 +000021// With debugging enabled, ptxas should be run with with no ptxas optimizations.
22// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
23// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
24
Justin Lebarb41f33c2016-04-19 02:27:11 +000025// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
26// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-debug \
27// RUN: --no-cuda-noopt-debug -O2 -c %s 2>&1 \
28// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
29
Justin Lebar21e5d4f2016-01-14 21:41:27 +000030// Regular compile without -O. This should result in us passing -O0 to ptxas.
31// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
32// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
33
Justin Lebar2836dcd2016-01-19 19:52:21 +000034// Regular compiles with -Os and -Oz. For lack of a better option, we map
35// these to ptxas -O3.
36// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
37// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
38// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
39// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
40
Justin Lebar21e5d4f2016-01-14 21:41:27 +000041// Regular compile targeting sm_35.
42// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
43// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
44
45// 32-bit compile.
46// RUN: %clang -### -target x86_32-linux-gnu -c %s 2>&1 \
47// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
48
49// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
50// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
51// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
52
53// Check -Xcuda-ptxas and -Xcuda-fatbinary
54// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
55// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
56// RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
57// RUN: -check-prefix FATBINARY-EXTRA %s
58
59// Match clang job that produces PTX assembly.
60// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
61// SM20: "-target-cpu" "sm_20"
62// SM35: "-target-cpu" "sm_35"
63// SM20: "-o" "[[PTXFILE:[^"]*]]"
64// SM35: "-o" "[[PTXFILE:[^"]*]]"
65
66// Match the call to ptxas (which assembles PTX to SASS).
67// CHECK:ptxas
68// ARCH64: "-m64"
69// ARCH32: "-m32"
70// OPT0: "-O0"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000071// OPT0-NOT: "-g"
Justin Lebar2836dcd2016-01-19 19:52:21 +000072// OPT1: "-O1"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000073// OPT1-NOT: "-g"
Justin Lebar21e5d4f2016-01-14 21:41:27 +000074// OPT2: "-O2"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000075// OPT2-NOT: "-g"
Justin Lebar2836dcd2016-01-19 19:52:21 +000076// OPT3: "-O3"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000077// OPT3-NOT: "-g"
78// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
Justin Lebar21e5d4f2016-01-14 21:41:27 +000079// SM20: "--gpu-name" "sm_20"
80// SM35: "--gpu-name" "sm_35"
81// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
82// SM35: "--output-file" "[[CUBINFILE:[^"]*]]"
83// PTXAS-EXTRA: "-foo1"
84// PTXAS-EXTRA-SAME: "-foo2"
85// CHECK-SAME: "[[PTXFILE]]"
86
87// Match the call to fatbinary (which combines all our PTX and SASS into one
88// blob).
89// CHECK:fatbinary
90// CHECK-DAG: "--cuda"
91// ARCH64-DAG: "-64"
92// ARCH32-DAG: "-32"
93// CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
94// SM20-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
95// SM35-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
96// SM20-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
97// SM35-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
98// FATBINARY-EXTRA: "-bar1"
99// FATBINARY-EXTRA-SAME: "-bar2"
100
101// Match the clang job for host compilation.
102// CHECK: "-cc1" "-triple" "x86_64--linux-gnu"
103// CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"