blob: c15cc05a7d0df915a2abdf37d2e26045ffc4d177 [file] [log] [blame]
Justin Lebar66c4fd72016-11-18 00:41:22 +00001// Tests that ptxas and fatbinary are invoked correctly during CUDA
2// compilation.
Justin Lebar21e5d4f2016-01-14 21:41:27 +00003//
4// REQUIRES: clang-driver
5// REQUIRES: x86-registered-target
6// REQUIRES: nvptx-registered-target
7
Justin Lebar2836dcd2016-01-19 19:52:21 +00008// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
9// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000010// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000011// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000012// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT1 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000013// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000014// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000015// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000016// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000017// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000018// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000019// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000020// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000021// Generating relocatable device code
22// RUN: %clang -### -target x86_64-linux-gnu -fcuda-rdc -c %s 2>&1 \
23// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000024
Artem Belevich0a0e54c2016-02-16 22:03:20 +000025// With debugging enabled, ptxas should be run with with no ptxas optimizations.
26// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000027// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s
Artem Belevich0a0e54c2016-02-16 22:03:20 +000028
Justin Lebarb41f33c2016-04-19 02:27:11 +000029// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
Serge Pavlovb43573b2017-05-24 14:57:17 +000030// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
31// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000032// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
Justin Lebarb41f33c2016-04-19 02:27:11 +000033
Justin Lebar21e5d4f2016-01-14 21:41:27 +000034// Regular compile without -O. This should result in us passing -O0 to ptxas.
35// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000036// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000037
Justin Lebar2836dcd2016-01-19 19:52:21 +000038// Regular compiles with -Os and -Oz. For lack of a better option, we map
39// these to ptxas -O3.
40// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000041// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000042// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000043// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000044
Justin Lebar21e5d4f2016-01-14 21:41:27 +000045// Regular compile targeting sm_35.
46// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000047// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000048// Separate compilation targeting sm_35.
49// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \
50// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000051
52// 32-bit compile.
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000053// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \
54// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000055// 32-bit compile when generating relocatable device code.
56// RUN: %clang -### -target i386-linux-gnu -fcuda-rdc -c %s 2>&1 \
57// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000058
59// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
60// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000061// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000062// Check that we still pass -c when generating relocatable device code.
63// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fcuda-rdc -c %s 2>&1 \
64// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000065
66// Check -Xcuda-ptxas and -Xcuda-fatbinary
67// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
68// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000069// RUN: | FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000070
Justin Lebar66c4fd72016-11-18 00:41:22 +000071// MacOS spot-checks
72// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000073// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
Justin Lebar66c4fd72016-11-18 00:41:22 +000074// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000075// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
76// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \
77// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
Justin Lebar66c4fd72016-11-18 00:41:22 +000078
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000079// Check relocatable device code generation on MacOS.
80// RUN: %clang -### -target x86_64-apple-macosx -O0 -fcuda-rdc -c %s 2>&1 \
81// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
82// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \
83// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
84// RUN: %clang -### -target i386-apple-macosx -fcuda-rdc -c %s 2>&1 \
85// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s
86
Gheorghe-Teodor Bercea53431bc2017-08-07 20:19:23 +000087// Check that CLANG forwards the -v flag to PTXAS.
88// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
89// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
90
Justin Lebar21e5d4f2016-01-14 21:41:27 +000091// Match clang job that produces PTX assembly.
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +000092// CHECK: "-cc1"
93// ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda"
94// ARCH32-SAME: "-triple" "nvptx-nvidia-cuda"
95// SM20-SAME: "-target-cpu" "sm_20"
96// SM35-SAME: "-target-cpu" "sm_35"
97// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]"
98// SM35-SAME: "-o" "[[PTXFILE:[^"]*]]"
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +000099// RDC-SAME: "-fcuda-rdc"
100// CHECK-NOT: "-fcuda-rdc"
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000101
102// Match the call to ptxas (which assembles PTX to SASS).
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000103// CHECK: ptxas
104// ARCH64-SAME: "-m64"
105// ARCH32-SAME: "-m32"
106// OPT0-SAME: "-O0"
Artem Belevich0a0e54c2016-02-16 22:03:20 +0000107// OPT0-NOT: "-g"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000108// OPT1-SAME: "-O1"
Artem Belevich0a0e54c2016-02-16 22:03:20 +0000109// OPT1-NOT: "-g"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000110// OPT2-SAME: "-O2"
Artem Belevich0a0e54c2016-02-16 22:03:20 +0000111// OPT2-NOT: "-g"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000112// OPT3-SAME: "-O3"
Artem Belevich0a0e54c2016-02-16 22:03:20 +0000113// OPT3-NOT: "-g"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000114// DBG-SAME: "-g" "--dont-merge-basicblocks" "--return-at-end"
115// SM20-SAME: "--gpu-name" "sm_20"
116// SM35-SAME: "--gpu-name" "sm_35"
117// SM20-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
118// SM35-SAME: "--output-file" "[[CUBINFILE:[^"]*]]"
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000119// CHECK-SAME: "[[PTXFILE]]"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000120// PTXAS-EXTRA-SAME: "-foo1"
121// PTXAS-EXTRA-SAME: "-foo2"
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +0000122// RDC-SAME: "-c"
123// CHECK-NOT: "-c"
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000124
125// Match the call to fatbinary (which combines all our PTX and SASS into one
126// blob).
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000127// CHECK: fatbinary
128// CHECK-SAME-DAG: "--cuda"
129// ARCH64-SAME-DAG: "-64"
130// ARCH32-SAME-DAG: "-32"
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000131// CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000132// SM20-SAME-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
133// SM35-SAME-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
134// SM20-SAME-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
135// SM35-SAME-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
136// FATBINARY-EXTRA-SAME: "-bar1"
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000137// FATBINARY-EXTRA-SAME: "-bar2"
138
139// Match the clang job for host compilation.
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000140// CHECK: "-cc1"
141// ARCH64-SAME: "-triple" "x86_64-
142// ARCH32-SAME: "-triple" "i386-
Justin Lebar21e5d4f2016-01-14 21:41:27 +0000143// CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
Jonas Hahnfeld5379c6d2018-02-12 10:46:45 +0000144// RDC-SAME: "-fcuda-rdc"
145// CHECK-NOT: "-fcuda-rdc"
Gheorghe-Teodor Bercea53431bc2017-08-07 20:19:23 +0000146
Jonas Hahnfeld15dd8c62018-02-12 10:46:34 +0000147// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"