blob: 63b74fc06b9b575e444a7ec4362b5b254ff34fe6 [file] [log] [blame]
Justin Lebar66c4fd72016-11-18 00:41:22 +00001// Tests that ptxas and fatbinary are invoked correctly during CUDA
2// compilation.
Justin Lebar21e5d4f2016-01-14 21:41:27 +00003//
4// REQUIRES: clang-driver
5// REQUIRES: x86-registered-target
6// REQUIRES: nvptx-registered-target
7
Justin Lebar2836dcd2016-01-19 19:52:21 +00008// Regular compiles with -O{0,1,2,3,4,fast}. -O4 and -Ofast map to ptxas O3.
9// RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
10// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
11// RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
12// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000013// RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
14// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
Justin Lebar2836dcd2016-01-19 19:52:21 +000015// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
16// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
17// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
18// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
19// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
20// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
Justin Lebar21e5d4f2016-01-14 21:41:27 +000021
Artem Belevich0a0e54c2016-02-16 22:03:20 +000022// With debugging enabled, ptxas should be run with with no ptxas optimizations.
23// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
24// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
25
Justin Lebarb41f33c2016-04-19 02:27:11 +000026// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
Serge Pavlovb43573b2017-05-24 14:57:17 +000027// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
28// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
Justin Lebarb41f33c2016-04-19 02:27:11 +000029// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
30
Justin Lebar21e5d4f2016-01-14 21:41:27 +000031// Regular compile without -O. This should result in us passing -O0 to ptxas.
32// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
33// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
34
Justin Lebar2836dcd2016-01-19 19:52:21 +000035// Regular compiles with -Os and -Oz. For lack of a better option, we map
36// these to ptxas -O3.
37// RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
38// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
39// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
40// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
41
Justin Lebar21e5d4f2016-01-14 21:41:27 +000042// Regular compile targeting sm_35.
43// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
44// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
45
46// 32-bit compile.
47// RUN: %clang -### -target x86_32-linux-gnu -c %s 2>&1 \
48// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
49
50// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
51// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
52// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
53
54// Check -Xcuda-ptxas and -Xcuda-fatbinary
55// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
56// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
57// RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
58// RUN: -check-prefix FATBINARY-EXTRA %s
59
Justin Lebar66c4fd72016-11-18 00:41:22 +000060// MacOS spot-checks
61// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
62// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
63// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
64// RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
65// RUN: %clang -### -target x86_32-apple-macosx -c %s 2>&1 \
66// RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
67
Gheorghe-Teodor Bercea53431bc2017-08-07 20:19:23 +000068// Check that CLANG forwards the -v flag to PTXAS.
69// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
70// RUN: | FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s
71
Justin Lebar21e5d4f2016-01-14 21:41:27 +000072// Match clang job that produces PTX assembly.
73// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
74// SM20: "-target-cpu" "sm_20"
75// SM35: "-target-cpu" "sm_35"
76// SM20: "-o" "[[PTXFILE:[^"]*]]"
77// SM35: "-o" "[[PTXFILE:[^"]*]]"
78
79// Match the call to ptxas (which assembles PTX to SASS).
80// CHECK:ptxas
81// ARCH64: "-m64"
82// ARCH32: "-m32"
83// OPT0: "-O0"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000084// OPT0-NOT: "-g"
Justin Lebar2836dcd2016-01-19 19:52:21 +000085// OPT1: "-O1"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000086// OPT1-NOT: "-g"
Justin Lebar21e5d4f2016-01-14 21:41:27 +000087// OPT2: "-O2"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000088// OPT2-NOT: "-g"
Justin Lebar2836dcd2016-01-19 19:52:21 +000089// OPT3: "-O3"
Artem Belevich0a0e54c2016-02-16 22:03:20 +000090// OPT3-NOT: "-g"
91// DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
Justin Lebar21e5d4f2016-01-14 21:41:27 +000092// SM20: "--gpu-name" "sm_20"
93// SM35: "--gpu-name" "sm_35"
94// SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
95// SM35: "--output-file" "[[CUBINFILE:[^"]*]]"
96// PTXAS-EXTRA: "-foo1"
97// PTXAS-EXTRA-SAME: "-foo2"
98// CHECK-SAME: "[[PTXFILE]]"
99
100// Match the call to fatbinary (which combines all our PTX and SASS into one
101// blob).
102// CHECK:fatbinary
103// CHECK-DAG: "--cuda"
104// ARCH64-DAG: "-64"
105// ARCH32-DAG: "-32"
106// CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
107// SM20-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
108// SM35-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
109// SM20-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
110// SM35-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
111// FATBINARY-EXTRA: "-bar1"
112// FATBINARY-EXTRA-SAME: "-bar2"
113
114// Match the clang job for host compilation.
115// CHECK: "-cc1" "-triple" "x86_64--linux-gnu"
116// CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
Gheorghe-Teodor Bercea53431bc2017-08-07 20:19:23 +0000117
118// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"