blob: b52de8fe74a461f1c6a1094ef47700a641e2fd82 [file] [log] [blame]
Lei Zhangf18e1f22016-09-12 14:11:46 -04001// Copyright (c) 2016 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef SPIRV_TOOLS_OPTIMIZER_HPP_
16#define SPIRV_TOOLS_OPTIMIZER_HPP_
17
18#include <memory>
19#include <string>
20#include <unordered_map>
21#include <vector>
22
23#include "libspirv.hpp"
Lei Zhangf18e1f22016-09-12 14:11:46 -040024
25namespace spvtools {
26
27// C++ interface for SPIR-V optimization functionalities. It wraps the context
28// (including target environment and the corresponding SPIR-V grammar) and
29// provides methods for registering optimization passes and optimizing.
30//
31// Instances of this class provides basic thread-safety guarantee.
32class Optimizer {
33 public:
34 // The token for an optimization pass. It is returned via one of the
35 // Create*Pass() standalone functions at the end of this header file and
36 // consumed by the RegisterPass() method. Tokens are one-time objects that
37 // only support move; copying is not allowed.
38 struct PassToken {
39 struct Impl; // Opaque struct for holding inernal data.
40
41 PassToken(std::unique_ptr<Impl>);
42
43 // Tokens can only be moved. Copying is disabled.
44 PassToken(const PassToken&) = delete;
45 PassToken(PassToken&&);
46 PassToken& operator=(const PassToken&) = delete;
47 PassToken& operator=(PassToken&&);
48
49 ~PassToken();
50
51 std::unique_ptr<Impl> impl_; // Unique pointer to internal data.
52 };
53
54 // Constructs an instance with the given target |env|, which is used to decode
55 // the binaries to be optimized later.
56 //
57 // The constructed instance will have an empty message consumer, which just
58 // ignores all messages from the library. Use SetMessageConsumer() to supply
59 // one if messages are of concern.
60 explicit Optimizer(spv_target_env env);
61
62 // Disables copy/move constructor/assignment operations.
63 Optimizer(const Optimizer&) = delete;
64 Optimizer(Optimizer&&) = delete;
65 Optimizer& operator=(const Optimizer&) = delete;
66 Optimizer& operator=(Optimizer&&) = delete;
67
68 // Destructs this instance.
69 ~Optimizer();
70
71 // Sets the message consumer to the given |consumer|. The |consumer| will be
72 // invoked once for each message communicated from the library.
73 void SetMessageConsumer(MessageConsumer consumer);
74
75 // Registers the given |pass| to this optimizer. Passes will be run in the
76 // exact order of registration. The token passed in will be consumed by this
77 // method.
78 Optimizer& RegisterPass(PassToken&& pass);
79
80 // Optimizes the given SPIR-V module |original_binary| and writes the
81 // optimized binary into |optimized_binary|.
82 // Returns true on successful optimization, whether or not the module is
83 // modified. Returns false if errors occur when processing |original_binary|
84 // using any of the registered passes. In that case, no further passes are
85 // excuted and the contents in |optimized_binary| may be invalid.
86 //
87 // It's allowed to alias |original_binary| to the start of |optimized_binary|.
88 bool Run(const uint32_t* original_binary, size_t original_binary_size,
89 std::vector<uint32_t>* optimized_binary) const;
90
91 private:
92 struct Impl; // Opaque struct for holding internal data.
93 std::unique_ptr<Impl> impl_; // Unique pointer to internal data.
94};
95
96// Creates a null pass.
97// A null pass does nothing to the SPIR-V module to be optimized.
98Optimizer::PassToken CreateNullPass();
99
100// Creates a strip-debug-info pass.
101// A strip-debug-info pass removes all debug instructions (as documented in
102// Section 3.32.2 of the SPIR-V spec) of the SPIR-V module to be optimized.
103Optimizer::PassToken CreateStripDebugInfoPass();
104
qining144f59e2017-04-19 18:10:59 -0400105// Creates a set-spec-constant-default-value pass from a mapping from spec-ids
106// to the default values in the form of string.
Lei Zhangf18e1f22016-09-12 14:11:46 -0400107// A set-spec-constant-default-value pass sets the default values for the
108// spec constants that have SpecId decorations (i.e., those defined by
109// OpSpecConstant{|True|False} instructions).
110Optimizer::PassToken CreateSetSpecConstantDefaultValuePass(
111 const std::unordered_map<uint32_t, std::string>& id_value_map);
112
qining144f59e2017-04-19 18:10:59 -0400113// Creates a set-spec-constant-default-value pass from a mapping from spec-ids
114// to the default values in the form of bit pattern.
115// A set-spec-constant-default-value pass sets the default values for the
116// spec constants that have SpecId decorations (i.e., those defined by
117// OpSpecConstant{|True|False} instructions).
118Optimizer::PassToken CreateSetSpecConstantDefaultValuePass(
119 const std::unordered_map<uint32_t, std::vector<uint32_t>>& id_value_map);
120
David Neto11a867f2017-04-01 16:10:16 -0400121// Creates a flatten-decoration pass.
122// A flatten-decoration pass replaces grouped decorations with equivalent
123// ungrouped decorations. That is, it replaces each OpDecorationGroup
124// instruction and associated OpGroupDecorate and OpGroupMemberDecorate
125// instructions with equivalent OpDecorate and OpMemberDecorate instructions.
126// The pass does not attempt to preserve debug information for instructions
127// it removes.
128Optimizer::PassToken CreateFlattenDecorationPass();
129
Lei Zhangf18e1f22016-09-12 14:11:46 -0400130// Creates a freeze-spec-constant-value pass.
131// A freeze-spec-constant pass specializes the value of spec constants to
132// their default values. This pass only processes the spec constants that have
133// SpecId decorations (defined by OpSpecConstant, OpSpecConstantTrue, or
134// OpSpecConstantFalse instructions) and replaces them with their normal
135// counterparts (OpConstant, OpConstantTrue, or OpConstantFalse). The
136// corresponding SpecId annotation instructions will also be removed. This
137// pass does not fold the newly added normal constants and does not process
138// other spec constants defined by OpSpecConstantComposite or
139// OpSpecConstantOp.
140Optimizer::PassToken CreateFreezeSpecConstantValuePass();
141
142// Creates a fold-spec-constant-op-and-composite pass.
143// A fold-spec-constant-op-and-composite pass folds spec constants defined by
144// OpSpecConstantOp or OpSpecConstantComposite instruction, to normal Constants
145// defined by OpConstantTrue, OpConstantFalse, OpConstant, OpConstantNull, or
146// OpConstantComposite instructions. Note that spec constants defined with
147// OpSpecConstant, OpSpecConstantTrue, or OpSpecConstantFalse instructions are
148// not handled, as these instructions indicate their value are not determined
149// and can be changed in future. A spec constant is foldable if all of its
150// value(s) can be determined from the module. E.g., an integer spec constant
151// defined with OpSpecConstantOp instruction can be folded if its value won't
152// change later. This pass will replace the original OpSpecContantOp instruction
153// with an OpConstant instruction. When folding composite spec constants,
154// new instructions may be inserted to define the components of the composite
155// constant first, then the original spec constants will be replaced by
156// OpConstantComposite instructions.
157//
158// There are some operations not supported yet:
159// OpSConvert, OpFConvert, OpQuantizeToF16 and
160// all the operations under Kernel capability.
161// TODO(qining): Add support for the operations listed above.
162Optimizer::PassToken CreateFoldSpecConstantOpAndCompositePass();
163
164// Creates a unify-constant pass.
165// A unify-constant pass de-duplicates the constants. Constants with the exact
166// same value and identical form will be unified and only one constant will
167// be kept for each unique pair of type and value.
168// There are several cases not handled by this pass:
169// 1) Constants defined by OpConstantNull instructions (null constants) and
170// constants defined by OpConstantFalse, OpConstant or OpConstantComposite
171// with value 0 (zero-valued normal constants) are not considered equivalent.
172// So null constants won't be used to replace zero-valued normal constants,
173// vice versa.
174// 2) Whenever there are decorations to the constant's result id id, the
175// constant won't be handled, which means, it won't be used to replace any
176// other constants, neither can other constants replace it.
177// 3) NaN in float point format with different bit patterns are not unified.
178Optimizer::PassToken CreateUnifyConstantPass();
179
180// Creates a eliminate-dead-constant pass.
181// A eliminate-dead-constant pass removes dead constants, including normal
182// contants defined by OpConstant, OpConstantComposite, OpConstantTrue, or
183// OpConstantFalse and spec constants defined by OpSpecConstant,
184// OpSpecConstantComposite, OpSpecConstantTrue, OpSpecConstantFalse or
185// OpSpecConstantOp.
186Optimizer::PassToken CreateEliminateDeadConstantPass();
187
GregFad1d0352017-06-07 15:28:53 -0600188// Creates a block merge pass.
189// This pass searches for blocks with a single Branch to a block with no
190// other predecessors and merges the blocks into a single block. Continue
191// blocks and Merge blocks are not candidates for the second block.
192//
193// The pass is most useful after Dead Branch Elimination, which can leave
194// such sequences of blocks. Merging them makes subsequent passes more
195// effective, such as single block local store-load elimination.
196//
197// While this pass reduces the number of occurrences of this sequence, at
198// this time it does not guarantee all such sequences are eliminated.
199//
200// Presence of phi instructions can inhibit this optimization. Handling
201// these is left for future improvements.
202Optimizer::PassToken CreateBlockMergePass();
203
Greg Fischer04fcc662016-11-10 10:11:50 -0700204// Creates an inline pass.
205// An inline pass exhaustively inlines all function calls in all functions
206// designated as an entry point. The intent is to enable, albeit through
207// brute force, analysis and optimization across function calls by subsequent
208// passes. As the inlining is exhaustive, there is no attempt to optimize for
209// size or runtime performance. Functions that are not designated as entry
210// points are not changed.
211Optimizer::PassToken CreateInlinePass();
GregF7c8da662017-05-18 14:51:55 -0600212
213// Creates a single-block local variable load/store elimination pass.
214// For every entry point function, do single block memory optimization of
215// function variables referenced only with non-access-chain loads and stores.
216// For each targeted variable load, if previous store to that variable in the
217// block, replace the load's result id with the value id of the store.
218// If previous load within the block, replace the current load's result id
219// with the previous load's result id. In either case, delete the current
220// load. Finally, check if any remaining stores are useless, and delete store
221// and variable if possible.
222//
223// The presence of access chain references and function calls can inhibit
224// the above optimization.
225//
226// Only modules with logical addressing are currently processed.
227//
228// This pass is most effective if preceeded by Inlining and
229// LocalAccessChainConvert. This pass will reduce the work needed to be done
GregFcc8bad32017-06-16 15:37:31 -0600230// by LocalSingleStoreElim and LocalMultiStoreElim.
GregF7c8da662017-05-18 14:51:55 -0600231Optimizer::PassToken CreateLocalSingleBlockLoadStoreElimPass();
Greg Fischer04fcc662016-11-10 10:11:50 -0700232
GregF52e247f2017-06-02 13:23:20 -0600233// Create dead branch elimination pass.
234// For each entry point function, this pass will look for SelectionMerge
235// BranchConditionals with constant condition and convert to a Branch to
236// the indicated label. It will delete resulting dead blocks.
237//
238// This pass only works on shaders (guaranteed to have structured control
239// flow). Note that some such branches and blocks may be left to avoid
240// creating invalid control flow. Improving this is left to future work.
241//
242// This pass is most effective when preceeded by passes which eliminate
243// local loads and stores, effectively propagating constant values where
244// possible.
245Optimizer::PassToken CreateDeadBranchElimPass();
246
GregFcc8bad32017-06-16 15:37:31 -0600247// Creates an SSA local variable load/store elimination pass.
248// For every entry point function, eliminate all loads and stores of function
249// scope variables only referenced with non-access-chain loads and stores.
250// Eliminate the variables as well.
251//
252// The presence of access chain references and function calls can inhibit
253// the above optimization.
254//
255// Only shader modules with logical addressing are currently processed.
256// Currently modules with any extensions enabled are not processed. This
257// is left for future work.
258//
259// This pass is most effective if preceeded by Inlining and
260// LocalAccessChainConvert. LocalSingleStoreElim and LocalSingleBlockElim
261// will reduce the work that this pass has to do.
262Optimizer::PassToken CreateLocalMultiStoreElimPass();
263
GregFaa7e6872017-05-12 17:27:21 -0600264// Creates a local access chain conversion pass.
265// A local access chain conversion pass identifies all function scope
266// variables which are accessed only with loads, stores and access chains
267// with constant indices. It then converts all loads and stores of such
268// variables into equivalent sequences of loads, stores, extracts and inserts.
269//
270// This pass only processes entry point functions. It currently only converts
271// non-nested, non-ptr access chains. It does not process modules with
272// non-32-bit integer types present. Optional memory access options on loads
273// and stores are ignored as we are only processing function scope variables.
274//
275// This pass unifies access to these variables to a single mode and simplifies
276// subsequent analysis and elimination of these variables along with their
277// loads and stores allowing values to propagate to their points of use where
278// possible.
279Optimizer::PassToken CreateLocalAccessChainConvertPass();
280
GregF9de4e692017-06-08 10:37:21 -0600281// Create aggressive dead code elimination pass
282// This pass eliminates unused code from functions. In addition,
283// it detects and eliminates code which may have spurious uses but which do
284// not contribute to the output of the function. The most common cause of
285// such code sequences is summations in loops whose result is no longer used
286// due to dead code elimination. This optimization has additional compile
287// time cost over standard dead code elimination.
288//
289// This pass only processes entry point functions. It also only processes
290// shaders with logical addressing. It currently will not process functions
291// with function calls. It currently only supports the GLSL.std.450 extended
292// instruction set. It currently does not support any extensions.
293//
294// This pass will be made more effective by first running passes that remove
295// dead control flow and inlines function calls.
296//
297// This pass can be especially useful after running Local Access Chain
298// Conversion, which tends to cause cycles of dead code to be left after
299// Store/Load elimination passes are completed. These cycles cannot be
300// eliminated with standard dead code elimination.
301Optimizer::PassToken CreateAggressiveDCEPass();
302
GregF0c5722f2017-05-19 17:31:28 -0600303// Creates a local single store elimination pass.
304// For each entry point function, this pass eliminates loads and stores for
305// function scope variable that are stored to only once, where possible. Only
306// whole variable loads and stores are eliminated; access-chain references are
307// not optimized. Replace all loads of such variables with the value that is
308// stored and eliminate any resulting dead code.
309//
310// Currently, the presence of access chains and function calls can inhibit this
311// pass, however the Inlining and LocalAccessChainConvert passes can make it
312// more effective. In additional, many non-load/store memory operations are
313// not supported and will prohibit optimization of a function. Support of
314// these operations are future work.
315//
316// This pass will reduce the work needed to be done by LocalSingleBlockElim
GregFcc8bad32017-06-16 15:37:31 -0600317// and LocalMultiStoreElim and can improve the effectiveness of other passes
318// such as DeadBranchElimination which depend on values for their analysis.
GregF0c5722f2017-05-19 17:31:28 -0600319Optimizer::PassToken CreateLocalSingleStoreElimPass();
320
GregF6136bf92017-05-26 10:33:11 -0600321// Creates an insert/extract elimination pass.
322// This pass processes each entry point function in the module, searching for
323// extracts on a sequence of inserts. It further searches the sequence for an
324// insert with indices identical to the extract. If such an insert can be
325// found before hitting a conflicting insert, the extract's result id is
326// replaced with the id of the values from the insert.
327//
328// Besides removing extracts this pass enables subsequent dead code elimination
329// passes to delete the inserts. This pass performs best after access chains are
330// converted to inserts and extracts and local loads and stores are eliminated.
331Optimizer::PassToken CreateInsertExtractElimPass();
332
GregF52e247f2017-06-02 13:23:20 -0600333// Create dead branch elimination pass.
334// For each entry point function, this pass will look for BranchConditionals
335// with constant condition and convert to a branch. The BranchConditional must
336// be preceeded by OpSelectionMerge. For all phi functions in merge block,
337// replace all uses with the id corresponding to the living predecessor.
338//
339// This pass is most effective when preceeded by passes which eliminate
340// local loads and stores, effectively propagating constant values where
341// possible.
342Optimizer::PassToken CreateDeadBranchElimPass();
343
GregF9de4e692017-06-08 10:37:21 -0600344// Create aggressive dead code elimination pass
345// This pass eliminates unused code from functions. In addition,
346// it detects and eliminates code which may have spurious uses but which do
347// not contribute to the output of the function. The most common cause of
348// such code sequences is summations in loops whose result is no longer used
349// due to dead code elimination. This optimization has additional compile
350// time cost over standard dead code elimination.
351//
352// This pass only processes entry point functions. It also only processes
353// shaders with logical addressing. It currently will not process functions
354// with function calls.
355//
356// This pass will be made more effective by first running passes that remove
357// dead control flow and inlines function calls.
358//
359// This pass can be especially useful after running Local Access Chain
360// Conversion, which tends to cause cycles of dead code to be left after
361// Store/Load elimination passes are completed. These cycles cannot be
362// eliminated with standard dead code elimination.
363Optimizer::PassToken CreateAggressiveDCEPass();
364
Andrey Tuganov1e309af2017-04-11 15:11:04 -0400365// Creates a compact ids pass.
366// The pass remaps result ids to a compact and gapless range starting from %1.
367Optimizer::PassToken CreateCompactIdsPass();
368
Lei Zhangf18e1f22016-09-12 14:11:46 -0400369} // namespace spvtools
370
371#endif // SPIRV_TOOLS_OPTIMIZER_HPP_