blob: 37e43e537d6010850aaac3ef198cf9581145510f [file] [log] [blame]
Samuel Huang06f1ae92018-03-13 18:19:34 +00001// Copyright 2017 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/zucchini/disassembler_win32.h"
6
7#include <stddef.h>
8
9#include <algorithm>
10
11#include "base/logging.h"
12#include "base/numerics/safe_conversions.h"
13#include "components/zucchini/abs32_utils.h"
14#include "components/zucchini/algorithm.h"
15#include "components/zucchini/buffer_source.h"
16#include "components/zucchini/rel32_finder.h"
17#include "components/zucchini/rel32_utils.h"
Etienne Pierre-dorayae27d8e2018-07-24 15:42:33 +000018#include "components/zucchini/reloc_win32.h"
Samuel Huang06f1ae92018-03-13 18:19:34 +000019
20namespace zucchini {
21
22namespace {
23
24// Decides whether |image| points to a Win32 PE file. If this is a possibility,
25// assigns |source| to enable further parsing, and returns true. Otherwise
26// leaves |source| at an undefined state and returns false.
Samuel Huang06f1ae92018-03-13 18:19:34 +000027bool ReadWin32Header(ConstBufferView image, BufferSource* source) {
28 *source = BufferSource(image);
29
30 // Check "MZ" magic of DOS header.
31 if (!source->CheckNextBytes({'M', 'Z'}))
32 return false;
33
34 const auto* dos_header = source->GetPointer<pe::ImageDOSHeader>();
Samuel Huang431d1192018-12-31 19:59:22 +000035 // For |e_lfanew|, reject on misalignment or overlap with DOS header.
36 if (!dos_header || (dos_header->e_lfanew & 7) != 0 ||
37 dos_header->e_lfanew < 0U + sizeof(pe::ImageDOSHeader)) {
Samuel Huang06f1ae92018-03-13 18:19:34 +000038 return false;
Samuel Huang431d1192018-12-31 19:59:22 +000039 }
Samuel Huang06f1ae92018-03-13 18:19:34 +000040 // Offset to PE header is in DOS header.
41 *source = std::move(BufferSource(image).Skip(dos_header->e_lfanew));
42 // Check 'PE\0\0' magic from PE header.
43 if (!source->ConsumeBytes({'P', 'E', 0, 0}))
44 return false;
45
46 return true;
47}
48
Samuel Huang3e1f64d2021-08-04 00:58:50 +000049template <class TRAITS>
Samuel Huang06f1ae92018-03-13 18:19:34 +000050const pe::ImageDataDirectory* ReadDataDirectory(
Samuel Huang3e1f64d2021-08-04 00:58:50 +000051 const typename TRAITS::ImageOptionalHeader* optional_header,
Samuel Huang06f1ae92018-03-13 18:19:34 +000052 size_t index) {
53 if (index >= optional_header->number_of_rva_and_sizes)
54 return nullptr;
55 return &optional_header->data_directory[index];
56}
57
58// Decides whether |section| (assumed value) is a section that contains code.
Samuel Huang3e1f64d2021-08-04 00:58:50 +000059template <class TRAITS>
Samuel Huang06f1ae92018-03-13 18:19:34 +000060bool IsWin32CodeSection(const pe::ImageSectionHeader& section) {
61 return (section.characteristics & kCodeCharacteristics) ==
62 kCodeCharacteristics;
63}
64
65} // namespace
66
67/******** Win32X86Traits ********/
68
69// static
70constexpr Bitness Win32X86Traits::kBitness;
71constexpr ExecutableType Win32X86Traits::kExeType;
72const char Win32X86Traits::kExeTypeString[] = "Windows PE x86";
73
74/******** Win32X64Traits ********/
75
76// static
77constexpr Bitness Win32X64Traits::kBitness;
78constexpr ExecutableType Win32X64Traits::kExeType;
79const char Win32X64Traits::kExeTypeString[] = "Windows PE x64";
80
81/******** DisassemblerWin32 ********/
82
83// static.
Samuel Huang3e1f64d2021-08-04 00:58:50 +000084template <class TRAITS>
85bool DisassemblerWin32<TRAITS>::QuickDetect(ConstBufferView image) {
Samuel Huang06f1ae92018-03-13 18:19:34 +000086 BufferSource source;
Samuel Huang3e1f64d2021-08-04 00:58:50 +000087 return ReadWin32Header(image, &source);
Samuel Huang06f1ae92018-03-13 18:19:34 +000088}
89
Samuel Huang56a6ff42018-03-13 21:35:53 +000090// |num_equivalence_iterations_| = 2 for reloc -> abs32.
Samuel Huang3e1f64d2021-08-04 00:58:50 +000091template <class TRAITS>
92DisassemblerWin32<TRAITS>::DisassemblerWin32() : Disassembler(2) {}
Samuel Huang06f1ae92018-03-13 18:19:34 +000093
Samuel Huang3e1f64d2021-08-04 00:58:50 +000094template <class TRAITS>
95DisassemblerWin32<TRAITS>::~DisassemblerWin32() = default;
Samuel Huang06f1ae92018-03-13 18:19:34 +000096
Samuel Huang3e1f64d2021-08-04 00:58:50 +000097template <class TRAITS>
98ExecutableType DisassemblerWin32<TRAITS>::GetExeType() const {
Samuel Huang06f1ae92018-03-13 18:19:34 +000099 return Traits::kExeType;
100}
101
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000102template <class TRAITS>
103std::string DisassemblerWin32<TRAITS>::GetExeTypeString() const {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000104 return Traits::kExeTypeString;
105}
106
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000107template <class TRAITS>
108std::vector<ReferenceGroup> DisassemblerWin32<TRAITS>::MakeReferenceGroups()
Samuel Huang06f1ae92018-03-13 18:19:34 +0000109 const {
110 return {
111 {ReferenceTypeTraits{2, TypeTag(kReloc), PoolTag(kReloc)},
112 &DisassemblerWin32::MakeReadRelocs, &DisassemblerWin32::MakeWriteRelocs},
113 {ReferenceTypeTraits{Traits::kVAWidth, TypeTag(kAbs32), PoolTag(kAbs32)},
114 &DisassemblerWin32::MakeReadAbs32, &DisassemblerWin32::MakeWriteAbs32},
115 {ReferenceTypeTraits{4, TypeTag(kRel32), PoolTag(kRel32)},
116 &DisassemblerWin32::MakeReadRel32, &DisassemblerWin32::MakeWriteRel32},
117 };
118}
119
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000120template <class TRAITS>
121std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRelocs(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000122 offset_t lo,
123 offset_t hi) {
Samuel Huang431d1192018-12-31 19:59:22 +0000124 if (!ParseAndStoreRelocBlocks())
125 return std::make_unique<EmptyReferenceReader>();
Samuel Huang06f1ae92018-03-13 18:19:34 +0000126
127 RelocRvaReaderWin32 reloc_rva_reader(image_, reloc_region_,
128 reloc_block_offsets_, lo, hi);
129 CHECK_GE(image_.size(), Traits::kVAWidth);
130 offset_t offset_bound =
131 base::checked_cast<offset_t>(image_.size() - Traits::kVAWidth + 1);
132 return std::make_unique<RelocReaderWin32>(std::move(reloc_rva_reader),
133 Traits::kRelocType, offset_bound,
134 translator_);
135}
136
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000137template <class TRAITS>
138std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadAbs32(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000139 offset_t lo,
140 offset_t hi) {
141 ParseAndStoreAbs32();
142 Abs32RvaExtractorWin32 abs_rva_extractor(
143 image_, {Traits::kBitness, image_base_}, abs32_locations_, lo, hi);
144 return std::make_unique<Abs32ReaderWin32>(std::move(abs_rva_extractor),
145 translator_);
146}
147
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000148template <class TRAITS>
149std::unique_ptr<ReferenceReader> DisassemblerWin32<TRAITS>::MakeReadRel32(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000150 offset_t lo,
151 offset_t hi) {
152 ParseAndStoreRel32();
153 return std::make_unique<Rel32ReaderX86>(image_, lo, hi, &rel32_locations_,
154 translator_);
155}
156
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000157template <class TRAITS>
158std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRelocs(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000159 MutableBufferView image) {
Samuel Huang431d1192018-12-31 19:59:22 +0000160 if (!ParseAndStoreRelocBlocks())
161 return std::make_unique<EmptyReferenceWriter>();
162
Samuel Huang06f1ae92018-03-13 18:19:34 +0000163 return std::make_unique<RelocWriterWin32>(Traits::kRelocType, image,
164 reloc_region_, reloc_block_offsets_,
165 translator_);
166}
167
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000168template <class TRAITS>
169std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteAbs32(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000170 MutableBufferView image) {
171 return std::make_unique<Abs32WriterWin32>(
172 image, AbsoluteAddress(Traits::kBitness, image_base_), translator_);
173}
174
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000175template <class TRAITS>
176std::unique_ptr<ReferenceWriter> DisassemblerWin32<TRAITS>::MakeWriteRel32(
Samuel Huang06f1ae92018-03-13 18:19:34 +0000177 MutableBufferView image) {
178 return std::make_unique<Rel32WriterX86>(image, translator_);
179}
180
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000181template <class TRAITS>
182bool DisassemblerWin32<TRAITS>::Parse(ConstBufferView image) {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000183 image_ = image;
184 return ParseHeader();
185}
186
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000187template <class TRAITS>
188bool DisassemblerWin32<TRAITS>::ParseHeader() {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000189 BufferSource source;
190
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000191 if (!ReadWin32Header(image_, &source))
Samuel Huang06f1ae92018-03-13 18:19:34 +0000192 return false;
193
Samuel Huang431d1192018-12-31 19:59:22 +0000194 constexpr size_t kDataDirBase =
195 offsetof(typename Traits::ImageOptionalHeader, data_directory);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000196 auto* coff_header = source.GetPointer<pe::ImageFileHeader>();
Samuel Huang431d1192018-12-31 19:59:22 +0000197 if (!coff_header || coff_header->size_of_optional_header < kDataDirBase)
Samuel Huang06f1ae92018-03-13 18:19:34 +0000198 return false;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000199
Samuel Huang431d1192018-12-31 19:59:22 +0000200 // |number_of_rva_and_sizes < kImageNumberOfDirectoryEntries| is possible. So
201 // in theory, GetPointer() on ImageOptionalHeader can reach EOF for a tiny PE
202 // file, causing false rejection. However, this should not occur for practical
203 // cases; and rejection is okay for corner cases (e.g., from a fuzzer).
Samuel Huang06f1ae92018-03-13 18:19:34 +0000204 auto* optional_header =
205 source.GetPointer<typename Traits::ImageOptionalHeader>();
206 if (!optional_header || optional_header->magic != Traits::kMagic)
207 return false;
208
Samuel Huang431d1192018-12-31 19:59:22 +0000209 // Check |optional_header->number_of_rva_and_sizes|.
210 const size_t data_dir_size =
211 coff_header->size_of_optional_header - kDataDirBase;
212 const size_t num_data_dir = data_dir_size / sizeof(pe::ImageDataDirectory);
213 if (num_data_dir != optional_header->number_of_rva_and_sizes ||
214 num_data_dir * sizeof(pe::ImageDataDirectory) != data_dir_size ||
215 num_data_dir > pe::kImageNumberOfDirectoryEntries) {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000216 return false;
Samuel Huang431d1192018-12-31 19:59:22 +0000217 }
Samuel Huang06f1ae92018-03-13 18:19:34 +0000218
219 base_relocation_table_ = ReadDataDirectory<Traits>(
220 optional_header, pe::kIndexOfBaseRelocationTable);
221 if (!base_relocation_table_)
222 return false;
223
224 image_base_ = optional_header->image_base;
225
226 // |optional_header->size_of_image| is the size of the image when loaded into
227 // memory, and not the actual size on disk.
228 rva_t rva_bound = optional_header->size_of_image;
229 if (rva_bound >= kRvaBound)
230 return false;
231
232 // An exclusive upper bound of all offsets used in the image. This gets
233 // updated as sections get visited.
234 offset_t offset_bound =
235 base::checked_cast<offset_t>(source.begin() - image_.begin());
236
237 // Extract |sections_|.
238 size_t sections_count = coff_header->number_of_sections;
239 auto* sections_array =
240 source.GetArray<pe::ImageSectionHeader>(sections_count);
241 if (!sections_array)
242 return false;
243 sections_.assign(sections_array, sections_array + sections_count);
244
245 // Prepare |units| for offset-RVA translation.
246 std::vector<AddressTranslator::Unit> units;
247 units.reserve(sections_count);
248
249 // Visit each section, validate, and add address translation data to |units|.
250 bool has_text_section = false;
251 decltype(pe::ImageSectionHeader::virtual_address) prev_virtual_address = 0;
252 for (size_t i = 0; i < sections_count; ++i) {
253 const pe::ImageSectionHeader& section = sections_[i];
254 // Apply strict checks on section bounds.
255 if (!image_.covers(
256 {section.file_offset_of_raw_data, section.size_of_raw_data})) {
257 return false;
258 }
259 if (!RangeIsBounded(section.virtual_address, section.virtual_size,
260 rva_bound)) {
261 return false;
262 }
263
264 // PE sections should be sorted by RVAs. For robustness, we don't rely on
265 // this, so even if unsorted we don't care. Output warning though.
266 if (prev_virtual_address > section.virtual_address)
267 LOG(WARNING) << "RVA anomaly found for Section " << i;
268 prev_virtual_address = section.virtual_address;
269
270 // Add |section| data for offset-RVA translation.
271 units.push_back({section.file_offset_of_raw_data, section.size_of_raw_data,
272 section.virtual_address, section.virtual_size});
273
274 offset_t end_offset =
275 section.file_offset_of_raw_data + section.size_of_raw_data;
276 offset_bound = std::max(end_offset, offset_bound);
277 if (IsWin32CodeSection<Traits>(section))
278 has_text_section = true;
279 }
280
281 if (offset_bound > image_.size())
282 return false;
283 if (!has_text_section)
284 return false;
285
286 // Initialize |translator_| for offset-RVA translations. Any inconsistency
287 // (e.g., 2 offsets correspond to the same RVA) would invalidate the PE file.
288 if (translator_.Initialize(std::move(units)) != AddressTranslator::kSuccess)
289 return false;
290
291 // Resize |image_| to include only contents claimed by sections. Note that
292 // this may miss digital signatures at end of PE files, but for patching this
293 // is of minor concern.
294 image_.shrink(offset_bound);
295
296 return true;
297}
298
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000299template <class TRAITS>
300bool DisassemblerWin32<TRAITS>::ParseAndStoreRelocBlocks() {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000301 if (has_parsed_relocs_)
Samuel Huang431d1192018-12-31 19:59:22 +0000302 return reloc_region_.lo() != kInvalidOffset;
303
Samuel Huang06f1ae92018-03-13 18:19:34 +0000304 has_parsed_relocs_ = true;
305 DCHECK(reloc_block_offsets_.empty());
306
307 offset_t relocs_offset =
308 translator_.RvaToOffset(base_relocation_table_->virtual_address);
309 size_t relocs_size = base_relocation_table_->size;
Samuel Huang431d1192018-12-31 19:59:22 +0000310 const BufferRegion temp_reloc_region = {relocs_offset, relocs_size};
311
312 // Reject bogus relocs. It's possible to have no reloc, so this is non-fatal!
313 if (relocs_offset == kInvalidOffset || !image_.covers(temp_reloc_region))
Samuel Huang06f1ae92018-03-13 18:19:34 +0000314 return false;
315
316 // Precompute offsets of all reloc blocks.
Samuel Huang431d1192018-12-31 19:59:22 +0000317 if (!RelocRvaReaderWin32::FindRelocBlocks(image_, temp_reloc_region,
318 &reloc_block_offsets_)) {
319 return false;
320 }
321 // Reassign |reloc_region_| only on success.
322 reloc_region_ = temp_reloc_region;
323 return true;
Samuel Huang06f1ae92018-03-13 18:19:34 +0000324}
325
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000326template <class TRAITS>
327bool DisassemblerWin32<TRAITS>::ParseAndStoreAbs32() {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000328 if (has_parsed_abs32_)
329 return true;
330 has_parsed_abs32_ = true;
331
Samuel Huang98dd0172018-10-10 15:48:10 +0000332 // Read reloc targets as preliminary abs32 locations.
Samuel Huang06f1ae92018-03-13 18:19:34 +0000333 std::unique_ptr<ReferenceReader> relocs = MakeReadRelocs(0, offset_t(size()));
334 for (auto ref = relocs->GetNext(); ref.has_value(); ref = relocs->GetNext())
335 abs32_locations_.push_back(ref->target);
336
Samuel Huang06f1ae92018-03-13 18:19:34 +0000337 std::sort(abs32_locations_.begin(), abs32_locations_.end());
338
Samuel Huang98dd0172018-10-10 15:48:10 +0000339 // Abs32 references must have targets translatable to offsets. Remove those
340 // that are unable to do so.
341 size_t num_untranslatable = RemoveUntranslatableAbs32(
342 image_, {Traits::kBitness, image_base_}, translator_, &abs32_locations_);
343 LOG_IF(WARNING, num_untranslatable) << "Removed " << num_untranslatable
344 << " untranslatable abs32 references.";
345
Samuel Huang06f1ae92018-03-13 18:19:34 +0000346 // Abs32 reference bodies must not overlap. If found, simply remove them.
Samuel Huang98dd0172018-10-10 15:48:10 +0000347 size_t num_overlapping =
Etienne Pierre-doraye53806a2018-10-05 20:15:13 +0000348 RemoveOverlappingAbs32Locations(Traits::kVAWidth, &abs32_locations_);
Samuel Huang98dd0172018-10-10 15:48:10 +0000349 LOG_IF(WARNING, num_overlapping)
350 << "Removed " << num_overlapping
351 << " abs32 references with overlapping bodies.";
352
353 abs32_locations_.shrink_to_fit();
Samuel Huang06f1ae92018-03-13 18:19:34 +0000354 return true;
355}
356
Samuel Huang3e1f64d2021-08-04 00:58:50 +0000357template <class TRAITS>
358bool DisassemblerWin32<TRAITS>::ParseAndStoreRel32() {
Samuel Huang06f1ae92018-03-13 18:19:34 +0000359 if (has_parsed_rel32_)
360 return true;
361 has_parsed_rel32_ = true;
362
363 ParseAndStoreAbs32();
364
Samuel Huang06f1ae92018-03-13 18:19:34 +0000365 AddressTranslator::RvaToOffsetCache target_rva_checker(translator_);
366
367 for (const pe::ImageSectionHeader& section : sections_) {
368 if (!IsWin32CodeSection<Traits>(section))
369 continue;
370
371 rva_t start_rva = section.virtual_address;
372 rva_t end_rva = start_rva + section.virtual_size;
373
Samuel Huang036cb9d2019-03-25 15:28:52 +0000374 // |virtual_size < size_of_raw_data| is possible. In this case, disassembly
375 // should not proceed beyond |virtual_size|, so rel32 location RVAs remain
376 // translatable to file offsets.
377 uint32_t size_to_use =
378 std::min(section.virtual_size, section.size_of_raw_data);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000379 ConstBufferView region =
Samuel Huang036cb9d2019-03-25 15:28:52 +0000380 image_[{section.file_offset_of_raw_data, size_to_use}];
Samuel Huang06f1ae92018-03-13 18:19:34 +0000381 Abs32GapFinder gap_finder(image_, region, abs32_locations_,
382 Traits::kVAWidth);
Samuel Huang1cec5a72021-06-01 18:29:53 +0000383 typename Traits::RelFinder rel_finder(image_, translator_);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000384 // Iterate over gaps between abs32 references, to avoid collision.
Samuel Huang1cec5a72021-06-01 18:29:53 +0000385 while (gap_finder.FindNext()) {
386 rel_finder.SetRegion(gap_finder.GetGap());
387 // Heuristically detect rel32 references, store if valid.
388 while (rel_finder.FindNext()) {
389 auto rel32 = rel_finder.GetRel32();
390 if (target_rva_checker.IsValid(rel32.target_rva) &&
391 (rel32.can_point_outside_section ||
392 (start_rva <= rel32.target_rva && rel32.target_rva < end_rva))) {
393 rel_finder.Accept();
394 rel32_locations_.push_back(rel32.location);
Samuel Huang06f1ae92018-03-13 18:19:34 +0000395 }
396 }
397 }
398 }
399 rel32_locations_.shrink_to_fit();
400 // |sections_| entries are usually sorted by offset, but there's no guarantee.
401 // So sort explicitly, to be sure.
402 std::sort(rel32_locations_.begin(), rel32_locations_.end());
403 return true;
404}
405
406// Explicit instantiation for supported classes.
407template class DisassemblerWin32<Win32X86Traits>;
408template class DisassemblerWin32<Win32X64Traits>;
409
410} // namespace zucchini