blob: 88567b2d99fae24732a62f15f58297469657aa47 [file] [log] [blame]
Alex Deymoaea4c1c2015-08-19 20:24:43 -07001//
2// Copyright (C) 2015 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
Alex Deymo14158572015-06-13 03:37:08 -070016
17#include "update_engine/payload_generator/delta_diff_utils.h"
18
19#include <algorithm>
20#include <map>
21
22#include <base/files/file_util.h>
23#include <base/format_macros.h>
24#include <base/strings/stringprintf.h>
25
Alex Deymo39910dc2015-11-09 17:04:30 -080026#include "update_engine/common/hash_calculator.h"
27#include "update_engine/common/subprocess.h"
28#include "update_engine/common/utils.h"
Alex Deymof0061352015-07-01 14:59:15 -070029#include "update_engine/payload_generator/block_mapping.h"
Alex Deymo39910dc2015-11-09 17:04:30 -080030#include "update_engine/payload_generator/bzip.h"
Alex Deymo14158572015-06-13 03:37:08 -070031#include "update_engine/payload_generator/delta_diff_generator.h"
32#include "update_engine/payload_generator/extent_ranges.h"
33#include "update_engine/payload_generator/extent_utils.h"
Alex Deymo14158572015-06-13 03:37:08 -070034
35using std::map;
36using std::string;
Alex Deymo14158572015-06-13 03:37:08 -070037using std::vector;
38
39namespace chromeos_update_engine {
40namespace {
41
42const char* const kBsdiffPath = "bsdiff";
Sen Jiang55c4f9b2016-02-10 11:26:20 -080043const char* const kImgdiffPath = "imgdiff";
Alex Deymo14158572015-06-13 03:37:08 -070044
45// The maximum destination size allowed for bsdiff. In general, bsdiff should
46// work for arbitrary big files, but the payload generation and payload
47// application requires a significant amount of RAM. We put a hard-limit of
48// 200 MiB that should not affect any released board, but will limit the
49// Chrome binary in ASan builders.
50const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes
51
52// Process a range of blocks from |range_start| to |range_end| in the extent at
53// position |*idx_p| of |extents|. If |do_remove| is true, this range will be
54// removed, which may cause the extent to be trimmed, split or removed entirely.
55// The value of |*idx_p| is updated to point to the next extent to be processed.
56// Returns true iff the next extent to process is a new or updated one.
57bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
58 const bool do_remove, uint64_t range_start,
59 uint64_t range_end) {
60 size_t idx = *idx_p;
61 uint64_t start_block = (*extents)[idx].start_block();
62 uint64_t num_blocks = (*extents)[idx].num_blocks();
63 uint64_t range_size = range_end - range_start;
64
65 if (do_remove) {
66 if (range_size == num_blocks) {
67 // Remove the entire extent.
68 extents->erase(extents->begin() + idx);
69 } else if (range_end == num_blocks) {
70 // Trim the end of the extent.
71 (*extents)[idx].set_num_blocks(num_blocks - range_size);
72 idx++;
73 } else if (range_start == 0) {
74 // Trim the head of the extent.
75 (*extents)[idx].set_start_block(start_block + range_size);
76 (*extents)[idx].set_num_blocks(num_blocks - range_size);
77 } else {
78 // Trim the middle, splitting the remainder into two parts.
79 (*extents)[idx].set_num_blocks(range_start);
80 Extent e;
81 e.set_start_block(start_block + range_end);
82 e.set_num_blocks(num_blocks - range_end);
83 idx++;
84 extents->insert(extents->begin() + idx, e);
85 }
86 } else if (range_end == num_blocks) {
87 // Done with this extent.
88 idx++;
89 } else {
90 return false;
91 }
92
93 *idx_p = idx;
94 return true;
95}
96
97// Remove identical corresponding block ranges in |src_extents| and
98// |dst_extents|. Used for preventing moving of blocks onto themselves during
99// MOVE operations. The value of |total_bytes| indicates the actual length of
100// content; this may be slightly less than the total size of blocks, in which
101// case the last block is only partly occupied with data. Returns the total
102// number of bytes removed.
103size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
104 vector<Extent>* dst_extents,
105 const size_t total_bytes) {
106 size_t src_idx = 0;
107 size_t dst_idx = 0;
108 uint64_t src_offset = 0, dst_offset = 0;
109 bool new_src = true, new_dst = true;
110 size_t removed_bytes = 0, nonfull_block_bytes;
111 bool do_remove = false;
112 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
113 if (new_src) {
114 src_offset = 0;
115 new_src = false;
116 }
117 if (new_dst) {
118 dst_offset = 0;
119 new_dst = false;
120 }
121
122 do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
123 (*dst_extents)[dst_idx].start_block() + dst_offset);
124
125 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
126 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
127 uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
128 dst_num_blocks - dst_offset);
129 uint64_t prev_src_offset = src_offset;
130 uint64_t prev_dst_offset = dst_offset;
131 src_offset += min_num_blocks;
132 dst_offset += min_num_blocks;
133
134 new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
135 prev_src_offset, src_offset);
136 new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
137 prev_dst_offset, dst_offset);
138 if (do_remove)
139 removed_bytes += min_num_blocks * kBlockSize;
140 }
141
142 // If we removed the last block and this block is only partly used by file
143 // content, deduct the unused portion from the total removed byte count.
144 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
145 removed_bytes -= kBlockSize - nonfull_block_bytes;
146
147 return removed_bytes;
148}
149
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800150// Returns true if the given blob |data| contains gzip header magic.
151bool ContainsGZip(const brillo::Blob& data) {
152 const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
153 return std::search(data.begin(),
154 data.end(),
155 std::begin(kGZipMagic),
156 std::end(kGZipMagic)) != data.end();
157}
158
Alex Deymo14158572015-06-13 03:37:08 -0700159} // namespace
160
161namespace diff_utils {
162
Alex Deymob42b98d2015-07-06 17:42:38 -0700163bool DeltaReadPartition(
Alex Deymo14158572015-06-13 03:37:08 -0700164 vector<AnnotatedOperation>* aops,
Alex Deymob42b98d2015-07-06 17:42:38 -0700165 const PartitionConfig& old_part,
166 const PartitionConfig& new_part,
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700167 ssize_t hard_chunk_blocks,
168 size_t soft_chunk_blocks,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700169 BlobFileWriter* blob_file,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800170 bool imgdiff_allowed,
Alex Deymo14158572015-06-13 03:37:08 -0700171 bool src_ops_allowed) {
172 ExtentRanges old_visited_blocks;
173 ExtentRanges new_visited_blocks;
174
Alex Deymof0061352015-07-01 14:59:15 -0700175 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
176 aops,
177 old_part.path,
178 new_part.path,
179 old_part.fs_interface ? old_part.fs_interface->GetBlockCount() : 0,
180 new_part.fs_interface->GetBlockCount(),
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700181 soft_chunk_blocks,
Alex Deymof0061352015-07-01 14:59:15 -0700182 src_ops_allowed,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700183 blob_file,
Alex Deymof0061352015-07-01 14:59:15 -0700184 &old_visited_blocks,
185 &new_visited_blocks));
186
Alex Deymo14158572015-06-13 03:37:08 -0700187 map<string, vector<Extent>> old_files_map;
Alex Deymob42b98d2015-07-06 17:42:38 -0700188 if (old_part.fs_interface) {
Alex Deymo14158572015-06-13 03:37:08 -0700189 vector<FilesystemInterface::File> old_files;
Alex Deymob42b98d2015-07-06 17:42:38 -0700190 old_part.fs_interface->GetFiles(&old_files);
Alex Deymo14158572015-06-13 03:37:08 -0700191 for (const FilesystemInterface::File& file : old_files)
192 old_files_map[file.name] = file.extents;
193 }
194
Alex Deymob42b98d2015-07-06 17:42:38 -0700195 TEST_AND_RETURN_FALSE(new_part.fs_interface);
Alex Deymo14158572015-06-13 03:37:08 -0700196 vector<FilesystemInterface::File> new_files;
Alex Deymob42b98d2015-07-06 17:42:38 -0700197 new_part.fs_interface->GetFiles(&new_files);
Alex Deymo14158572015-06-13 03:37:08 -0700198
199 // The processing is very straightforward here, we generate operations for
200 // every file (and pseudo-file such as the metadata) in the new filesystem
201 // based on the file with the same name in the old filesystem, if any.
202 // Files with overlapping data blocks (like hardlinks or filesystems with tail
203 // packing or compression where the blocks store more than one file) are only
204 // generated once in the new image, but are also used only once from the old
205 // image due to some simplifications (see below).
206 for (const FilesystemInterface::File& new_file : new_files) {
207 // Ignore the files in the new filesystem without blocks. Symlinks with
208 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
209 // handled as normal files. We also ignore blocks that were already
210 // processed by a previous file.
211 vector<Extent> new_file_extents = FilterExtentRanges(
212 new_file.extents, new_visited_blocks);
213 new_visited_blocks.AddExtents(new_file_extents);
214
215 if (new_file_extents.empty())
216 continue;
217
218 LOG(INFO) << "Encoding file " << new_file.name << " ("
219 << BlocksInExtents(new_file_extents) << " blocks)";
220
221 // We can't visit each dst image inode more than once, as that would
222 // duplicate work. Here, we avoid visiting each source image inode
223 // more than once. Technically, we could have multiple operations
224 // that read the same blocks from the source image for diffing, but
225 // we choose not to avoid complexity. Eventually we will move away
226 // from using a graph/cycle detection/etc to generate diffs, and at that
227 // time, it will be easy (non-complex) to have many operations read
228 // from the same source blocks. At that time, this code can die. -adlr
229 vector<Extent> old_file_extents = FilterExtentRanges(
230 old_files_map[new_file.name], old_visited_blocks);
231 old_visited_blocks.AddExtents(old_file_extents);
232
233 TEST_AND_RETURN_FALSE(DeltaReadFile(
234 aops,
Alex Deymob42b98d2015-07-06 17:42:38 -0700235 old_part.path,
236 new_part.path,
Alex Deymo14158572015-06-13 03:37:08 -0700237 old_file_extents,
238 new_file_extents,
239 new_file.name, // operation name
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700240 hard_chunk_blocks,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700241 blob_file,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800242 imgdiff_allowed,
Alex Deymo14158572015-06-13 03:37:08 -0700243 src_ops_allowed));
244 }
245 // Process all the blocks not included in any file. We provided all the unused
246 // blocks in the old partition as available data.
Alex Deymob42b98d2015-07-06 17:42:38 -0700247 vector<Extent> new_unvisited = {
248 ExtentForRange(0, new_part.size / kBlockSize)};
Alex Deymo14158572015-06-13 03:37:08 -0700249 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
250 if (new_unvisited.empty())
251 return true;
252
253 vector<Extent> old_unvisited;
Alex Deymob42b98d2015-07-06 17:42:38 -0700254 if (old_part.fs_interface) {
255 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
Alex Deymo14158572015-06-13 03:37:08 -0700256 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
257 }
258
259 LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700260 << " unwritten blocks using chunk size of "
261 << soft_chunk_blocks << " blocks.";
262 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
263 // really know the structure of this data and we should not expect it to have
264 // redundancy between partitions.
Alex Deymo14158572015-06-13 03:37:08 -0700265 TEST_AND_RETURN_FALSE(DeltaReadFile(
266 aops,
Alex Deymob42b98d2015-07-06 17:42:38 -0700267 old_part.path,
268 new_part.path,
Alex Deymo14158572015-06-13 03:37:08 -0700269 old_unvisited,
270 new_unvisited,
271 "<non-file-data>", // operation name
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700272 soft_chunk_blocks,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700273 blob_file,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800274 imgdiff_allowed,
Alex Deymo14158572015-06-13 03:37:08 -0700275 src_ops_allowed));
276
277 return true;
278}
279
Alex Deymof0061352015-07-01 14:59:15 -0700280bool DeltaMovedAndZeroBlocks(
281 vector<AnnotatedOperation>* aops,
282 const string& old_part,
283 const string& new_part,
284 size_t old_num_blocks,
285 size_t new_num_blocks,
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700286 ssize_t chunk_blocks,
Alex Deymof0061352015-07-01 14:59:15 -0700287 bool src_ops_allowed,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700288 BlobFileWriter* blob_file,
Alex Deymof0061352015-07-01 14:59:15 -0700289 ExtentRanges* old_visited_blocks,
290 ExtentRanges* new_visited_blocks) {
291 vector<BlockMapping::BlockId> old_block_ids;
292 vector<BlockMapping::BlockId> new_block_ids;
293 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
294 new_part,
295 old_num_blocks * kBlockSize,
296 new_num_blocks * kBlockSize,
297 kBlockSize,
298 &old_block_ids,
299 &new_block_ids));
300
301 // For minor-version=1, we map all the blocks that didn't move, regardless of
302 // the contents since they are already copied and no operation is required.
303 if (!src_ops_allowed) {
304 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
305 for (uint64_t block = 0; block < num_blocks; block++) {
306 if (old_block_ids[block] == new_block_ids[block] &&
307 !old_visited_blocks->ContainsBlock(block) &&
308 !new_visited_blocks->ContainsBlock(block)) {
309 old_visited_blocks->AddBlock(block);
310 new_visited_blocks->AddBlock(block);
311 }
312 }
313 }
314
315 // A mapping from the block_id to the list of block numbers with that block id
316 // in the old partition. This is used to lookup where in the old partition
317 // is a block from the new partition.
318 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
319
320 for (uint64_t block = old_num_blocks; block-- > 0; ) {
321 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
322 old_blocks_map[old_block_ids[block]].push_back(block);
323 }
324
325 // The collection of blocks in the new partition with just zeros. This is a
326 // common case for free-space that's also problematic for bsdiff, so we want
327 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
328 // just zeros is so small that it doesn't make sense to spend the I/O reading
329 // zeros from the old partition.
330 vector<Extent> new_zeros;
331
332 vector<Extent> old_identical_blocks;
333 vector<Extent> new_identical_blocks;
334
335 for (uint64_t block = 0; block < new_num_blocks; block++) {
336 // Only produce operations for blocks that were not yet visited.
337 if (new_visited_blocks->ContainsBlock(block))
338 continue;
339 if (new_block_ids[block] == 0) {
340 AppendBlockToExtents(&new_zeros, block);
341 continue;
342 }
343
344 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
345 // Check if the block exists in the old partition at all.
346 if (old_blocks_map_it == old_blocks_map.end() ||
347 old_blocks_map_it->second.empty())
348 continue;
349
350 AppendBlockToExtents(&old_identical_blocks,
351 old_blocks_map_it->second.back());
352 AppendBlockToExtents(&new_identical_blocks, block);
353 // We can't reuse source blocks in minor version 1 because the cycle
354 // breaking algorithm doesn't support that.
355 if (!src_ops_allowed)
356 old_blocks_map_it->second.pop_back();
357 }
358
359 // Produce operations for the zero blocks split per output extent.
360 size_t num_ops = aops->size();
361 new_visited_blocks->AddExtents(new_zeros);
362 for (Extent extent : new_zeros) {
363 TEST_AND_RETURN_FALSE(DeltaReadFile(
364 aops,
365 "",
366 new_part,
367 vector<Extent>(), // old_extents
368 vector<Extent>{extent}, // new_extents
369 "<zeros>",
370 chunk_blocks,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700371 blob_file,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800372 false, // imgdiff_allowed
Alex Deymof0061352015-07-01 14:59:15 -0700373 src_ops_allowed));
374 }
375 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
376 << BlocksInExtents(new_zeros) << " zeroed blocks";
377
378 // Produce MOVE/SOURCE_COPY operations for the moved blocks.
379 num_ops = aops->size();
380 if (chunk_blocks == -1)
381 chunk_blocks = new_num_blocks;
382 uint64_t used_blocks = 0;
383 old_visited_blocks->AddExtents(old_identical_blocks);
384 new_visited_blocks->AddExtents(new_identical_blocks);
385 for (Extent extent : new_identical_blocks) {
386 // We split the operation at the extent boundary or when bigger than
387 // chunk_blocks.
388 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
389 op_block_offset += chunk_blocks) {
390 aops->emplace_back();
391 AnnotatedOperation* aop = &aops->back();
392 aop->name = "<identical-blocks>";
Alex Deymoa12ee112015-08-12 22:19:32 -0700393 aop->op.set_type(src_ops_allowed ? InstallOperation::SOURCE_COPY
394 : InstallOperation::MOVE);
Alex Deymof0061352015-07-01 14:59:15 -0700395
396 uint64_t chunk_num_blocks =
397 std::min(extent.num_blocks() - op_block_offset,
398 static_cast<uint64_t>(chunk_blocks));
399
400 // The current operation represents the move/copy operation for the
401 // sublist starting at |used_blocks| of length |chunk_num_blocks| where
402 // the src and dst are from |old_identical_blocks| and
403 // |new_identical_blocks| respectively.
404 StoreExtents(
405 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
406 aop->op.mutable_src_extents());
407
408 Extent* op_dst_extent = aop->op.add_dst_extents();
409 op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
410 op_dst_extent->set_num_blocks(chunk_num_blocks);
411 CHECK(
412 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces)
413 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
414
415 used_blocks += chunk_num_blocks;
416 }
417 }
418 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
419 << used_blocks << " identical blocks moved";
420
421 return true;
422}
423
Alex Deymo14158572015-06-13 03:37:08 -0700424bool DeltaReadFile(
425 vector<AnnotatedOperation>* aops,
426 const string& old_part,
427 const string& new_part,
428 const vector<Extent>& old_extents,
429 const vector<Extent>& new_extents,
430 const string& name,
Alex Deymo2d3b2d62015-07-17 17:34:36 -0700431 ssize_t chunk_blocks,
Sen Jiang8cc502d2015-08-10 10:04:54 -0700432 BlobFileWriter* blob_file,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800433 bool imgdiff_allowed,
Alex Deymo14158572015-06-13 03:37:08 -0700434 bool src_ops_allowed) {
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700435 brillo::Blob data;
Alex Deymoa12ee112015-08-12 22:19:32 -0700436 InstallOperation operation;
Alex Deymo14158572015-06-13 03:37:08 -0700437
438 uint64_t total_blocks = BlocksInExtents(new_extents);
439 if (chunk_blocks == -1)
440 chunk_blocks = total_blocks;
441
442 // If bsdiff breaks again, blacklist the problem file by using:
443 // bsdiff_allowed = (name != "/foo/bar")
444 //
445 // TODO(dgarrett): chromium-os:15274 connect this test to the command line.
446 bool bsdiff_allowed = true;
447 if (static_cast<uint64_t>(chunk_blocks) * kBlockSize >
448 kMaxBsdiffDestinationSize) {
449 bsdiff_allowed = false;
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800450 imgdiff_allowed = false;
Alex Deymo14158572015-06-13 03:37:08 -0700451 }
452
453 if (!bsdiff_allowed) {
454 LOG(INFO) << "bsdiff blacklisting: " << name;
455 }
456
457 for (uint64_t block_offset = 0; block_offset < total_blocks;
458 block_offset += chunk_blocks) {
459 // Split the old/new file in the same chunks. Note that this could drop
460 // some information from the old file used for the new chunk. If the old
461 // file is smaller (or even empty when there's no old file) the chunk will
462 // also be empty.
463 vector<Extent> old_extents_chunk = ExtentsSublist(
464 old_extents, block_offset, chunk_blocks);
465 vector<Extent> new_extents_chunk = ExtentsSublist(
466 new_extents, block_offset, chunk_blocks);
467 NormalizeExtents(&old_extents_chunk);
468 NormalizeExtents(&new_extents_chunk);
469
470 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
471 new_part,
472 old_extents_chunk,
473 new_extents_chunk,
474 bsdiff_allowed,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800475 imgdiff_allowed,
Alex Deymo14158572015-06-13 03:37:08 -0700476 &data,
477 &operation,
478 src_ops_allowed));
479
480 // Check if the operation writes nothing.
481 if (operation.dst_extents_size() == 0) {
Alex Deymoa12ee112015-08-12 22:19:32 -0700482 if (operation.type() == InstallOperation::MOVE) {
Alex Deymo14158572015-06-13 03:37:08 -0700483 LOG(INFO) << "Empty MOVE operation ("
484 << name << "), skipping";
485 continue;
486 } else {
487 LOG(ERROR) << "Empty non-MOVE operation";
488 return false;
489 }
490 }
491
Alex Deymo14158572015-06-13 03:37:08 -0700492 // Now, insert into the list of operations.
493 AnnotatedOperation aop;
494 aop.name = name;
495 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
496 aop.name = base::StringPrintf("%s:%" PRIu64,
497 name.c_str(), block_offset / chunk_blocks);
498 }
499 aop.op = operation;
Sen Jiang8cc502d2015-08-10 10:04:54 -0700500
501 // Write the data
Alex Deymoa12ee112015-08-12 22:19:32 -0700502 if (operation.type() != InstallOperation::MOVE &&
503 operation.type() != InstallOperation::SOURCE_COPY) {
Sen Jiang8cc502d2015-08-10 10:04:54 -0700504 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(&data, blob_file));
505 } else {
506 TEST_AND_RETURN_FALSE(blob_file->StoreBlob(data) != -1);
507 }
Alex Deymo14158572015-06-13 03:37:08 -0700508 aops->emplace_back(aop);
509 }
510 return true;
511}
512
513bool ReadExtentsToDiff(const string& old_part,
514 const string& new_part,
515 const vector<Extent>& old_extents,
516 const vector<Extent>& new_extents,
517 bool bsdiff_allowed,
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800518 bool imgdiff_allowed,
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700519 brillo::Blob* out_data,
Alex Deymoa12ee112015-08-12 22:19:32 -0700520 InstallOperation* out_op,
Alex Deymo14158572015-06-13 03:37:08 -0700521 bool src_ops_allowed) {
Alex Deymoa12ee112015-08-12 22:19:32 -0700522 InstallOperation operation;
Alex Deymo14158572015-06-13 03:37:08 -0700523 // Data blob that will be written to delta file.
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700524 const brillo::Blob* data_blob = nullptr;
Alex Deymo14158572015-06-13 03:37:08 -0700525
526 // We read blocks from old_extents and write blocks to new_extents.
527 uint64_t blocks_to_read = BlocksInExtents(old_extents);
528 uint64_t blocks_to_write = BlocksInExtents(new_extents);
529
530 // Make copies of the extents so we can modify them.
531 vector<Extent> src_extents = old_extents;
532 vector<Extent> dst_extents = new_extents;
533
534 // Read in bytes from new data.
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700535 brillo::Blob new_data;
Alex Deymo14158572015-06-13 03:37:08 -0700536 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
537 new_extents,
538 &new_data,
539 kBlockSize * blocks_to_write,
540 kBlockSize));
541 TEST_AND_RETURN_FALSE(!new_data.empty());
542
543
544 // Using a REPLACE is always an option.
Alex Deymoa12ee112015-08-12 22:19:32 -0700545 operation.set_type(InstallOperation::REPLACE);
Alex Deymo14158572015-06-13 03:37:08 -0700546 data_blob = &new_data;
547
548 // Try compressing it with bzip2.
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700549 brillo::Blob new_data_bz;
Alex Deymo14158572015-06-13 03:37:08 -0700550 TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
551 CHECK(!new_data_bz.empty());
552 if (new_data_bz.size() < data_blob->size()) {
553 // A REPLACE_BZ is better.
Alex Deymoa12ee112015-08-12 22:19:32 -0700554 operation.set_type(InstallOperation::REPLACE_BZ);
Alex Deymo14158572015-06-13 03:37:08 -0700555 data_blob = &new_data_bz;
556 }
557
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700558 brillo::Blob old_data;
559 brillo::Blob empty_blob;
560 brillo::Blob bsdiff_delta;
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800561 brillo::Blob imgdiff_delta;
Alex Deymo14158572015-06-13 03:37:08 -0700562 if (blocks_to_read > 0) {
563 // Read old data.
564 TEST_AND_RETURN_FALSE(
565 utils::ReadExtents(old_part, src_extents, &old_data,
566 kBlockSize * blocks_to_read, kBlockSize));
567 if (old_data == new_data) {
568 // No change in data.
569 if (src_ops_allowed) {
Alex Deymoa12ee112015-08-12 22:19:32 -0700570 operation.set_type(InstallOperation::SOURCE_COPY);
Alex Deymo14158572015-06-13 03:37:08 -0700571 } else {
Alex Deymoa12ee112015-08-12 22:19:32 -0700572 operation.set_type(InstallOperation::MOVE);
Alex Deymo14158572015-06-13 03:37:08 -0700573 }
574 data_blob = &empty_blob;
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800575 } else if (bsdiff_allowed || imgdiff_allowed) {
Alex Deymo14158572015-06-13 03:37:08 -0700576 // If the source file is considered bsdiff safe (no bsdiff bugs
577 // triggered), see if BSDIFF encoding is smaller.
578 base::FilePath old_chunk;
579 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
580 ScopedPathUnlinker old_unlinker(old_chunk.value());
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800581 TEST_AND_RETURN_FALSE(utils::WriteFile(
582 old_chunk.value().c_str(), old_data.data(), old_data.size()));
Alex Deymo14158572015-06-13 03:37:08 -0700583 base::FilePath new_chunk;
584 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
585 ScopedPathUnlinker new_unlinker(new_chunk.value());
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800586 TEST_AND_RETURN_FALSE(utils::WriteFile(
587 new_chunk.value().c_str(), new_data.data(), new_data.size()));
Alex Deymo14158572015-06-13 03:37:08 -0700588
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800589 if (bsdiff_allowed) {
590 TEST_AND_RETURN_FALSE(DiffFiles(
591 kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
592 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
593 if (bsdiff_delta.size() < data_blob->size()) {
594 if (src_ops_allowed) {
595 operation.set_type(InstallOperation::SOURCE_BSDIFF);
596 } else {
597 operation.set_type(InstallOperation::BSDIFF);
598 }
599 data_blob = &bsdiff_delta;
Alex Deymo14158572015-06-13 03:37:08 -0700600 }
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800601 }
602 if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
603 // Imgdiff might fail in some cases, only use the result if it succeed,
604 // otherwise print the extents to analyze.
605 if (DiffFiles(kImgdiffPath,
606 old_chunk.value(),
607 new_chunk.value(),
608 &imgdiff_delta) &&
609 imgdiff_delta.size() > 0) {
610 if (imgdiff_delta.size() < data_blob->size()) {
611 operation.set_type(InstallOperation::IMGDIFF);
612 data_blob = &imgdiff_delta;
613 }
614 } else {
615 LOG(ERROR) << "Imgdiff failed with source extents: "
616 << ExtentsToString(src_extents)
617 << ", destination extents: "
618 << ExtentsToString(dst_extents);
619 }
Alex Deymo14158572015-06-13 03:37:08 -0700620 }
621 }
622 }
623
624 size_t removed_bytes = 0;
625 // Remove identical src/dst block ranges in MOVE operations.
Alex Deymoa12ee112015-08-12 22:19:32 -0700626 if (operation.type() == InstallOperation::MOVE) {
Alex Deymo14158572015-06-13 03:37:08 -0700627 removed_bytes = RemoveIdenticalBlockRanges(
628 &src_extents, &dst_extents, new_data.size());
629 }
630 // Set legacy src_length and dst_length fields.
631 operation.set_src_length(old_data.size() - removed_bytes);
632 operation.set_dst_length(new_data.size() - removed_bytes);
633
634 // Embed extents in the operation.
635 StoreExtents(src_extents, operation.mutable_src_extents());
636 StoreExtents(dst_extents, operation.mutable_dst_extents());
637
638 // Replace operations should not have source extents.
Alex Deymoa12ee112015-08-12 22:19:32 -0700639 if (operation.type() == InstallOperation::REPLACE ||
640 operation.type() == InstallOperation::REPLACE_BZ) {
Alex Deymo14158572015-06-13 03:37:08 -0700641 operation.clear_src_extents();
642 operation.clear_src_length();
643 }
644
645 *out_data = std::move(*data_blob);
646 *out_op = operation;
647
648 return true;
649}
650
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800651// Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
652// resulting delta in |out|. Returns true on success.
653bool DiffFiles(const string& diff_path,
654 const string& old_file,
655 const string& new_file,
656 brillo::Blob* out) {
Alex Deymo14158572015-06-13 03:37:08 -0700657 const string kPatchFile = "delta.patchXXXXXX";
658 string patch_file_path;
659
660 TEST_AND_RETURN_FALSE(
661 utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
662
663 vector<string> cmd;
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800664 cmd.push_back(diff_path);
Alex Deymo14158572015-06-13 03:37:08 -0700665 cmd.push_back(old_file);
666 cmd.push_back(new_file);
667 cmd.push_back(patch_file_path);
668
669 int rc = 1;
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700670 brillo::Blob patch_file;
Sen Jiang55c4f9b2016-02-10 11:26:20 -0800671 string stdout;
672 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
673 if (rc != 0) {
674 LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
675 return false;
676 }
Alex Deymo14158572015-06-13 03:37:08 -0700677 TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
678 unlink(patch_file_path.c_str());
679 return true;
680}
681
682// Returns true if |op| is a no-op operation that doesn't do any useful work
683// (e.g., a move operation that copies blocks onto themselves).
Alex Deymoa12ee112015-08-12 22:19:32 -0700684bool IsNoopOperation(const InstallOperation& op) {
685 return (op.type() == InstallOperation::MOVE &&
Alex Deymo14158572015-06-13 03:37:08 -0700686 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
687}
688
689void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
690 ops->erase(
691 std::remove_if(
692 ops->begin(), ops->end(),
693 [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
694 ops->end());
695}
696
697bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
698 info->set_size(part.size);
Alex Deymo39910dc2015-11-09 17:04:30 -0800699 HashCalculator hasher;
Alex Deymo14158572015-06-13 03:37:08 -0700700 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
701 static_cast<off_t>(part.size));
702 TEST_AND_RETURN_FALSE(hasher.Finalize());
Alex Vakulenko3f39d5c2015-10-13 09:27:13 -0700703 const brillo::Blob& hash = hasher.raw_hash();
Alex Deymo14158572015-06-13 03:37:08 -0700704 info->set_hash(hash.data(), hash.size());
705 LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
706 return true;
707}
708
Alex Deymo3b2c7d02015-07-16 16:08:16 -0700709bool CompareAopsByDestination(AnnotatedOperation first_aop,
710 AnnotatedOperation second_aop) {
711 // We want empty operations to be at the end of the payload.
712 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
713 return ((!first_aop.op.dst_extents().size()) <
714 (!second_aop.op.dst_extents().size()));
715 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
716 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
717 return first_dst_start < second_dst_start;
718}
719
Alex Deymo14158572015-06-13 03:37:08 -0700720} // namespace diff_utils
721
722} // namespace chromeos_update_engine