adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame^] | 1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "update_engine/delta_diff_generator.h" |
| 6 | #include <dirent.h> |
| 7 | #include <endian.h> |
| 8 | #include <errno.h> |
| 9 | #include <stdio.h> |
| 10 | #include <unistd.h> |
| 11 | #include <algorithm> |
| 12 | #include <vector> |
| 13 | #include <tr1/memory> |
| 14 | #include <zlib.h> |
| 15 | #include "chromeos/obsolete_logging.h" |
| 16 | #include "base/scoped_ptr.h" |
| 17 | #include "update_engine/delta_diff_parser.h" |
| 18 | #include "update_engine/gzip.h" |
| 19 | #include "update_engine/subprocess.h" |
| 20 | #include "update_engine/utils.h" |
| 21 | |
| 22 | using std::vector; |
| 23 | using std::tr1::shared_ptr; |
| 24 | using chromeos_update_engine::DeltaArchiveManifest; |
| 25 | |
| 26 | namespace chromeos_update_engine { |
| 27 | |
| 28 | namespace { |
| 29 | // These structs and methods are helpers for EncodeDataToDeltaFile() |
| 30 | |
| 31 | // Before moving the data into a proto buffer, the data is stored in |
| 32 | // memory in these Node and Child structures. |
| 33 | |
| 34 | // Each Node struct represents a file on disk (which can be regular file, |
| 35 | // directory, fifo, socket, symlink, etc). Nodes that contain children |
| 36 | // (just directories) will have a vector of Child objects. Each child |
| 37 | // object has a filename and a pointer to the associated Node. Thus, |
| 38 | // filenames for files are stored with their parents, not as part of |
| 39 | // the file itself. |
| 40 | |
| 41 | // These structures are easier to work with than the protobuf format |
| 42 | // when adding files. When generating a delta file, we add an entry |
| 43 | // for each file to a root Node object. Then, we sort each Node's |
| 44 | // children vector so the children are stored alphabetically. Then, |
| 45 | // we assign an index value to the idx field of each Node by a preorder |
| 46 | // tree traversal. The index value assigned to a Node is the index it |
| 47 | // will have in the DeltaArchiveManifest protobuf. |
| 48 | // Finally, we add each Node to a DeltaArchiveManifest protobuf. |
| 49 | |
| 50 | struct Node; |
| 51 | |
| 52 | struct Child { |
| 53 | Child(const string& the_name, |
| 54 | Node* the_node) |
| 55 | : name(the_name), |
| 56 | node(the_node) {} |
| 57 | string name; |
| 58 | // Use shared_ptr here rather than scoped_ptr b/c this struct will be copied |
| 59 | // in stl containers |
| 60 | scoped_ptr<Node> node; |
| 61 | }; |
| 62 | |
| 63 | // For the C++ sort() function. |
| 64 | struct ChildLessThan { |
| 65 | bool operator()(const shared_ptr<Child>& a, const shared_ptr<Child>& b) { |
| 66 | return a->name < b->name; |
| 67 | } |
| 68 | }; |
| 69 | |
| 70 | struct Node { |
| 71 | Node() |
| 72 | : mode(0), |
| 73 | uid(0), |
| 74 | gid(0), |
| 75 | compressed(false), |
| 76 | offset(-1), |
| 77 | length(0), |
| 78 | idx(0) {} |
| 79 | |
| 80 | mode_t mode; |
| 81 | uid_t uid; |
| 82 | gid_t gid; |
| 83 | |
| 84 | // data |
| 85 | bool compressed; |
| 86 | int offset; // -1 means no data |
| 87 | int length; |
| 88 | |
| 89 | vector<shared_ptr<Child> > children; |
| 90 | int idx; |
| 91 | }; |
| 92 | |
| 93 | // This function sets *node's variables to match what's at path. |
| 94 | // This includes calling this function recursively on all children. Children |
| 95 | // not on the same device as the original node will not be considered. |
| 96 | // Returns true on success. |
| 97 | bool UpdateNodeFromPath(const string& path, Node* node) { |
| 98 | // Set metadata |
| 99 | struct stat stbuf; |
| 100 | TEST_AND_RETURN_FALSE_ERRNO(lstat(path.c_str(), &stbuf) == 0); |
| 101 | const dev_t dev = stbuf.st_dev; |
| 102 | node->mode = stbuf.st_mode; |
| 103 | node->uid = stbuf.st_uid; |
| 104 | node->gid = stbuf.st_gid; |
| 105 | if (!S_ISDIR(node->mode)) { |
| 106 | return true; |
| 107 | } |
| 108 | |
| 109 | DIR* dir = opendir(path.c_str()); |
| 110 | TEST_AND_RETURN_FALSE(dir); |
| 111 | |
| 112 | struct dirent entry; |
| 113 | struct dirent* dir_entry; |
| 114 | |
| 115 | for (;;) { |
| 116 | TEST_AND_RETURN_FALSE_ERRNO(readdir_r(dir, &entry, &dir_entry) == 0); |
| 117 | if (!dir_entry) { |
| 118 | // done |
| 119 | break; |
| 120 | } |
| 121 | if (!strcmp(".", dir_entry->d_name)) |
| 122 | continue; |
| 123 | if (!strcmp("..", dir_entry->d_name)) |
| 124 | continue; |
| 125 | |
| 126 | string child_path = path + "/" + dir_entry->d_name; |
| 127 | struct stat child_stbuf; |
| 128 | TEST_AND_RETURN_FALSE_ERRNO(lstat(child_path.c_str(), &child_stbuf) == 0); |
| 129 | // make sure it's on the same dev |
| 130 | if (child_stbuf.st_dev != dev) |
| 131 | continue; |
| 132 | shared_ptr<Child> child(new Child(dir_entry->d_name, new Node)); |
| 133 | node->children.push_back(child); |
| 134 | TEST_AND_RETURN_FALSE(UpdateNodeFromPath(path + "/" + child->name, |
| 135 | child->node.get())); |
| 136 | } |
| 137 | TEST_AND_RETURN_FALSE_ERRNO(closedir(dir) == 0); |
| 138 | // Done with all subdirs. sort children. |
| 139 | sort(node->children.begin(), node->children.end(), ChildLessThan()); |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | // We go through n setting the index value of each Node to |
| 144 | // *next_index_value, then increment next_index_value. |
| 145 | // We then recursively assign index values to children. |
| 146 | // The first caller should call this with *next_index_value == 0 and |
| 147 | // the root Node, thus setting the root Node's index to 0. |
| 148 | void PopulateChildIndexes(Node* n, int* next_index_value) { |
| 149 | n->idx = (*next_index_value)++; |
| 150 | for (unsigned int i = 0; i < n->children.size(); i++) { |
| 151 | PopulateChildIndexes(n->children[i]->node.get(), next_index_value); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | // This converts a Node tree rooted at n into a DeltaArchiveManifest. |
| 156 | void NodeToDeltaArchiveManifest(Node* n, DeltaArchiveManifest* archive) { |
| 157 | DeltaArchiveManifest_File *f = archive->add_files(); |
| 158 | f->set_mode(n->mode); |
| 159 | f->set_uid(n->uid); |
| 160 | f->set_gid(n->gid); |
| 161 | if (!S_ISDIR(n->mode)) |
| 162 | return; |
| 163 | for (unsigned int i = 0; i < n->children.size(); i++) { |
| 164 | DeltaArchiveManifest_File_Child* child = f->add_children(); |
| 165 | child->set_name(n->children[i]->name); |
| 166 | child->set_index(n->children[i]->node->idx); |
| 167 | } |
| 168 | for (unsigned int i = 0; i < n->children.size(); i++) { |
| 169 | NodeToDeltaArchiveManifest(n->children[i]->node.get(), archive); |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | } // namespace {} |
| 174 | |
| 175 | // For each file in archive, write a delta for it into out_file |
| 176 | // and update 'file' to refer to the delta. |
| 177 | // This is a recursive function. Returns true on success. |
| 178 | bool DeltaDiffGenerator::WriteFileDiffsToDeltaFile( |
| 179 | DeltaArchiveManifest* archive, |
| 180 | DeltaArchiveManifest_File* file, |
| 181 | const std::string& file_name, |
| 182 | const std::string& old_path, |
| 183 | const std::string& new_path, |
| 184 | FileWriter* out_file_writer, |
| 185 | int* out_file_length) { |
| 186 | TEST_AND_RETURN_FALSE(file->has_mode()); |
| 187 | |
| 188 | // Stat the actual file, too |
| 189 | struct stat stbuf; |
| 190 | TEST_AND_RETURN_FALSE_ERRNO(lstat((new_path + "/" + file_name).c_str(), |
| 191 | &stbuf) == 0); |
| 192 | TEST_AND_RETURN_FALSE(stbuf.st_mode == file->mode()); |
| 193 | |
| 194 | // See if we're a directory or not |
| 195 | if (S_ISDIR(file->mode())) { |
| 196 | for (int i = 0; i < file->children_size(); i++) { |
| 197 | DeltaArchiveManifest_File_Child* child = file->mutable_children(i); |
| 198 | DeltaArchiveManifest_File* child_file = |
| 199 | archive->mutable_files(child->index()); |
| 200 | TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile( |
| 201 | archive, |
| 202 | child_file, |
| 203 | child->name(), |
| 204 | old_path + "/" + file_name, |
| 205 | new_path + "/" + file_name, |
| 206 | out_file_writer, |
| 207 | out_file_length)); |
| 208 | } |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | if (S_ISFIFO(file->mode()) || S_ISSOCK(file->mode())) { |
| 213 | // These don't store any additional data |
| 214 | return true; |
| 215 | } |
| 216 | |
| 217 | vector<char> data; |
| 218 | bool should_compress = true; |
| 219 | bool format_set = false; |
| 220 | DeltaArchiveManifest_File_DataFormat format; |
| 221 | if (S_ISLNK(file->mode())) { |
| 222 | LOG(INFO) << "link"; |
| 223 | TEST_AND_RETURN_FALSE(EncodeLink(new_path + "/" + file_name, &data)); |
| 224 | } else if (S_ISCHR(file->mode()) || S_ISBLK(file->mode())) { |
| 225 | LOG(INFO) << "dev"; |
| 226 | TEST_AND_RETURN_FALSE(EncodeDev(stbuf, &data)); |
| 227 | } else if (S_ISREG(file->mode())) { |
| 228 | LOG(INFO) << "reg"; |
| 229 | // regular file. We may use a delta here. |
| 230 | TEST_AND_RETURN_FALSE(EncodeFile(old_path, new_path, file_name, &format, |
| 231 | &data)); |
| 232 | should_compress = false; |
| 233 | format_set = true; |
| 234 | if ((format == DeltaArchiveManifest_File_DataFormat_BSDIFF) || |
| 235 | (format == DeltaArchiveManifest_File_DataFormat_FULL_GZ)) |
| 236 | TEST_AND_RETURN_FALSE(!data.empty()); |
| 237 | } else { |
| 238 | // Should never get here; unhandled mode type. |
| 239 | LOG(ERROR) << "Unhandled mode type: " << file->mode(); |
| 240 | return false; |
| 241 | } |
| 242 | LOG(INFO) << "data len: " << data.size(); |
| 243 | if (!format_set) { |
| 244 | // Pick a format now |
| 245 | vector<char> compressed_data; |
| 246 | TEST_AND_RETURN_FALSE(GzipCompress(data, &compressed_data)); |
| 247 | if (compressed_data.size() < data.size()) { |
| 248 | format = DeltaArchiveManifest_File_DataFormat_FULL_GZ; |
| 249 | data.swap(compressed_data); |
| 250 | } else { |
| 251 | format = DeltaArchiveManifest_File_DataFormat_FULL; |
| 252 | } |
| 253 | format_set = true; |
| 254 | } |
| 255 | |
| 256 | if (!data.empty()) { |
| 257 | TEST_AND_RETURN_FALSE(format_set); |
| 258 | file->set_data_format(format); |
| 259 | file->set_data_offset(*out_file_length); |
| 260 | TEST_AND_RETURN_FALSE(static_cast<ssize_t>(data.size()) == |
| 261 | out_file_writer->Write(&data[0], data.size())); |
| 262 | file->set_data_length(data.size()); |
| 263 | *out_file_length += data.size(); |
| 264 | } |
| 265 | return true; |
| 266 | } |
| 267 | |
| 268 | bool DeltaDiffGenerator::EncodeLink(const std::string& path, |
| 269 | std::vector<char>* out) { |
| 270 | // Store symlink path as file data |
| 271 | vector<char> link_data(4096); |
| 272 | int rc = readlink(path.c_str(), &link_data[0], link_data.size()); |
| 273 | TEST_AND_RETURN_FALSE_ERRNO(rc >= 0); |
| 274 | link_data.resize(rc); |
| 275 | out->swap(link_data); |
| 276 | return true; |
| 277 | } |
| 278 | |
| 279 | bool DeltaDiffGenerator::EncodeDev(const struct stat& stbuf, |
| 280 | std::vector<char>* out) { |
| 281 | LinuxDevice dev; |
| 282 | dev.set_major(major(stbuf.st_rdev)); |
| 283 | dev.set_minor(minor(stbuf.st_rdev)); |
| 284 | out->resize(dev.ByteSize()); |
| 285 | TEST_AND_RETURN_FALSE(dev.SerializeToArray(&(*out)[0], out->size())); |
| 286 | return true; |
| 287 | } |
| 288 | |
| 289 | // Encode the file at new_path + "/" + file_name. It may be a binary diff |
| 290 | // based on old_path + "/" + file_name. out_data_format will be set to |
| 291 | // the format used. out_data_format may not be NULL. |
| 292 | bool DeltaDiffGenerator::EncodeFile( |
| 293 | const string& old_dir, |
| 294 | const string& new_dir, |
| 295 | const string& file_name, |
| 296 | DeltaArchiveManifest_File_DataFormat* out_data_format, |
| 297 | vector<char>* out) { |
| 298 | TEST_AND_RETURN_FALSE(out_data_format); |
| 299 | // First, see the full length: |
| 300 | vector<char> full_data; |
| 301 | TEST_AND_RETURN_FALSE(utils::ReadFile(new_dir + "/" + file_name, &full_data)); |
| 302 | vector<char> gz_data; |
| 303 | if (!full_data.empty()) { |
| 304 | TEST_AND_RETURN_FALSE(GzipCompress(full_data, &gz_data)); |
| 305 | } |
| 306 | vector<char> *ret = NULL; |
| 307 | |
| 308 | if (gz_data.size() < full_data.size()) { |
| 309 | *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL_GZ; |
| 310 | ret = &gz_data; |
| 311 | } else { |
| 312 | *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL; |
| 313 | ret = &full_data; |
| 314 | } |
| 315 | |
| 316 | struct stat old_stbuf; |
| 317 | if ((stat((old_dir + "/" + file_name).c_str(), &old_stbuf) < 0) || |
| 318 | (!S_ISREG(old_stbuf.st_mode))) { |
| 319 | // stat() failed or old file is not a regular file. Just send back the full |
| 320 | // contents |
| 321 | *out = *ret; |
| 322 | return true; |
| 323 | } |
| 324 | // We have an old file. Do a binary diff. For now use bsdiff. |
| 325 | const string kPatchFile = "/tmp/delta.patch"; |
| 326 | |
| 327 | vector<string> cmd; |
| 328 | cmd.push_back("/usr/bin/bsdiff"); |
| 329 | cmd.push_back(old_dir + "/" + file_name); |
| 330 | cmd.push_back(new_dir + "/" + file_name); |
| 331 | cmd.push_back(kPatchFile); |
| 332 | |
| 333 | int rc = 1; |
| 334 | TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc)); |
| 335 | TEST_AND_RETURN_FALSE(rc == 0); |
| 336 | vector<char> patch_file; |
| 337 | TEST_AND_RETURN_FALSE(utils::ReadFile(kPatchFile, &patch_file)); |
| 338 | unlink(kPatchFile.c_str()); |
| 339 | |
| 340 | if (patch_file.size() < ret->size()) { |
| 341 | *out_data_format = DeltaArchiveManifest_File_DataFormat_BSDIFF; |
| 342 | ret = &patch_file; |
| 343 | } |
| 344 | |
| 345 | *out = *ret; |
| 346 | return true; |
| 347 | } |
| 348 | |
| 349 | DeltaArchiveManifest* DeltaDiffGenerator::EncodeMetadataToProtoBuffer( |
| 350 | const char* new_path) { |
| 351 | Node node; |
| 352 | if (!UpdateNodeFromPath(new_path, &node)) |
| 353 | return NULL; |
| 354 | int index = 0; |
| 355 | PopulateChildIndexes(&node, &index); |
| 356 | DeltaArchiveManifest *ret = new DeltaArchiveManifest; |
| 357 | NodeToDeltaArchiveManifest(&node, ret); |
| 358 | return ret; |
| 359 | } |
| 360 | |
| 361 | bool DeltaDiffGenerator::EncodeDataToDeltaFile(DeltaArchiveManifest* archive, |
| 362 | const std::string& old_path, |
| 363 | const std::string& new_path, |
| 364 | const std::string& out_file) { |
| 365 | DirectFileWriter out_writer; |
| 366 | int r = out_writer.Open(out_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); |
| 367 | TEST_AND_RETURN_FALSE_ERRNO(r >= 0); |
| 368 | ScopedFileWriterCloser closer(&out_writer); |
| 369 | TEST_AND_RETURN_FALSE(out_writer.Write(DeltaDiffParser::kFileMagic, |
| 370 | strlen(DeltaDiffParser::kFileMagic)) |
| 371 | == static_cast<ssize_t>( |
| 372 | strlen(DeltaDiffParser::kFileMagic))); |
| 373 | // Write 8 null bytes. This will be filled in w/ the offset of |
| 374 | // the protobuf. |
| 375 | TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8); |
| 376 | // 8 more bytes will be filled w/ the protobuf length. |
| 377 | TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8); |
| 378 | int out_file_length = strlen(DeltaDiffParser::kFileMagic) + 16; |
| 379 | |
| 380 | TEST_AND_RETURN_FALSE(archive->files_size() > 0); |
| 381 | DeltaArchiveManifest_File* file = archive->mutable_files(0); |
| 382 | |
| 383 | TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(archive, |
| 384 | file, |
| 385 | "", |
| 386 | old_path, |
| 387 | new_path, |
| 388 | &out_writer, |
| 389 | &out_file_length)); |
| 390 | |
| 391 | // Finally, write the protobuf to the end of the file |
| 392 | string encoded_archive; |
| 393 | TEST_AND_RETURN_FALSE(archive->SerializeToString(&encoded_archive)); |
| 394 | |
| 395 | // Compress the protobuf (which contains filenames) |
| 396 | vector<char> compressed_encoded_archive; |
| 397 | TEST_AND_RETURN_FALSE(GzipCompressString(encoded_archive, |
| 398 | &compressed_encoded_archive)); |
| 399 | |
| 400 | TEST_AND_RETURN_FALSE(out_writer.Write(compressed_encoded_archive.data(), |
| 401 | compressed_encoded_archive.size()) == |
| 402 | static_cast<ssize_t>( |
| 403 | compressed_encoded_archive.size())); |
| 404 | |
| 405 | // write offset of protobut to just after the file magic |
| 406 | int64 big_endian_protobuf_offset = htobe64(out_file_length); |
| 407 | TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(), |
| 408 | &big_endian_protobuf_offset, |
| 409 | sizeof(big_endian_protobuf_offset), |
| 410 | strlen(DeltaDiffParser::kFileMagic)) == |
| 411 | sizeof(big_endian_protobuf_offset)); |
| 412 | // Write the size just after the offset |
| 413 | int64 pb_length = htobe64(compressed_encoded_archive.size()); |
| 414 | TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(), |
| 415 | &pb_length, |
| 416 | sizeof(pb_length), |
| 417 | strlen(DeltaDiffParser::kFileMagic) + |
| 418 | sizeof(big_endian_protobuf_offset)) == |
| 419 | sizeof(pb_length)); |
| 420 | return true; |
| 421 | } |
| 422 | |
| 423 | } // namespace chromeos_update_engine |