Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 1 | //===-- TarWriter.cpp - Tar archive file creator --------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // TarWriter class provides a feature to create a tar archive file. |
| 11 | // |
| 12 | // I put emphasis on simplicity over comprehensiveness when |
| 13 | // implementing this class because we don't need a full-fledged |
| 14 | // archive file generator in LLVM at the moment. |
| 15 | // |
| 16 | // The filename field in the Unix V7 tar header is 100 bytes, which is |
| 17 | // apparently too small. Various extensions were proposed and |
| 18 | // implemented to fix the issue. The writer implemented in this file |
| 19 | // uses PAX extension headers. |
| 20 | // |
| 21 | // Note that we emit PAX headers even if filenames fit in the V7 |
| 22 | // header for the sake of simplicity. So, generated files are N |
| 23 | // kilobyte larger than the ideal where N is the number of files in |
| 24 | // archives. In practice, I think you don't need to worry about that. |
| 25 | // |
| 26 | // The PAX header is standardized in IEEE Std 1003.1-2001. |
| 27 | // |
| 28 | // The struct definition of UstarHeader is copied from |
| 29 | // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 |
| 30 | // |
| 31 | //===----------------------------------------------------------------------===// |
| 32 | |
| 33 | #include "llvm/Support/TarWriter.h" |
| 34 | #include "llvm/ADT/StringRef.h" |
| 35 | #include "llvm/Support/FileSystem.h" |
| 36 | #include "llvm/Support/MathExtras.h" |
| 37 | |
| 38 | using namespace llvm; |
| 39 | |
| 40 | // Each file in an archive must be aligned to this block size. |
| 41 | static const int BlockSize = 512; |
| 42 | |
| 43 | struct UstarHeader { |
| 44 | char Name[100]; |
| 45 | char Mode[8]; |
| 46 | char Uid[8]; |
| 47 | char Gid[8]; |
| 48 | char Size[12]; |
| 49 | char Mtime[12]; |
| 50 | char Checksum[8]; |
| 51 | char TypeFlag; |
| 52 | char Linkname[100]; |
| 53 | char Magic[6]; |
| 54 | char Version[2]; |
| 55 | char Uname[32]; |
| 56 | char Gname[32]; |
| 57 | char DevMajor[8]; |
| 58 | char DevMinor[8]; |
| 59 | char Prefix[155]; |
| 60 | char Pad[12]; |
| 61 | }; |
| 62 | static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); |
| 63 | |
| 64 | // A PAX attribute is in the form of "<length> <key>=<value>\n" |
| 65 | // where <length> is the length of the entire string including |
| 66 | // the length field itself. An example string is this. |
| 67 | // |
| 68 | // 25 ctime=1084839148.1212\n |
| 69 | // |
| 70 | // This function create such string. |
| 71 | static std::string formatPax(StringRef Key, const Twine &Val) { |
| 72 | int Len = Key.size() + Val.str().size() + 3; // +3 for " ", "=" and "\n" |
| 73 | |
| 74 | // We need to compute total size twice because appending |
| 75 | // a length field could change total size by one. |
| 76 | int Total = Len + Twine(Len).str().size(); |
| 77 | Total = Len + Twine(Total).str().size(); |
| 78 | return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); |
| 79 | } |
| 80 | |
| 81 | // Headers in tar files must be aligned to 512 byte boundaries. |
| 82 | // This function writes null bytes so that the file is a multiple |
| 83 | // of 512 bytes. |
| 84 | static void pad(raw_fd_ostream &OS) { |
| 85 | uint64_t Pos = OS.tell(); |
| 86 | OS.seek(alignTo(Pos, BlockSize)); |
| 87 | } |
| 88 | |
| 89 | // Computes a checksum for a tar header. |
| 90 | static void computeChecksum(UstarHeader &Hdr) { |
| 91 | // Before computing a checksum, checksum field must be |
| 92 | // filled with space characters. |
| 93 | memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); |
| 94 | |
| 95 | // Compute a checksum and set it to the checksum field. |
| 96 | unsigned Chksum = 0; |
| 97 | for (size_t I = 0; I < sizeof(Hdr); ++I) |
| 98 | Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; |
| 99 | sprintf(Hdr.Checksum, "%06o", Chksum); |
| 100 | } |
| 101 | |
| 102 | // Create a tar header and write it to a given output stream. |
| 103 | static void writePaxHeader(raw_fd_ostream &OS, const Twine &Path) { |
| 104 | // A PAX header consists of a 512-byte header followed |
| 105 | // by key-value strings. First, create key-value strings. |
| 106 | std::string PaxAttr = formatPax("path", Path); |
| 107 | |
| 108 | // Create a 512-byte header. |
| 109 | UstarHeader Hdr = {}; |
| 110 | sprintf(Hdr.Size, "%011lo", PaxAttr.size()); |
| 111 | Hdr.TypeFlag = 'x'; // PAX magic |
| 112 | memcpy(Hdr.Magic, "ustar", 6); // Ustar magic |
| 113 | computeChecksum(Hdr); |
| 114 | |
| 115 | // Write them down. |
| 116 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 117 | OS << PaxAttr; |
| 118 | pad(OS); |
| 119 | } |
| 120 | |
| 121 | // The PAX header is an extended format, so a PAX header needs |
| 122 | // to be followed by a "real" header. |
| 123 | static void writeUstarHeader(raw_fd_ostream &OS, size_t Size) { |
| 124 | UstarHeader Hdr = {}; |
| 125 | strcpy(Hdr.Mode, "0000664"); |
| 126 | sprintf(Hdr.Size, "%011lo", Size); |
| 127 | memcpy(Hdr.Magic, "ustar", 6); |
| 128 | |
| 129 | computeChecksum(Hdr); |
| 130 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 131 | } |
| 132 | |
| 133 | // We want to use '/' as a path separator even on Windows. |
| 134 | // This function canonicalizes a given path. |
| 135 | static std::string canonicalize(std::string S) { |
| 136 | #ifdef LLVM_ON_WIN32 |
| 137 | std::replace(S.begin(), S.end(), '\\', '/'); |
| 138 | #endif |
| 139 | return S; |
| 140 | } |
| 141 | |
| 142 | // Creates a TarWriter instance and returns it. |
| 143 | Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, |
| 144 | StringRef BaseDir) { |
| 145 | int FD; |
| 146 | if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) |
| 147 | return make_error<StringError>("cannot open " + OutputPath, EC); |
| 148 | return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); |
| 149 | } |
| 150 | |
| 151 | TarWriter::TarWriter(int FD, StringRef BaseDir) |
| 152 | : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} |
| 153 | |
| 154 | // Append a given file to an archive. |
| 155 | void TarWriter::append(StringRef Path, StringRef Data) { |
| 156 | // Write Path and Data. |
| 157 | writePaxHeader(OS, BaseDir + "/" + canonicalize(Path)); |
| 158 | writeUstarHeader(OS, Data.size()); |
| 159 | OS << Data; |
| 160 | pad(OS); |
| 161 | |
| 162 | // POSIX requires tar archives end with two null blocks. |
| 163 | // Here, we write the terminator and then seek back, so that |
| 164 | // the file being output is terminated correctly at any moment. |
| 165 | uint64_t Pos = OS.tell(); |
| 166 | OS << std::string(BlockSize * 2, '\0'); |
| 167 | OS.seek(Pos); |
| 168 | OS.flush(); |
| 169 | } |