Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 1 | //===-- TarWriter.cpp - Tar archive file creator --------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // TarWriter class provides a feature to create a tar archive file. |
| 11 | // |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 12 | // I put emphasis on simplicity over comprehensiveness when implementing this |
| 13 | // class because we don't need a full-fledged archive file generator in LLVM |
| 14 | // at the moment. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 15 | // |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 16 | // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames |
| 17 | // are stored using the PAX extension. The PAX header is standardized in |
| 18 | // POSIX.1-2001. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 19 | // |
| 20 | // The struct definition of UstarHeader is copied from |
| 21 | // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 |
| 22 | // |
| 23 | //===----------------------------------------------------------------------===// |
| 24 | |
| 25 | #include "llvm/Support/TarWriter.h" |
| 26 | #include "llvm/ADT/StringRef.h" |
| 27 | #include "llvm/Support/FileSystem.h" |
| 28 | #include "llvm/Support/MathExtras.h" |
Rui Ueyama | 3e64903 | 2017-01-09 01:47:15 +0000 | [diff] [blame] | 29 | #include "llvm/Support/Path.h" |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | // Each file in an archive must be aligned to this block size. |
| 34 | static const int BlockSize = 512; |
| 35 | |
| 36 | struct UstarHeader { |
| 37 | char Name[100]; |
| 38 | char Mode[8]; |
| 39 | char Uid[8]; |
| 40 | char Gid[8]; |
| 41 | char Size[12]; |
| 42 | char Mtime[12]; |
| 43 | char Checksum[8]; |
| 44 | char TypeFlag; |
| 45 | char Linkname[100]; |
| 46 | char Magic[6]; |
| 47 | char Version[2]; |
| 48 | char Uname[32]; |
| 49 | char Gname[32]; |
| 50 | char DevMajor[8]; |
| 51 | char DevMinor[8]; |
| 52 | char Prefix[155]; |
| 53 | char Pad[12]; |
| 54 | }; |
| 55 | static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); |
| 56 | |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 57 | static UstarHeader makeUstarHeader() { |
| 58 | UstarHeader Hdr = {}; |
| 59 | memcpy(Hdr.Magic, "ustar", 5); // Ustar magic |
| 60 | memcpy(Hdr.Version, "00", 2); // Ustar version |
| 61 | return Hdr; |
| 62 | } |
| 63 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 64 | // A PAX attribute is in the form of "<length> <key>=<value>\n" |
| 65 | // where <length> is the length of the entire string including |
| 66 | // the length field itself. An example string is this. |
| 67 | // |
| 68 | // 25 ctime=1084839148.1212\n |
| 69 | // |
| 70 | // This function create such string. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 71 | static std::string formatPax(StringRef Key, StringRef Val) { |
| 72 | int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 73 | |
| 74 | // We need to compute total size twice because appending |
| 75 | // a length field could change total size by one. |
| 76 | int Total = Len + Twine(Len).str().size(); |
| 77 | Total = Len + Twine(Total).str().size(); |
| 78 | return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); |
| 79 | } |
| 80 | |
| 81 | // Headers in tar files must be aligned to 512 byte boundaries. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 82 | // This function forwards the current file position to the next boundary. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 83 | static void pad(raw_fd_ostream &OS) { |
| 84 | uint64_t Pos = OS.tell(); |
| 85 | OS.seek(alignTo(Pos, BlockSize)); |
| 86 | } |
| 87 | |
| 88 | // Computes a checksum for a tar header. |
| 89 | static void computeChecksum(UstarHeader &Hdr) { |
| 90 | // Before computing a checksum, checksum field must be |
| 91 | // filled with space characters. |
| 92 | memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); |
| 93 | |
| 94 | // Compute a checksum and set it to the checksum field. |
| 95 | unsigned Chksum = 0; |
| 96 | for (size_t I = 0; I < sizeof(Hdr); ++I) |
| 97 | Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 98 | snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum); |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | // Create a tar header and write it to a given output stream. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 102 | static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 103 | // A PAX header consists of a 512-byte header followed |
| 104 | // by key-value strings. First, create key-value strings. |
| 105 | std::string PaxAttr = formatPax("path", Path); |
| 106 | |
| 107 | // Create a 512-byte header. |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 108 | UstarHeader Hdr = makeUstarHeader(); |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 109 | snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size()); |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 110 | Hdr.TypeFlag = 'x'; // PAX magic |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 111 | computeChecksum(Hdr); |
| 112 | |
| 113 | // Write them down. |
| 114 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 115 | OS << PaxAttr; |
| 116 | pad(OS); |
| 117 | } |
| 118 | |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 119 | // In the Ustar header, a path can be split at any '/' to store |
| 120 | // a path into UstarHeader::Name and UstarHeader::Prefix. This |
| 121 | // function splits a given path for that purpose. |
| 122 | static std::pair<StringRef, StringRef> splitPath(StringRef Path) { |
| 123 | if (Path.size() <= sizeof(UstarHeader::Name)) |
| 124 | return {"", Path}; |
| 125 | size_t Sep = Path.rfind('/', sizeof(UstarHeader::Name) + 1); |
| 126 | if (Sep == StringRef::npos) |
| 127 | return {"", Path}; |
| 128 | return {Path.substr(0, Sep), Path.substr(Sep + 1)}; |
| 129 | } |
| 130 | |
| 131 | // Returns true if a given path can be stored to a Ustar header |
| 132 | // without the PAX extension. |
Rui Ueyama | d52f4b8 | 2017-01-07 08:32:07 +0000 | [diff] [blame] | 133 | static bool fitsInUstar(StringRef Path) { |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 134 | StringRef Prefix; |
| 135 | StringRef Name; |
| 136 | std::tie(Prefix, Name) = splitPath(Path); |
| 137 | return Name.size() <= sizeof(UstarHeader::Name); |
| 138 | } |
| 139 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 140 | // The PAX header is an extended format, so a PAX header needs |
| 141 | // to be followed by a "real" header. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 142 | static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) { |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 143 | StringRef Prefix; |
| 144 | StringRef Name; |
| 145 | std::tie(Prefix, Name) = splitPath(Path); |
| 146 | |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 147 | UstarHeader Hdr = makeUstarHeader(); |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 148 | memcpy(Hdr.Name, Name.data(), Name.size()); |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 149 | memcpy(Hdr.Mode, "0000664", 8); |
| 150 | snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 151 | memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 152 | computeChecksum(Hdr); |
| 153 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 154 | } |
| 155 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 156 | // Creates a TarWriter instance and returns it. |
| 157 | Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, |
| 158 | StringRef BaseDir) { |
| 159 | int FD; |
| 160 | if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) |
| 161 | return make_error<StringError>("cannot open " + OutputPath, EC); |
| 162 | return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); |
| 163 | } |
| 164 | |
| 165 | TarWriter::TarWriter(int FD, StringRef BaseDir) |
| 166 | : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} |
| 167 | |
| 168 | // Append a given file to an archive. |
| 169 | void TarWriter::append(StringRef Path, StringRef Data) { |
| 170 | // Write Path and Data. |
Rui Ueyama | 3e64903 | 2017-01-09 01:47:15 +0000 | [diff] [blame] | 171 | std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0"; |
Rui Ueyama | d52f4b8 | 2017-01-07 08:32:07 +0000 | [diff] [blame] | 172 | if (fitsInUstar(S)) { |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 173 | writeUstarHeader(OS, S, Data.size()); |
| 174 | } else { |
| 175 | writePaxHeader(OS, S); |
| 176 | writeUstarHeader(OS, "", Data.size()); |
| 177 | } |
| 178 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 179 | OS << Data; |
| 180 | pad(OS); |
| 181 | |
| 182 | // POSIX requires tar archives end with two null blocks. |
| 183 | // Here, we write the terminator and then seek back, so that |
| 184 | // the file being output is terminated correctly at any moment. |
| 185 | uint64_t Pos = OS.tell(); |
| 186 | OS << std::string(BlockSize * 2, '\0'); |
| 187 | OS.seek(Pos); |
| 188 | OS.flush(); |
| 189 | } |