Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 1 | //===-- TarWriter.cpp - Tar archive file creator --------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // TarWriter class provides a feature to create a tar archive file. |
| 11 | // |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 12 | // I put emphasis on simplicity over comprehensiveness when implementing this |
| 13 | // class because we don't need a full-fledged archive file generator in LLVM |
| 14 | // at the moment. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 15 | // |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 16 | // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames |
| 17 | // are stored using the PAX extension. The PAX header is standardized in |
| 18 | // POSIX.1-2001. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 19 | // |
| 20 | // The struct definition of UstarHeader is copied from |
| 21 | // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 |
| 22 | // |
| 23 | //===----------------------------------------------------------------------===// |
| 24 | |
| 25 | #include "llvm/Support/TarWriter.h" |
| 26 | #include "llvm/ADT/StringRef.h" |
| 27 | #include "llvm/Support/FileSystem.h" |
| 28 | #include "llvm/Support/MathExtras.h" |
Rui Ueyama | 3e64903 | 2017-01-09 01:47:15 +0000 | [diff] [blame] | 29 | #include "llvm/Support/Path.h" |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | // Each file in an archive must be aligned to this block size. |
| 34 | static const int BlockSize = 512; |
| 35 | |
| 36 | struct UstarHeader { |
| 37 | char Name[100]; |
| 38 | char Mode[8]; |
| 39 | char Uid[8]; |
| 40 | char Gid[8]; |
| 41 | char Size[12]; |
| 42 | char Mtime[12]; |
| 43 | char Checksum[8]; |
| 44 | char TypeFlag; |
| 45 | char Linkname[100]; |
| 46 | char Magic[6]; |
| 47 | char Version[2]; |
| 48 | char Uname[32]; |
| 49 | char Gname[32]; |
| 50 | char DevMajor[8]; |
| 51 | char DevMinor[8]; |
| 52 | char Prefix[155]; |
| 53 | char Pad[12]; |
| 54 | }; |
| 55 | static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header"); |
| 56 | |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 57 | static UstarHeader makeUstarHeader() { |
| 58 | UstarHeader Hdr = {}; |
| 59 | memcpy(Hdr.Magic, "ustar", 5); // Ustar magic |
| 60 | memcpy(Hdr.Version, "00", 2); // Ustar version |
| 61 | return Hdr; |
| 62 | } |
| 63 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 64 | // A PAX attribute is in the form of "<length> <key>=<value>\n" |
| 65 | // where <length> is the length of the entire string including |
| 66 | // the length field itself. An example string is this. |
| 67 | // |
| 68 | // 25 ctime=1084839148.1212\n |
| 69 | // |
| 70 | // This function create such string. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 71 | static std::string formatPax(StringRef Key, StringRef Val) { |
| 72 | int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 73 | |
| 74 | // We need to compute total size twice because appending |
| 75 | // a length field could change total size by one. |
| 76 | int Total = Len + Twine(Len).str().size(); |
| 77 | Total = Len + Twine(Total).str().size(); |
| 78 | return (Twine(Total) + " " + Key + "=" + Val + "\n").str(); |
| 79 | } |
| 80 | |
| 81 | // Headers in tar files must be aligned to 512 byte boundaries. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 82 | // This function forwards the current file position to the next boundary. |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 83 | static void pad(raw_fd_ostream &OS) { |
| 84 | uint64_t Pos = OS.tell(); |
| 85 | OS.seek(alignTo(Pos, BlockSize)); |
| 86 | } |
| 87 | |
| 88 | // Computes a checksum for a tar header. |
| 89 | static void computeChecksum(UstarHeader &Hdr) { |
| 90 | // Before computing a checksum, checksum field must be |
| 91 | // filled with space characters. |
| 92 | memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum)); |
| 93 | |
| 94 | // Compute a checksum and set it to the checksum field. |
| 95 | unsigned Chksum = 0; |
| 96 | for (size_t I = 0; I < sizeof(Hdr); ++I) |
| 97 | Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 98 | snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum); |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | // Create a tar header and write it to a given output stream. |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 102 | static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) { |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 103 | // A PAX header consists of a 512-byte header followed |
| 104 | // by key-value strings. First, create key-value strings. |
| 105 | std::string PaxAttr = formatPax("path", Path); |
| 106 | |
| 107 | // Create a 512-byte header. |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 108 | UstarHeader Hdr = makeUstarHeader(); |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 109 | snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size()); |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 110 | Hdr.TypeFlag = 'x'; // PAX magic |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 111 | computeChecksum(Hdr); |
| 112 | |
| 113 | // Write them down. |
| 114 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 115 | OS << PaxAttr; |
| 116 | pad(OS); |
| 117 | } |
| 118 | |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 119 | // Path fits in a Ustar header if |
| 120 | // |
| 121 | // - Path is less than 100 characters long, or |
| 122 | // - Path is in the form of "<prefix>/<name>" where <prefix> is less |
| 123 | // than or equal to 155 characters long and <name> is less than 100 |
| 124 | // characters long. Both <prefix> and <name> can contain extra '/'. |
| 125 | // |
| 126 | // If Path fits in a Ustar header, updates Prefix and Name and returns true. |
| 127 | // Otherwise, returns false. |
| 128 | static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { |
| 129 | if (Path.size() < sizeof(UstarHeader::Name)) { |
Rui Ueyama | 5908845 | 2017-09-28 00:27:39 +0000 | [diff] [blame] | 130 | Prefix = ""; |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 131 | Name = Path; |
| 132 | return true; |
| 133 | } |
| 134 | |
Rui Ueyama | e9d1754 | 2017-01-09 22:55:00 +0000 | [diff] [blame] | 135 | size_t Sep = Path.rfind('/', sizeof(UstarHeader::Prefix) + 1); |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 136 | if (Sep == StringRef::npos) |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 137 | return false; |
| 138 | if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) |
| 139 | return false; |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 140 | |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 141 | Prefix = Path.substr(0, Sep); |
| 142 | Name = Path.substr(Sep + 1); |
| 143 | return true; |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 144 | } |
| 145 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 146 | // The PAX header is an extended format, so a PAX header needs |
| 147 | // to be followed by a "real" header. |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 148 | static void writeUstarHeader(raw_fd_ostream &OS, StringRef Prefix, |
| 149 | StringRef Name, size_t Size) { |
Rui Ueyama | a84ab07 | 2017-01-09 21:20:42 +0000 | [diff] [blame] | 150 | UstarHeader Hdr = makeUstarHeader(); |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 151 | memcpy(Hdr.Name, Name.data(), Name.size()); |
Reid Kleckner | 5984d01 | 2017-01-06 18:22:18 +0000 | [diff] [blame] | 152 | memcpy(Hdr.Mode, "0000664", 8); |
| 153 | snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size); |
Rui Ueyama | 999f094 | 2017-01-07 08:28:56 +0000 | [diff] [blame] | 154 | memcpy(Hdr.Prefix, Prefix.data(), Prefix.size()); |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 155 | computeChecksum(Hdr); |
| 156 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 157 | } |
| 158 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 159 | // Creates a TarWriter instance and returns it. |
| 160 | Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, |
| 161 | StringRef BaseDir) { |
| 162 | int FD; |
| 163 | if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None)) |
| 164 | return make_error<StringError>("cannot open " + OutputPath, EC); |
| 165 | return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); |
| 166 | } |
| 167 | |
| 168 | TarWriter::TarWriter(int FD, StringRef BaseDir) |
| 169 | : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {} |
| 170 | |
| 171 | // Append a given file to an archive. |
| 172 | void TarWriter::append(StringRef Path, StringRef Data) { |
| 173 | // Write Path and Data. |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 174 | std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(Path); |
| 175 | |
George Rimar | f91f0b0 | 2017-12-05 10:09:59 +0000 | [diff] [blame] | 176 | // We do not want to include the same file more than once. |
| 177 | if (!Files.insert(Fullpath).second) |
| 178 | return; |
| 179 | |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 180 | StringRef Prefix; |
| 181 | StringRef Name; |
| 182 | if (splitUstar(Fullpath, Prefix, Name)) { |
| 183 | writeUstarHeader(OS, Prefix, Name, Data.size()); |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 184 | } else { |
Rui Ueyama | 283f56a | 2017-09-27 21:38:02 +0000 | [diff] [blame] | 185 | writePaxHeader(OS, Fullpath); |
| 186 | writeUstarHeader(OS, "", "", Data.size()); |
Rui Ueyama | f2a6275 | 2017-01-06 05:33:45 +0000 | [diff] [blame] | 187 | } |
| 188 | |
Rui Ueyama | 4bb7883f | 2017-01-06 02:29:48 +0000 | [diff] [blame] | 189 | OS << Data; |
| 190 | pad(OS); |
| 191 | |
| 192 | // POSIX requires tar archives end with two null blocks. |
| 193 | // Here, we write the terminator and then seek back, so that |
| 194 | // the file being output is terminated correctly at any moment. |
| 195 | uint64_t Pos = OS.tell(); |
| 196 | OS << std::string(BlockSize * 2, '\0'); |
| 197 | OS.seek(Pos); |
| 198 | OS.flush(); |
| 199 | } |