blob: c3ed3f1d140d23a054570f47a74dc30378632f8b [file] [log] [blame]
Rui Ueyama4bb7883f2017-01-06 02:29:48 +00001//===-- TarWriter.cpp - Tar archive file creator --------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// TarWriter class provides a feature to create a tar archive file.
11//
12// I put emphasis on simplicity over comprehensiveness when
13// implementing this class because we don't need a full-fledged
14// archive file generator in LLVM at the moment.
15//
16// The filename field in the Unix V7 tar header is 100 bytes, which is
17// apparently too small. Various extensions were proposed and
18// implemented to fix the issue. The writer implemented in this file
19// uses PAX extension headers.
20//
21// Note that we emit PAX headers even if filenames fit in the V7
22// header for the sake of simplicity. So, generated files are N
23// kilobyte larger than the ideal where N is the number of files in
24// archives. In practice, I think you don't need to worry about that.
25//
26// The PAX header is standardized in IEEE Std 1003.1-2001.
27//
28// The struct definition of UstarHeader is copied from
29// https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
30//
31//===----------------------------------------------------------------------===//
32
33#include "llvm/Support/TarWriter.h"
34#include "llvm/ADT/StringRef.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/MathExtras.h"
37
38using namespace llvm;
39
40// Each file in an archive must be aligned to this block size.
41static const int BlockSize = 512;
42
43struct UstarHeader {
44 char Name[100];
45 char Mode[8];
46 char Uid[8];
47 char Gid[8];
48 char Size[12];
49 char Mtime[12];
50 char Checksum[8];
51 char TypeFlag;
52 char Linkname[100];
53 char Magic[6];
54 char Version[2];
55 char Uname[32];
56 char Gname[32];
57 char DevMajor[8];
58 char DevMinor[8];
59 char Prefix[155];
60 char Pad[12];
61};
62static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header");
63
64// A PAX attribute is in the form of "<length> <key>=<value>\n"
65// where <length> is the length of the entire string including
66// the length field itself. An example string is this.
67//
68// 25 ctime=1084839148.1212\n
69//
70// This function create such string.
71static std::string formatPax(StringRef Key, const Twine &Val) {
72 int Len = Key.size() + Val.str().size() + 3; // +3 for " ", "=" and "\n"
73
74 // We need to compute total size twice because appending
75 // a length field could change total size by one.
76 int Total = Len + Twine(Len).str().size();
77 Total = Len + Twine(Total).str().size();
78 return (Twine(Total) + " " + Key + "=" + Val + "\n").str();
79}
80
81// Headers in tar files must be aligned to 512 byte boundaries.
82// This function writes null bytes so that the file is a multiple
83// of 512 bytes.
84static void pad(raw_fd_ostream &OS) {
85 uint64_t Pos = OS.tell();
86 OS.seek(alignTo(Pos, BlockSize));
87}
88
89// Computes a checksum for a tar header.
90static void computeChecksum(UstarHeader &Hdr) {
91 // Before computing a checksum, checksum field must be
92 // filled with space characters.
93 memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum));
94
95 // Compute a checksum and set it to the checksum field.
96 unsigned Chksum = 0;
97 for (size_t I = 0; I < sizeof(Hdr); ++I)
98 Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I];
99 sprintf(Hdr.Checksum, "%06o", Chksum);
100}
101
102// Create a tar header and write it to a given output stream.
103static void writePaxHeader(raw_fd_ostream &OS, const Twine &Path) {
104 // A PAX header consists of a 512-byte header followed
105 // by key-value strings. First, create key-value strings.
106 std::string PaxAttr = formatPax("path", Path);
107
108 // Create a 512-byte header.
109 UstarHeader Hdr = {};
110 sprintf(Hdr.Size, "%011lo", PaxAttr.size());
111 Hdr.TypeFlag = 'x'; // PAX magic
112 memcpy(Hdr.Magic, "ustar", 6); // Ustar magic
113 computeChecksum(Hdr);
114
115 // Write them down.
116 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
117 OS << PaxAttr;
118 pad(OS);
119}
120
121// The PAX header is an extended format, so a PAX header needs
122// to be followed by a "real" header.
123static void writeUstarHeader(raw_fd_ostream &OS, size_t Size) {
124 UstarHeader Hdr = {};
125 strcpy(Hdr.Mode, "0000664");
126 sprintf(Hdr.Size, "%011lo", Size);
127 memcpy(Hdr.Magic, "ustar", 6);
128
129 computeChecksum(Hdr);
130 OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
131}
132
133// We want to use '/' as a path separator even on Windows.
134// This function canonicalizes a given path.
135static std::string canonicalize(std::string S) {
136#ifdef LLVM_ON_WIN32
137 std::replace(S.begin(), S.end(), '\\', '/');
138#endif
139 return S;
140}
141
142// Creates a TarWriter instance and returns it.
143Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
144 StringRef BaseDir) {
145 int FD;
146 if (std::error_code EC = openFileForWrite(OutputPath, FD, sys::fs::F_None))
147 return make_error<StringError>("cannot open " + OutputPath, EC);
148 return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir));
149}
150
151TarWriter::TarWriter(int FD, StringRef BaseDir)
152 : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), BaseDir(BaseDir) {}
153
154// Append a given file to an archive.
155void TarWriter::append(StringRef Path, StringRef Data) {
156 // Write Path and Data.
157 writePaxHeader(OS, BaseDir + "/" + canonicalize(Path));
158 writeUstarHeader(OS, Data.size());
159 OS << Data;
160 pad(OS);
161
162 // POSIX requires tar archives end with two null blocks.
163 // Here, we write the terminator and then seek back, so that
164 // the file being output is terminated correctly at any moment.
165 uint64_t Pos = OS.tell();
166 OS << std::string(BlockSize * 2, '\0');
167 OS.seek(Pos);
168 OS.flush();
169}