blob: d8c4901bd5db91e2f6c6918b38f8774393a3d596 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * extractExternal.cpp
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include <fstream>
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include <iostream>
Jim Cownie5e8470a2013-09-27 10:38:44 +000018#include <map>
Jonathan Peyton30419822017-05-12 18:01:32 +000019#include <set>
20#include <stdlib.h>
21#include <string>
22#include <strstream>
Jim Cownie5e8470a2013-09-27 10:38:44 +000023
24/* Given a set of n object files h ('external' object files) and a set of m
25 object files o ('internal' object files),
26 1. Determines r, the subset of h that o depends on, directly or indirectly
27 2. Removes the files in h - r from the file system
28 3. For each external symbol defined in some file in r, rename it in r U o
29 by prefixing it with "__kmp_external_"
30 Usage:
31 hide.exe <n> <filenames for h> <filenames for o>
32
Jonathan Peyton30419822017-05-12 18:01:32 +000033 Thus, the prefixed symbols become hidden in the sense that they now have a
34 special prefix.
Jim Cownie5e8470a2013-09-27 10:38:44 +000035*/
36
37using namespace std;
38
Jonathan Peyton30419822017-05-12 18:01:32 +000039void stop(char *errorMsg) {
40 printf("%s\n", errorMsg);
41 exit(1);
Jim Cownie5e8470a2013-09-27 10:38:44 +000042}
43
44// an entry in the symbol table of a .OBJ file
45class Symbol {
46public:
Jonathan Peyton30419822017-05-12 18:01:32 +000047 __int64 name;
48 unsigned value;
49 unsigned short sectionNum, type;
50 char storageClass, nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +000051};
52
53class _rstream : public istrstream {
54private:
Jonathan Peyton30419822017-05-12 18:01:32 +000055 const char *buf;
56
Jim Cownie5e8470a2013-09-27 10:38:44 +000057protected:
Jonathan Peyton30419822017-05-12 18:01:32 +000058 _rstream(pair<const char *, streamsize> p)
59 : istrstream(p.first, p.second), buf(p.first) {}
60 ~_rstream() { delete[] buf; }
Jim Cownie5e8470a2013-09-27 10:38:44 +000061};
62
Jonathan Peyton30419822017-05-12 18:01:32 +000063// A stream encapuslating the content of a file or the content of a string,
64// overriding the >> operator to read various integer types in binary form,
65// as well as a symbol table entry.
Jim Cownie5e8470a2013-09-27 10:38:44 +000066class rstream : public _rstream {
67private:
Jonathan Peyton30419822017-05-12 18:01:32 +000068 template <class T> inline rstream &doRead(T &x) {
69 read((char *)&x, sizeof(T));
70 return *this;
71 }
72 static pair<const char *, streamsize> getBuf(const char *fileName) {
73 ifstream raw(fileName, ios::binary | ios::in);
74 if (!raw.is_open())
75 stop("rstream.getBuf: Error opening file");
76 raw.seekg(0, ios::end);
77 streampos fileSize = raw.tellg();
78 if (fileSize < 0)
79 stop("rstream.getBuf: Error reading file");
80 char *buf = new char[fileSize];
81 raw.seekg(0, ios::beg);
82 raw.read(buf, fileSize);
83 return pair<const char *, streamsize>(buf, fileSize);
84 }
85
Jim Cownie5e8470a2013-09-27 10:38:44 +000086public:
Jonathan Peyton30419822017-05-12 18:01:32 +000087 // construct from a string
88 rstream(const char *buf, streamsize size)
89 : _rstream(pair<const char *, streamsize>(buf, size)) {}
90 // construct from a file whole content is fully read once to initialize the
91 // content of this stream
92 rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
93 rstream &operator>>(int &x) { return doRead(x); }
94 rstream &operator>>(unsigned &x) { return doRead(x); }
95 rstream &operator>>(short &x) { return doRead(x); }
96 rstream &operator>>(unsigned short &x) { return doRead(x); }
97 rstream &operator>>(Symbol &e) {
98 read((char *)&e, 18);
99 return *this;
100 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101};
102
103// string table in a .OBJ file
104class StringTable {
105private:
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 map<string, unsigned> directory;
107 size_t length;
108 char *data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000109
Jonathan Peyton30419822017-05-12 18:01:32 +0000110 // make <directory> from <length> bytes in <data>
111 void makeDirectory(void) {
112 unsigned i = 4;
113 while (i < length) {
114 string s = string(data + i);
115 directory.insert(make_pair(s, i));
116 i += s.size() + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000117 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000118 }
119 // initialize <length> and <data> with contents specified by the arguments
120 void init(const char *_data) {
121 unsigned _length = *(unsigned *)_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122
Jonathan Peyton30419822017-05-12 18:01:32 +0000123 if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
124 stop("StringTable.init: Invalid symbol table");
125 if (_data[_length - 1]) {
126 // to prevent runaway strings, make sure the data ends with a zero
127 data = new char[length = _length + 1];
128 data[_length] = 0;
129 } else {
130 data = new char[length = _length];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000132 *(unsigned *)data = length;
133 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
134 length - sizeof(unsigned));
135 makeDirectory();
136 }
137
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000139 StringTable(rstream &f) {
140 // Construct string table by reading from f.
141 streampos s;
142 unsigned strSize;
143 char *strData;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000144
Jonathan Peyton30419822017-05-12 18:01:32 +0000145 s = f.tellg();
146 f >> strSize;
147 if (strSize < sizeof(unsigned))
148 stop("StringTable: Invalid string table");
149 strData = new char[strSize];
150 *(unsigned *)strData = strSize;
151 // read the raw data into <strData>
152 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
153 s = f.tellg() - s;
154 if (s < strSize)
155 stop("StringTable: Unexpected EOF");
156 init(strData);
157 delete[] strData;
158 }
159 StringTable(const set<string> &strings) {
160 // Construct string table from given strings.
161 char *p;
162 set<string>::const_iterator it;
163 size_t s;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000164
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 // count required size for data
166 for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
167 ++it) {
168 size_t l = (*it).size();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 if (l > (unsigned)0xFFFFFFFF)
171 stop("StringTable: String too long");
172 if (l > 8) {
173 length += l + 1;
174 if (length > (unsigned)0xFFFFFFFF)
175 stop("StringTable: Symbol table too long");
176 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000178 data = new char[length];
179 *(unsigned *)data = length;
180 // populate data and directory
181 for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
182 ++it) {
183 const string &str = *it;
184 size_t l = str.size();
185 if (l > 8) {
186 directory.insert(make_pair(str, p - data));
187 KMP_MEMCPY(p, str.c_str(), l);
188 p[l] = 0;
189 p += l + 1;
190 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000191 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000192 }
193 ~StringTable() { delete[] data; }
194 // Returns encoding for given string based on this string table. Error if
195 // string length is greater than 8 but string is not in the string table
196 // -- returns 0.
197 __int64 encode(const string &str) {
198 __int64 r;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199
Jonathan Peyton30419822017-05-12 18:01:32 +0000200 if (str.size() <= 8) {
201 // encoded directly
202 ((char *)&r)[7] = 0;
203 KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
204 return r;
205 } else {
206 // represented as index into table
207 map<string, unsigned>::const_iterator it = directory.find(str);
208 if (it == directory.end())
209 stop("StringTable::encode: String now found in string table");
210 ((unsigned *)&r)[0] = 0;
211 ((unsigned *)&r)[1] = (*it).second;
212 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000214 }
215 // Returns string represented by x based on this string table. Error if x
216 // references an invalid position in the table--returns the empty string.
217 string decode(__int64 x) const {
218 if (*(unsigned *)&x == 0) {
219 // represented as index into table
220 unsigned &p = ((unsigned *)&x)[1];
221 if (p >= length)
222 stop("StringTable::decode: Invalid string table lookup");
223 return string(data + p);
224 } else {
225 // encoded directly
226 char *p = (char *)&x;
227 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000228
Jonathan Peyton30419822017-05-12 18:01:32 +0000229 for (i = 0; i < 8 && p[i]; ++i)
230 ;
231 return string(p, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000232 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 }
234 void write(ostream &os) { os.write(data, length); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235};
236
Jonathan Peyton30419822017-05-12 18:01:32 +0000237// for the named object file, determines the set of defined symbols and the set
238// of undefined external symbols and writes them to <defined> and <undefined>
239// respectively
240void computeExternalSymbols(const char *fileName, set<string> *defined,
241 set<string> *undefined) {
242 streampos fileSize;
243 size_t strTabStart;
244 unsigned symTabStart, symNEntries;
245 rstream f(fileName);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000246
Jonathan Peyton30419822017-05-12 18:01:32 +0000247 f.seekg(0, ios::end);
248 fileSize = f.tellg();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000249
Jonathan Peyton30419822017-05-12 18:01:32 +0000250 f.seekg(8);
251 f >> symTabStart >> symNEntries;
252 // seek to the string table
253 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
254 if (f.eof()) {
255 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
256 "= %u, symNEntries = %u\n",
257 fileName, (unsigned long)fileSize, symTabStart, symNEntries);
258 stop("computeExternalSymbols: Unexpected EOF 1");
259 }
260 StringTable stringTable(f); // read the string table
261 if (f.tellg() != fileSize)
262 stop("computeExternalSymbols: Unexpected data after string table");
263
264 f.clear();
265 f.seekg(symTabStart); // seek to the symbol table
266
267 defined->clear();
268 undefined->clear();
269 for (int i = 0; i < symNEntries; ++i) {
270 // process each entry
271 Symbol e;
272
273 if (f.eof())
274 stop("computeExternalSymbols: Unexpected EOF 2");
275 f >> e;
276 if (f.fail())
277 stop("computeExternalSymbols: File read error");
278 if (e.nAux) { // auxiliary entry: skip
279 f.seekg(e.nAux * 18, ios::cur);
280 i += e.nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000281 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000282 // if symbol is extern and defined in the current file, insert it
283 if (e.storageClass == 2)
284 if (e.sectionNum)
285 defined->insert(stringTable.decode(e.name));
286 else
287 undefined->insert(stringTable.decode(e.name));
288 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000289}
290
Jonathan Peyton30419822017-05-12 18:01:32 +0000291// For each occurrence of an external symbol in the object file named by
292// by <fileName> that is a member of <hide>, renames it by prefixing
293// with "__kmp_external_", writing back the file in-place
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294void hideSymbols(char *fileName, const set<string> &hide) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000295 static const string prefix("__kmp_external_");
296 set<string> strings; // set of all occurring symbols, appropriately prefixed
297 streampos fileSize;
298 size_t strTabStart;
299 unsigned symTabStart, symNEntries;
300 int i;
301 rstream in(fileName);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000302
Jonathan Peyton30419822017-05-12 18:01:32 +0000303 in.seekg(0, ios::end);
304 fileSize = in.tellg();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000305
Jonathan Peyton30419822017-05-12 18:01:32 +0000306 in.seekg(8);
307 in >> symTabStart >> symNEntries;
308 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
309 if (in.eof())
310 stop("hideSymbols: Unexpected EOF");
311 StringTable stringTableOld(in); // read original string table
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312
Jonathan Peyton30419822017-05-12 18:01:32 +0000313 if (in.tellg() != fileSize)
314 stop("hideSymbols: Unexpected data after string table");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000315
Jonathan Peyton30419822017-05-12 18:01:32 +0000316 // compute set of occurring strings with prefix added
317 for (i = 0; i < symNEntries; ++i) {
318 Symbol e;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319
Jonathan Peyton30419822017-05-12 18:01:32 +0000320 in.seekg(symTabStart + i * 18);
321 if (in.eof())
322 stop("hideSymbols: Unexpected EOF");
323 in >> e;
324 if (in.fail())
325 stop("hideSymbols: File read error");
326 if (e.nAux)
327 i += e.nAux;
328 const string &s = stringTableOld.decode(e.name);
329 // if symbol is extern and found in <hide>, prefix and insert into strings,
330 // otherwise, just insert into strings without prefix
331 strings.insert(
332 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
333 }
334
335 ofstream out(fileName, ios::trunc | ios::out | ios::binary);
336 if (!out.is_open())
337 stop("hideSymbols: Error opening output file");
338
339 // make new string table from string set
340 StringTable stringTableNew = StringTable(strings);
341
342 // copy input file to output file up to just before the symbol table
343 in.seekg(0);
344 char *buf = new char[symTabStart];
345 in.read(buf, symTabStart);
346 out.write(buf, symTabStart);
347 delete[] buf;
348
349 // copy input symbol table to output symbol table with name translation
350 for (i = 0; i < symNEntries; ++i) {
351 Symbol e;
352
353 in.seekg(symTabStart + i * 18);
354 if (in.eof())
355 stop("hideSymbols: Unexpected EOF");
356 in >> e;
357 if (in.fail())
358 stop("hideSymbols: File read error");
359 const string &s = stringTableOld.decode(e.name);
360 out.seekp(symTabStart + i * 18);
361 e.name = stringTableNew.encode(
362 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
363 out.write((char *)&e, 18);
364 if (out.fail())
365 stop("hideSymbols: File write error");
366 if (e.nAux) {
367 // copy auxiliary symbol table entries
368 int nAux = e.nAux;
369 for (int j = 1; j <= nAux; ++j) {
370 in >> e;
371 out.seekp(symTabStart + (i + j) * 18);
372 out.write((char *)&e, 18);
373 }
374 i += nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000375 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000376 }
377 // output string table
378 stringTableNew.write(out);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000379}
380
381// returns true iff <a> and <b> have no common element
Jonathan Peyton30419822017-05-12 18:01:32 +0000382template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
383 set<T>::const_iterator ita, itb;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
386 const T &ta = *ita, &tb = *itb;
387 if (ta < tb)
388 ++ita;
389 else if (tb < ta)
390 ++itb;
391 else
392 return false;
393 }
394 return true;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000395}
396
Jonathan Peyton30419822017-05-12 18:01:32 +0000397// PRE: <defined> and <undefined> are arrays with <nTotal> elements where
398// <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the
399// external object files and the rest correspond to the internal object files.
400// POST: file x is said to depend on file y if undefined[x] and defined[y] are
401// not disjoint. Returns the transitive closure of the set of internal object
402// files, as a set of file indexes, under the 'depends on' relation, minus the
403// set of internal object files.
404set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
405 set<string> *undefined) {
406 set<int> *required = new set<int>;
407 set<int> fresh[2];
408 int i, cur = 0;
409 bool changed;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000410
Jonathan Peyton30419822017-05-12 18:01:32 +0000411 for (i = nTotal - 1; i >= nExternal; --i)
412 fresh[cur].insert(i);
413 do {
414 changed = false;
415 for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
416 ++it) {
417 set<string> &s = undefined[*it];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 for (i = 0; i < nExternal; ++i) {
420 if (required->find(i) == required->end()) {
421 if (!isDisjoint(defined[i], s)) {
422 // found a new qualifying element
423 required->insert(i);
424 fresh[1 - cur].insert(i);
425 changed = true;
426 }
427 }
428 }
429 }
430 fresh[cur].clear();
431 cur = 1 - cur;
432 } while (changed);
433 return required;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434}
435
436int main(int argc, char **argv) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000437 int nExternal, nInternal, i;
438 set<string> *defined, *undefined;
439 set<int>::iterator it;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440
Jonathan Peyton30419822017-05-12 18:01:32 +0000441 if (argc < 3)
442 stop("Please specify a positive integer followed by a list of object "
443 "filenames");
444 nExternal = atoi(argv[1]);
445 if (nExternal <= 0)
446 stop("Please specify a positive integer followed by a list of object "
447 "filenames");
448 if (nExternal + 2 > argc)
449 stop("Too few external objects");
450 nInternal = argc - nExternal - 2;
451 defined = new set<string>[argc - 2];
452 undefined = new set<string>[argc - 2];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000453
Jonathan Peyton30419822017-05-12 18:01:32 +0000454 // determine the set of defined and undefined external symbols
455 for (i = 2; i < argc; ++i)
456 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000457
Jonathan Peyton30419822017-05-12 18:01:32 +0000458 // determine the set of required external files
459 set<int> *requiredExternal =
460 findRequiredExternal(nExternal, argc - 2, defined, undefined);
461 set<string> hide;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000462
Jonathan Peyton30419822017-05-12 18:01:32 +0000463 // determine the set of symbols to hide--namely defined external symbols of
464 // the required external files
465 for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
466 int idx = *it;
467 set<string>::iterator it2;
468 // We have to insert one element at a time instead of inserting a range
469 // because the insert member function taking a range doesn't exist on
470 // Windows* OS, at least at the time of this writing.
471 for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
472 hide.insert(*it2);
473 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474
Jonathan Peyton30419822017-05-12 18:01:32 +0000475 // process the external files--removing those that are not required and hiding
476 // the appropriate symbols in the others
477 for (i = 0; i < nExternal; ++i)
478 if (requiredExternal->find(i) != requiredExternal->end())
479 hideSymbols(argv[2 + i], hide);
480 else
481 remove(argv[2 + i]);
482 // hide the appropriate symbols in the internal files
483 for (i = nExternal + 2; i < argc; ++i)
484 hideSymbols(argv[i], hide);
485 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000486}