blob: ba20373a6a416b32bf8a9adad7ce0c453214240c [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * extractExternal.cpp
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include <fstream>
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include <iostream>
Jim Cownie5e8470a2013-09-27 10:38:44 +000016#include <map>
Jonathan Peyton30419822017-05-12 18:01:32 +000017#include <set>
18#include <stdlib.h>
19#include <string>
20#include <strstream>
Jim Cownie5e8470a2013-09-27 10:38:44 +000021
22/* Given a set of n object files h ('external' object files) and a set of m
23 object files o ('internal' object files),
24 1. Determines r, the subset of h that o depends on, directly or indirectly
25 2. Removes the files in h - r from the file system
26 3. For each external symbol defined in some file in r, rename it in r U o
27 by prefixing it with "__kmp_external_"
28 Usage:
29 hide.exe <n> <filenames for h> <filenames for o>
30
Jonathan Peyton30419822017-05-12 18:01:32 +000031 Thus, the prefixed symbols become hidden in the sense that they now have a
32 special prefix.
Jim Cownie5e8470a2013-09-27 10:38:44 +000033*/
34
35using namespace std;
36
Jonathan Peyton30419822017-05-12 18:01:32 +000037void stop(char *errorMsg) {
38 printf("%s\n", errorMsg);
39 exit(1);
Jim Cownie5e8470a2013-09-27 10:38:44 +000040}
41
42// an entry in the symbol table of a .OBJ file
43class Symbol {
44public:
Jonathan Peyton30419822017-05-12 18:01:32 +000045 __int64 name;
46 unsigned value;
47 unsigned short sectionNum, type;
48 char storageClass, nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +000049};
50
51class _rstream : public istrstream {
52private:
Jonathan Peyton30419822017-05-12 18:01:32 +000053 const char *buf;
54
Jim Cownie5e8470a2013-09-27 10:38:44 +000055protected:
Jonathan Peyton30419822017-05-12 18:01:32 +000056 _rstream(pair<const char *, streamsize> p)
57 : istrstream(p.first, p.second), buf(p.first) {}
58 ~_rstream() { delete[] buf; }
Jim Cownie5e8470a2013-09-27 10:38:44 +000059};
60
Jonathan Peyton30419822017-05-12 18:01:32 +000061// A stream encapuslating the content of a file or the content of a string,
62// overriding the >> operator to read various integer types in binary form,
63// as well as a symbol table entry.
Jim Cownie5e8470a2013-09-27 10:38:44 +000064class rstream : public _rstream {
65private:
Jonathan Peyton30419822017-05-12 18:01:32 +000066 template <class T> inline rstream &doRead(T &x) {
67 read((char *)&x, sizeof(T));
68 return *this;
69 }
70 static pair<const char *, streamsize> getBuf(const char *fileName) {
71 ifstream raw(fileName, ios::binary | ios::in);
72 if (!raw.is_open())
73 stop("rstream.getBuf: Error opening file");
74 raw.seekg(0, ios::end);
75 streampos fileSize = raw.tellg();
76 if (fileSize < 0)
77 stop("rstream.getBuf: Error reading file");
78 char *buf = new char[fileSize];
79 raw.seekg(0, ios::beg);
80 raw.read(buf, fileSize);
81 return pair<const char *, streamsize>(buf, fileSize);
82 }
83
Jim Cownie5e8470a2013-09-27 10:38:44 +000084public:
Jonathan Peyton30419822017-05-12 18:01:32 +000085 // construct from a string
86 rstream(const char *buf, streamsize size)
87 : _rstream(pair<const char *, streamsize>(buf, size)) {}
88 // construct from a file whole content is fully read once to initialize the
89 // content of this stream
90 rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
91 rstream &operator>>(int &x) { return doRead(x); }
92 rstream &operator>>(unsigned &x) { return doRead(x); }
93 rstream &operator>>(short &x) { return doRead(x); }
94 rstream &operator>>(unsigned short &x) { return doRead(x); }
95 rstream &operator>>(Symbol &e) {
96 read((char *)&e, 18);
97 return *this;
98 }
Jim Cownie5e8470a2013-09-27 10:38:44 +000099};
100
101// string table in a .OBJ file
102class StringTable {
103private:
Jonathan Peyton30419822017-05-12 18:01:32 +0000104 map<string, unsigned> directory;
105 size_t length;
106 char *data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107
Jonathan Peyton30419822017-05-12 18:01:32 +0000108 // make <directory> from <length> bytes in <data>
109 void makeDirectory(void) {
110 unsigned i = 4;
111 while (i < length) {
112 string s = string(data + i);
113 directory.insert(make_pair(s, i));
114 i += s.size() + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000115 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000116 }
117 // initialize <length> and <data> with contents specified by the arguments
118 void init(const char *_data) {
119 unsigned _length = *(unsigned *)_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000120
Jonathan Peyton30419822017-05-12 18:01:32 +0000121 if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
122 stop("StringTable.init: Invalid symbol table");
123 if (_data[_length - 1]) {
124 // to prevent runaway strings, make sure the data ends with a zero
125 data = new char[length = _length + 1];
126 data[_length] = 0;
127 } else {
128 data = new char[length = _length];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000130 *(unsigned *)data = length;
131 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
132 length - sizeof(unsigned));
133 makeDirectory();
134 }
135
Jim Cownie5e8470a2013-09-27 10:38:44 +0000136public:
Jonathan Peyton30419822017-05-12 18:01:32 +0000137 StringTable(rstream &f) {
138 // Construct string table by reading from f.
139 streampos s;
140 unsigned strSize;
141 char *strData;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142
Jonathan Peyton30419822017-05-12 18:01:32 +0000143 s = f.tellg();
144 f >> strSize;
145 if (strSize < sizeof(unsigned))
146 stop("StringTable: Invalid string table");
147 strData = new char[strSize];
148 *(unsigned *)strData = strSize;
149 // read the raw data into <strData>
150 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
151 s = f.tellg() - s;
152 if (s < strSize)
153 stop("StringTable: Unexpected EOF");
154 init(strData);
155 delete[] strData;
156 }
157 StringTable(const set<string> &strings) {
158 // Construct string table from given strings.
159 char *p;
160 set<string>::const_iterator it;
161 size_t s;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000162
Jonathan Peyton30419822017-05-12 18:01:32 +0000163 // count required size for data
164 for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
165 ++it) {
166 size_t l = (*it).size();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000167
Jonathan Peyton30419822017-05-12 18:01:32 +0000168 if (l > (unsigned)0xFFFFFFFF)
169 stop("StringTable: String too long");
170 if (l > 8) {
171 length += l + 1;
172 if (length > (unsigned)0xFFFFFFFF)
173 stop("StringTable: Symbol table too long");
174 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000176 data = new char[length];
177 *(unsigned *)data = length;
178 // populate data and directory
179 for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
180 ++it) {
181 const string &str = *it;
182 size_t l = str.size();
183 if (l > 8) {
184 directory.insert(make_pair(str, p - data));
185 KMP_MEMCPY(p, str.c_str(), l);
186 p[l] = 0;
187 p += l + 1;
188 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000189 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000190 }
191 ~StringTable() { delete[] data; }
192 // Returns encoding for given string based on this string table. Error if
193 // string length is greater than 8 but string is not in the string table
194 // -- returns 0.
195 __int64 encode(const string &str) {
196 __int64 r;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000197
Jonathan Peyton30419822017-05-12 18:01:32 +0000198 if (str.size() <= 8) {
199 // encoded directly
200 ((char *)&r)[7] = 0;
201 KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
202 return r;
203 } else {
204 // represented as index into table
205 map<string, unsigned>::const_iterator it = directory.find(str);
206 if (it == directory.end())
207 stop("StringTable::encode: String now found in string table");
208 ((unsigned *)&r)[0] = 0;
209 ((unsigned *)&r)[1] = (*it).second;
210 return r;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000212 }
213 // Returns string represented by x based on this string table. Error if x
214 // references an invalid position in the table--returns the empty string.
215 string decode(__int64 x) const {
216 if (*(unsigned *)&x == 0) {
217 // represented as index into table
218 unsigned &p = ((unsigned *)&x)[1];
219 if (p >= length)
220 stop("StringTable::decode: Invalid string table lookup");
221 return string(data + p);
222 } else {
223 // encoded directly
224 char *p = (char *)&x;
225 int i;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000226
Jonathan Peyton30419822017-05-12 18:01:32 +0000227 for (i = 0; i < 8 && p[i]; ++i)
228 ;
229 return string(p, i);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000230 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000231 }
232 void write(ostream &os) { os.write(data, length); }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000233};
234
Jonathan Peyton30419822017-05-12 18:01:32 +0000235// for the named object file, determines the set of defined symbols and the set
236// of undefined external symbols and writes them to <defined> and <undefined>
237// respectively
238void computeExternalSymbols(const char *fileName, set<string> *defined,
239 set<string> *undefined) {
240 streampos fileSize;
241 size_t strTabStart;
242 unsigned symTabStart, symNEntries;
243 rstream f(fileName);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000244
Jonathan Peyton30419822017-05-12 18:01:32 +0000245 f.seekg(0, ios::end);
246 fileSize = f.tellg();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 f.seekg(8);
249 f >> symTabStart >> symNEntries;
250 // seek to the string table
251 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
252 if (f.eof()) {
253 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
254 "= %u, symNEntries = %u\n",
255 fileName, (unsigned long)fileSize, symTabStart, symNEntries);
256 stop("computeExternalSymbols: Unexpected EOF 1");
257 }
258 StringTable stringTable(f); // read the string table
259 if (f.tellg() != fileSize)
260 stop("computeExternalSymbols: Unexpected data after string table");
261
262 f.clear();
263 f.seekg(symTabStart); // seek to the symbol table
264
265 defined->clear();
266 undefined->clear();
267 for (int i = 0; i < symNEntries; ++i) {
268 // process each entry
269 Symbol e;
270
271 if (f.eof())
272 stop("computeExternalSymbols: Unexpected EOF 2");
273 f >> e;
274 if (f.fail())
275 stop("computeExternalSymbols: File read error");
276 if (e.nAux) { // auxiliary entry: skip
277 f.seekg(e.nAux * 18, ios::cur);
278 i += e.nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000279 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000280 // if symbol is extern and defined in the current file, insert it
281 if (e.storageClass == 2)
282 if (e.sectionNum)
283 defined->insert(stringTable.decode(e.name));
284 else
285 undefined->insert(stringTable.decode(e.name));
286 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000287}
288
Jonathan Peyton30419822017-05-12 18:01:32 +0000289// For each occurrence of an external symbol in the object file named by
290// by <fileName> that is a member of <hide>, renames it by prefixing
291// with "__kmp_external_", writing back the file in-place
Jim Cownie5e8470a2013-09-27 10:38:44 +0000292void hideSymbols(char *fileName, const set<string> &hide) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000293 static const string prefix("__kmp_external_");
294 set<string> strings; // set of all occurring symbols, appropriately prefixed
295 streampos fileSize;
296 size_t strTabStart;
297 unsigned symTabStart, symNEntries;
298 int i;
299 rstream in(fileName);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000300
Jonathan Peyton30419822017-05-12 18:01:32 +0000301 in.seekg(0, ios::end);
302 fileSize = in.tellg();
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303
Jonathan Peyton30419822017-05-12 18:01:32 +0000304 in.seekg(8);
305 in >> symTabStart >> symNEntries;
306 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
307 if (in.eof())
308 stop("hideSymbols: Unexpected EOF");
309 StringTable stringTableOld(in); // read original string table
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310
Jonathan Peyton30419822017-05-12 18:01:32 +0000311 if (in.tellg() != fileSize)
312 stop("hideSymbols: Unexpected data after string table");
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313
Jonathan Peyton30419822017-05-12 18:01:32 +0000314 // compute set of occurring strings with prefix added
315 for (i = 0; i < symNEntries; ++i) {
316 Symbol e;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317
Jonathan Peyton30419822017-05-12 18:01:32 +0000318 in.seekg(symTabStart + i * 18);
319 if (in.eof())
320 stop("hideSymbols: Unexpected EOF");
321 in >> e;
322 if (in.fail())
323 stop("hideSymbols: File read error");
324 if (e.nAux)
325 i += e.nAux;
326 const string &s = stringTableOld.decode(e.name);
327 // if symbol is extern and found in <hide>, prefix and insert into strings,
328 // otherwise, just insert into strings without prefix
329 strings.insert(
330 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
331 }
332
333 ofstream out(fileName, ios::trunc | ios::out | ios::binary);
334 if (!out.is_open())
335 stop("hideSymbols: Error opening output file");
336
337 // make new string table from string set
338 StringTable stringTableNew = StringTable(strings);
339
340 // copy input file to output file up to just before the symbol table
341 in.seekg(0);
342 char *buf = new char[symTabStart];
343 in.read(buf, symTabStart);
344 out.write(buf, symTabStart);
345 delete[] buf;
346
347 // copy input symbol table to output symbol table with name translation
348 for (i = 0; i < symNEntries; ++i) {
349 Symbol e;
350
351 in.seekg(symTabStart + i * 18);
352 if (in.eof())
353 stop("hideSymbols: Unexpected EOF");
354 in >> e;
355 if (in.fail())
356 stop("hideSymbols: File read error");
357 const string &s = stringTableOld.decode(e.name);
358 out.seekp(symTabStart + i * 18);
359 e.name = stringTableNew.encode(
360 (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
361 out.write((char *)&e, 18);
362 if (out.fail())
363 stop("hideSymbols: File write error");
364 if (e.nAux) {
365 // copy auxiliary symbol table entries
366 int nAux = e.nAux;
367 for (int j = 1; j <= nAux; ++j) {
368 in >> e;
369 out.seekp(symTabStart + (i + j) * 18);
370 out.write((char *)&e, 18);
371 }
372 i += nAux;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000373 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000374 }
375 // output string table
376 stringTableNew.write(out);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377}
378
379// returns true iff <a> and <b> have no common element
Jonathan Peyton30419822017-05-12 18:01:32 +0000380template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
381 set<T>::const_iterator ita, itb;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000382
Jonathan Peyton30419822017-05-12 18:01:32 +0000383 for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
384 const T &ta = *ita, &tb = *itb;
385 if (ta < tb)
386 ++ita;
387 else if (tb < ta)
388 ++itb;
389 else
390 return false;
391 }
392 return true;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000393}
394
Jonathan Peyton30419822017-05-12 18:01:32 +0000395// PRE: <defined> and <undefined> are arrays with <nTotal> elements where
396// <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the
397// external object files and the rest correspond to the internal object files.
398// POST: file x is said to depend on file y if undefined[x] and defined[y] are
399// not disjoint. Returns the transitive closure of the set of internal object
400// files, as a set of file indexes, under the 'depends on' relation, minus the
401// set of internal object files.
402set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
403 set<string> *undefined) {
404 set<int> *required = new set<int>;
405 set<int> fresh[2];
406 int i, cur = 0;
407 bool changed;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000408
Jonathan Peyton30419822017-05-12 18:01:32 +0000409 for (i = nTotal - 1; i >= nExternal; --i)
410 fresh[cur].insert(i);
411 do {
412 changed = false;
413 for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
414 ++it) {
415 set<string> &s = undefined[*it];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000416
Jonathan Peyton30419822017-05-12 18:01:32 +0000417 for (i = 0; i < nExternal; ++i) {
418 if (required->find(i) == required->end()) {
419 if (!isDisjoint(defined[i], s)) {
420 // found a new qualifying element
421 required->insert(i);
422 fresh[1 - cur].insert(i);
423 changed = true;
424 }
425 }
426 }
427 }
428 fresh[cur].clear();
429 cur = 1 - cur;
430 } while (changed);
431 return required;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000432}
433
434int main(int argc, char **argv) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000435 int nExternal, nInternal, i;
436 set<string> *defined, *undefined;
437 set<int>::iterator it;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000438
Jonathan Peyton30419822017-05-12 18:01:32 +0000439 if (argc < 3)
440 stop("Please specify a positive integer followed by a list of object "
441 "filenames");
442 nExternal = atoi(argv[1]);
443 if (nExternal <= 0)
444 stop("Please specify a positive integer followed by a list of object "
445 "filenames");
446 if (nExternal + 2 > argc)
447 stop("Too few external objects");
448 nInternal = argc - nExternal - 2;
449 defined = new set<string>[argc - 2];
450 undefined = new set<string>[argc - 2];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000451
Jonathan Peyton30419822017-05-12 18:01:32 +0000452 // determine the set of defined and undefined external symbols
453 for (i = 2; i < argc; ++i)
454 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000455
Jonathan Peyton30419822017-05-12 18:01:32 +0000456 // determine the set of required external files
457 set<int> *requiredExternal =
458 findRequiredExternal(nExternal, argc - 2, defined, undefined);
459 set<string> hide;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460
Jonathan Peyton30419822017-05-12 18:01:32 +0000461 // determine the set of symbols to hide--namely defined external symbols of
462 // the required external files
463 for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
464 int idx = *it;
465 set<string>::iterator it2;
466 // We have to insert one element at a time instead of inserting a range
467 // because the insert member function taking a range doesn't exist on
468 // Windows* OS, at least at the time of this writing.
469 for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
470 hide.insert(*it2);
471 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000472
Jonathan Peyton30419822017-05-12 18:01:32 +0000473 // process the external files--removing those that are not required and hiding
474 // the appropriate symbols in the others
475 for (i = 0; i < nExternal; ++i)
476 if (requiredExternal->find(i) != requiredExternal->end())
477 hideSymbols(argv[2 + i], hide);
478 else
479 remove(argv[2 + i]);
480 // hide the appropriate symbols in the internal files
481 for (i = nExternal + 2; i < argc; ++i)
482 hideSymbols(argv[i], hide);
483 return 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000484}