blob: 7a6fdb7e2970f8941ff7103ab840c8342c1cf07b [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * extractExternal.cpp
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stdlib.h>
17#include <iostream>
18#include <strstream>
19#include <fstream>
20#include <string>
21#include <set>
22#include <map>
23
24/* Given a set of n object files h ('external' object files) and a set of m
25 object files o ('internal' object files),
26 1. Determines r, the subset of h that o depends on, directly or indirectly
27 2. Removes the files in h - r from the file system
28 3. For each external symbol defined in some file in r, rename it in r U o
29 by prefixing it with "__kmp_external_"
30 Usage:
31 hide.exe <n> <filenames for h> <filenames for o>
32
33 Thus, the prefixed symbols become hidden in the sense that they now have a special
34 prefix.
35*/
36
37using namespace std;
38
39void stop(char* errorMsg) {
40 printf("%s\n", errorMsg);
41 exit(1);
42}
43
44// an entry in the symbol table of a .OBJ file
45class Symbol {
46public:
47 __int64 name;
48 unsigned value;
49 unsigned short sectionNum, type;
50 char storageClass, nAux;
51};
52
53class _rstream : public istrstream {
54private:
55 const char *buf;
56protected:
57 _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
58 ~_rstream() {
59 delete[]buf;
60 }
61};
62
63/* A stream encapuslating the content of a file or the content of a string, overriding the
64 >> operator to read various integer types in binary form, as well as a symbol table
65 entry.
66*/
67class rstream : public _rstream {
68private:
69 template<class T>
70 inline rstream& doRead(T &x) {
71 read((char*)&x, sizeof(T));
72 return *this;
73 }
74 static pair<const char*, streamsize> getBuf(const char *fileName) {
75 ifstream raw(fileName,ios::binary | ios::in);
76 if(!raw.is_open())
77 stop("rstream.getBuf: Error opening file");
78 raw.seekg(0,ios::end);
79 streampos fileSize = raw.tellg();
80 if(fileSize < 0)
81 stop("rstream.getBuf: Error reading file");
82 char *buf = new char[fileSize];
83 raw.seekg(0,ios::beg);
84 raw.read(buf, fileSize);
85 return pair<const char*, streamsize>(buf,fileSize);
86 }
87public:
88 // construct from a string
89 rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
90 /* construct from a file whole content is fully read once to initialize the content of
91 this stream
92 */
93 rstream(const char *fileName):_rstream(getBuf(fileName)){}
94 rstream& operator>>(int &x) {
95 return doRead(x);
96 }
97 rstream& operator>>(unsigned &x) {
98 return doRead(x);
99 }
100 rstream& operator>>(short &x) {
101 return doRead(x);
102 }
103 rstream& operator>>(unsigned short &x) {
104 return doRead(x);
105 }
106 rstream& operator>>(Symbol &e) {
107 read((char*)&e, 18);
108 return *this;
109 }
110};
111
112// string table in a .OBJ file
113class StringTable {
114private:
115 map<string, unsigned> directory;
116 size_t length;
117 char *data;
118
119 // make <directory> from <length> bytes in <data>
120 void makeDirectory(void) {
121 unsigned i = 4;
122 while(i < length) {
123 string s = string(data + i);
124 directory.insert(make_pair(s, i));
125 i += s.size() + 1;
126 }
127 }
128 // initialize <length> and <data> with contents specified by the arguments
129 void init(const char *_data) {
130 unsigned _length = *(unsigned*)_data;
131
132 if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
133 stop("StringTable.init: Invalid symbol table");
134 if(_data[_length - 1]) {
135 // to prevent runaway strings, make sure the data ends with a zero
136 data = new char[length = _length + 1];
137 data[_length] = 0;
138 } else {
139 data = new char[length = _length];
140 }
141 *(unsigned*)data = length;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000142 KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
143 length - sizeof(unsigned));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000144 makeDirectory();
145 }
146public:
147 StringTable(rstream &f) {
148 /* Construct string table by reading from f.
149 */
150 streampos s;
151 unsigned strSize;
152 char *strData;
153
154 s = f.tellg();
155 f>>strSize;
156 if(strSize < sizeof(unsigned))
157 stop("StringTable: Invalid string table");
158 strData = new char[strSize];
159 *(unsigned*)strData = strSize;
160 // read the raw data into <strData>
161 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
162 s = f.tellg() - s;
163 if(s < strSize)
164 stop("StringTable: Unexpected EOF");
165 init(strData);
166 delete[]strData;
167 }
168 StringTable(const set<string> &strings) {
169 /* Construct string table from given strings.
170 */
171 char *p;
172 set<string>::const_iterator it;
173 size_t s;
174
175 // count required size for data
176 for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
177 size_t l = (*it).size();
178
179 if(l > (unsigned) 0xFFFFFFFF)
180 stop("StringTable: String too long");
181 if(l > 8) {
182 length += l + 1;
183 if(length > (unsigned) 0xFFFFFFFF)
184 stop("StringTable: Symbol table too long");
185 }
186 }
187 data = new char[length];
188 *(unsigned*)data = length;
189 // populate data and directory
190 for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
191 const string &str = *it;
192 size_t l = str.size();
193 if(l > 8) {
194 directory.insert(make_pair(str, p - data));
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000195 KMP_MEMCPY(p, str.c_str(), l);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000196 p[l] = 0;
197 p += l + 1;
198 }
199 }
200 }
201 ~StringTable() {
202 delete[] data;
203 }
204 /* Returns encoding for given string based on this string table.
205 Error if string length is greater than 8 but string is not in
206 the string table--returns 0.
207 */
208 __int64 encode(const string &str) {
209 __int64 r;
210
211 if(str.size() <= 8) {
212 // encoded directly
213 ((char*)&r)[7] = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000214 KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000215 return r;
216 } else {
217 // represented as index into table
218 map<string,unsigned>::const_iterator it = directory.find(str);
219 if(it == directory.end())
220 stop("StringTable::encode: String now found in string table");
221 ((unsigned*)&r)[0] = 0;
222 ((unsigned*)&r)[1] = (*it).second;
223 return r;
224 }
225 }
226 /* Returns string represented by x based on this string table.
227 Error if x references an invalid position in the table--returns
228 the empty string.
229 */
230 string decode(__int64 x) const {
231 if(*(unsigned*)&x == 0) {
232 // represented as index into table
233 unsigned &p = ((unsigned*)&x)[1];
234 if(p >= length)
235 stop("StringTable::decode: Invalid string table lookup");
236 return string(data + p);
237 } else {
238 // encoded directly
239 char *p = (char*)&x;
240 int i;
241
242 for(i = 0; i < 8 && p[i]; ++i);
243 return string(p, i);
244 }
245 }
246 void write(ostream &os) {
247 os.write(data, length);
248 }
249};
250
251/* for the named object file, determines the set of defined symbols and the set of undefined external symbols
252 and writes them to <defined> and <undefined> respectively
253*/
254void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
255 streampos fileSize;
256 size_t strTabStart;
257 unsigned symTabStart, symNEntries;
258 rstream f(fileName);
259
260 f.seekg(0,ios::end);
261 fileSize = f.tellg();
262
263 f.seekg(8);
264 f >> symTabStart >> symNEntries;
265 // seek to the string table
266 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
267 if(f.eof()) {
268 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
269 fileName, (unsigned long) fileSize, symTabStart, symNEntries);
270 stop("computeExternalSymbols: Unexpected EOF 1");
271 }
272 StringTable stringTable(f); // read the string table
273 if(f.tellg() != fileSize)
274 stop("computeExternalSymbols: Unexpected data after string table");
275
276 f.clear();
277 f.seekg(symTabStart); // seek to the symbol table
278
279 defined->clear(); undefined->clear();
280 for(int i = 0; i < symNEntries; ++i) {
281 // process each entry
282 Symbol e;
283
284 if(f.eof())
285 stop("computeExternalSymbols: Unexpected EOF 2");
286 f>>e;
287 if(f.fail())
288 stop("computeExternalSymbols: File read error");
289 if(e.nAux) { // auxiliary entry: skip
290 f.seekg(e.nAux * 18, ios::cur);
291 i += e.nAux;
292 }
293 // if symbol is extern and defined in the current file, insert it
294 if(e.storageClass == 2)
295 if(e.sectionNum)
296 defined->insert(stringTable.decode(e.name));
297 else
298 undefined->insert(stringTable.decode(e.name));
299 }
300}
301
Alp Toker8f2d3f02014-02-24 10:40:15 +0000302/* For each occurrence of an external symbol in the object file named by
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303 by <fileName> that is a member of <hide>, renames it by prefixing
304 with "__kmp_external_", writing back the file in-place
305*/
306void hideSymbols(char *fileName, const set<string> &hide) {
307 static const string prefix("__kmp_external_");
308 set<string> strings; // set of all occurring symbols, appropriately prefixed
309 streampos fileSize;
310 size_t strTabStart;
311 unsigned symTabStart, symNEntries;
312 int i;
313 rstream in(fileName);
314
315 in.seekg(0,ios::end);
316 fileSize = in.tellg();
317
318 in.seekg(8);
319 in >> symTabStart >> symNEntries;
320 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
321 if(in.eof())
322 stop("hideSymbols: Unexpected EOF");
323 StringTable stringTableOld(in); // read original string table
324
325 if(in.tellg() != fileSize)
326 stop("hideSymbols: Unexpected data after string table");
327
328 // compute set of occurring strings with prefix added
329 for(i = 0; i < symNEntries; ++i) {
330 Symbol e;
331
332 in.seekg(symTabStart + i * 18);
333 if(in.eof())
334 stop("hideSymbols: Unexpected EOF");
335 in >> e;
336 if(in.fail())
337 stop("hideSymbols: File read error");
338 if(e.nAux)
339 i += e.nAux;
340 const string &s = stringTableOld.decode(e.name);
341 // if symbol is extern and found in <hide>, prefix and insert into strings,
342 // otherwise, just insert into strings without prefix
343 strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
344 prefix + s : s);
345 }
346
347 ofstream out(fileName, ios::trunc | ios::out | ios::binary);
348 if(!out.is_open())
349 stop("hideSymbols: Error opening output file");
350
351 // make new string table from string set
352 StringTable stringTableNew = StringTable(strings);
353
354 // copy input file to output file up to just before the symbol table
355 in.seekg(0);
356 char *buf = new char[symTabStart];
357 in.read(buf, symTabStart);
358 out.write(buf, symTabStart);
359 delete []buf;
360
361 // copy input symbol table to output symbol table with name translation
362 for(i = 0; i < symNEntries; ++i) {
363 Symbol e;
364
365 in.seekg(symTabStart + i*18);
366 if(in.eof())
367 stop("hideSymbols: Unexpected EOF");
368 in >> e;
369 if(in.fail())
370 stop("hideSymbols: File read error");
371 const string &s = stringTableOld.decode(e.name);
372 out.seekp(symTabStart + i*18);
373 e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
374 prefix + s : s);
375 out.write((char*)&e, 18);
376 if(out.fail())
377 stop("hideSymbols: File write error");
378 if(e.nAux) {
379 // copy auxiliary symbol table entries
380 int nAux = e.nAux;
381 for(int j = 1; j <= nAux; ++j) {
382 in >> e;
383 out.seekp(symTabStart + (i + j) * 18);
384 out.write((char*)&e, 18);
385 }
386 i += nAux;
387 }
388 }
389 // output string table
390 stringTableNew.write(out);
391}
392
393// returns true iff <a> and <b> have no common element
394template <class T>
395bool isDisjoint(const set<T> &a, const set<T> &b) {
396 set<T>::const_iterator ita, itb;
397
398 for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
399 const T &ta = *ita, &tb = *itb;
400 if(ta < tb)
401 ++ita;
402 else if (tb < ta)
403 ++itb;
404 else
405 return false;
406 }
407 return true;
408}
409
410/* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
411 <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
412 files and the rest correspond to the internal object files.
413 postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
414 disjoint. Returns the transitive closure of the set of internal object files, as a set of
415 file indexes, under the 'depends on' relation, minus the set of internal object files.
416*/
417set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
418 set<int> *required = new set<int>;
419 set<int> fresh[2];
420 int i, cur = 0;
421 bool changed;
422
423 for(i = nTotal - 1; i >= nExternal; --i)
424 fresh[cur].insert(i);
425 do {
426 changed = false;
427 for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
428 set<string> &s = undefined[*it];
429
430 for(i = 0; i < nExternal; ++i) {
431 if(required->find(i) == required->end()) {
432 if(!isDisjoint(defined[i], s)) {
433 // found a new qualifying element
434 required->insert(i);
435 fresh[1 - cur].insert(i);
436 changed = true;
437 }
438 }
439 }
440 }
441 fresh[cur].clear();
442 cur = 1 - cur;
443 } while(changed);
444 return required;
445}
446
447int main(int argc, char **argv) {
448 int nExternal, nInternal, i;
449 set<string> *defined, *undefined;
450 set<int>::iterator it;
451
452 if(argc < 3)
453 stop("Please specify a positive integer followed by a list of object filenames");
454 nExternal = atoi(argv[1]);
455 if(nExternal <= 0)
456 stop("Please specify a positive integer followed by a list of object filenames");
457 if(nExternal + 2 > argc)
458 stop("Too few external objects");
459 nInternal = argc - nExternal - 2;
460 defined = new set<string>[argc - 2];
461 undefined = new set<string>[argc - 2];
462
463 // determine the set of defined and undefined external symbols
464 for(i = 2; i < argc; ++i)
465 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
466
467 // determine the set of required external files
468 set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
469 set<string> hide;
470
471 /* determine the set of symbols to hide--namely defined external symbols of the
472 required external files
473 */
474 for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
475 int idx = *it;
476 set<string>::iterator it2;
477 /* We have to insert one element at a time instead of inserting a range because
478 the insert member function taking a range doesn't exist on Windows* OS, at least
479 at the time of this writing.
480 */
481 for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
482 hide.insert(*it2);
483 }
484
485 /* process the external files--removing those that are not required and hiding
486 the appropriate symbols in the others
487 */
488 for(i = 0; i < nExternal; ++i)
489 if(requiredExternal->find(i) != requiredExternal->end())
490 hideSymbols(argv[2 + i], hide);
491 else
492 remove(argv[2 + i]);
493 // hide the appropriate symbols in the internal files
494 for(i = nExternal + 2; i < argc; ++i)
495 hideSymbols(argv[i], hide);
496 return 0;
497}