blob: 3f15c8e306ef05753ab01affd1bc8d0ff46d635e [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * extractExternal.cpp
3 * $Revision: 42181 $
4 * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
5 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include <stdlib.h>
19#include <iostream>
20#include <strstream>
21#include <fstream>
22#include <string>
23#include <set>
24#include <map>
25
26/* Given a set of n object files h ('external' object files) and a set of m
27 object files o ('internal' object files),
28 1. Determines r, the subset of h that o depends on, directly or indirectly
29 2. Removes the files in h - r from the file system
30 3. For each external symbol defined in some file in r, rename it in r U o
31 by prefixing it with "__kmp_external_"
32 Usage:
33 hide.exe <n> <filenames for h> <filenames for o>
34
35 Thus, the prefixed symbols become hidden in the sense that they now have a special
36 prefix.
37*/
38
39using namespace std;
40
41void stop(char* errorMsg) {
42 printf("%s\n", errorMsg);
43 exit(1);
44}
45
46// an entry in the symbol table of a .OBJ file
47class Symbol {
48public:
49 __int64 name;
50 unsigned value;
51 unsigned short sectionNum, type;
52 char storageClass, nAux;
53};
54
55class _rstream : public istrstream {
56private:
57 const char *buf;
58protected:
59 _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
60 ~_rstream() {
61 delete[]buf;
62 }
63};
64
65/* A stream encapuslating the content of a file or the content of a string, overriding the
66 >> operator to read various integer types in binary form, as well as a symbol table
67 entry.
68*/
69class rstream : public _rstream {
70private:
71 template<class T>
72 inline rstream& doRead(T &x) {
73 read((char*)&x, sizeof(T));
74 return *this;
75 }
76 static pair<const char*, streamsize> getBuf(const char *fileName) {
77 ifstream raw(fileName,ios::binary | ios::in);
78 if(!raw.is_open())
79 stop("rstream.getBuf: Error opening file");
80 raw.seekg(0,ios::end);
81 streampos fileSize = raw.tellg();
82 if(fileSize < 0)
83 stop("rstream.getBuf: Error reading file");
84 char *buf = new char[fileSize];
85 raw.seekg(0,ios::beg);
86 raw.read(buf, fileSize);
87 return pair<const char*, streamsize>(buf,fileSize);
88 }
89public:
90 // construct from a string
91 rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
92 /* construct from a file whole content is fully read once to initialize the content of
93 this stream
94 */
95 rstream(const char *fileName):_rstream(getBuf(fileName)){}
96 rstream& operator>>(int &x) {
97 return doRead(x);
98 }
99 rstream& operator>>(unsigned &x) {
100 return doRead(x);
101 }
102 rstream& operator>>(short &x) {
103 return doRead(x);
104 }
105 rstream& operator>>(unsigned short &x) {
106 return doRead(x);
107 }
108 rstream& operator>>(Symbol &e) {
109 read((char*)&e, 18);
110 return *this;
111 }
112};
113
114// string table in a .OBJ file
115class StringTable {
116private:
117 map<string, unsigned> directory;
118 size_t length;
119 char *data;
120
121 // make <directory> from <length> bytes in <data>
122 void makeDirectory(void) {
123 unsigned i = 4;
124 while(i < length) {
125 string s = string(data + i);
126 directory.insert(make_pair(s, i));
127 i += s.size() + 1;
128 }
129 }
130 // initialize <length> and <data> with contents specified by the arguments
131 void init(const char *_data) {
132 unsigned _length = *(unsigned*)_data;
133
134 if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
135 stop("StringTable.init: Invalid symbol table");
136 if(_data[_length - 1]) {
137 // to prevent runaway strings, make sure the data ends with a zero
138 data = new char[length = _length + 1];
139 data[_length] = 0;
140 } else {
141 data = new char[length = _length];
142 }
143 *(unsigned*)data = length;
144 memcpy(data + sizeof(unsigned), _data + sizeof(unsigned),
145 length - sizeof(unsigned));
146 makeDirectory();
147 }
148public:
149 StringTable(rstream &f) {
150 /* Construct string table by reading from f.
151 */
152 streampos s;
153 unsigned strSize;
154 char *strData;
155
156 s = f.tellg();
157 f>>strSize;
158 if(strSize < sizeof(unsigned))
159 stop("StringTable: Invalid string table");
160 strData = new char[strSize];
161 *(unsigned*)strData = strSize;
162 // read the raw data into <strData>
163 f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
164 s = f.tellg() - s;
165 if(s < strSize)
166 stop("StringTable: Unexpected EOF");
167 init(strData);
168 delete[]strData;
169 }
170 StringTable(const set<string> &strings) {
171 /* Construct string table from given strings.
172 */
173 char *p;
174 set<string>::const_iterator it;
175 size_t s;
176
177 // count required size for data
178 for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
179 size_t l = (*it).size();
180
181 if(l > (unsigned) 0xFFFFFFFF)
182 stop("StringTable: String too long");
183 if(l > 8) {
184 length += l + 1;
185 if(length > (unsigned) 0xFFFFFFFF)
186 stop("StringTable: Symbol table too long");
187 }
188 }
189 data = new char[length];
190 *(unsigned*)data = length;
191 // populate data and directory
192 for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
193 const string &str = *it;
194 size_t l = str.size();
195 if(l > 8) {
196 directory.insert(make_pair(str, p - data));
197 memcpy(p, str.c_str(), l);
198 p[l] = 0;
199 p += l + 1;
200 }
201 }
202 }
203 ~StringTable() {
204 delete[] data;
205 }
206 /* Returns encoding for given string based on this string table.
207 Error if string length is greater than 8 but string is not in
208 the string table--returns 0.
209 */
210 __int64 encode(const string &str) {
211 __int64 r;
212
213 if(str.size() <= 8) {
214 // encoded directly
215 ((char*)&r)[7] = 0;
216 strncpy((char*)&r, str.c_str(), 8);
217 return r;
218 } else {
219 // represented as index into table
220 map<string,unsigned>::const_iterator it = directory.find(str);
221 if(it == directory.end())
222 stop("StringTable::encode: String now found in string table");
223 ((unsigned*)&r)[0] = 0;
224 ((unsigned*)&r)[1] = (*it).second;
225 return r;
226 }
227 }
228 /* Returns string represented by x based on this string table.
229 Error if x references an invalid position in the table--returns
230 the empty string.
231 */
232 string decode(__int64 x) const {
233 if(*(unsigned*)&x == 0) {
234 // represented as index into table
235 unsigned &p = ((unsigned*)&x)[1];
236 if(p >= length)
237 stop("StringTable::decode: Invalid string table lookup");
238 return string(data + p);
239 } else {
240 // encoded directly
241 char *p = (char*)&x;
242 int i;
243
244 for(i = 0; i < 8 && p[i]; ++i);
245 return string(p, i);
246 }
247 }
248 void write(ostream &os) {
249 os.write(data, length);
250 }
251};
252
253/* for the named object file, determines the set of defined symbols and the set of undefined external symbols
254 and writes them to <defined> and <undefined> respectively
255*/
256void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
257 streampos fileSize;
258 size_t strTabStart;
259 unsigned symTabStart, symNEntries;
260 rstream f(fileName);
261
262 f.seekg(0,ios::end);
263 fileSize = f.tellg();
264
265 f.seekg(8);
266 f >> symTabStart >> symNEntries;
267 // seek to the string table
268 f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
269 if(f.eof()) {
270 printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
271 fileName, (unsigned long) fileSize, symTabStart, symNEntries);
272 stop("computeExternalSymbols: Unexpected EOF 1");
273 }
274 StringTable stringTable(f); // read the string table
275 if(f.tellg() != fileSize)
276 stop("computeExternalSymbols: Unexpected data after string table");
277
278 f.clear();
279 f.seekg(symTabStart); // seek to the symbol table
280
281 defined->clear(); undefined->clear();
282 for(int i = 0; i < symNEntries; ++i) {
283 // process each entry
284 Symbol e;
285
286 if(f.eof())
287 stop("computeExternalSymbols: Unexpected EOF 2");
288 f>>e;
289 if(f.fail())
290 stop("computeExternalSymbols: File read error");
291 if(e.nAux) { // auxiliary entry: skip
292 f.seekg(e.nAux * 18, ios::cur);
293 i += e.nAux;
294 }
295 // if symbol is extern and defined in the current file, insert it
296 if(e.storageClass == 2)
297 if(e.sectionNum)
298 defined->insert(stringTable.decode(e.name));
299 else
300 undefined->insert(stringTable.decode(e.name));
301 }
302}
303
Alp Toker8f2d3f02014-02-24 10:40:15 +0000304/* For each occurrence of an external symbol in the object file named by
Jim Cownie5e8470a2013-09-27 10:38:44 +0000305 by <fileName> that is a member of <hide>, renames it by prefixing
306 with "__kmp_external_", writing back the file in-place
307*/
308void hideSymbols(char *fileName, const set<string> &hide) {
309 static const string prefix("__kmp_external_");
310 set<string> strings; // set of all occurring symbols, appropriately prefixed
311 streampos fileSize;
312 size_t strTabStart;
313 unsigned symTabStart, symNEntries;
314 int i;
315 rstream in(fileName);
316
317 in.seekg(0,ios::end);
318 fileSize = in.tellg();
319
320 in.seekg(8);
321 in >> symTabStart >> symNEntries;
322 in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
323 if(in.eof())
324 stop("hideSymbols: Unexpected EOF");
325 StringTable stringTableOld(in); // read original string table
326
327 if(in.tellg() != fileSize)
328 stop("hideSymbols: Unexpected data after string table");
329
330 // compute set of occurring strings with prefix added
331 for(i = 0; i < symNEntries; ++i) {
332 Symbol e;
333
334 in.seekg(symTabStart + i * 18);
335 if(in.eof())
336 stop("hideSymbols: Unexpected EOF");
337 in >> e;
338 if(in.fail())
339 stop("hideSymbols: File read error");
340 if(e.nAux)
341 i += e.nAux;
342 const string &s = stringTableOld.decode(e.name);
343 // if symbol is extern and found in <hide>, prefix and insert into strings,
344 // otherwise, just insert into strings without prefix
345 strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
346 prefix + s : s);
347 }
348
349 ofstream out(fileName, ios::trunc | ios::out | ios::binary);
350 if(!out.is_open())
351 stop("hideSymbols: Error opening output file");
352
353 // make new string table from string set
354 StringTable stringTableNew = StringTable(strings);
355
356 // copy input file to output file up to just before the symbol table
357 in.seekg(0);
358 char *buf = new char[symTabStart];
359 in.read(buf, symTabStart);
360 out.write(buf, symTabStart);
361 delete []buf;
362
363 // copy input symbol table to output symbol table with name translation
364 for(i = 0; i < symNEntries; ++i) {
365 Symbol e;
366
367 in.seekg(symTabStart + i*18);
368 if(in.eof())
369 stop("hideSymbols: Unexpected EOF");
370 in >> e;
371 if(in.fail())
372 stop("hideSymbols: File read error");
373 const string &s = stringTableOld.decode(e.name);
374 out.seekp(symTabStart + i*18);
375 e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
376 prefix + s : s);
377 out.write((char*)&e, 18);
378 if(out.fail())
379 stop("hideSymbols: File write error");
380 if(e.nAux) {
381 // copy auxiliary symbol table entries
382 int nAux = e.nAux;
383 for(int j = 1; j <= nAux; ++j) {
384 in >> e;
385 out.seekp(symTabStart + (i + j) * 18);
386 out.write((char*)&e, 18);
387 }
388 i += nAux;
389 }
390 }
391 // output string table
392 stringTableNew.write(out);
393}
394
395// returns true iff <a> and <b> have no common element
396template <class T>
397bool isDisjoint(const set<T> &a, const set<T> &b) {
398 set<T>::const_iterator ita, itb;
399
400 for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
401 const T &ta = *ita, &tb = *itb;
402 if(ta < tb)
403 ++ita;
404 else if (tb < ta)
405 ++itb;
406 else
407 return false;
408 }
409 return true;
410}
411
412/* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
413 <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
414 files and the rest correspond to the internal object files.
415 postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
416 disjoint. Returns the transitive closure of the set of internal object files, as a set of
417 file indexes, under the 'depends on' relation, minus the set of internal object files.
418*/
419set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
420 set<int> *required = new set<int>;
421 set<int> fresh[2];
422 int i, cur = 0;
423 bool changed;
424
425 for(i = nTotal - 1; i >= nExternal; --i)
426 fresh[cur].insert(i);
427 do {
428 changed = false;
429 for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
430 set<string> &s = undefined[*it];
431
432 for(i = 0; i < nExternal; ++i) {
433 if(required->find(i) == required->end()) {
434 if(!isDisjoint(defined[i], s)) {
435 // found a new qualifying element
436 required->insert(i);
437 fresh[1 - cur].insert(i);
438 changed = true;
439 }
440 }
441 }
442 }
443 fresh[cur].clear();
444 cur = 1 - cur;
445 } while(changed);
446 return required;
447}
448
449int main(int argc, char **argv) {
450 int nExternal, nInternal, i;
451 set<string> *defined, *undefined;
452 set<int>::iterator it;
453
454 if(argc < 3)
455 stop("Please specify a positive integer followed by a list of object filenames");
456 nExternal = atoi(argv[1]);
457 if(nExternal <= 0)
458 stop("Please specify a positive integer followed by a list of object filenames");
459 if(nExternal + 2 > argc)
460 stop("Too few external objects");
461 nInternal = argc - nExternal - 2;
462 defined = new set<string>[argc - 2];
463 undefined = new set<string>[argc - 2];
464
465 // determine the set of defined and undefined external symbols
466 for(i = 2; i < argc; ++i)
467 computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
468
469 // determine the set of required external files
470 set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
471 set<string> hide;
472
473 /* determine the set of symbols to hide--namely defined external symbols of the
474 required external files
475 */
476 for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
477 int idx = *it;
478 set<string>::iterator it2;
479 /* We have to insert one element at a time instead of inserting a range because
480 the insert member function taking a range doesn't exist on Windows* OS, at least
481 at the time of this writing.
482 */
483 for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
484 hide.insert(*it2);
485 }
486
487 /* process the external files--removing those that are not required and hiding
488 the appropriate symbols in the others
489 */
490 for(i = 0; i < nExternal; ++i)
491 if(requiredExternal->find(i) != requiredExternal->end())
492 hideSymbols(argv[2 + i], hide);
493 else
494 remove(argv[2 + i]);
495 // hide the appropriate symbols in the internal files
496 for(i = nExternal + 2; i < argc; ++i)
497 hideSymbols(argv[i], hide);
498 return 0;
499}