blob: b9b162548f63bb5843ead1de5461e716d949a128 [file] [log] [blame]
Ben Murdoch3ef787d2012-04-12 10:51:47 +01001#!/usr/bin/env python
2
3#
4# Copyright 2012 the V8 project authors. All rights reserved.
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following
13# disclaimer in the documentation and/or other materials provided
14# with the distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived
17# from this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31
32#
33# Emits a C++ file to be compiled and linked into libv8 to support postmortem
34# debugging tools. Most importantly, this tool emits constants describing V8
35# internals:
36#
37# v8dbg_type_CLASS__TYPE = VALUE Describes class type values
38# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields
39# v8dbg_parent_CLASS__PARENT Describes class hierarchy
40# v8dbg_frametype_NAME = VALUE Describes stack frame values
41# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets
42# v8dbg_prop_NAME = OFFSET Object property offsets
43# v8dbg_NAME = VALUE Miscellaneous values
44#
45# These constants are declared as global integers so that they'll be present in
46# the generated libv8 binary.
47#
48
49import re
50import sys
51
52#
53# Miscellaneous constants, tags, and masks used for object identification.
54#
55consts_misc = [
56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },
57
58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },
59 { 'name': 'StringTag', 'value': 'kStringTag' },
60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' },
61
62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },
63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },
64 { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' },
65
66 { 'name': 'StringRepresentationMask',
67 'value': 'kStringRepresentationMask' },
68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },
69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },
70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },
71
72 { 'name': 'FailureTag', 'value': 'kFailureTag' },
73 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },
74 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },
75 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },
76 { 'name': 'SmiTag', 'value': 'kSmiTag' },
77 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },
78 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },
79 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },
80
81 { 'name': 'prop_idx_content',
82 'value': 'DescriptorArray::kContentArrayIndex' },
83 { 'name': 'prop_idx_first',
84 'value': 'DescriptorArray::kFirstIndex' },
85 { 'name': 'prop_type_field',
86 'value': 'FIELD' },
87 { 'name': 'prop_type_first_phantom',
88 'value': 'MAP_TRANSITION' },
89 { 'name': 'prop_type_mask',
90 'value': 'PropertyDetails::TypeField::kMask' },
91
92 { 'name': 'off_fp_context',
93 'value': 'StandardFrameConstants::kContextOffset' },
94 { 'name': 'off_fp_marker',
95 'value': 'StandardFrameConstants::kMarkerOffset' },
96 { 'name': 'off_fp_function',
97 'value': 'JavaScriptFrameConstants::kFunctionOffset' },
98 { 'name': 'off_fp_args',
99 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
100];
101
102#
103# The following useful fields are missing accessors, so we define fake ones.
104#
105extras_accessors = [
106 'HeapObject, map, Map, kMapOffset',
107 'JSObject, elements, Object, kElementsOffset',
108 'FixedArray, data, uintptr_t, kHeaderSize',
109 'Map, instance_attributes, int, kInstanceAttributesOffset',
110 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',
111 'Map, inobject_properties, int, kInObjectPropertiesOffset',
112 'Map, instance_size, int, kInstanceSizeOffset',
113 'HeapNumber, value, double, kValueOffset',
114 'ConsString, first, String, kFirstOffset',
115 'ConsString, second, String, kSecondOffset',
116 'ExternalString, resource, Object, kResourceOffset',
117 'SeqAsciiString, chars, char, kHeaderSize',
118 'SharedFunctionInfo, code, Code, kCodeOffset',
119 'Code, instruction_start, uintptr_t, kHeaderSize',
120 'Code, instruction_size, int, kInstructionSizeOffset',
121];
122
123#
124# The following is a whitelist of classes we expect to find when scanning the
125# source code. This list is not exhaustive, but it's still useful to identify
126# when this script gets out of sync with the source. See load_objects().
127#
128expected_classes = [
129 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
130 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
131 'SeqAsciiString', 'SharedFunctionInfo'
132];
133
134
135#
136# The following structures store high-level representations of the structures
137# for which we're going to emit descriptive constants.
138#
139types = {}; # set of all type names
140typeclasses = {}; # maps type names to corresponding class names
141klasses = {}; # known classes, including parents
142fields = []; # field declarations
143
144header = '''
145/*
146 * This file is generated by %s. Do not edit directly.
147 */
148
149#include "v8.h"
150#include "frames.h"
151#include "frames-inl.h" /* for architecture-specific frame constants */
152
153using namespace v8::internal;
154
155extern "C" {
156
157/* stack frame constants */
158#define FRAME_CONST(value, klass) \
159 int v8dbg_frametype_##klass = StackFrame::value;
160
161STACK_FRAME_TYPE_LIST(FRAME_CONST)
162
163#undef FRAME_CONST
164
165''' % sys.argv[0];
166
167footer = '''
168}
169'''
170
171#
172# Loads class hierarchy and type information from "objects.h".
173#
174def load_objects():
175 objfilename = sys.argv[2];
176 objfile = open(objfilename, 'r');
177 in_insttype = False;
178
179 typestr = '';
180
181 #
182 # Construct a dictionary for the classes we're sure should be present.
183 #
184 checktypes = {};
185 for klass in expected_classes:
186 checktypes[klass] = True;
187
188 #
189 # Iterate objects.h line-by-line to collect type and class information.
190 # For types, we accumulate a string representing the entire InstanceType
191 # enum definition and parse it later because it's easier to do so
192 # without the embedded newlines.
193 #
194 for line in objfile:
195 if (line.startswith('enum InstanceType {')):
196 in_insttype = True;
197 continue;
198
199 if (in_insttype and line.startswith('};')):
200 in_insttype = False;
201 continue;
202
203 line = re.sub('//.*', '', line.rstrip().lstrip());
204
205 if (in_insttype):
206 typestr += line;
207 continue;
208
209 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
210 line);
211
212 if (match):
213 klass = match.group(1);
214 pklass = match.group(3);
215 klasses[klass] = { 'parent': pklass };
216
217 #
218 # Process the instance type declaration.
219 #
220 entries = typestr.split(',');
221 for entry in entries:
222 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
223
224 #
225 # Infer class names for each type based on a systematic transformation.
226 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the
227 # class for each type rather than the other way around because there are
228 # fewer cases where one type maps to more than one class than the other
229 # way around.
230 #
231 for type in types:
232 #
233 # Symbols and Strings are implemented using the same classes.
234 #
235 usetype = re.sub('SYMBOL_', 'STRING_', type);
236
237 #
238 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
239 #
240 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
241
242 #
243 # Remove the "_TYPE" suffix and then convert to camel case,
244 # except that a "JS" prefix remains uppercase (as in
245 # "JS_FUNCTION_TYPE" => "JSFunction").
246 #
247 if (not usetype.endswith('_TYPE')):
248 continue;
249
250 usetype = usetype[0:len(usetype) - len('_TYPE')];
251 parts = usetype.split('_');
252 cctype = '';
253
254 if (parts[0] == 'JS'):
255 cctype = 'JS';
256 start = 1;
257 else:
258 cctype = '';
259 start = 0;
260
261 for ii in range(start, len(parts)):
262 part = parts[ii];
263 cctype += part[0].upper() + part[1:].lower();
264
265 #
266 # Mapping string types is more complicated. Both types and
267 # class names for Strings specify a representation (e.g., Seq,
268 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
269 # In the simplest case, both of these are explicit in both
270 # names, as in:
271 #
272 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
273 #
274 # However, either the representation or encoding can be omitted
275 # from the type name, in which case "Seq" and "TwoByte" are
276 # assumed, as in:
277 #
278 # STRING_TYPE => SeqTwoByteString
279 #
280 # Additionally, sometimes the type name has more information
281 # than the class, as in:
282 #
283 # CONS_ASCII_STRING_TYPE => ConsString
284 #
285 # To figure this out dynamically, we first check for a
286 # representation and encoding and add them if they're not
287 # present. If that doesn't yield a valid class name, then we
288 # strip out the representation.
289 #
290 if (cctype.endswith('String')):
291 if (cctype.find('Cons') == -1 and
292 cctype.find('External') == -1 and
293 cctype.find('Sliced') == -1):
294 if (cctype.find('Ascii') != -1):
295 cctype = re.sub('AsciiString$',
296 'SeqAsciiString', cctype);
297 else:
298 cctype = re.sub('String$',
299 'SeqString', cctype);
300
301 if (cctype.find('Ascii') == -1):
302 cctype = re.sub('String$', 'TwoByteString',
303 cctype);
304
305 if (not (cctype in klasses)):
306 cctype = re.sub('Ascii', '', cctype);
307 cctype = re.sub('TwoByte', '', cctype);
308
309 #
310 # Despite all that, some types have no corresponding class.
311 #
312 if (cctype in klasses):
313 typeclasses[type] = cctype;
314 if (cctype in checktypes):
315 del checktypes[cctype];
316
317 if (len(checktypes) > 0):
318 for klass in checktypes:
319 print('error: expected class \"%s\" not found' % klass);
320
321 sys.exit(1);
322
323
324#
325# For a given macro call, pick apart the arguments and return an object
326# describing the corresponding output constant. See load_fields().
327#
328def parse_field(call):
329 # Replace newlines with spaces.
330 for ii in range(0, len(call)):
331 if (call[ii] == '\n'):
332 call[ii] == ' ';
333
334 idx = call.find('(');
335 kind = call[0:idx];
336 rest = call[idx + 1: len(call) - 1];
337 args = re.split('\s*,\s*', rest);
338
339 consts = [];
340
341 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
342 klass = args[0];
343 field = args[1];
344 dtype = args[2];
345 offset = args[3];
346
347 return ({
348 'name': 'class_%s__%s__%s' % (klass, field, dtype),
349 'value': '%s::%s' % (klass, offset)
350 });
351
352 assert(kind == 'SMI_ACCESSORS');
353 klass = args[0];
354 field = args[1];
355 offset = args[2];
356
357 return ({
358 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
359 'value': '%s::%s' % (klass, offset)
360 });
361
362#
363# Load field offset information from objects-inl.h.
364#
365def load_fields():
366 inlfilename = sys.argv[3];
367 inlfile = open(inlfilename, 'r');
368
369 #
370 # Each class's fields and the corresponding offsets are described in the
371 # source by calls to macros like "ACCESSORS" (and friends). All we do
372 # here is extract these macro invocations, taking into account that they
373 # may span multiple lines and may contain nested parentheses. We also
374 # call parse_field() to pick apart the invocation.
375 #
376 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
377 current = '';
378 opens = 0;
379
380 for line in inlfile:
381 if (opens > 0):
382 # Continuation line
383 for ii in range(0, len(line)):
384 if (line[ii] == '('):
385 opens += 1;
386 elif (line[ii] == ')'):
387 opens -= 1;
388
389 if (opens == 0):
390 break;
391
392 current += line[0:ii + 1];
393 continue;
394
395 for prefix in prefixes:
396 if (not line.startswith(prefix + '(')):
397 continue;
398
399 if (len(current) > 0):
400 fields.append(parse_field(current));
401 current = '';
402
403 for ii in range(len(prefix), len(line)):
404 if (line[ii] == '('):
405 opens += 1;
406 elif (line[ii] == ')'):
407 opens -= 1;
408
409 if (opens == 0):
410 break;
411
412 current += line[0:ii + 1];
413
414 if (len(current) > 0):
415 fields.append(parse_field(current));
416 current = '';
417
418 for body in extras_accessors:
419 fields.append(parse_field('ACCESSORS(%s)' % body));
420
421#
422# Emit a block of constants.
423#
424def emit_set(out, consts):
425 for ii in range(0, len(consts)):
426 out.write('int v8dbg_%s = %s;\n' %
427 (consts[ii]['name'], consts[ii]['value']));
428 out.write('\n');
429
430#
431# Emit the whole output file.
432#
433def emit_config():
434 out = file(sys.argv[1], 'w');
435
436 out.write(header);
437
438 out.write('/* miscellaneous constants */\n');
439 emit_set(out, consts_misc);
440
441 out.write('/* class type information */\n');
442 consts = [];
443 keys = typeclasses.keys();
444 keys.sort();
445 for typename in keys:
446 klass = typeclasses[typename];
447 consts.append({
448 'name': 'type_%s__%s' % (klass, typename),
449 'value': typename
450 });
451
452 emit_set(out, consts);
453
454 out.write('/* class hierarchy information */\n');
455 consts = [];
456 keys = klasses.keys();
457 keys.sort();
458 for klassname in keys:
459 pklass = klasses[klassname]['parent'];
460 if (pklass == None):
461 continue;
462
463 consts.append({
464 'name': 'parent_%s__%s' % (klassname, pklass),
465 'value': 0
466 });
467
468 emit_set(out, consts);
469
470 out.write('/* field information */\n');
471 emit_set(out, fields);
472
473 out.write(footer);
474
475if (len(sys.argv) < 4):
476 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
477 sys.exit(2);
478
479load_objects();
480load_fields();
481emit_config();