blob: 04a1ea87f4afaf73745d696b5ca895358c2cbad6 [file] [log] [blame]
Ben Murdoch3ef787d2012-04-12 10:51:47 +01001#!/usr/bin/env python
2
3#
4# Copyright 2012 the V8 project authors. All rights reserved.
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11# * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following
13# disclaimer in the documentation and/or other materials provided
14# with the distribution.
15# * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived
17# from this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31
32#
33# Emits a C++ file to be compiled and linked into libv8 to support postmortem
34# debugging tools. Most importantly, this tool emits constants describing V8
35# internals:
36#
37# v8dbg_type_CLASS__TYPE = VALUE Describes class type values
38# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields
39# v8dbg_parent_CLASS__PARENT Describes class hierarchy
40# v8dbg_frametype_NAME = VALUE Describes stack frame values
41# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets
42# v8dbg_prop_NAME = OFFSET Object property offsets
43# v8dbg_NAME = VALUE Miscellaneous values
44#
45# These constants are declared as global integers so that they'll be present in
46# the generated libv8 binary.
47#
48
49import re
50import sys
51
52#
53# Miscellaneous constants, tags, and masks used for object identification.
54#
55consts_misc = [
56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },
57
58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },
59 { 'name': 'StringTag', 'value': 'kStringTag' },
60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' },
61
62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },
63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },
Ben Murdochb8a8cc12014-11-26 15:28:44 +000064 { 'name': 'OneByteStringTag', 'value': 'kOneByteStringTag' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +010065
66 { 'name': 'StringRepresentationMask',
67 'value': 'kStringRepresentationMask' },
68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },
69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },
70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },
Ben Murdochb8a8cc12014-11-26 15:28:44 +000071 { 'name': 'SlicedStringTag', 'value': 'kSlicedStringTag' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +010072
73 { 'name': 'FailureTag', 'value': 'kFailureTag' },
74 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },
75 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },
76 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },
77 { 'name': 'SmiTag', 'value': 'kSmiTag' },
78 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },
79 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },
Ben Murdochb8a8cc12014-11-26 15:28:44 +000080 { 'name': 'SmiShiftSize', 'value': 'kSmiShiftSize' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +010081 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },
82
Ben Murdochb8a8cc12014-11-26 15:28:44 +000083 { 'name': 'OddballFalse', 'value': 'Oddball::kFalse' },
84 { 'name': 'OddballTrue', 'value': 'Oddball::kTrue' },
85 { 'name': 'OddballTheHole', 'value': 'Oddball::kTheHole' },
86 { 'name': 'OddballNull', 'value': 'Oddball::kNull' },
87 { 'name': 'OddballArgumentMarker', 'value': 'Oddball::kArgumentMarker' },
88 { 'name': 'OddballUndefined', 'value': 'Oddball::kUndefined' },
89 { 'name': 'OddballUninitialized', 'value': 'Oddball::kUninitialized' },
90 { 'name': 'OddballOther', 'value': 'Oddball::kOther' },
91 { 'name': 'OddballException', 'value': 'Oddball::kException' },
92
Ben Murdoch3ef787d2012-04-12 10:51:47 +010093 { 'name': 'prop_idx_first',
94 'value': 'DescriptorArray::kFirstIndex' },
95 { 'name': 'prop_type_field',
96 'value': 'FIELD' },
97 { 'name': 'prop_type_first_phantom',
Ben Murdochb8a8cc12014-11-26 15:28:44 +000098 'value': 'TRANSITION' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +010099 { 'name': 'prop_type_mask',
100 'value': 'PropertyDetails::TypeField::kMask' },
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000101 { 'name': 'prop_index_mask',
102 'value': 'PropertyDetails::FieldIndexField::kMask' },
103 { 'name': 'prop_index_shift',
104 'value': 'PropertyDetails::FieldIndexField::kShift' },
105
106 { 'name': 'prop_desc_key',
107 'value': 'DescriptorArray::kDescriptorKey' },
108 { 'name': 'prop_desc_details',
109 'value': 'DescriptorArray::kDescriptorDetails' },
110 { 'name': 'prop_desc_value',
111 'value': 'DescriptorArray::kDescriptorValue' },
112 { 'name': 'prop_desc_size',
113 'value': 'DescriptorArray::kDescriptorSize' },
114
115 { 'name': 'elements_fast_holey_elements',
116 'value': 'FAST_HOLEY_ELEMENTS' },
117 { 'name': 'elements_fast_elements',
118 'value': 'FAST_ELEMENTS' },
119 { 'name': 'elements_dictionary_elements',
120 'value': 'DICTIONARY_ELEMENTS' },
121
122 { 'name': 'bit_field2_elements_kind_mask',
123 'value': 'Map::kElementsKindMask' },
124 { 'name': 'bit_field2_elements_kind_shift',
125 'value': 'Map::kElementsKindShift' },
126 { 'name': 'bit_field3_dictionary_map_shift',
127 'value': 'Map::DictionaryMap::kShift' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100128
129 { 'name': 'off_fp_context',
130 'value': 'StandardFrameConstants::kContextOffset' },
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000131 { 'name': 'off_fp_constant_pool',
132 'value': 'StandardFrameConstants::kConstantPoolOffset' },
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100133 { 'name': 'off_fp_marker',
134 'value': 'StandardFrameConstants::kMarkerOffset' },
135 { 'name': 'off_fp_function',
136 'value': 'JavaScriptFrameConstants::kFunctionOffset' },
137 { 'name': 'off_fp_args',
138 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
139];
140
141#
142# The following useful fields are missing accessors, so we define fake ones.
143#
144extras_accessors = [
145 'HeapObject, map, Map, kMapOffset',
146 'JSObject, elements, Object, kElementsOffset',
147 'FixedArray, data, uintptr_t, kHeaderSize',
148 'Map, instance_attributes, int, kInstanceAttributesOffset',
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100149 'Map, inobject_properties, int, kInObjectPropertiesOffset',
150 'Map, instance_size, int, kInstanceSizeOffset',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000151 'Map, bit_field, char, kBitFieldOffset',
152 'Map, bit_field2, char, kBitField2Offset',
153 'Map, bit_field3, SMI, kBitField3Offset',
154 'Map, prototype, Object, kPrototypeOffset',
155 'NameDictionaryShape, prefix_size, int, kPrefixSize',
156 'NameDictionaryShape, entry_size, int, kEntrySize',
157 'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize',
158 'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize',
159 'NumberDictionaryShape, entry_size, int, kEntrySize',
160 'Oddball, kind_offset, int, kKindOffset',
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100161 'HeapNumber, value, double, kValueOffset',
162 'ConsString, first, String, kFirstOffset',
163 'ConsString, second, String, kSecondOffset',
164 'ExternalString, resource, Object, kResourceOffset',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000165 'SeqOneByteString, chars, char, kHeaderSize',
166 'SeqTwoByteString, chars, char, kHeaderSize',
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100167 'SharedFunctionInfo, code, Code, kCodeOffset',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000168 'SlicedString, parent, String, kParentOffset',
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100169 'Code, instruction_start, uintptr_t, kHeaderSize',
170 'Code, instruction_size, int, kInstructionSizeOffset',
171];
172
173#
174# The following is a whitelist of classes we expect to find when scanning the
175# source code. This list is not exhaustive, but it's still useful to identify
176# when this script gets out of sync with the source. See load_objects().
177#
178expected_classes = [
179 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
180 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000181 'SeqOneByteString', 'SharedFunctionInfo'
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100182];
183
184
185#
186# The following structures store high-level representations of the structures
187# for which we're going to emit descriptive constants.
188#
189types = {}; # set of all type names
190typeclasses = {}; # maps type names to corresponding class names
191klasses = {}; # known classes, including parents
192fields = []; # field declarations
193
194header = '''
195/*
196 * This file is generated by %s. Do not edit directly.
197 */
198
199#include "v8.h"
200#include "frames.h"
201#include "frames-inl.h" /* for architecture-specific frame constants */
202
203using namespace v8::internal;
204
205extern "C" {
206
207/* stack frame constants */
208#define FRAME_CONST(value, klass) \
209 int v8dbg_frametype_##klass = StackFrame::value;
210
211STACK_FRAME_TYPE_LIST(FRAME_CONST)
212
213#undef FRAME_CONST
214
215''' % sys.argv[0];
216
217footer = '''
218}
219'''
220
221#
222# Loads class hierarchy and type information from "objects.h".
223#
224def load_objects():
225 objfilename = sys.argv[2];
226 objfile = open(objfilename, 'r');
227 in_insttype = False;
228
229 typestr = '';
230
231 #
232 # Construct a dictionary for the classes we're sure should be present.
233 #
234 checktypes = {};
235 for klass in expected_classes:
236 checktypes[klass] = True;
237
238 #
239 # Iterate objects.h line-by-line to collect type and class information.
240 # For types, we accumulate a string representing the entire InstanceType
241 # enum definition and parse it later because it's easier to do so
242 # without the embedded newlines.
243 #
244 for line in objfile:
245 if (line.startswith('enum InstanceType {')):
246 in_insttype = True;
247 continue;
248
249 if (in_insttype and line.startswith('};')):
250 in_insttype = False;
251 continue;
252
253 line = re.sub('//.*', '', line.rstrip().lstrip());
254
255 if (in_insttype):
256 typestr += line;
257 continue;
258
259 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
260 line);
261
262 if (match):
263 klass = match.group(1);
264 pklass = match.group(3);
265 klasses[klass] = { 'parent': pklass };
266
267 #
268 # Process the instance type declaration.
269 #
270 entries = typestr.split(',');
271 for entry in entries:
272 types[re.sub('\s*=.*', '', entry).lstrip()] = True;
273
274 #
275 # Infer class names for each type based on a systematic transformation.
276 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the
277 # class for each type rather than the other way around because there are
278 # fewer cases where one type maps to more than one class than the other
279 # way around.
280 #
281 for type in types:
282 #
283 # Symbols and Strings are implemented using the same classes.
284 #
285 usetype = re.sub('SYMBOL_', 'STRING_', type);
286
287 #
288 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
289 #
290 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
291
292 #
293 # Remove the "_TYPE" suffix and then convert to camel case,
294 # except that a "JS" prefix remains uppercase (as in
295 # "JS_FUNCTION_TYPE" => "JSFunction").
296 #
297 if (not usetype.endswith('_TYPE')):
298 continue;
299
300 usetype = usetype[0:len(usetype) - len('_TYPE')];
301 parts = usetype.split('_');
302 cctype = '';
303
304 if (parts[0] == 'JS'):
305 cctype = 'JS';
306 start = 1;
307 else:
308 cctype = '';
309 start = 0;
310
311 for ii in range(start, len(parts)):
312 part = parts[ii];
313 cctype += part[0].upper() + part[1:].lower();
314
315 #
316 # Mapping string types is more complicated. Both types and
317 # class names for Strings specify a representation (e.g., Seq,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000318 # Cons, External, or Sliced) and an encoding (TwoByte/OneByte),
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100319 # In the simplest case, both of these are explicit in both
320 # names, as in:
321 #
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000322 # EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100323 #
324 # However, either the representation or encoding can be omitted
325 # from the type name, in which case "Seq" and "TwoByte" are
326 # assumed, as in:
327 #
328 # STRING_TYPE => SeqTwoByteString
329 #
330 # Additionally, sometimes the type name has more information
331 # than the class, as in:
332 #
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000333 # CONS_ONE_BYTE_STRING_TYPE => ConsString
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100334 #
335 # To figure this out dynamically, we first check for a
336 # representation and encoding and add them if they're not
337 # present. If that doesn't yield a valid class name, then we
338 # strip out the representation.
339 #
340 if (cctype.endswith('String')):
341 if (cctype.find('Cons') == -1 and
342 cctype.find('External') == -1 and
343 cctype.find('Sliced') == -1):
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000344 if (cctype.find('OneByte') != -1):
345 cctype = re.sub('OneByteString$',
346 'SeqOneByteString', cctype);
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100347 else:
348 cctype = re.sub('String$',
349 'SeqString', cctype);
350
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000351 if (cctype.find('OneByte') == -1):
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100352 cctype = re.sub('String$', 'TwoByteString',
353 cctype);
354
355 if (not (cctype in klasses)):
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000356 cctype = re.sub('OneByte', '', cctype);
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100357 cctype = re.sub('TwoByte', '', cctype);
358
359 #
360 # Despite all that, some types have no corresponding class.
361 #
362 if (cctype in klasses):
363 typeclasses[type] = cctype;
364 if (cctype in checktypes):
365 del checktypes[cctype];
366
367 if (len(checktypes) > 0):
368 for klass in checktypes:
369 print('error: expected class \"%s\" not found' % klass);
370
371 sys.exit(1);
372
373
374#
375# For a given macro call, pick apart the arguments and return an object
376# describing the corresponding output constant. See load_fields().
377#
378def parse_field(call):
379 # Replace newlines with spaces.
380 for ii in range(0, len(call)):
381 if (call[ii] == '\n'):
382 call[ii] == ' ';
383
384 idx = call.find('(');
385 kind = call[0:idx];
386 rest = call[idx + 1: len(call) - 1];
387 args = re.split('\s*,\s*', rest);
388
389 consts = [];
390
391 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
392 klass = args[0];
393 field = args[1];
394 dtype = args[2];
395 offset = args[3];
396
397 return ({
398 'name': 'class_%s__%s__%s' % (klass, field, dtype),
399 'value': '%s::%s' % (klass, offset)
400 });
401
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000402 assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI');
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100403 klass = args[0];
404 field = args[1];
405 offset = args[2];
406
407 return ({
408 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
409 'value': '%s::%s' % (klass, offset)
410 });
411
412#
413# Load field offset information from objects-inl.h.
414#
415def load_fields():
416 inlfilename = sys.argv[3];
417 inlfile = open(inlfilename, 'r');
418
419 #
420 # Each class's fields and the corresponding offsets are described in the
421 # source by calls to macros like "ACCESSORS" (and friends). All we do
422 # here is extract these macro invocations, taking into account that they
423 # may span multiple lines and may contain nested parentheses. We also
424 # call parse_field() to pick apart the invocation.
425 #
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000426 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE',
427 'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ];
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100428 current = '';
429 opens = 0;
430
431 for line in inlfile:
432 if (opens > 0):
433 # Continuation line
434 for ii in range(0, len(line)):
435 if (line[ii] == '('):
436 opens += 1;
437 elif (line[ii] == ')'):
438 opens -= 1;
439
440 if (opens == 0):
441 break;
442
443 current += line[0:ii + 1];
444 continue;
445
446 for prefix in prefixes:
447 if (not line.startswith(prefix + '(')):
448 continue;
449
450 if (len(current) > 0):
451 fields.append(parse_field(current));
452 current = '';
453
454 for ii in range(len(prefix), len(line)):
455 if (line[ii] == '('):
456 opens += 1;
457 elif (line[ii] == ')'):
458 opens -= 1;
459
460 if (opens == 0):
461 break;
462
463 current += line[0:ii + 1];
464
465 if (len(current) > 0):
466 fields.append(parse_field(current));
467 current = '';
468
469 for body in extras_accessors:
470 fields.append(parse_field('ACCESSORS(%s)' % body));
471
472#
473# Emit a block of constants.
474#
475def emit_set(out, consts):
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000476 # Fix up overzealous parses. This could be done inside the
477 # parsers but as there are several, it's easiest to do it here.
478 ws = re.compile('\s+')
479 for const in consts:
480 name = ws.sub('', const['name'])
481 value = ws.sub('', str(const['value'])) # Can be a number.
482 out.write('int v8dbg_%s = %s;\n' % (name, value))
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100483 out.write('\n');
484
485#
486# Emit the whole output file.
487#
488def emit_config():
489 out = file(sys.argv[1], 'w');
490
491 out.write(header);
492
493 out.write('/* miscellaneous constants */\n');
494 emit_set(out, consts_misc);
495
496 out.write('/* class type information */\n');
497 consts = [];
498 keys = typeclasses.keys();
499 keys.sort();
500 for typename in keys:
501 klass = typeclasses[typename];
502 consts.append({
503 'name': 'type_%s__%s' % (klass, typename),
504 'value': typename
505 });
506
507 emit_set(out, consts);
508
509 out.write('/* class hierarchy information */\n');
510 consts = [];
511 keys = klasses.keys();
512 keys.sort();
513 for klassname in keys:
514 pklass = klasses[klassname]['parent'];
515 if (pklass == None):
516 continue;
517
518 consts.append({
519 'name': 'parent_%s__%s' % (klassname, pklass),
520 'value': 0
521 });
522
523 emit_set(out, consts);
524
525 out.write('/* field information */\n');
526 emit_set(out, fields);
527
528 out.write(footer);
529
530if (len(sys.argv) < 4):
531 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
532 sys.exit(2);
533
534load_objects();
535load_fields();
536emit_config();