blob: 85cb72d19f5c28bb85d44e4e71924871d07c00e6 [file] [log] [blame]
Robert Sloan8ff03552017-06-14 12:40:58 -07001# Copyright (c) 2017, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15# This is a rough parser for x86-64 and ppc64le assembly designed to work with
16# https://github.com/pointlander/peg. delocate.go has a go:generate line for
17# rebuilding delocate.peg.go from this file.
18
19package main
20
21type Asm Peg {}
22
23AsmFile <- Statement* !.
24Statement <- WS? (Label / ((GlobalDirective /
25 LocationDirective /
26 LabelContainingDirective /
27 Instruction /
28 Directive /
29 Comment / ) WS? ((Comment? '\n') / ';')))
30GlobalDirective <- (".global" / ".globl") WS SymbolName
31Directive <- '.' DirectiveName (WS Args)?
32DirectiveName <- [[A-Z0-9_]]+
33LocationDirective <- (".file" / ".loc") WS [^#\n]+
34Args <- Arg ((WS? ',' WS?) Arg)*
35Arg <- QuotedArg / [[0-9a-z%+\-_@.]]*
36QuotedArg <- '"' QuotedText '"'
37QuotedText <- (EscapedChar / [^"])*
38LabelContainingDirective <- LabelContainingDirectiveName WS SymbolArgs
39LabelContainingDirectiveName <- ".long" / ".set" / ".8byte" / ".4byte" / ".quad" / ".tc" / ".localentry" / ".size" / ".type"
40SymbolArgs <- SymbolArg ((WS? ',' WS?) SymbolArg)*
41SymbolArg <- Offset /
42 SymbolType /
43 (Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) /
44 LocalSymbol TCMarker? /
45 SymbolName Offset /
46 SymbolName TCMarker?
47SymbolType <- '@function' / '@object'
48Dot <- '.'
49TCMarker <- '[TC]'
50EscapedChar <- '\\' .
51WS <- [ \t]+
52Comment <- '#' [^\n]*
53Label <- (LocalSymbol / LocalLabel / SymbolName) ':'
54SymbolName <- [[A-Z._]][[A-Z.0-9$_]]*
55LocalSymbol <- '.L' [[A-Z.0-9$_]]+
56LocalLabel <- [0-9][0-9$]*
57LocalLabelRef <- [0-9][0-9$]*[bf]
58Instruction <- InstructionName (WS InstructionArg ((WS? ',' WS?) InstructionArg)*)?
59InstructionName <- [[A-Z]][[A-Z0-9]]* [.+\-]?
60InstructionArg <- IndirectionIndicator? (RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / MemoryRef)
61TOCRefHigh <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@ha"
62TOCRefLow <- '.TOC.-' ('0b' / ('.L' [a-zA-Z_0-9]+)) "@l"
63IndirectionIndicator <- '*'
64RegisterOrConstant <- (('%'[[A-Z]][[A-Z0-9]]*) / ('$'? ((Offset Offset) / Offset))) ![fb:(+\-]
65# Compilers only output a very limited number of expression forms. Rather than
66# implement a full expression parser, this enumerate those forms plus a few
67# that appear in our hand-written assembly.
68MemoryRef <- (SymbolRef BaseIndexScale /
69 SymbolRef /
70 Offset* BaseIndexScale /
71 SegmentRegister Offset BaseIndexScale /
72 SegmentRegister BaseIndexScale /
73 SegmentRegister Offset /
74 BaseIndexScale)
75SymbolRef <- (Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?
76BaseIndexScale <- '(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)? )? ')'
77Operator <- [+\-]
78Offset <- '+'? '-'? (("0b" [01]+) / ("0x" [[0-9A-F]]+) / [0-9]+)
79Section <- [[A-Z@]]+
80SegmentRegister <- '%' [c-gs] 's:'