Erick Tryzelaar | f119765 | 2010-03-08 19:32:27 +0000 | [diff] [blame] | 1 | (*===---------------------------------------------------------------------=== |
| 2 | * Parser |
| 3 | *===---------------------------------------------------------------------===*) |
| 4 | |
| 5 | (* binop_precedence - This holds the precedence for each binary operator that is |
| 6 | * defined *) |
| 7 | let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 |
| 8 | |
| 9 | (* precedence - Get the precedence of the pending binary operator token. *) |
| 10 | let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 |
| 11 | |
| 12 | (* primary |
| 13 | * ::= identifier |
| 14 | * ::= numberexpr |
| 15 | * ::= parenexpr |
| 16 | * ::= ifexpr |
| 17 | * ::= forexpr |
| 18 | * ::= varexpr *) |
| 19 | let rec parse_primary = parser |
| 20 | (* numberexpr ::= number *) |
| 21 | | [< 'Token.Number n >] -> Ast.Number n |
| 22 | |
| 23 | (* parenexpr ::= '(' expression ')' *) |
| 24 | | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e |
| 25 | |
| 26 | (* identifierexpr |
| 27 | * ::= identifier |
| 28 | * ::= identifier '(' argumentexpr ')' *) |
| 29 | | [< 'Token.Ident id; stream >] -> |
| 30 | let rec parse_args accumulator = parser |
| 31 | | [< e=parse_expr; stream >] -> |
| 32 | begin parser |
| 33 | | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e |
| 34 | | [< >] -> e :: accumulator |
| 35 | end stream |
| 36 | | [< >] -> accumulator |
| 37 | in |
| 38 | let rec parse_ident id = parser |
| 39 | (* Call. *) |
| 40 | | [< 'Token.Kwd '('; |
| 41 | args=parse_args []; |
| 42 | 'Token.Kwd ')' ?? "expected ')'">] -> |
| 43 | Ast.Call (id, Array.of_list (List.rev args)) |
| 44 | |
| 45 | (* Simple variable ref. *) |
| 46 | | [< >] -> Ast.Variable id |
| 47 | in |
| 48 | parse_ident id stream |
| 49 | |
| 50 | (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) |
| 51 | | [< 'Token.If; c=parse_expr; |
| 52 | 'Token.Then ?? "expected 'then'"; t=parse_expr; |
| 53 | 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> |
| 54 | Ast.If (c, t, e) |
| 55 | |
| 56 | (* forexpr |
| 57 | ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) |
| 58 | | [< 'Token.For; |
| 59 | 'Token.Ident id ?? "expected identifier after for"; |
| 60 | 'Token.Kwd '=' ?? "expected '=' after for"; |
| 61 | stream >] -> |
| 62 | begin parser |
| 63 | | [< |
| 64 | start=parse_expr; |
| 65 | 'Token.Kwd ',' ?? "expected ',' after for"; |
| 66 | end_=parse_expr; |
| 67 | stream >] -> |
| 68 | let step = |
| 69 | begin parser |
| 70 | | [< 'Token.Kwd ','; step=parse_expr >] -> Some step |
| 71 | | [< >] -> None |
| 72 | end stream |
| 73 | in |
| 74 | begin parser |
| 75 | | [< 'Token.In; body=parse_expr >] -> |
| 76 | Ast.For (id, start, end_, step, body) |
| 77 | | [< >] -> |
| 78 | raise (Stream.Error "expected 'in' after for") |
| 79 | end stream |
| 80 | | [< >] -> |
| 81 | raise (Stream.Error "expected '=' after for") |
| 82 | end stream |
| 83 | |
| 84 | (* varexpr |
| 85 | * ::= 'var' identifier ('=' expression? |
| 86 | * (',' identifier ('=' expression)?)* 'in' expression *) |
| 87 | | [< 'Token.Var; |
| 88 | (* At least one variable name is required. *) |
| 89 | 'Token.Ident id ?? "expected identifier after var"; |
| 90 | init=parse_var_init; |
| 91 | var_names=parse_var_names [(id, init)]; |
| 92 | (* At this point, we have to have 'in'. *) |
| 93 | 'Token.In ?? "expected 'in' keyword after 'var'"; |
| 94 | body=parse_expr >] -> |
| 95 | Ast.Var (Array.of_list (List.rev var_names), body) |
| 96 | |
| 97 | | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") |
| 98 | |
| 99 | (* unary |
| 100 | * ::= primary |
| 101 | * ::= '!' unary *) |
| 102 | and parse_unary = parser |
| 103 | (* If this is a unary operator, read it. *) |
| 104 | | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> |
| 105 | Ast.Unary (op, operand) |
| 106 | |
| 107 | (* If the current token is not an operator, it must be a primary expr. *) |
| 108 | | [< stream >] -> parse_primary stream |
| 109 | |
| 110 | (* binoprhs |
| 111 | * ::= ('+' primary)* *) |
| 112 | and parse_bin_rhs expr_prec lhs stream = |
| 113 | match Stream.peek stream with |
| 114 | (* If this is a binop, find its precedence. *) |
| 115 | | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> |
| 116 | let token_prec = precedence c in |
| 117 | |
| 118 | (* If this is a binop that binds at least as tightly as the current binop, |
| 119 | * consume it, otherwise we are done. *) |
| 120 | if token_prec < expr_prec then lhs else begin |
| 121 | (* Eat the binop. *) |
| 122 | Stream.junk stream; |
| 123 | |
| 124 | (* Parse the primary expression after the binary operator. *) |
| 125 | let rhs = parse_unary stream in |
| 126 | |
| 127 | (* Okay, we know this is a binop. *) |
| 128 | let rhs = |
| 129 | match Stream.peek stream with |
| 130 | | Some (Token.Kwd c2) -> |
| 131 | (* If BinOp binds less tightly with rhs than the operator after |
| 132 | * rhs, let the pending operator take rhs as its lhs. *) |
| 133 | let next_prec = precedence c2 in |
| 134 | if token_prec < next_prec |
| 135 | then parse_bin_rhs (token_prec + 1) rhs stream |
| 136 | else rhs |
| 137 | | _ -> rhs |
| 138 | in |
| 139 | |
| 140 | (* Merge lhs/rhs. *) |
| 141 | let lhs = Ast.Binary (c, lhs, rhs) in |
| 142 | parse_bin_rhs expr_prec lhs stream |
| 143 | end |
| 144 | | _ -> lhs |
| 145 | |
| 146 | and parse_var_init = parser |
| 147 | (* read in the optional initializer. *) |
| 148 | | [< 'Token.Kwd '='; e=parse_expr >] -> Some e |
| 149 | | [< >] -> None |
| 150 | |
| 151 | and parse_var_names accumulator = parser |
| 152 | | [< 'Token.Kwd ','; |
| 153 | 'Token.Ident id ?? "expected identifier list after var"; |
| 154 | init=parse_var_init; |
| 155 | e=parse_var_names ((id, init) :: accumulator) >] -> e |
| 156 | | [< >] -> accumulator |
| 157 | |
| 158 | (* expression |
| 159 | * ::= primary binoprhs *) |
| 160 | and parse_expr = parser |
| 161 | | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream |
| 162 | |
| 163 | (* prototype |
| 164 | * ::= id '(' id* ')' |
| 165 | * ::= binary LETTER number? (id, id) |
| 166 | * ::= unary LETTER number? (id) *) |
| 167 | let parse_prototype = |
| 168 | let rec parse_args accumulator = parser |
| 169 | | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e |
| 170 | | [< >] -> accumulator |
| 171 | in |
| 172 | let parse_operator = parser |
| 173 | | [< 'Token.Unary >] -> "unary", 1 |
| 174 | | [< 'Token.Binary >] -> "binary", 2 |
| 175 | in |
| 176 | let parse_binary_precedence = parser |
| 177 | | [< 'Token.Number n >] -> int_of_float n |
| 178 | | [< >] -> 30 |
| 179 | in |
| 180 | parser |
| 181 | | [< 'Token.Ident id; |
| 182 | 'Token.Kwd '(' ?? "expected '(' in prototype"; |
| 183 | args=parse_args []; |
| 184 | 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> |
| 185 | (* success. *) |
| 186 | Ast.Prototype (id, Array.of_list (List.rev args)) |
| 187 | | [< (prefix, kind)=parse_operator; |
| 188 | 'Token.Kwd op ?? "expected an operator"; |
| 189 | (* Read the precedence if present. *) |
| 190 | binary_precedence=parse_binary_precedence; |
| 191 | 'Token.Kwd '(' ?? "expected '(' in prototype"; |
| 192 | args=parse_args []; |
| 193 | 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> |
| 194 | let name = prefix ^ (String.make 1 op) in |
| 195 | let args = Array.of_list (List.rev args) in |
| 196 | |
| 197 | (* Verify right number of arguments for operator. *) |
| 198 | if Array.length args != kind |
| 199 | then raise (Stream.Error "invalid number of operands for operator") |
| 200 | else |
| 201 | if kind == 1 then |
| 202 | Ast.Prototype (name, args) |
| 203 | else |
| 204 | Ast.BinOpPrototype (name, args, binary_precedence) |
| 205 | | [< >] -> |
| 206 | raise (Stream.Error "expected function name in prototype") |
| 207 | |
| 208 | (* definition ::= 'def' prototype expression *) |
| 209 | let parse_definition = parser |
| 210 | | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> |
| 211 | Ast.Function (p, e) |
| 212 | |
| 213 | (* toplevelexpr ::= expression *) |
| 214 | let parse_toplevel = parser |
| 215 | | [< e=parse_expr >] -> |
| 216 | (* Make an anonymous proto. *) |
| 217 | Ast.Function (Ast.Prototype ("", [||]), e) |
| 218 | |
| 219 | (* external ::= 'extern' prototype *) |
| 220 | let parse_extern = parser |
| 221 | | [< 'Token.Extern; e=parse_prototype >] -> e |