Yigit Boyar | 8ffce57 | 2014-12-12 18:45:04 -0800 | [diff] [blame] | 1 | /* |
| 2 | [The "BSD licence"] |
| 3 | Copyright (c) 2013 Terence Parr |
| 4 | All rights reserved. |
| 5 | |
| 6 | Redistribution and use in source and binary forms, with or without |
| 7 | modification, are permitted provided that the following conditions |
| 8 | are met: |
| 9 | 1. Redistributions of source code must retain the above copyright |
| 10 | notice, this list of conditions and the following disclaimer. |
| 11 | 2. Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | 3. The name of the author may not be used to endorse or promote products |
| 15 | derived from this software without specific prior written permission. |
| 16 | |
| 17 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| 18 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 19 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 20 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 21 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 22 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| 26 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** XML lexer derived from ANTLR v4 ref guide book example */ |
| 30 | lexer grammar XMLLexer; |
| 31 | |
| 32 | // Default "mode": Everything OUTSIDE of a tag |
| 33 | COMMENT : '<!--' .*? '-->' ; |
| 34 | CDATA : '<![CDATA[' .*? ']]>' ; |
| 35 | /** Scarf all DTD stuff, Entity Declarations like <!ENTITY ...>, |
| 36 | * and Notation Declarations <!NOTATION ...> |
| 37 | */ |
| 38 | DTD : '<!' .*? '>' -> skip ; |
| 39 | EntityRef : '&' Name ';' ; |
| 40 | CharRef : '&#' DIGIT+ ';' |
| 41 | | '&#x' HEXDIGIT+ ';' |
| 42 | ; |
| 43 | SEA_WS : (' '|'\t'|'\r'? '\n')+ ; |
| 44 | |
| 45 | OPEN : '<' -> pushMode(INSIDE) ; |
| 46 | XMLDeclOpen : '<?xml' S -> pushMode(INSIDE) ; |
| 47 | SPECIAL_OPEN: '<?' Name -> more, pushMode(PROC_INSTR) ; |
| 48 | |
| 49 | TEXT : ~[<&]+ ; // match any 16 bit char other than < and & |
| 50 | |
| 51 | // ----------------- Everything INSIDE of a tag --------------------- |
| 52 | mode INSIDE; |
| 53 | |
| 54 | CLOSE : '>' -> popMode ; |
| 55 | SPECIAL_CLOSE: '?>' -> popMode ; // close <?xml...?> |
| 56 | SLASH_CLOSE : '/>' -> popMode ; |
| 57 | SLASH : '/' ; |
| 58 | EQUALS : '=' ; |
| 59 | STRING : '"' ~[<"]* '"' |
| 60 | | '\'' ~[<']* '\'' |
| 61 | ; |
| 62 | Name : NameStartChar NameChar* ; |
| 63 | S : [ \t\r\n] -> skip ; |
| 64 | |
| 65 | fragment |
| 66 | HEXDIGIT : [a-fA-F0-9] ; |
| 67 | |
| 68 | fragment |
| 69 | DIGIT : [0-9] ; |
| 70 | |
| 71 | fragment |
| 72 | NameChar : NameStartChar |
| 73 | | '-' | '_' | '.' | DIGIT |
| 74 | | '\u00B7' |
| 75 | | '\u0300'..'\u036F' |
| 76 | | '\u203F'..'\u2040' |
| 77 | ; |
| 78 | |
| 79 | fragment |
| 80 | NameStartChar |
| 81 | : [:a-zA-Z] |
| 82 | | '\u2070'..'\u218F' |
| 83 | | '\u2C00'..'\u2FEF' |
| 84 | | '\u3001'..'\uD7FF' |
| 85 | | '\uF900'..'\uFDCF' |
| 86 | | '\uFDF0'..'\uFFFD' |
| 87 | ; |
| 88 | |
| 89 | // ----------------- Handle <? ... ?> --------------------- |
| 90 | mode PROC_INSTR; |
| 91 | |
| 92 | PI : '?>' -> popMode ; // close <?...?> |
| 93 | IGNORE : . -> more ; |