| // Copyright 2014 PDFium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| |
| #include "xfa/fxfa/fm2js/xfa_lexer.h" |
| |
| #include "core/fxcrt/fx_ext.h" |
| |
| namespace { |
| |
| struct XFA_FMDChar { |
| static const FX_WCHAR* inc(const FX_WCHAR*& p) { |
| ++p; |
| return p; |
| } |
| static const FX_WCHAR* dec(const FX_WCHAR*& p) { |
| --p; |
| return p; |
| } |
| static uint16_t get(const FX_WCHAR* p) { return *p; } |
| static bool isWhiteSpace(const FX_WCHAR* p) { |
| return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20; |
| } |
| static bool isLineTerminator(const FX_WCHAR* p) { |
| return *p == 0x0A || *p == 0x0D; |
| } |
| static bool isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; } |
| static bool isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; } |
| static bool isDigital(const FX_WCHAR* p) { |
| return (*p) >= '0' && (*p) <= '9'; |
| } |
| static bool isHex(const FX_WCHAR* p) { |
| return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') || |
| ((*p) >= 'A' && (*p) <= 'F'); |
| } |
| static bool isAlpha(const FX_WCHAR* p) { |
| return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A'); |
| } |
| static bool isAvalid(const FX_WCHAR* p, bool flag = 0); |
| static bool string2number(const FX_WCHAR* s, |
| FX_DOUBLE* pValue, |
| const FX_WCHAR*& pEnd); |
| static bool isUnicodeAlpha(uint16_t ch); |
| }; |
| |
| inline bool XFA_FMDChar::isAvalid(const FX_WCHAR* p, bool flag) { |
| if (*p == 0) { |
| return 1; |
| } |
| if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D || |
| (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) { |
| return 1; |
| } |
| if (!flag) { |
| if (*p == 0x0B || *p == 0x0C) { |
| return 1; |
| } |
| } |
| return 0; |
| } |
| |
| inline bool XFA_FMDChar::string2number(const FX_WCHAR* s, |
| FX_DOUBLE* pValue, |
| const FX_WCHAR*& pEnd) { |
| if (s) { |
| *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd); |
| } |
| return 0; |
| } |
| |
| inline bool XFA_FMDChar::isUnicodeAlpha(uint16_t ch) { |
| if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || |
| ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || |
| ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || |
| ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || |
| ch == '+' || ch == '-' || ch == '*' || ch == '/') { |
| return false; |
| } |
| return true; |
| } |
| |
| const XFA_FMKeyword keyWords[] = { |
| {TOKand, 0x00000026, L"&"}, |
| {TOKlparen, 0x00000028, L"("}, |
| {TOKrparen, 0x00000029, L")"}, |
| {TOKmul, 0x0000002a, L"*"}, |
| {TOKplus, 0x0000002b, L"+"}, |
| {TOKcomma, 0x0000002c, L","}, |
| {TOKminus, 0x0000002d, L"-"}, |
| {TOKdot, 0x0000002e, L"."}, |
| {TOKdiv, 0x0000002f, L"/"}, |
| {TOKlt, 0x0000003c, L"<"}, |
| {TOKassign, 0x0000003d, L"="}, |
| {TOKgt, 0x0000003e, L">"}, |
| {TOKlbracket, 0x0000005b, L"["}, |
| {TOKrbracket, 0x0000005d, L"]"}, |
| {TOKor, 0x0000007c, L"|"}, |
| {TOKdotscream, 0x0000ec11, L".#"}, |
| {TOKdotstar, 0x0000ec18, L".*"}, |
| {TOKdotdot, 0x0000ec1c, L".."}, |
| {TOKle, 0x000133f9, L"<="}, |
| {TOKne, 0x000133fa, L"<>"}, |
| {TOKeq, 0x0001391a, L"=="}, |
| {TOKge, 0x00013e3b, L">="}, |
| {TOKdo, 0x00020153, L"do"}, |
| {TOKkseq, 0x00020676, L"eq"}, |
| {TOKksge, 0x000210ac, L"ge"}, |
| {TOKksgt, 0x000210bb, L"gt"}, |
| {TOKif, 0x00021aef, L"if"}, |
| {TOKin, 0x00021af7, L"in"}, |
| {TOKksle, 0x00022a51, L"le"}, |
| {TOKkslt, 0x00022a60, L"lt"}, |
| {TOKksne, 0x00023493, L"ne"}, |
| {TOKksor, 0x000239c1, L"or"}, |
| {TOKnull, 0x052931bb, L"null"}, |
| {TOKbreak, 0x05518c25, L"break"}, |
| {TOKksand, 0x09f9db33, L"and"}, |
| {TOKend, 0x0a631437, L"end"}, |
| {TOKeof, 0x0a63195a, L"eof"}, |
| {TOKfor, 0x0a7d67a7, L"for"}, |
| {TOKnan, 0x0b4f91dd, L"nan"}, |
| {TOKksnot, 0x0b4fd9b1, L"not"}, |
| {TOKvar, 0x0c2203e9, L"var"}, |
| {TOKthen, 0x2d5738cf, L"then"}, |
| {TOKelse, 0x45f65ee9, L"else"}, |
| {TOKexit, 0x4731d6ba, L"exit"}, |
| {TOKdownto, 0x4caadc3b, L"downto"}, |
| {TOKreturn, 0x4db8bd60, L"return"}, |
| {TOKinfinity, 0x5c0a010a, L"infinity"}, |
| {TOKendwhile, 0x5c64bff0, L"endwhile"}, |
| {TOKforeach, 0x67e31f38, L"foreach"}, |
| {TOKendfunc, 0x68f984a3, L"endfunc"}, |
| {TOKelseif, 0x78253218, L"elseif"}, |
| {TOKwhile, 0x84229259, L"while"}, |
| {TOKendfor, 0x8ab49d7e, L"endfor"}, |
| {TOKthrow, 0x8db05c94, L"throw"}, |
| {TOKstep, 0xa7a7887c, L"step"}, |
| {TOKupto, 0xb5155328, L"upto"}, |
| {TOKcontinue, 0xc0340685, L"continue"}, |
| {TOKfunc, 0xcdce60ec, L"func"}, |
| {TOKendif, 0xe0e8fee6, L"endif"}, |
| }; |
| |
| const XFA_FM_TOKEN KEYWORD_START = TOKdo; |
| const XFA_FM_TOKEN KEYWORD_END = TOKendif; |
| |
| } // namespace |
| |
| const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { |
| if (op < KEYWORD_START || op > KEYWORD_END) |
| return L""; |
| return keyWords[op].m_keyword; |
| } |
| |
| CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} |
| |
| CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum) |
| : m_type(TOKreserver), m_uLinenum(uLineNum) {} |
| |
| CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, |
| CXFA_FMErrorInfo* pErrorInfo) |
| : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {} |
| |
| CXFA_FMLexer::~CXFA_FMLexer() {} |
| |
| CXFA_FMToken* CXFA_FMLexer::NextToken() { |
| m_pToken.reset(Scan()); |
| return m_pToken.get(); |
| } |
| |
| CXFA_FMToken* CXFA_FMLexer::Scan() { |
| uint16_t ch = 0; |
| CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine); |
| if (!XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| int iRet = 0; |
| while (1) { |
| if (!XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| ch = XFA_FMDChar::get(m_ptr); |
| switch (ch) { |
| case 0: |
| p->m_type = TOKeof; |
| return p; |
| case 0x0A: |
| ++m_uCurrentLine; |
| p->m_uLinenum = m_uCurrentLine; |
| XFA_FMDChar::inc(m_ptr); |
| break; |
| case 0x0D: |
| XFA_FMDChar::inc(m_ptr); |
| break; |
| case ';': { |
| const FX_WCHAR* pTemp = 0; |
| Comment(m_ptr, pTemp); |
| m_ptr = pTemp; |
| } break; |
| case '"': { |
| const FX_WCHAR* pTemp = 0; |
| p->m_type = TOKstring; |
| iRet = String(p, m_ptr, pTemp); |
| m_ptr = pTemp; |
| } |
| return p; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': { |
| p->m_type = TOKnumber; |
| const FX_WCHAR* pTemp = 0; |
| iRet = Number(p, m_ptr, pTemp); |
| m_ptr = pTemp; |
| if (iRet) { |
| Error(kFMErrBadSuffixNumber); |
| return p; |
| } |
| } |
| return p; |
| case '=': |
| XFA_FMDChar::inc(m_ptr); |
| if (XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| if (ch == '=') { |
| p->m_type = TOKeq; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else { |
| p->m_type = TOKassign; |
| return p; |
| } |
| } else { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| break; |
| case '<': |
| XFA_FMDChar::inc(m_ptr); |
| if (XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| if (ch == '=') { |
| p->m_type = TOKle; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else if (ch == '>') { |
| p->m_type = TOKne; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else { |
| p->m_type = TOKlt; |
| return p; |
| } |
| } else { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| break; |
| case '>': |
| XFA_FMDChar::inc(m_ptr); |
| if (XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| if (ch == '=') { |
| p->m_type = TOKge; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else { |
| p->m_type = TOKgt; |
| return p; |
| } |
| } else { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| break; |
| case ',': |
| p->m_type = TOKcomma; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| case '(': |
| p->m_type = TOKlparen; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| case ')': |
| p->m_type = TOKrparen; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| case '[': |
| p->m_type = TOKlbracket; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| case ']': |
| p->m_type = TOKrbracket; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| case '&': |
| XFA_FMDChar::inc(m_ptr); |
| p->m_type = TOKand; |
| return p; |
| case '|': |
| XFA_FMDChar::inc(m_ptr); |
| p->m_type = TOKor; |
| return p; |
| case '+': |
| XFA_FMDChar::inc(m_ptr); |
| p->m_type = TOKplus; |
| return p; |
| case '-': |
| XFA_FMDChar::inc(m_ptr); |
| p->m_type = TOKminus; |
| return p; |
| case '*': |
| XFA_FMDChar::inc(m_ptr); |
| p->m_type = TOKmul; |
| return p; |
| case '/': |
| XFA_FMDChar::inc(m_ptr); |
| if (XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| if (ch == '/') { |
| const FX_WCHAR* pTemp = 0; |
| Comment(m_ptr, pTemp); |
| m_ptr = pTemp; |
| break; |
| } else { |
| p->m_type = TOKdiv; |
| return p; |
| } |
| } else { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| break; |
| case '.': |
| XFA_FMDChar::inc(m_ptr); |
| if (XFA_FMDChar::isAvalid(m_ptr)) { |
| ch = XFA_FMDChar::get(m_ptr); |
| if (ch == '.') { |
| p->m_type = TOKdotdot; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else if (ch == '*') { |
| p->m_type = TOKdotstar; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else if (ch == '#') { |
| p->m_type = TOKdotscream; |
| XFA_FMDChar::inc(m_ptr); |
| return p; |
| } else if (ch <= '9' && ch >= '0') { |
| p->m_type = TOKnumber; |
| const FX_WCHAR* pTemp = 0; |
| XFA_FMDChar::dec(m_ptr); |
| iRet = Number(p, m_ptr, pTemp); |
| m_ptr = pTemp; |
| if (iRet) { |
| Error(kFMErrBadSuffixNumber); |
| } |
| return p; |
| } else { |
| p->m_type = TOKdot; |
| return p; |
| } |
| } else { |
| ch = XFA_FMDChar::get(m_ptr); |
| Error(kFMErrUnsupportedChar, ch); |
| return p; |
| } |
| case 0x09: |
| case 0x0B: |
| case 0x0C: |
| case 0x20: |
| XFA_FMDChar::inc(m_ptr); |
| break; |
| default: { |
| const FX_WCHAR* pTemp = 0; |
| iRet = Identifiers(p, m_ptr, pTemp); |
| m_ptr = pTemp; |
| if (iRet) { |
| return p; |
| } |
| p->m_type = IsKeyword(p->m_wstring); |
| } |
| return p; |
| } |
| } |
| } |
| |
| uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t, |
| const FX_WCHAR* p, |
| const FX_WCHAR*& pEnd) { |
| FX_DOUBLE number = 0; |
| if (XFA_FMDChar::string2number(p, &number, pEnd)) { |
| return 1; |
| } |
| if (pEnd && XFA_FMDChar::isAlpha(pEnd)) { |
| return 1; |
| } |
| t->m_wstring = CFX_WideStringC(p, (pEnd - p)); |
| return 0; |
| } |
| |
| uint32_t CXFA_FMLexer::String(CXFA_FMToken* t, |
| const FX_WCHAR* p, |
| const FX_WCHAR*& pEnd) { |
| const FX_WCHAR* pStart = p; |
| uint16_t ch = 0; |
| XFA_FMDChar::inc(p); |
| ch = XFA_FMDChar::get(p); |
| while (ch) { |
| if (!XFA_FMDChar::isAvalid(p)) { |
| ch = XFA_FMDChar::get(p); |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| Error(kFMErrUnsupportedChar, ch); |
| return 1; |
| } |
| if (ch == '"') { |
| XFA_FMDChar::inc(p); |
| if (!XFA_FMDChar::isAvalid(p)) { |
| ch = XFA_FMDChar::get(p); |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| Error(kFMErrUnsupportedChar, ch); |
| return 1; |
| } |
| ch = XFA_FMDChar::get(p); |
| if (ch == '"') { |
| goto NEXT; |
| } else { |
| break; |
| } |
| } |
| NEXT: |
| XFA_FMDChar::inc(p); |
| ch = XFA_FMDChar::get(p); |
| } |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| return 0; |
| } |
| |
| uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t, |
| const FX_WCHAR* p, |
| const FX_WCHAR*& pEnd) { |
| const FX_WCHAR* pStart = p; |
| uint16_t ch = 0; |
| ch = XFA_FMDChar::get(p); |
| XFA_FMDChar::inc(p); |
| if (!XFA_FMDChar::isAvalid(p)) { |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| Error(kFMErrUnsupportedChar, ch); |
| return 1; |
| } |
| ch = XFA_FMDChar::get(p); |
| while (ch) { |
| if (!XFA_FMDChar::isAvalid(p)) { |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| Error(kFMErrUnsupportedChar, ch); |
| return 1; |
| } |
| ch = XFA_FMDChar::get(p); |
| if (XFA_FMDChar::isUnicodeAlpha(ch)) { |
| XFA_FMDChar::inc(p); |
| } else { |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| return 0; |
| } |
| } |
| pEnd = p; |
| t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); |
| return 0; |
| } |
| |
| void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) { |
| unsigned ch = 0; |
| XFA_FMDChar::inc(p); |
| ch = XFA_FMDChar::get(p); |
| while (ch) { |
| if (ch == 0x0D) { |
| XFA_FMDChar::inc(p); |
| pEnd = p; |
| return; |
| } |
| if (ch == 0x0A) { |
| ++m_uCurrentLine; |
| XFA_FMDChar::inc(p); |
| pEnd = p; |
| return; |
| } |
| XFA_FMDChar::inc(p); |
| ch = XFA_FMDChar::get(p); |
| } |
| pEnd = p; |
| } |
| |
| XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { |
| uint32_t uHash = FX_HashCode_GetW(str, true); |
| int32_t iStart = KEYWORD_START; |
| int32_t iEnd = KEYWORD_END; |
| do { |
| int32_t iMid = (iStart + iEnd) / 2; |
| XFA_FMKeyword keyword = keyWords[iMid]; |
| if (uHash == keyword.m_uHash) |
| return keyword.m_type; |
| if (uHash < keyword.m_uHash) |
| iEnd = iMid - 1; |
| else |
| iStart = iMid + 1; |
| } while (iStart <= iEnd); |
| return TOKidentifier; |
| } |
| |
| void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) { |
| m_pErrorInfo->linenum = m_uCurrentLine; |
| va_list ap; |
| va_start(ap, msg); |
| m_pErrorInfo->message.FormatV(msg, ap); |
| va_end(ap); |
| } |
| |
| bool CXFA_FMLexer::HasError() const { |
| if (m_pErrorInfo->message.IsEmpty()) { |
| return false; |
| } |
| return true; |
| } |