blob: cff3ba70c427695f1b7f46853c91b96a1b23d8c7 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9#ifndef RBBINODE_H
10#define RBBINODE_H
11
12#include "unicode/utypes.h"
13#include "unicode/unistr.h"
14#include "unicode/uobject.h"
15
16//
17// class RBBINode
18//
19// Represents a node in the parse tree generated when reading
20// a rule file.
21//
22
23U_NAMESPACE_BEGIN
24
25class UnicodeSet;
26class UVector;
27
28class RBBINode : public UMemory {
29 public:
30 enum NodeType {
31 setRef,
32 uset,
33 varRef,
34 leafChar,
35 lookAhead,
36 tag,
37 endMark,
38 opStart,
39 opCat,
40 opOr,
41 opStar,
42 opPlus,
43 opQuestion,
44 opBreak,
45 opReverse,
46 opLParen
47 };
48
49 enum OpPrecedence {
50 precZero,
51 precStart,
52 precLParen,
53 precOpOr,
54 precOpCat
55 };
56
57 NodeType fType;
58 RBBINode *fParent;
59 RBBINode *fLeftChild;
60 RBBINode *fRightChild;
61 UnicodeSet *fInputSet; // For uset nodes only.
62 OpPrecedence fPrecedence; // For binary ops only.
63
64 UnicodeString fText; // Text corresponding to this node.
65 // May be lazily evaluated when (if) needed
66 // for some node types.
67 int fFirstPos; // Position in the rule source string of the
68 // first text associated with the node.
69 // If there's a left child, this will be the same
70 // as that child's left pos.
71 int fLastPos; // Last position in the rule source string
72 // of any text associated with this node.
73 // If there's a right child, this will be the same
74 // as that child's last postion.
75
76 UBool fNullable; // See Aho.
77 int32_t fVal; // For leafChar nodes, the value.
78 // Values are the character category,
79 // corresponds to columns in the final
80 // state transition table.
81
Victor Changce4bf3c2021-01-19 16:34:24 +000082 UBool fLookAheadEnd; // For endMark nodes, set true if
Victor Chang73229502020-09-17 13:39:19 +010083 // marking the end of a look-ahead rule.
84
85 UBool fRuleRoot; // True if this node is the root of a rule.
86 UBool fChainIn; // True if chaining into this rule is allowed
87 // (no '^' present).
88
89 UVector *fFirstPosSet;
90 UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
91 UVector *fFollowPos;
92
93
94 RBBINode(NodeType t);
95 RBBINode(const RBBINode &other);
96 ~RBBINode();
97
98 RBBINode *cloneTree();
99 RBBINode *flattenVariables();
100 void flattenSets();
101 void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
102
103#ifdef RBBI_DEBUG
104 static void printNodeHeader();
105 static void printNode(const RBBINode *n);
106 static void printTree(const RBBINode *n, UBool withHeading);
107#endif
108
109 private:
110 RBBINode &operator = (const RBBINode &other); // No defs.
111 UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
112
113#ifdef RBBI_DEBUG
114 public:
115 int fSerialNum; // Debugging aids.
116#endif
117};
118
119#ifdef RBBI_DEBUG
120U_CFUNC void
121RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
122#endif
123
124U_NAMESPACE_END
125
126#endif
127