blob: c1ee02d8eccb7f7c099084d7fe81dff6a65705c3 [file] [log] [blame]
Guido van Rossume7b146f2000-02-04 15:28:42 +00001"""A lexical analyzer class for simple shell-like syntaxes."""
2
Guido van Rossum9c30c241998-12-22 05:19:29 +00003# Module and documentation by Eric S. Raymond, 21 Dec 1998
4
Guido van Rossum73898c71999-05-03 18:14:16 +00005import sys
Guido van Rossum9c30c241998-12-22 05:19:29 +00006
7class shlex:
8 "A lexical analyzer class for simple shell-like syntaxes."
9 def __init__(self, instream=None):
10 if instream:
11 self.instream = instream
12 else:
13 self.instream = sys.stdin
14 self.commenters = '#'
15 self.wordchars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
16 self.whitespace = ' \t\r\n'
17 self.quotes = '\'"'
18 self.state = ' '
19 self.pushback = [];
20 self.lineno = 1
21 self.debug = 0
22 self.token = ''
23
24 def push_token(self, tok):
25 "Push a token onto the stack popped by the get_token method"
26 if (self.debug >= 1):
27 print "Pushing " + tok
28 self.pushback = [tok] + self.pushback;
29
30 def get_token(self):
31 "Get a token from the input stream (or from stack if it's monempty)"
32 if self.pushback:
33 tok = self.pushback[0]
34 self.pushback = self.pushback[1:]
35 if (self.debug >= 1):
36 print "Popping " + tok
37 return tok
38 tok = ''
39 while 1:
40 nextchar = self.instream.read(1);
41 if nextchar == '\n':
42 self.lineno = self.lineno + 1
43 if self.debug >= 3:
44 print "In state " + repr(self.state) + " I see character: " + repr(nextchar)
45 if self.state == None:
46 return ''
47 elif self.state == ' ':
48 if not nextchar:
49 self.state = None; # end of file
50 break
51 elif nextchar in self.whitespace:
52 if self.debug >= 2:
53 print "I see whitespace in whitespace state"
54 if self.token:
55 break # emit current token
56 else:
57 continue
58 elif nextchar in self.commenters:
59 self.instream.readline()
60 self.lineno = self.lineno + 1
61 elif nextchar in self.wordchars:
62 self.token = nextchar
63 self.state = 'a'
64 elif nextchar in self.quotes:
65 self.token = nextchar
66 self.state = nextchar
67 else:
68 self.token = nextchar
69 if self.token:
70 break # emit current token
71 else:
72 continue
73 elif self.state in self.quotes:
74 self.token = self.token + nextchar
75 if nextchar == self.state:
76 self.state = ' '
77 break
78 elif self.state == 'a':
79 if not nextchar:
80 self.state = None; # end of file
81 break
82 elif nextchar in self.whitespace:
83 if self.debug >= 2:
84 print "I see whitespace in word state"
85 self.state = ' '
86 if self.token:
87 break # emit current token
88 else:
89 continue
90 elif nextchar in self.commenters:
91 self.instream.readline()
92 self.lineno = self.lineno + 1
93 elif nextchar in self.wordchars or nextchar in self.quotes:
94 self.token = self.token + nextchar
95 else:
96 self.pushback = [nextchar] + self.pushback
97 if self.debug >= 2:
98 print "I see punctuation in word state"
Guido van Rossumf247d751999-03-22 15:28:08 +000099 self.state = ' '
Guido van Rossum9c30c241998-12-22 05:19:29 +0000100 if self.token:
101 break # emit current token
102 else:
103 continue
104
105 result = self.token
106 self.token = ''
107 if self.debug >= 1:
108 print "Token: " + result
109 return result
110
111if __name__ == '__main__':
112
113 lexer = shlex()
114 while 1:
115 tt = lexer.get_token()
116 if tt != None:
117 print "Token: " + repr(tt)
118 else:
119 break
120