blob: 8b4ac7a0d20da91b60d981cb8eeb563d963674a7 [file] [log] [blame]
Guido van Rossum9c30c241998-12-22 05:19:29 +00001#!/usr/bin/python
2# Module and documentation by Eric S. Raymond, 21 Dec 1998
3
4import sys, os, string
5
6class shlex:
7 "A lexical analyzer class for simple shell-like syntaxes."
8 def __init__(self, instream=None):
9 if instream:
10 self.instream = instream
11 else:
12 self.instream = sys.stdin
13 self.commenters = '#'
14 self.wordchars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
15 self.whitespace = ' \t\r\n'
16 self.quotes = '\'"'
17 self.state = ' '
18 self.pushback = [];
19 self.lineno = 1
20 self.debug = 0
21 self.token = ''
22
23 def push_token(self, tok):
24 "Push a token onto the stack popped by the get_token method"
25 if (self.debug >= 1):
26 print "Pushing " + tok
27 self.pushback = [tok] + self.pushback;
28
29 def get_token(self):
30 "Get a token from the input stream (or from stack if it's monempty)"
31 if self.pushback:
32 tok = self.pushback[0]
33 self.pushback = self.pushback[1:]
34 if (self.debug >= 1):
35 print "Popping " + tok
36 return tok
37 tok = ''
38 while 1:
39 nextchar = self.instream.read(1);
40 if nextchar == '\n':
41 self.lineno = self.lineno + 1
42 if self.debug >= 3:
43 print "In state " + repr(self.state) + " I see character: " + repr(nextchar)
44 if self.state == None:
45 return ''
46 elif self.state == ' ':
47 if not nextchar:
48 self.state = None; # end of file
49 break
50 elif nextchar in self.whitespace:
51 if self.debug >= 2:
52 print "I see whitespace in whitespace state"
53 if self.token:
54 break # emit current token
55 else:
56 continue
57 elif nextchar in self.commenters:
58 self.instream.readline()
59 self.lineno = self.lineno + 1
60 elif nextchar in self.wordchars:
61 self.token = nextchar
62 self.state = 'a'
63 elif nextchar in self.quotes:
64 self.token = nextchar
65 self.state = nextchar
66 else:
67 self.token = nextchar
68 if self.token:
69 break # emit current token
70 else:
71 continue
72 elif self.state in self.quotes:
73 self.token = self.token + nextchar
74 if nextchar == self.state:
75 self.state = ' '
76 break
77 elif self.state == 'a':
78 if not nextchar:
79 self.state = None; # end of file
80 break
81 elif nextchar in self.whitespace:
82 if self.debug >= 2:
83 print "I see whitespace in word state"
84 self.state = ' '
85 if self.token:
86 break # emit current token
87 else:
88 continue
89 elif nextchar in self.commenters:
90 self.instream.readline()
91 self.lineno = self.lineno + 1
92 elif nextchar in self.wordchars or nextchar in self.quotes:
93 self.token = self.token + nextchar
94 else:
95 self.pushback = [nextchar] + self.pushback
96 if self.debug >= 2:
97 print "I see punctuation in word state"
98 state = ' '
99 if self.token:
100 break # emit current token
101 else:
102 continue
103
104 result = self.token
105 self.token = ''
106 if self.debug >= 1:
107 print "Token: " + result
108 return result
109
110if __name__ == '__main__':
111
112 lexer = shlex()
113 while 1:
114 tt = lexer.get_token()
115 if tt != None:
116 print "Token: " + repr(tt)
117 else:
118 break
119