blob: fd1abe48fd19cfb1a309cf14470f7209fc29063a [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001#!/usr/bin/python2.4
2
3# Copyright 2009 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following
12# disclaimer in the documentation and/or other materials provided
13# with the distribution.
14# * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived
16# from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30"""A JavaScript minifier.
31
32It is far from being a complete JS parser, so there are many valid
33JavaScript programs that will be ruined by it. Another strangeness is that
34it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
35out blank lines in order to ease debugging. Variables at the top scope are
36properties of the global object so we can't rename them. It is assumed that
37you introduce variables with var as if JavaScript followed C++ scope rules
38around curly braces, so the declaration must be above the first use.
39
40Use as:
41import jsmin
42minifier = JavaScriptMinifier()
43program1 = minifier.JSMinify(program1)
44program2 = minifier.JSMinify(program2)
45"""
46
47import re
48
49
50class JavaScriptMinifier(object):
51 """An object that you can feed code snippets to to get them minified."""
52
53 def __init__(self):
54 # We prepopulate the list of identifiers that shouldn't be used. These
55 # short language keywords could otherwise be used by the script as variable
56 # names.
57 self.seen_identifiers = {"do": True, "in": True}
58 self.identifier_counter = 0
59 self.in_comment = False
60 self.map = {}
61 self.nesting = 0
62
63 def LookAtIdentifier(self, m):
64 """Records identifiers or keywords that we see in use.
65
66 (So we can avoid renaming variables to these strings.)
67 Args:
68 m: The match object returned by re.search.
69
70 Returns:
71 Nothing.
72 """
73 identifier = m.group(1)
74 self.seen_identifiers[identifier] = True
75
76 def Push(self):
77 """Called when we encounter a '{'."""
78 self.nesting += 1
79
80 def Pop(self):
81 """Called when we encounter a '}'."""
82 self.nesting -= 1
83 # We treat each top-level opening brace as a single scope that can span
84 # several sets of nested braces.
85 if self.nesting == 0:
86 self.map = {}
87 self.identifier_counter = 0
88
89 def Declaration(self, m):
90 """Rewrites bits of the program selected by a regexp.
91
92 These can be curly braces, literal strings, function declarations and var
93 declarations. (These last two must be on one line including the opening
94 curly brace of the function for their variables to be renamed).
95
96 Args:
97 m: The match object returned by re.search.
98
99 Returns:
100 The string that should replace the match in the rewritten program.
101 """
102 matched_text = m.group(0)
103 if matched_text == "{":
104 self.Push()
105 return matched_text
106 if matched_text == "}":
107 self.Pop()
108 return matched_text
109 if re.match("[\"'/]", matched_text):
110 return matched_text
111 m = re.match(r"var ", matched_text)
112 if m:
113 var_names = matched_text[m.end():]
114 var_names = re.split(r",", var_names)
115 return "var " + ",".join(map(self.FindNewName, var_names))
116 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
117 if m:
118 up_to_args = m.group(1)
119 args = m.group(2)
120 args = re.split(r",", args)
121 self.Push()
122 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
123
124 if matched_text in self.map:
125 return self.map[matched_text]
126
127 return matched_text
128
129 def CharFromNumber(self, number):
130 """A single-digit base-52 encoding using a-zA-Z."""
131 if number < 26:
132 return chr(number + 97)
133 number -= 26
134 return chr(number + 65)
135
136 def FindNewName(self, var_name):
137 """Finds a new 1-character or 2-character name for a variable.
138
139 Enters it into the mapping table for this scope.
140
141 Args:
142 var_name: The name of the variable before renaming.
143
144 Returns:
145 The new name of the variable.
146 """
147 new_identifier = ""
148 # Variable names that end in _ are member variables of the global object,
149 # so they can be visible from code in a different scope. We leave them
150 # alone.
151 if var_name in self.map:
152 return self.map[var_name]
153 if self.nesting == 0:
154 return var_name
155 while True:
156 identifier_first_char = self.identifier_counter % 52
157 identifier_second_char = self.identifier_counter / 52
158 new_identifier = self.CharFromNumber(identifier_first_char)
159 if identifier_second_char != 0:
160 new_identifier = (
161 self.CharFromNumber(identifier_second_char - 1) + new_identifier)
162 self.identifier_counter += 1
163 if not new_identifier in self.seen_identifiers:
164 break
165
166 self.map[var_name] = new_identifier
167 return new_identifier
168
169 def RemoveSpaces(self, m):
170 """Returns literal strings unchanged, replaces other inputs with group 2.
171
172 Other inputs are replaced with the contents of capture 1. This is either
173 a single space or an empty string.
174
175 Args:
176 m: The match object returned by re.search.
177
178 Returns:
179 The string that should be inserted instead of the matched text.
180 """
181 entire_match = m.group(0)
182 replacement = m.group(1)
183 if re.match(r"'.*'$", entire_match):
184 return entire_match
185 if re.match(r'".*"$', entire_match):
186 return entire_match
187 if re.match(r"/.+/$", entire_match):
188 return entire_match
189 return replacement
190
191 def JSMinify(self, text):
192 """The main entry point. Takes a text and returns a compressed version.
193
194 The compressed version hopefully does the same thing. Line breaks are
195 preserved.
196
197 Args:
198 text: The text of the code snippet as a multiline string.
199
200 Returns:
201 The compressed text of the code snippet as a multiline string.
202 """
203 new_lines = []
204 for line in re.split(r"\n", text):
205 line = line.replace("\t", " ")
206 if self.in_comment:
207 m = re.search(r"\*/", line)
208 if m:
209 line = line[m.end():]
210 self.in_comment = False
211 else:
212 new_lines.append("")
213 continue
214
215 if not self.in_comment:
216 line = re.sub(r"/\*.*?\*/", " ", line)
217 line = re.sub(r"//.*", "", line)
218 m = re.search(r"/\*", line)
219 if m:
220 line = line[:m.start()]
221 self.in_comment = True
222
223 # Strip leading and trailing spaces.
224 line = re.sub(r"^ +", "", line)
225 line = re.sub(r" +$", "", line)
226 # A regexp that matches a literal string surrounded by "double quotes".
227 # This regexp can handle embedded backslash-escaped characters including
228 # embedded backslash-escaped double quotes.
229 double_quoted_string = r'"(?:[^"\\]|\\.)*"'
230 # A regexp that matches a literal string surrounded by 'double quotes'.
231 single_quoted_string = r"'(?:[^'\\]|\\.)*'"
232 # A regexp that matches a regexp literal surrounded by /slashes/.
233 slash_quoted_regexp = r"/(?:[^/\\]|\\.)+/"
234 # Replace multiple spaces with a single space.
235 line = re.sub("|".join([double_quoted_string,
236 single_quoted_string,
237 slash_quoted_regexp,
238 "( )+"]),
239 self.RemoveSpaces,
240 line)
241 # Strip single spaces unless they have an identifier character both before
242 # and after the space. % and $ are counted as identifier characters.
243 line = re.sub("|".join([double_quoted_string,
244 single_quoted_string,
245 slash_quoted_regexp,
246 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
247 self.RemoveSpaces,
248 line)
249 # Collect keywords and identifiers that are already in use.
250 if self.nesting == 0:
251 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
252 function_declaration_regexp = (
253 r"\bfunction" # Function definition keyword...
254 r"( [\w$%]+)?" # ...optional function name...
255 r"\([\w$%,]+\)\{") # ...argument declarations.
256 # Unfortunately the keyword-value syntax { key:value } makes the key look
257 # like a variable where in fact it is a literal string. We use the
258 # presence or absence of a question mark to try to distinguish between
259 # this case and the ternary operator: "condition ? iftrue : iffalse".
260 if re.search(r"\?", line):
261 block_trailing_colon = r""
262 else:
263 block_trailing_colon = r"(?![:\w$%])"
264 # Variable use. Cannot follow a period precede a colon.
265 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
266 line = re.sub("|".join([double_quoted_string,
267 single_quoted_string,
268 slash_quoted_regexp,
269 r"\{", # Curly braces.
270 r"\}",
271 r"\bvar [\w$%,]+", # var declarations.
272 function_declaration_regexp,
273 variable_use_regexp]),
274 self.Declaration,
275 line)
276 new_lines.append(line)
277
278 return "\n".join(new_lines) + "\n"