blob: 236f511d447b228e9b5473dddba99fb93902397e [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001#!/usr/bin/python2.4
2
Ben Murdochb8a8cc12014-11-26 15:28:44 +00003# Copyright 2012 the V8 project authors. All rights reserved.
Steve Blocka7e24c12009-10-30 11:49:00 +00004# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following
12# disclaimer in the documentation and/or other materials provided
13# with the distribution.
14# * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived
16# from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30"""A JavaScript minifier.
31
32It is far from being a complete JS parser, so there are many valid
33JavaScript programs that will be ruined by it. Another strangeness is that
34it accepts $ and % as parts of identifiers. It doesn't merge lines or strip
35out blank lines in order to ease debugging. Variables at the top scope are
36properties of the global object so we can't rename them. It is assumed that
37you introduce variables with var as if JavaScript followed C++ scope rules
38around curly braces, so the declaration must be above the first use.
39
40Use as:
41import jsmin
42minifier = JavaScriptMinifier()
43program1 = minifier.JSMinify(program1)
44program2 = minifier.JSMinify(program2)
45"""
46
47import re
48
49
50class JavaScriptMinifier(object):
51 """An object that you can feed code snippets to to get them minified."""
52
53 def __init__(self):
54 # We prepopulate the list of identifiers that shouldn't be used. These
55 # short language keywords could otherwise be used by the script as variable
56 # names.
57 self.seen_identifiers = {"do": True, "in": True}
58 self.identifier_counter = 0
59 self.in_comment = False
60 self.map = {}
61 self.nesting = 0
62
63 def LookAtIdentifier(self, m):
64 """Records identifiers or keywords that we see in use.
65
66 (So we can avoid renaming variables to these strings.)
67 Args:
68 m: The match object returned by re.search.
69
70 Returns:
71 Nothing.
72 """
73 identifier = m.group(1)
74 self.seen_identifiers[identifier] = True
75
76 def Push(self):
77 """Called when we encounter a '{'."""
78 self.nesting += 1
79
80 def Pop(self):
81 """Called when we encounter a '}'."""
82 self.nesting -= 1
83 # We treat each top-level opening brace as a single scope that can span
84 # several sets of nested braces.
85 if self.nesting == 0:
86 self.map = {}
87 self.identifier_counter = 0
88
89 def Declaration(self, m):
90 """Rewrites bits of the program selected by a regexp.
91
92 These can be curly braces, literal strings, function declarations and var
93 declarations. (These last two must be on one line including the opening
94 curly brace of the function for their variables to be renamed).
95
96 Args:
97 m: The match object returned by re.search.
98
99 Returns:
100 The string that should replace the match in the rewritten program.
101 """
102 matched_text = m.group(0)
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000103
104 if matched_text.startswith("`") and matched_text.endswith("`"):
105 return re.sub(r"\$\{([\w$%]+)\}",
106 lambda m: '${' + self.FindNewName(m.group(1)) + '}',
107 matched_text)
108
Steve Blocka7e24c12009-10-30 11:49:00 +0000109 if matched_text == "{":
110 self.Push()
111 return matched_text
112 if matched_text == "}":
113 self.Pop()
114 return matched_text
115 if re.match("[\"'/]", matched_text):
116 return matched_text
117 m = re.match(r"var ", matched_text)
118 if m:
119 var_names = matched_text[m.end():]
120 var_names = re.split(r",", var_names)
121 return "var " + ",".join(map(self.FindNewName, var_names))
122 m = re.match(r"(function\b[^(]*)\((.*)\)\{$", matched_text)
123 if m:
124 up_to_args = m.group(1)
125 args = m.group(2)
126 args = re.split(r",", args)
127 self.Push()
128 return up_to_args + "(" + ",".join(map(self.FindNewName, args)) + "){"
129
130 if matched_text in self.map:
131 return self.map[matched_text]
132
133 return matched_text
134
135 def CharFromNumber(self, number):
136 """A single-digit base-52 encoding using a-zA-Z."""
137 if number < 26:
138 return chr(number + 97)
139 number -= 26
140 return chr(number + 65)
141
142 def FindNewName(self, var_name):
143 """Finds a new 1-character or 2-character name for a variable.
144
145 Enters it into the mapping table for this scope.
146
147 Args:
148 var_name: The name of the variable before renaming.
149
150 Returns:
151 The new name of the variable.
152 """
153 new_identifier = ""
154 # Variable names that end in _ are member variables of the global object,
155 # so they can be visible from code in a different scope. We leave them
156 # alone.
157 if var_name in self.map:
158 return self.map[var_name]
159 if self.nesting == 0:
160 return var_name
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000161 # Do not rename arguments object.
162 if var_name == 'arguments':
163 return 'arguments'
Steve Blocka7e24c12009-10-30 11:49:00 +0000164 while True:
165 identifier_first_char = self.identifier_counter % 52
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000166 identifier_second_char = self.identifier_counter // 52
Steve Blocka7e24c12009-10-30 11:49:00 +0000167 new_identifier = self.CharFromNumber(identifier_first_char)
168 if identifier_second_char != 0:
169 new_identifier = (
170 self.CharFromNumber(identifier_second_char - 1) + new_identifier)
171 self.identifier_counter += 1
172 if not new_identifier in self.seen_identifiers:
173 break
174
175 self.map[var_name] = new_identifier
176 return new_identifier
177
178 def RemoveSpaces(self, m):
179 """Returns literal strings unchanged, replaces other inputs with group 2.
180
181 Other inputs are replaced with the contents of capture 1. This is either
182 a single space or an empty string.
183
184 Args:
185 m: The match object returned by re.search.
186
187 Returns:
188 The string that should be inserted instead of the matched text.
189 """
190 entire_match = m.group(0)
191 replacement = m.group(1)
192 if re.match(r"'.*'$", entire_match):
193 return entire_match
194 if re.match(r'".*"$', entire_match):
195 return entire_match
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000196 if re.match(r"`.*`$", entire_match):
197 return entire_match
Steve Blocka7e24c12009-10-30 11:49:00 +0000198 if re.match(r"/.+/$", entire_match):
199 return entire_match
200 return replacement
201
202 def JSMinify(self, text):
203 """The main entry point. Takes a text and returns a compressed version.
204
205 The compressed version hopefully does the same thing. Line breaks are
206 preserved.
207
208 Args:
209 text: The text of the code snippet as a multiline string.
210
211 Returns:
212 The compressed text of the code snippet as a multiline string.
213 """
214 new_lines = []
215 for line in re.split(r"\n", text):
216 line = line.replace("\t", " ")
217 if self.in_comment:
218 m = re.search(r"\*/", line)
219 if m:
220 line = line[m.end():]
221 self.in_comment = False
222 else:
223 new_lines.append("")
224 continue
225
226 if not self.in_comment:
227 line = re.sub(r"/\*.*?\*/", " ", line)
228 line = re.sub(r"//.*", "", line)
229 m = re.search(r"/\*", line)
230 if m:
231 line = line[:m.start()]
232 self.in_comment = True
233
234 # Strip leading and trailing spaces.
235 line = re.sub(r"^ +", "", line)
236 line = re.sub(r" +$", "", line)
237 # A regexp that matches a literal string surrounded by "double quotes".
238 # This regexp can handle embedded backslash-escaped characters including
239 # embedded backslash-escaped double quotes.
240 double_quoted_string = r'"(?:[^"\\]|\\.)*"'
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000241 # A regexp that matches a literal string surrounded by 'single quotes'.
Steve Blocka7e24c12009-10-30 11:49:00 +0000242 single_quoted_string = r"'(?:[^'\\]|\\.)*'"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000243 # A regexp that matches a template string
244 template_string = r"`(?:[^`\\]|\\.)*`"
Steve Blocka7e24c12009-10-30 11:49:00 +0000245 # A regexp that matches a regexp literal surrounded by /slashes/.
Leon Clarkee46be812010-01-19 14:06:41 +0000246 # Don't allow a regexp to have a ) before the first ( since that's a
247 # syntax error and it's probably just two unrelated slashes.
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100248 # Also don't allow it to come after anything that can only be the
249 # end of a primary expression.
250 slash_quoted_regexp = r"(?<![\w$'\")\]])/(?:(?=\()|(?:[^()/\\]|\\.)+)(?:\([^/\\]|\\.)*/"
Steve Blocka7e24c12009-10-30 11:49:00 +0000251 # Replace multiple spaces with a single space.
252 line = re.sub("|".join([double_quoted_string,
253 single_quoted_string,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000254 template_string,
Steve Blocka7e24c12009-10-30 11:49:00 +0000255 slash_quoted_regexp,
256 "( )+"]),
257 self.RemoveSpaces,
258 line)
259 # Strip single spaces unless they have an identifier character both before
260 # and after the space. % and $ are counted as identifier characters.
261 line = re.sub("|".join([double_quoted_string,
262 single_quoted_string,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000263 template_string,
Steve Blocka7e24c12009-10-30 11:49:00 +0000264 slash_quoted_regexp,
265 r"(?<![a-zA-Z_0-9$%]) | (?![a-zA-Z_0-9$%])()"]),
266 self.RemoveSpaces,
267 line)
268 # Collect keywords and identifiers that are already in use.
269 if self.nesting == 0:
270 re.sub(r"([a-zA-Z0-9_$%]+)", self.LookAtIdentifier, line)
271 function_declaration_regexp = (
272 r"\bfunction" # Function definition keyword...
273 r"( [\w$%]+)?" # ...optional function name...
274 r"\([\w$%,]+\)\{") # ...argument declarations.
275 # Unfortunately the keyword-value syntax { key:value } makes the key look
276 # like a variable where in fact it is a literal string. We use the
277 # presence or absence of a question mark to try to distinguish between
278 # this case and the ternary operator: "condition ? iftrue : iffalse".
279 if re.search(r"\?", line):
280 block_trailing_colon = r""
281 else:
282 block_trailing_colon = r"(?![:\w$%])"
283 # Variable use. Cannot follow a period precede a colon.
284 variable_use_regexp = r"(?<![.\w$%])[\w$%]+" + block_trailing_colon
285 line = re.sub("|".join([double_quoted_string,
286 single_quoted_string,
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000287 template_string,
Steve Blocka7e24c12009-10-30 11:49:00 +0000288 slash_quoted_regexp,
289 r"\{", # Curly braces.
290 r"\}",
291 r"\bvar [\w$%,]+", # var declarations.
292 function_declaration_regexp,
293 variable_use_regexp]),
294 self.Declaration,
295 line)
296 new_lines.append(line)
297
298 return "\n".join(new_lines) + "\n"