blob: 222296f3634feb089bdd3b4da267821482b8ded5 [file] [log] [blame]
Greg Wardef4490f1999-09-29 12:50:13 +00001"""distutils.command.dist
2
3Implements the Distutils 'dist' command (create a source distribution)."""
4
5# created 1999/09/22, Greg Ward
6
7__rcsid__ = "$Id$"
8
9import sys, os, string, re
10import fnmatch
11from types import *
12from glob import glob
13from distutils.core import Command
14from distutils.text_file import TextFile
15
16
17# Possible modes of operation:
18# - require an explicit manifest that lists every single file (presumably
19# along with a way to auto-generate the manifest)
20# - require an explicit manifest, but allow it to have globs or
21# filename patterns of some kind (and also have auto-generation)
22# - allow an explict manifest, but automatically augment it at runtime
23# with the source files mentioned in 'packages', 'py_modules', and
24# 'ext_modules' (and any other such things that might come along)
25
26# I'm liking the third way. Possible gotchas:
27# - redundant specification: 'packages' includes 'foo' and manifest
28# includes 'foo/*.py'
29# - obvious conflict: 'packages' includes 'foo' and manifest
30# includes '! foo/*.py' (can't imagine why you'd want this)
31# - subtle conflict: 'packages' includes 'foo' and manifest
32# includes '! foo/bar.py' (this could well be desired: eg. exclude
33# an experimental module from distribution)
34
35# Syntax for the manifest file:
36# - if a line is just a Unix-style glob by itself, it's a "simple include
37# pattern": go find all files that match and add them to the list
38# of files
39# - if a line is a glob preceded by "!", then it's a "simple exclude
40# pattern": go over the current list of files and exclude any that
41# match the glob pattern
42# - if a line consists of a directory name followed by zero or more
43# glob patterns, then we'll recursively explore that directory tree
44# - the glob patterns can be include (no punctuation) or exclude
45# (prefixed by "!", no space)
46# - if no patterns given or the first pattern is not an include pattern,
47# then assume "*" -- ie. find everything (and then start applying
48# the rest of the patterns)
49# - the patterns are given in order of increasing precedence, ie.
50# the *last* one to match a given file applies to it
51#
52# example (ignoring auto-augmentation!):
53# distutils/*.py
54# distutils/command/*.py
55# ! distutils/bleeding_edge.py
56# examples/*.py
57# examples/README
58#
59# smarter way (that *will* include distutils/command/bleeding_edge.py!)
60# distutils *.py
61# ! distutils/bleeding_edge.py
62# examples !*~ !*.py[co] (same as: examples * !*~ !*.py[co])
63# test test_* *.txt !*~ !*.py[co]
64# README
65# setup.py
66#
67# The actual Distutils manifest (don't need to mention source files,
68# README, setup.py -- they're automatically distributed!):
69# examples !*~ !*.py[co]
70# test !*~ !*.py[co]
71
72# The algorithm that will make it work:
73# files = stuff from 'packages', 'py_modules', 'ext_modules',
74# plus README, setup.py, ... ?
75# foreach pattern in manifest file:
76# if simple-include-pattern: # "distutils/*.py"
77# files.append (glob (pattern))
78# elif simple-exclude-pattern: # "! distutils/foo*"
79# xfiles = glob (pattern)
80# remove all xfiles from files
81# elif recursive-pattern: # "examples" (just a directory name)
82# patterns = rest-of-words-on-line
83# dir_files = list of all files under dir
84# if patterns:
85# if patterns[0] is an exclude-pattern:
86# insert "*" at patterns[0]
87# for file in dir_files:
88# for dpattern in reverse (patterns):
89# if file matches dpattern:
90# if dpattern is an include-pattern:
91# files.append (file)
92# else:
93# nothing, don't include it
94# next file
95# else:
96# files.extend (dir_files) # ie. accept all of them
97
98
99# Anyways, this is all implemented below -- BUT it is largely untested; I
100# know it works for the simple case of distributing the Distutils, but
101# haven't tried it on more complicated examples. Undoubtedly doing so will
102# reveal bugs and cause delays, so I'm waiting until after I've released
103# Distutils 0.1.
104
105
106# Other things we need to look for in creating a source distribution:
107# - make sure there's a README
108# - make sure the distribution meta-info is supplied and non-empty
109# (*must* have name, version, ((author and author_email) or
110# (maintainer and maintainer_email)), url
111#
112# Frills:
113# - make sure the setup script is called "setup.py"
114# - make sure the README refers to "setup.py" (ie. has a line matching
115# /^\s*python\s+setup\.py/)
116
117# A crazy idea that conflicts with having/requiring 'version' in setup.py:
118# - make sure there's a version number in the "main file" (main file
119# is __init__.py of first package, or the first module if no packages,
120# or the first extension module if no pure Python modules)
121# - XXX how do we look for __version__ in an extension module?
122# - XXX do we import and look for __version__? or just scan source for
123# /^__version__\s*=\s*"[^"]+"/ ?
124# - what about 'version_from' as an alternative to 'version' -- then
125# we know just where to search for the version -- no guessing about
126# what the "main file" is
127
128
129
130class Dist (Command):
131
132 options = [('formats=', 'f',
133 "formats for source distribution (tar, ztar, gztar, or zip)"),
134 ('manifest=', 'm',
135 "name of manifest file"),
136 ]
137
138 default_format = { 'posix': 'gztar',
139 'nt': 'zip' }
140
141 exclude_re = re.compile (r'\s*!\s*(\S+)') # for manifest lines
142
143
144 def set_default_options (self):
145 self.formats = None
146 self.manifest = None
147
148
149 def set_final_options (self):
150 if self.formats is None:
151 try:
152 self.formats = [self.default_format[os.name]]
153 except KeyError:
154 raise DistutilsPlatformError, \
155 "don't know how to build source distributions on " + \
156 "%s platform" % os.name
157 elif type (self.formats) is StringType:
158 self.formats = string.split (self.formats, ',')
159
160 if self.manifest is None:
161 self.manifest = "MANIFEST"
162
163
164 def run (self):
165
166 self.check_metadata ()
167
168 self.files = []
169 self.find_defaults ()
170 self.read_manifest ()
171
172 self.make_distribution ()
173
174
175 def check_metadata (self):
176
177 dist = self.distribution
178
179 missing = []
180 for attr in ('name', 'version', 'url'):
181 if not (hasattr (dist, attr) and getattr (dist, attr)):
182 missing.append (attr)
183
184 if missing:
185 self.warn ("missing required meta-data: " +
186 string.join (missing, ", "))
187
188 if dist.author:
189 if not dist.author_email:
190 self.warn ("missing meta-data: if 'author' supplied, " +
191 "'author_email' must be supplied too")
192 elif dist.maintainer:
193 if not dist.maintainer_email:
194 self.warn ("missing meta-data: if 'maintainer' supplied, " +
195 "'maintainer_email' must be supplied too")
196 else:
197 self.warn ("missing meta-data: either author (and author_email) " +
198 "or maintainer (and maintainer_email) " +
199 "must be supplied")
200
201 # check_metadata ()
202
203
204 def find_defaults (self):
205
206 standards = ['README', 'setup.py']
207 for fn in standards:
208 if os.path.exists (fn):
209 self.files.append (fn)
210 else:
211 self.warn ("standard file %s not found" % fn)
212
213 optional = ['test/test*.py']
214 for pattern in optional:
215 files = glob (pattern)
216 if files:
217 self.files.extend (files)
218
219 if self.distribution.packages or self.distribution.py_modules:
220 build_py = self.find_peer ('build_py')
221 build_py.ensure_ready ()
222 self.files.extend (build_py.get_source_files ())
223
224 if self.distribution.ext_modules:
225 build_ext = self.find_peer ('build_ext')
226 build_ext.ensure_ready ()
227 self.files.extend (build_ext.get_source_files ())
228
229
230
231 def open_manifest (self, filename):
232 return TextFile (filename,
233 strip_comments=1,
234 skip_blanks=1,
235 join_lines=1,
236 lstrip_ws=1,
237 rstrip_ws=1,
238 collapse_ws=1)
239
240
241 def search_dir (self, dir, patterns):
242
243 allfiles = findall (dir)
244 if patterns:
245 if patterns[0][0] == "!": # starts with an exclude spec?
246 patterns.insert (0, "*")# then accept anything that isn't
247 # explicitly excluded
248
249 act_patterns = [] # "action-patterns": (include,regexp)
250 # tuples where include is a boolean
251 for pattern in patterns:
252 if pattern[0] == '!':
253 act_patterns.append \
254 ((0, re.compile (fnmatch.translate (pattern[1:]))))
255 else:
256 act_patterns.append \
257 ((1, re.compile (fnmatch.translate (pattern))))
258 act_patterns.reverse()
259
260
261 files = []
262 for file in allfiles:
263 for (include,regexp) in act_patterns:
264 if regexp.match (file):
265 if include:
266 files.append (file)
267 break # continue to next file
268 else:
269 files = allfiles
270
271 return files
272
273 # search_dir ()
274
275
276 def exclude_files (self, pattern):
277
278 regexp = re.compile (fnmatch.translate (pattern))
279 for i in range (len (self.files)-1, -1, -1):
280 if regexp.match (self.files[i]):
281 del self.files[i]
282
283
284 def read_manifest (self):
285
286 # self.files had better already be defined (and hold the
287 # "automatically found" files -- Python modules and extensions,
288 # README, setup script, ...)
289 assert self.files is not None
290
291 manifest = self.open_manifest (self.manifest)
292 while 1:
293
294 pattern = manifest.readline()
295 if pattern is None: # end of file
296 break
297
298 # Cases:
299 # 1) simple-include: "*.py", "foo/*.py", "doc/*.html", "FAQ"
300 # 2) simple-exclude: same, prefaced by !
301 # 3) recursive: multi-word line, first word a directory
302
303 exclude = self.exclude_re.match (pattern)
304 if exclude:
305 pattern = exclude.group (1)
306
307 words = string.split (pattern)
308 assert words # must have something!
309 if os.name != 'posix':
310 words[0] = apply (os.path.join, string.split (words[0], '/'))
311
312 # First word is a directory, possibly with include/exclude
313 # patterns making up the rest of the line: it's a recursive
314 # pattern
315 if os.path.isdir (words[0]):
316 if exclude:
317 file.warn ("exclude (!) doesn't apply to " +
318 "whole directory trees")
319 continue
320
321 dir_files = self.search_dir (words[0], words[1:])
322 self.files.extend (dir_files)
323
324 # Multiple words in pattern: that's a no-no unless the first
325 # word is a directory name
326 elif len (words) > 1:
327 file.warn ("can't have multiple words unless first word " +
328 "('%s') is a directory name" % words[0])
329 continue
330
331 # Single word, no bang: it's a "simple include pattern"
332 elif not exclude:
333 matches = glob (pattern)
334 if matches:
335 self.files.extend (matches)
336 else:
337 manifest.warn ("no matches for '%s' found" % pattern)
338
339
340 # Single word prefixed with a bang: it's a "simple exclude pattern"
341 else:
342 if self.exclude_files (pattern) == 0:
343 file.warn ("no files excluded by '%s'" % pattern)
344
345 # if/elif/.../else on 'pattern'
346
347 # loop over lines of 'manifest'
348
349 # read_manifest ()
350
351
352 def make_release_tree (self, base_dir, files):
353
354 # XXX this is Unix-specific
355
356 # First get the list of directories to create
357 need_dir = {}
358 for file in files:
359 need_dir[os.path.join (base_dir, os.path.dirname (file))] = 1
360 need_dirs = need_dir.keys()
361 need_dirs.sort()
362
363 # Now create them
364 for dir in need_dirs:
365 self.mkpath (dir)
366
367 # And walk over the list of files, making a hard link for
368 # each one that doesn't already exist in its corresponding
369 # location under 'base_dir'
370
371 self.announce ("making hard links in %s..." % base_dir)
372 for file in files:
373 dest = os.path.join (base_dir, file)
374 if not os.path.exists (dest):
375 self.execute (os.link, (file, dest),
376 "linking %s -> %s" % (file, dest))
377 # make_release_tree ()
378
379
380 def make_tarball (self, base_dir):
381
382 # XXX GNU tar 1.13 has a nifty option to add a prefix directory.
383 # It's pretty new, though, so we certainly can't require it -- but
384 # it would be nice to take advantage of it to skip the "create a
385 # tree of hardlinks" step!
386
387 # But I am a lazy bastard, so I require GNU tar anyways.
388
389 archive_name = base_dir + ".tar.gz"
390 self.spawn (["tar", "-czf", archive_name, base_dir])
391
392
393 def make_zipfile (self, base_dir):
394
395 # This assumes the Unix 'zip' utility -- it could be easily recast
396 # to use pkzip (or whatever the command-line zip creation utility
397 # on Redmond's archaic CP/M knockoff is nowadays), but I'll let
398 # someone who can actually test it do that.
399
400 self.spawn (["zip", "-r", base_dir, base_dir])
401
402
403 def make_distribution (self):
404
405 # Don't warn about missing meta-data here -- should be done
406 # elsewhere.
407 name = self.distribution.name or "UNKNOWN"
408 version = self.distribution.version
409
410 if version:
411 base_dir = "%s-%s" % (name, version)
412 else:
413 base_dir = name
414
415 # Remove any files that match "base_dir" from the fileset -- we
416 # don't want to go distributing the distribution inside itself!
417 self.exclude_files (base_dir + "*")
418
419 self.make_release_tree (base_dir, self.files)
420 if 'gztar' in self.formats:
421 self.make_tarball (base_dir)
422 if 'zip' in self.formats:
423 self.make_zipfile (base_dir)
424
425# class Dist
426
427
428# ----------------------------------------------------------------------
429# Utility functions
430
431def findall (dir = os.curdir):
432 """Find all files under 'dir' and return the sorted list of full
433 filenames (relative to 'dir')."""
434
435 list = []
436 stack = [dir]
437 pop = stack.pop
438 push = stack.append
439
440 while stack:
441 dir = pop()
442 names = os.listdir (dir)
443
444 for name in names:
445 fullname = os.path.join (dir, name)
446 list.append (fullname)
447 if os.path.isdir (fullname) and not os.path.islink(fullname):
448 push (fullname)
449
450 list.sort()
451 return list
452
453
454
455
456
457# ======================================================================
458# Here follows some extensive mental masturbation about how to
459# make the manifest file and search algorithm even more complex.
460# I think this is all gratuitous, really.
461
462# Hmm, something extra: want to apply an exclude pattern over a whole
463# subtree without necessarily having to explicitly include files from it,
464# ie. it should apply after gathering files by other means (simple
465# include pattern)
466# . !*~ !*.bak !#*#
467# and we also want to prune at certain directories:
468# . !RCS !CVS
469# which again should apply globally.
470#
471# possible solution:
472# - exclude pattern in a directory applies to all files found under that
473# directory
474# - subdirectories that match an exclude pattern will be pruned
475# - hmmm, to be consistent, subdirectories that match an include
476# pattern should be recursively included
477# - and this should apply to "simple" patterns too
478#
479# thus:
480#
481# examples/
482#
483# means get everything in examples/ and all subdirs;
484#
485# examples/ !*~ !#*# !*.py[co]
486#
487# means get everything under examples/ except files matching those three globs;
488#
489# ./ !RCS !CVS
490#
491# means get everything under current dir, but prune RCS/CVS directories;
492#
493# ./ !*~ !#*# !*.py[co] !RCS !CVS
494# ! build/
495# ! experimental/
496#
497# means get everything under the distribution directory except the usual
498# excludes at all levels; exclude "build" and "experimental" under the
499# distribution dir only.
500#
501# Do the former examples still work?
502#
503# distutils/ *.py
504# ! distutils/bleeding_edge.py
505#
506# means all .py files recursively found under distutils, except for the one
507# explicitly named.
508#
509# distutils/ *.py !bleeding_edge.py
510#
511# means the same, except bleeding_edge.py will be excluded wherever it's
512# found -- thus this can exclude up to one file per directory under
513# distutils.
514#
515# distutils/*.py
516# ! distutils/bleeding_edge.py
517#
518# gets exactly distutils/*.py, minus the one explicitly mentioned exclude, and
519#
520# distutils/*.py
521# distutils/ !bleeding_edge.py
522#
523# coincidentally does the same, but only because there can only be one file
524# that matches the exclude pattern. Oh, we'd still like
525#
526# distutils *.py !bleeding*.py
527# distutils/bleeding_ledge.py
528#
529# to include distutils/bleeding_ledge.py -- i.e. it should override the
530# earlier exclude pattern by virtue of appearing later in the manifest. Does
531# this conflict with the above requirements, ie. that "!RCS" and "!*~" should
532# apply everywhere? Hmm, I think it doesn't have to, as long as we're smart
533# about it. Consequence:
534#
535# . !RCS !CVS
536# distutils *
537#
538# will go ahead and include RCS and CVS files under distutils, but
539#
540# distutils *
541# . !RCS !CVS
542#
543# will do the right thing. Hmmm. I think that's OK, and an inevitable
544# consequence of the ability to override exclusions.
545
546# OK, new crack at the search algorithm.
547#
548# for pattern in manifest:
549# if dir-pattern: # ie. first word is a directory (incl. "."!)
550# dir = first word on line
551# patterns = rest of line
552# if patterns:
553# for dpattern in patterns:
554# if exclude-pattern:
555# remove from files anything matching dpattern (including pruning
556# subtrees rooted at directories that match dpattern)
557# else:
558# files.append (recursive_glob (dir, dpattern))
559# else:
560# files.append (recursive_glob (dir, '*')
561#
562# elif include-pattern: # it's a "simple include pattern"
563# files.append (glob (pattern))
564#
565# else: # it's a "simple exclude pattern"
566# remove from files anything matching pattern
567
568# The two removal algorithms might be a bit tricky:
569#
570# "remove simple exclude pattern":
571# for f in files:
572# if f matches pattern:
573# delete it
574#
575# "remove recursive exclude pattern":
576# for f in files:
577#
578# t = tail (f)
579# while t:
580# if t matches pattern:
581# delete current file
582# continue
583# t = tail (t)
584#
585# Well, that was an interesting mental exercise. I'm not completely
586# convinced it will work, nor am I convinced this level of complexity
587# is necessary. If you want to exclude RCS or CVS directories, just
588# don't bloody include them!
589
590