blob: 4afc8fc5ff4920af62f98445d5c60f26b6a65c19 [file] [log] [blame]
Guido van Rossum00efe7e2002-07-19 17:04:46 +00001"""Strptime-related classes and functions.
2
3CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
Barry Warsaw4d895fa2002-09-23 22:46:49 +00005 TimeRE -- Creates regexes for pattern matching a string of text containing
Guido van Rossum00efe7e2002-07-19 17:04:46 +00006 time information as is returned by time.strftime()
7
8FUNCTIONS:
9 firstjulian -- Calculates the Julian date up to the first of the specified
10 year
11 gregorian -- Calculates the Gregorian date based on the Julian day and
12 year
Tim Peters469cdad2002-08-08 20:19:19 +000013 julianday -- Calculates the Julian day since the first of the year based
Guido van Rossum00efe7e2002-07-19 17:04:46 +000014 on the Gregorian date
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
17
18Requires Python 2.2.1 or higher.
19Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
Guido van Rossum00efe7e2002-07-19 17:04:46 +000021"""
22import time
23import locale
24import calendar
25from re import compile as re_compile
26from re import IGNORECASE
Guido van Rossum00efe7e2002-07-19 17:04:46 +000027
Guido van Rossum00efe7e2002-07-19 17:04:46 +000028__author__ = "Brett Cannon"
29__email__ = "drifty@bigfoot.com"
30
31__all__ = ['strptime']
32
Barry Warsaw35816e62002-08-29 16:24:50 +000033RegexpType = type(re_compile(''))
34
Tim Peters80cebc12003-01-19 04:40:44 +000035def _getlang():
36 # Figure out what the current language is set to.
37 current_lang = locale.getlocale(locale.LC_TIME)[0]
38 if current_lang:
39 return current_lang
40 else:
41 current_lang = locale.getdefaultlocale()[0]
42 if current_lang:
43 return current_lang
44 else:
45 return ''
Barry Warsaw35816e62002-08-29 16:24:50 +000046
Guido van Rossum00efe7e2002-07-19 17:04:46 +000047class LocaleTime(object):
48 """Stores and handles locale-specific information related to time.
49
50 ATTRIBUTES (all read-only after instance creation! Instance variables that
51 store the values have mangled names):
52 f_weekday -- full weekday names (7-item list)
53 a_weekday -- abbreviated weekday names (7-item list)
Tim Peters469cdad2002-08-08 20:19:19 +000054 f_month -- full weekday names (14-item list; dummy value in [0], which
Guido van Rossum00efe7e2002-07-19 17:04:46 +000055 is added by code)
Tim Peters469cdad2002-08-08 20:19:19 +000056 a_month -- abbreviated weekday names (13-item list, dummy value in
Guido van Rossum00efe7e2002-07-19 17:04:46 +000057 [0], which is added by code)
58 am_pm -- AM/PM representation (2-item list)
59 LC_date_time -- format string for date/time representation (string)
60 LC_date -- format string for date representation (string)
61 LC_time -- format string for time representation (string)
Tim Peters469cdad2002-08-08 20:19:19 +000062 timezone -- daylight- and non-daylight-savings timezone representation
63 (3-item list; code tacks on blank item at end for
Guido van Rossum00efe7e2002-07-19 17:04:46 +000064 possible lack of timezone such as UTC)
65 lang -- Language used by instance (string)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000066 """
67
Tim Peters469cdad2002-08-08 20:19:19 +000068 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
Barry Warsaw35816e62002-08-29 16:24:50 +000069 a_month=None, am_pm=None, LC_date_time=None, LC_time=None,
70 LC_date=None, timezone=None, lang=None):
Guido van Rossum00efe7e2002-07-19 17:04:46 +000071 """Optionally set attributes with passed-in values."""
Barry Warsaw35816e62002-08-29 16:24:50 +000072 if f_weekday is None:
73 self.__f_weekday = None
74 elif len(f_weekday) == 7:
75 self.__f_weekday = list(f_weekday)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000076 else:
77 raise TypeError("full weekday names must be a 7-item sequence")
Barry Warsaw35816e62002-08-29 16:24:50 +000078 if a_weekday is None:
79 self.__a_weekday = None
80 elif len(a_weekday) == 7:
81 self.__a_weekday = list(a_weekday)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000082 else:
83 raise TypeError(
Barry Warsaw35816e62002-08-29 16:24:50 +000084 "abbreviated weekday names must be a 7-item sequence")
85 if f_month is None:
86 self.__f_month = None
Guido van Rossum00efe7e2002-07-19 17:04:46 +000087 elif len(f_month) == 12:
88 self.__f_month = self.__pad(f_month, True)
89 else:
90 raise TypeError("full month names must be a 12-item sequence")
Barry Warsaw35816e62002-08-29 16:24:50 +000091 if a_month is None:
92 self.__a_month = None
Guido van Rossum00efe7e2002-07-19 17:04:46 +000093 elif len(a_month) == 12:
94 self.__a_month = self.__pad(a_month, True)
95 else:
96 raise TypeError(
Barry Warsaw35816e62002-08-29 16:24:50 +000097 "abbreviated month names must be a 12-item sequence")
Guido van Rossum00efe7e2002-07-19 17:04:46 +000098 if am_pm is None:
99 self.__am_pm = None
100 elif len(am_pm) == 2:
101 self.__am_pm = am_pm
102 else:
103 raise TypeError("AM/PM representation must be a 2-item sequence")
104 self.__LC_date_time = LC_date_time
105 self.__LC_time = LC_time
106 self.__LC_date = LC_date
107 self.__timezone = timezone
108 if timezone:
109 if len(timezone) != 2:
110 raise TypeError("timezone names must contain 2 items")
111 else:
112 self.__timezone = self.__pad(timezone, False)
113 self.__lang = lang
114
115 def __pad(self, seq, front):
Barry Warsaw35816e62002-08-29 16:24:50 +0000116 # Add '' to seq to either front (is True), else the back.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000117 seq = list(seq)
Barry Warsaw35816e62002-08-29 16:24:50 +0000118 if front:
119 seq.insert(0, '')
120 else:
121 seq.append('')
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000122 return seq
123
124 def __set_nothing(self, stuff):
Barry Warsaw35816e62002-08-29 16:24:50 +0000125 # Raise TypeError when trying to set an attribute.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000126 raise TypeError("attribute does not support assignment")
127
128 def __get_f_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000129 # Fetch self.f_weekday.
130 if not self.__f_weekday:
131 self.__calc_weekday()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000132 return self.__f_weekday
133
134 def __get_a_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000135 # Fetch self.a_weekday.
136 if not self.__a_weekday:
137 self.__calc_weekday()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000138 return self.__a_weekday
139
Tim Peters469cdad2002-08-08 20:19:19 +0000140 f_weekday = property(__get_f_weekday, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000141 doc="Full weekday names")
Tim Peters469cdad2002-08-08 20:19:19 +0000142 a_weekday = property(__get_a_weekday, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000143 doc="Abbreviated weekday names")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000144
145 def __get_f_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000146 # Fetch self.f_month.
147 if not self.__f_month:
148 self.__calc_month()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000149 return self.__f_month
150
151 def __get_a_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000152 # Fetch self.a_month.
153 if not self.__a_month:
154 self.__calc_month()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000155 return self.__a_month
156
157 f_month = property(__get_f_month, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000158 doc="Full month names (dummy value at index 0)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000159 a_month = property(__get_a_month, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000160 doc="Abbreviated month names (dummy value at index 0)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000161
162 def __get_am_pm(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000163 # Fetch self.am_pm.
164 if not self.__am_pm:
165 self.__calc_am_pm()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000166 return self.__am_pm
167
168 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
169
170 def __get_timezone(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000171 # Fetch self.timezone.
172 if not self.__timezone:
173 self.__calc_timezone()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000174 return self.__timezone
175
176 timezone = property(__get_timezone, __set_nothing,
177 doc="Timezone representation (dummy value at index 2)")
178
179 def __get_LC_date_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000180 # Fetch self.LC_date_time.
181 if not self.__LC_date_time:
182 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000183 return self.__LC_date_time
184
185 def __get_LC_date(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000186 # Fetch self.LC_date.
187 if not self.__LC_date:
188 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000189 return self.__LC_date
190
191 def __get_LC_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000192 # Fetch self.LC_time.
193 if not self.__LC_time:
194 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000195 return self.__LC_time
196
Barry Warsaw35816e62002-08-29 16:24:50 +0000197 LC_date_time = property(
198 __get_LC_date_time, __set_nothing,
199 doc=
200 "Format string for locale's date/time representation ('%c' format)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000201 LC_date = property(__get_LC_date, __set_nothing,
202 doc="Format string for locale's date representation ('%x' format)")
203 LC_time = property(__get_LC_time, __set_nothing,
204 doc="Format string for locale's time representation ('%X' format)")
205
206 def __get_lang(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000207 # Fetch self.lang.
208 if not self.__lang:
209 self.__calc_lang()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000210 return self.__lang
211
Barry Warsaw35816e62002-08-29 16:24:50 +0000212 lang = property(__get_lang, __set_nothing,
213 doc="Language used for instance")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000214
215 def __calc_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000216 # Set self.__a_weekday and self.__f_weekday using the calendar
217 # module.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000218 a_weekday = [calendar.day_abbr[i] for i in range(7)]
219 f_weekday = [calendar.day_name[i] for i in range(7)]
Barry Warsaw35816e62002-08-29 16:24:50 +0000220 if not self.__a_weekday:
221 self.__a_weekday = a_weekday
222 if not self.__f_weekday:
223 self.__f_weekday = f_weekday
Tim Peters469cdad2002-08-08 20:19:19 +0000224
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000225 def __calc_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000226 # Set self.__f_month and self.__a_month using the calendar module.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000227 a_month = [calendar.month_abbr[i] for i in range(13)]
228 f_month = [calendar.month_name[i] for i in range(13)]
Barry Warsaw35816e62002-08-29 16:24:50 +0000229 if not self.__a_month:
230 self.__a_month = a_month
231 if not self.__f_month:
232 self.__f_month = f_month
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000233
234 def __calc_am_pm(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000235 # Set self.__am_pm by using time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000236
Barry Warsaw35816e62002-08-29 16:24:50 +0000237 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
238 # magical; just happened to have used it everywhere else where a
239 # static date was needed.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000240 am_pm = []
241 for hour in (01,22):
242 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
243 am_pm.append(time.strftime("%p", time_tuple))
244 self.__am_pm = am_pm
245
246 def __calc_date_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000247 # Set self.__date_time, self.__date, & self.__time by using
248 # time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000249
Barry Warsaw35816e62002-08-29 16:24:50 +0000250 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
251 # overloaded numbers is minimized. The order in which searches for
252 # values within the format string is very important; it eliminates
253 # possible ambiguity for what something represents.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000254 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
255 date_time = [None, None, None]
256 date_time[0] = time.strftime("%c", time_tuple)
257 date_time[1] = time.strftime("%x", time_tuple)
258 date_time[2] = time.strftime("%X", time_tuple)
259 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
260 current_format = date_time[offset]
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000261 for old, new in (
262 ('%', '%%'), (self.f_weekday[2], '%A'),
263 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
264 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
265 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'),
266 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
267 ('44', '%M'), ('55', '%S'), ('76', '%j'),
268 ('17', '%d'), ('03', '%m'), ('3', '%m'),
269 # '3' needed for when no leading zero.
270 ('2', '%w'), ('10', '%I')):
Jack Jansen62fe7552003-01-15 22:59:39 +0000271 # Must deal with possible lack of locale info
272 # manifesting itself as the empty string (e.g., Swedish's
273 # lack of AM/PM info) or a platform returning a tuple of empty
274 # strings (e.g., MacOS 9 having timezone as ('','')).
275 if old:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000276 current_format = current_format.replace(old, new)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000277 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
278 if time.strftime(directive, time_tuple).find('00'):
279 U_W = '%U'
280 else:
281 U_W = '%W'
282 date_time[offset] = current_format.replace('11', U_W)
Barry Warsaw35816e62002-08-29 16:24:50 +0000283 if not self.__LC_date_time:
284 self.__LC_date_time = date_time[0]
285 if not self.__LC_date:
286 self.__LC_date = date_time[1]
287 if not self.__LC_time:
288 self.__LC_time = date_time[2]
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000289
290 def __calc_timezone(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000291 # Set self.__timezone by using time.tzname.
292 #
293 # Empty string used for matching when timezone is not used/needed such
294 # as with UTC.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000295 self.__timezone = self.__pad(time.tzname, 0)
296
297 def __calc_lang(self):
Tim Peters80cebc12003-01-19 04:40:44 +0000298 # Set self.__lang by using __getlang().
299 self.__lang = _getlang()
300
Barry Warsaw35816e62002-08-29 16:24:50 +0000301
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000302
303class TimeRE(dict):
304 """Handle conversion from format directives to regexes."""
305
306 def __init__(self, locale_time=LocaleTime()):
Barry Warsaw35816e62002-08-29 16:24:50 +0000307 """Init inst with non-locale regexes and store LocaleTime object."""
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000308 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
309 base = super(TimeRE, self)
310 base.__init__({
Barry Warsaw35816e62002-08-29 16:24:50 +0000311 # The " \d" option is to make %c from ANSI C work
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000312 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000313 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000314 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
315 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
316 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000317 'M': r"(?P<M>[0-5]\d|\d)",
318 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
319 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
320 'w': r"(?P<w>[0-6])",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000321 # W is set below by using 'U'
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000322 'y': r"(?P<y>\d\d)",
323 'Y': r"(?P<Y>\d\d\d\d)"})
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000324 base.__setitem__('W', base.__getitem__('U'))
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000325 self.locale_time = locale_time
326
327 def __getitem__(self, fetch):
328 """Try to fetch regex; if it does not exist, construct it."""
329 try:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000330 return super(TimeRE, self).__getitem__(fetch)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000331 except KeyError:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000332 constructors = {
333 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch),
334 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch),
335 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:],
336 fetch),
337 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:],
338 fetch),
339 'c': lambda: self.pattern(self.locale_time.LC_date_time),
340 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch),
341 'x': lambda: self.pattern(self.locale_time.LC_date),
342 'X': lambda: self.pattern(self.locale_time.LC_time),
343 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch),
344 '%': lambda: '%',
345 }
346 if fetch in constructors:
347 self[fetch] = constructors[fetch]()
348 return self[fetch]
349 else:
350 raise
Tim Peters469cdad2002-08-08 20:19:19 +0000351
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000352 def __seqToRE(self, to_convert, directive):
Jack Jansen62fe7552003-01-15 22:59:39 +0000353 """Convert a list to a regex string for matching a directive."""
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000354 def sorter(a, b):
355 """Sort based on length.
Tim Peters469cdad2002-08-08 20:19:19 +0000356
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000357 Done in case for some strange reason that names in the locale only
358 differ by a suffix and thus want the name with the suffix to match
359 first.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000360 """
Barry Warsaw35816e62002-08-29 16:24:50 +0000361 try:
362 a_length = len(a)
363 except TypeError:
364 a_length = 0
365 try:
366 b_length = len(b)
367 except TypeError:
368 b_length = 0
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000369 return cmp(b_length, a_length)
Tim Peters469cdad2002-08-08 20:19:19 +0000370
Barry Warsaw35816e62002-08-29 16:24:50 +0000371 to_convert = to_convert[:] # Don't want to change value in-place.
Jack Jansen62fe7552003-01-15 22:59:39 +0000372 for value in to_convert:
373 if value != '':
374 break
375 else:
376 return ''
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000377 to_convert.sort(sorter)
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000378 regex = '|'.join(to_convert)
379 regex = '(?P<%s>%s' % (directive, regex)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000380 return '%s)' % regex
381
382 def pattern(self, format):
383 """Return re pattern for the format string."""
384 processed_format = ''
Tim Peters80cebc12003-01-19 04:40:44 +0000385 whitespace_replacement = re_compile('\s+')
386 format = whitespace_replacement.sub('\s*', format)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000387 while format.find('%') != -1:
388 directive_index = format.index('%')+1
Tim Peters469cdad2002-08-08 20:19:19 +0000389 processed_format = "%s%s%s" % (processed_format,
Barry Warsaw35816e62002-08-29 16:24:50 +0000390 format[:directive_index-1],
391 self[format[directive_index]])
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000392 format = format[directive_index+1:]
393 return "%s%s" % (processed_format, format)
394
395 def compile(self, format):
396 """Return a compiled re object for the format string."""
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000397 return re_compile(self.pattern(format), IGNORECASE)
398
Tim Peters80cebc12003-01-19 04:40:44 +0000399# Cached TimeRE; probably only need one instance ever so cache it for performance
400_locale_cache = TimeRE()
401# Cached regex objects; same reason as for TimeRE cache
402_regex_cache = dict()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000403
404def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
Tim Peters08e54272003-01-18 03:53:49 +0000405 """Return a time struct based on the input data and the format string."""
Tim Peters80cebc12003-01-19 04:40:44 +0000406 global _locale_cache
407 global _regex_cache
408 locale_time = _locale_cache.locale_time
409 # If the language changes, caches are invalidated, so clear them
410 if locale_time.lang != _getlang():
411 _locale_cache = TimeRE()
412 _regex_cache.clear()
413 format_regex = _regex_cache.get(format)
414 if not format_regex:
415 # Limit regex cache size to prevent major bloating of the module;
416 # The value 5 is arbitrary
417 if len(_regex_cache) > 5:
418 _regex_cache.clear()
419 format_regex = _locale_cache.compile(format)
420 _regex_cache[format] = format_regex
421 found = format_regex.match(data_string)
Tim Peters08e54272003-01-18 03:53:49 +0000422 if not found:
423 raise ValueError("time data did not match format")
424 year = 1900
425 month = day = 1
426 hour = minute = second = 0
427 tz = -1
428 # Defaulted to -1 so as to signal using functions to calc values
429 weekday = julian = -1
430 found_dict = found.groupdict()
431 for group_key in found_dict.iterkeys():
432 if group_key == 'y':
433 year = int(found_dict['y'])
434 # Open Group specification for strptime() states that a %y
435 #value in the range of [00, 68] is in the century 2000, while
436 #[69,99] is in the century 1900
437 if year <= 68:
438 year += 2000
439 else:
440 year += 1900
441 elif group_key == 'Y':
442 year = int(found_dict['Y'])
443 elif group_key == 'm':
444 month = int(found_dict['m'])
445 elif group_key == 'B':
446 month = _insensitiveindex(locale_time.f_month, found_dict['B'])
447 elif group_key == 'b':
448 month = _insensitiveindex(locale_time.a_month, found_dict['b'])
449 elif group_key == 'd':
450 day = int(found_dict['d'])
451 elif group_key is 'H':
452 hour = int(found_dict['H'])
453 elif group_key == 'I':
454 hour = int(found_dict['I'])
455 ampm = found_dict.get('p', '').lower()
456 # If there was no AM/PM indicator, we'll treat this like AM
457 if ampm in ('', locale_time.am_pm[0].lower()):
458 # We're in AM so the hour is correct unless we're
459 # looking at 12 midnight.
460 # 12 midnight == 12 AM == hour 0
461 if hour == 12:
462 hour = 0
463 elif ampm == locale_time.am_pm[1].lower():
464 # We're in PM so we need to add 12 to the hour unless
465 # we're looking at 12 noon.
466 # 12 noon == 12 PM == hour 12
467 if hour != 12:
468 hour += 12
469 elif group_key == 'M':
470 minute = int(found_dict['M'])
471 elif group_key == 'S':
472 second = int(found_dict['S'])
473 elif group_key == 'A':
474 weekday = _insensitiveindex(locale_time.f_weekday,
475 found_dict['A'])
476 elif group_key == 'a':
477 weekday = _insensitiveindex(locale_time.a_weekday,
478 found_dict['a'])
479 elif group_key == 'w':
480 weekday = int(found_dict['w'])
481 if weekday == 0:
482 weekday = 6
483 else:
484 weekday -= 1
485 elif group_key == 'j':
486 julian = int(found_dict['j'])
487 elif group_key == 'Z':
488 found_zone = found_dict['Z'].lower()
489 if locale_time.timezone[0] == locale_time.timezone[1]:
490 pass #Deals with bad locale setup where timezone info is
491 # the same; first found on FreeBSD 4.4.
492 elif locale_time.timezone[0].lower() == found_zone:
493 tz = 0
494 elif locale_time.timezone[1].lower() == found_zone:
495 tz = 1
496 elif locale_time.timezone[2].lower() == found_zone:
497 tz = -1
498 #XXX <bc>: If calculating fxns are never exposed to the general
499 #populous then just inline calculations. Also might be able to use
500 #``datetime`` and the methods it provides.
501 if julian == -1:
Tim Peters2c60f7a2003-01-29 03:49:43 +0000502 julian = julianday(year, month, day)
Tim Peters08e54272003-01-18 03:53:49 +0000503 else: # Assuming that if they bothered to include Julian day it will
504 #be accurate
Tim Peters2c60f7a2003-01-29 03:49:43 +0000505 year, month, day = gregorian(julian, year)
Tim Peters08e54272003-01-18 03:53:49 +0000506 if weekday == -1:
Tim Peters2c60f7a2003-01-29 03:49:43 +0000507 weekday = dayofweek(year, month, day)
Tim Peters08e54272003-01-18 03:53:49 +0000508 return time.struct_time((year, month, day,
509 hour, minute, second,
510 weekday, julian, tz))
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000511
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000512def _insensitiveindex(lst, findme):
513 # Perform a case-insensitive index search.
514
515 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
516 # just lowercase when LocaleTime sets its vars and lowercasing
517 # search values.
518 findme = findme.lower()
519 for key,item in enumerate(lst):
520 if item.lower() == findme:
521 return key
522 else:
523 raise ValueError("value not in list")
524
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000525def firstjulian(year):
526 """Calculate the Julian date up until the first of the year."""
Barry Warsaw35816e62002-08-29 16:24:50 +0000527 return ((146097 * (year + 4799)) // 400) - 31738
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000528
529def julianday(year, month, day):
Barry Warsaw35816e62002-08-29 16:24:50 +0000530 """Calculate the Julian day since the beginning of the year.
531 Calculated from the Gregorian date.
532 """
533 a = (14 - month) // 12
534 return (day - 32045
535 + (((153 * (month + (12 * a) - 3)) + 2) // 5)
536 + ((146097 * (year + 4800 - a)) // 400)) - firstjulian(year) + 1
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000537
538def gregorian(julian, year):
Barry Warsaw35816e62002-08-29 16:24:50 +0000539 """Return 3-item list containing Gregorian date based on the Julian day."""
540 a = 32043 + julian + firstjulian(year)
541 b = ((4 * a) + 3) // 146097
542 c = a - ((146097 * b) // 4)
543 d = ((4 * c) + 3) // 1461
544 e = c - ((1461 * d) // 4)
545 m = ((5 * e) + 2) // 153
546 day = 1 + e - (((153 * m) + 2) // 5)
547 month = m + 3 - (12 * (m // 10))
548 year = (100 * b) + d - 4800 + (m // 10)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000549 return [year, month, day]
550
551def dayofweek(year, month, day):
552 """Calculate the day of the week (Monday is 0)."""
Barry Warsaw35816e62002-08-29 16:24:50 +0000553 a = (14 - month) // 12
554 y = year - a
555 weekday = (day + y + ((97 * y) // 400)
556 + ((31 * (month + (12 * a) -2 )) // 12)) % 7
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000557 if weekday == 0:
558 return 6
559 else:
560 return weekday-1