blob: b0cd3d619e10f92092edd7e06d1c0608dd419343 [file] [log] [blame]
Guido van Rossum00efe7e2002-07-19 17:04:46 +00001"""Strptime-related classes and functions.
2
3CLASSES:
Brett Cannon474335c2003-08-05 04:02:49 +00004 LocaleTime -- Discovers and stores locale-specific time information
Barry Warsaw4d895fa2002-09-23 22:46:49 +00005 TimeRE -- Creates regexes for pattern matching a string of text containing
Brett Cannon474335c2003-08-05 04:02:49 +00006 time information
Guido van Rossum00efe7e2002-07-19 17:04:46 +00007
8FUNCTIONS:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +00009 _getlang -- Figure out what language is being used for the locale
Guido van Rossum00efe7e2002-07-19 17:04:46 +000010 strptime -- Calculates the time struct represented by the passed-in string
11
Guido van Rossum00efe7e2002-07-19 17:04:46 +000012"""
13import time
14import locale
15import calendar
16from re import compile as re_compile
Antoine Pitroufd036452008-08-19 17:56:33 +000017from re import IGNORECASE, ASCII
Brett Cannon4f35c712004-10-06 02:11:37 +000018from re import escape as re_escape
Alexander Belopolskyca94f552010-06-17 18:30:34 +000019from datetime import (date as datetime_date,
Alexander Belopolskyca94f552010-06-17 18:30:34 +000020 timedelta as datetime_timedelta,
21 timezone as datetime_timezone)
Brett Cannon474335c2003-08-05 04:02:49 +000022try:
Georg Brandl2067bfd2008-05-25 13:05:15 +000023 from _thread import allocate_lock as _thread_allocate_lock
Brett Cannon474335c2003-08-05 04:02:49 +000024except:
Georg Brandl2067bfd2008-05-25 13:05:15 +000025 from _dummy_thread import allocate_lock as _thread_allocate_lock
Guido van Rossum00efe7e2002-07-19 17:04:46 +000026
Christian Heimesdd15f6c2008-03-16 00:07:10 +000027__all__ = []
Guido van Rossum00efe7e2002-07-19 17:04:46 +000028
Tim Peters80cebc12003-01-19 04:40:44 +000029def _getlang():
30 # Figure out what the current language is set to.
Brett Cannon175ddb52003-07-24 06:27:17 +000031 return locale.getlocale(locale.LC_TIME)
Barry Warsaw35816e62002-08-29 16:24:50 +000032
Guido van Rossum00efe7e2002-07-19 17:04:46 +000033class LocaleTime(object):
34 """Stores and handles locale-specific information related to time.
35
Brett Cannon474335c2003-08-05 04:02:49 +000036 ATTRIBUTES:
Guido van Rossum00efe7e2002-07-19 17:04:46 +000037 f_weekday -- full weekday names (7-item list)
38 a_weekday -- abbreviated weekday names (7-item list)
Brett Cannonf5c96fb2003-08-08 01:53:05 +000039 f_month -- full month names (13-item list; dummy value in [0], which
Guido van Rossum00efe7e2002-07-19 17:04:46 +000040 is added by code)
Brett Cannonf5c96fb2003-08-08 01:53:05 +000041 a_month -- abbreviated month names (13-item list, dummy value in
Guido van Rossum00efe7e2002-07-19 17:04:46 +000042 [0], which is added by code)
43 am_pm -- AM/PM representation (2-item list)
44 LC_date_time -- format string for date/time representation (string)
45 LC_date -- format string for date representation (string)
46 LC_time -- format string for time representation (string)
Tim Peters469cdad2002-08-08 20:19:19 +000047 timezone -- daylight- and non-daylight-savings timezone representation
Brett Cannon474335c2003-08-05 04:02:49 +000048 (2-item list of sets)
49 lang -- Language used by instance (2-item tuple)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000050 """
51
Brett Cannon474335c2003-08-05 04:02:49 +000052 def __init__(self):
53 """Set all attributes.
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +000054
Brett Cannon474335c2003-08-05 04:02:49 +000055 Order of methods called matters for dependency reasons.
56
57 The locale language is set at the offset and then checked again before
58 exiting. This is to make sure that the attributes were not set with a
59 mix of information from more than one locale. This would most likely
60 happen when using threads where one thread calls a locale-dependent
61 function while another thread changes the locale while the function in
62 the other thread is still running. Proper coding would call for
63 locks to prevent changing the locale while locale-dependent code is
64 running. The check here is done in case someone does not think about
65 doing this.
Brett Cannon5187a3b2003-08-11 07:24:05 +000066
67 Only other possible issue is if someone changed the timezone and did
68 not call tz.tzset . That is an issue for the programmer, though,
69 since changing the timezone is worthless without that call.
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +000070
Brett Cannon474335c2003-08-05 04:02:49 +000071 """
72 self.lang = _getlang()
73 self.__calc_weekday()
74 self.__calc_month()
75 self.__calc_am_pm()
76 self.__calc_timezone()
77 self.__calc_date_time()
78 if _getlang() != self.lang:
79 raise ValueError("locale changed during initialization")
Guido van Rossum00efe7e2002-07-19 17:04:46 +000080
81 def __pad(self, seq, front):
Brett Cannon474335c2003-08-05 04:02:49 +000082 # Add '' to seq to either the front (is True), else the back.
Guido van Rossum00efe7e2002-07-19 17:04:46 +000083 seq = list(seq)
Barry Warsaw35816e62002-08-29 16:24:50 +000084 if front:
85 seq.insert(0, '')
86 else:
87 seq.append('')
Guido van Rossum00efe7e2002-07-19 17:04:46 +000088 return seq
89
Guido van Rossum00efe7e2002-07-19 17:04:46 +000090 def __calc_weekday(self):
Brett Cannon474335c2003-08-05 04:02:49 +000091 # Set self.a_weekday and self.f_weekday using the calendar
Barry Warsaw35816e62002-08-29 16:24:50 +000092 # module.
Brett Cannon474335c2003-08-05 04:02:49 +000093 a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
94 f_weekday = [calendar.day_name[i].lower() for i in range(7)]
95 self.a_weekday = a_weekday
96 self.f_weekday = f_weekday
Tim Peters469cdad2002-08-08 20:19:19 +000097
Guido van Rossum00efe7e2002-07-19 17:04:46 +000098 def __calc_month(self):
Brett Cannon474335c2003-08-05 04:02:49 +000099 # Set self.f_month and self.a_month using the calendar module.
100 a_month = [calendar.month_abbr[i].lower() for i in range(13)]
101 f_month = [calendar.month_name[i].lower() for i in range(13)]
102 self.a_month = a_month
103 self.f_month = f_month
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000104
105 def __calc_am_pm(self):
Brett Cannon474335c2003-08-05 04:02:49 +0000106 # Set self.am_pm by using time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000107
Barry Warsaw35816e62002-08-29 16:24:50 +0000108 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
109 # magical; just happened to have used it everywhere else where a
110 # static date was needed.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000111 am_pm = []
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000112 for hour in (1, 22):
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000113 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
Brett Cannon474335c2003-08-05 04:02:49 +0000114 am_pm.append(time.strftime("%p", time_tuple).lower())
115 self.am_pm = am_pm
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000116
117 def __calc_date_time(self):
Brett Cannon474335c2003-08-05 04:02:49 +0000118 # Set self.date_time, self.date, & self.time by using
Barry Warsaw35816e62002-08-29 16:24:50 +0000119 # time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000120
Barry Warsaw35816e62002-08-29 16:24:50 +0000121 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
122 # overloaded numbers is minimized. The order in which searches for
123 # values within the format string is very important; it eliminates
124 # possible ambiguity for what something represents.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000125 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
126 date_time = [None, None, None]
Brett Cannon474335c2003-08-05 04:02:49 +0000127 date_time[0] = time.strftime("%c", time_tuple).lower()
128 date_time[1] = time.strftime("%x", time_tuple).lower()
129 date_time[2] = time.strftime("%X", time_tuple).lower()
130 replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000131 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
132 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000133 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
134 ('44', '%M'), ('55', '%S'), ('76', '%j'),
135 ('17', '%d'), ('03', '%m'), ('3', '%m'),
136 # '3' needed for when no leading zero.
Brett Cannon474335c2003-08-05 04:02:49 +0000137 ('2', '%w'), ('10', '%I')]
138 replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
139 for tz in tz_values])
140 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
141 current_format = date_time[offset]
142 for old, new in replacement_pairs:
Jack Jansen62fe7552003-01-15 22:59:39 +0000143 # Must deal with possible lack of locale info
144 # manifesting itself as the empty string (e.g., Swedish's
145 # lack of AM/PM info) or a platform returning a tuple of empty
146 # strings (e.g., MacOS 9 having timezone as ('','')).
147 if old:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000148 current_format = current_format.replace(old, new)
Brett Cannonf1b2ba62005-08-29 18:25:55 +0000149 # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
150 # 2005-01-03 occurs before the first Monday of the year. Otherwise
151 # %U is used.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000152 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
Brett Cannon6e372d12005-08-27 19:25:59 +0000153 if '00' in time.strftime(directive, time_tuple):
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000154 U_W = '%W'
Brett Cannonf1b2ba62005-08-29 18:25:55 +0000155 else:
156 U_W = '%U'
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000157 date_time[offset] = current_format.replace('11', U_W)
Brett Cannon474335c2003-08-05 04:02:49 +0000158 self.LC_date_time = date_time[0]
159 self.LC_date = date_time[1]
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +0000160 self.LC_time = date_time[2]
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000161
162 def __calc_timezone(self):
Brett Cannon474335c2003-08-05 04:02:49 +0000163 # Set self.timezone by using time.tzname.
Brett Cannon5187a3b2003-08-11 07:24:05 +0000164 # Do not worry about possibility of time.tzname[0] == timetzname[1]
165 # and time.daylight; handle that in strptime .
Brett Cannonabe8eb02003-05-13 20:28:15 +0000166 try:
167 time.tzset()
168 except AttributeError:
169 pass
Raymond Hettingera690a992003-11-16 16:17:49 +0000170 no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
Brett Cannon172d9ef2003-05-11 06:23:36 +0000171 if time.daylight:
Raymond Hettingera690a992003-11-16 16:17:49 +0000172 has_saving = frozenset([time.tzname[1].lower()])
Brett Cannon172d9ef2003-05-11 06:23:36 +0000173 else:
Raymond Hettingera690a992003-11-16 16:17:49 +0000174 has_saving = frozenset()
Brett Cannon474335c2003-08-05 04:02:49 +0000175 self.timezone = (no_saving, has_saving)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000176
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000177
178class TimeRE(dict):
179 """Handle conversion from format directives to regexes."""
180
Brett Cannon2c24d422003-07-24 20:02:28 +0000181 def __init__(self, locale_time=None):
Brett Cannon474335c2003-08-05 04:02:49 +0000182 """Create keys/values.
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +0000183
Brett Cannon474335c2003-08-05 04:02:49 +0000184 Order of execution is important for dependency reasons.
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +0000185
Brett Cannon474335c2003-08-05 04:02:49 +0000186 """
187 if locale_time:
188 self.locale_time = locale_time
189 else:
190 self.locale_time = LocaleTime()
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000191 base = super()
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000192 base.__init__({
Brett Cannon474335c2003-08-05 04:02:49 +0000193 # The " \d" part of the regex is to make %c from ANSI C work
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000194 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000195 'f': r"(?P<f>[0-9]{1,6})",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000196 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000197 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
198 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
199 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000200 'M': r"(?P<M>[0-5]\d|\d)",
201 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
202 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
203 'w': r"(?P<w>[0-6])",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000204 # W is set below by using 'U'
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000205 'y': r"(?P<y>\d\d)",
Brett Cannon474335c2003-08-05 04:02:49 +0000206 #XXX: Does 'Y' need to worry about having less or more than
207 # 4 digits?
208 'Y': r"(?P<Y>\d\d\d\d)",
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000209 'z': r"(?P<z>[+-]\d\d[0-5]\d)",
Brett Cannon474335c2003-08-05 04:02:49 +0000210 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
211 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
212 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
213 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
214 'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
Brett Cannonf7948c22004-10-06 02:23:14 +0000215 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
216 for tz in tz_names),
Brett Cannon474335c2003-08-05 04:02:49 +0000217 'Z'),
218 '%': '%'})
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000219 base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
Brett Cannon474335c2003-08-05 04:02:49 +0000220 base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
221 base.__setitem__('x', self.pattern(self.locale_time.LC_date))
222 base.__setitem__('X', self.pattern(self.locale_time.LC_time))
Tim Peters469cdad2002-08-08 20:19:19 +0000223
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000224 def __seqToRE(self, to_convert, directive):
Brett Cannon474335c2003-08-05 04:02:49 +0000225 """Convert a list to a regex string for matching a directive.
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +0000226
Brett Cannon474335c2003-08-05 04:02:49 +0000227 Want possible matching values to be from longest to shortest. This
228 prevents the possibility of a match occuring for a value that also
229 a substring of a larger value that should have matched (e.g., 'abc'
230 matching when 'abcdef' should have been the match).
Raymond Hettinger6b59f5f2003-10-16 05:53:16 +0000231
Brett Cannon474335c2003-08-05 04:02:49 +0000232 """
Brett Cannonffa5cf92004-10-06 22:48:58 +0000233 to_convert = sorted(to_convert, key=len, reverse=True)
Jack Jansen62fe7552003-01-15 22:59:39 +0000234 for value in to_convert:
235 if value != '':
236 break
237 else:
238 return ''
Brett Cannon4f35c712004-10-06 02:11:37 +0000239 regex = '|'.join(re_escape(stuff) for stuff in to_convert)
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000240 regex = '(?P<%s>%s' % (directive, regex)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000241 return '%s)' % regex
242
243 def pattern(self, format):
Brett Cannon474335c2003-08-05 04:02:49 +0000244 """Return regex pattern for the format string.
Tim Peters0eadaac2003-04-24 16:02:54 +0000245
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000246 Need to make sure that any characters that might be interpreted as
Brett Cannon5187a3b2003-08-11 07:24:05 +0000247 regex syntax are escaped.
Tim Peters0eadaac2003-04-24 16:02:54 +0000248
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000249 """
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000250 processed_format = ''
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000251 # The sub() call escapes all characters that might be misconstrued
Brett Cannon4f35c712004-10-06 02:11:37 +0000252 # as regex syntax. Cannot use re.escape since we have to deal with
253 # format directives (%m, etc.).
Brett Cannon953c6f52003-08-29 02:28:54 +0000254 regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000255 format = regex_chars.sub(r"\\\1", format)
Tim Peters80cebc12003-01-19 04:40:44 +0000256 whitespace_replacement = re_compile('\s+')
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000257 format = whitespace_replacement.sub('\s+', format)
Raymond Hettingerbac788a2004-05-04 09:21:43 +0000258 while '%' in format:
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000259 directive_index = format.index('%')+1
Tim Peters469cdad2002-08-08 20:19:19 +0000260 processed_format = "%s%s%s" % (processed_format,
Barry Warsaw35816e62002-08-29 16:24:50 +0000261 format[:directive_index-1],
262 self[format[directive_index]])
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000263 format = format[directive_index+1:]
264 return "%s%s" % (processed_format, format)
265
266 def compile(self, format):
267 """Return a compiled re object for the format string."""
Brett Cannon7f6b4f82009-03-30 21:30:26 +0000268 return re_compile(self.pattern(format), IGNORECASE)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000269
Brett Cannon474335c2003-08-05 04:02:49 +0000270_cache_lock = _thread_allocate_lock()
271# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
272# first!
273_TimeRE_cache = TimeRE()
Brett Cannon5187a3b2003-08-11 07:24:05 +0000274_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
Brett Cannon474335c2003-08-05 04:02:49 +0000275_regex_cache = {}
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000276
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000277def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
278 """Calculate the Julian day based on the year, week of the year, and day of
279 the week, with week_start_day representing whether the week of the year
280 assumes the week starts on Sunday or Monday (6 or 0)."""
281 first_weekday = datetime_date(year, 1, 1).weekday()
282 # If we are dealing with the %U directive (week starts on Sunday), it's
283 # easier to just shift the view to Sunday being the first day of the
284 # week.
285 if not week_starts_Mon:
286 first_weekday = (first_weekday + 1) % 7
287 day_of_week = (day_of_week + 1) % 7
288 # Need to watch out for a week 0 (when the first day of the year is not
289 # the same as that specified by %U or %W).
290 week_0_length = (7 - first_weekday) % 7
291 if week_of_year == 0:
292 return 1 + day_of_week - first_weekday
293 else:
294 days_to_week = week_0_length + (7 * (week_of_year - 1))
295 return 1 + days_to_week + day_of_week
296
297
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000298def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
Alexander Belopolskyf5682182010-06-18 18:44:37 +0000299 """Return a 2-tuple consisting of a time struct and an int containing
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000300 the number of microseconds based on the input string and the
301 format string."""
Brett Cannon7f6b4f82009-03-30 21:30:26 +0000302
303 for index, arg in enumerate([data_string, format]):
304 if not isinstance(arg, str):
305 msg = "strptime() argument {} must be str, not {}"
Brett Cannon71095ea2009-03-31 03:58:04 +0000306 raise TypeError(msg.format(index, type(arg)))
Brett Cannon7f6b4f82009-03-30 21:30:26 +0000307
Brett Cannona783d062005-09-15 02:34:56 +0000308 global _TimeRE_cache, _regex_cache
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000309 with _cache_lock:
Brett Cannon7f6b4f82009-03-30 21:30:26 +0000310
Guido van Rossumd8faa362007-04-27 19:54:29 +0000311 if _getlang() != _TimeRE_cache.locale_time.lang:
Brett Cannon474335c2003-08-05 04:02:49 +0000312 _TimeRE_cache = TimeRE()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000313 _regex_cache.clear()
Brett Cannon474335c2003-08-05 04:02:49 +0000314 if len(_regex_cache) > _CACHE_MAX_SIZE:
315 _regex_cache.clear()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000316 locale_time = _TimeRE_cache.locale_time
Brett Cannon474335c2003-08-05 04:02:49 +0000317 format_regex = _regex_cache.get(format)
318 if not format_regex:
Brett Cannon5d0bf942005-11-02 23:04:26 +0000319 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000320 format_regex = _TimeRE_cache.compile(format)
Brett Cannon5d0bf942005-11-02 23:04:26 +0000321 # KeyError raised when a bad format is found; can be specified as
322 # \\, in which case it was a stray % but with a space after it
Guido van Rossumb940e112007-01-10 16:19:56 +0000323 except KeyError as err:
Brett Cannon5d0bf942005-11-02 23:04:26 +0000324 bad_directive = err.args[0]
325 if bad_directive == "\\":
326 bad_directive = "%"
327 del err
328 raise ValueError("'%s' is a bad directive in format '%s'" %
329 (bad_directive, format))
330 # IndexError only occurs when the format string is "%"
331 except IndexError:
332 raise ValueError("stray %% in format '%s'" % format)
Brett Cannon474335c2003-08-05 04:02:49 +0000333 _regex_cache[format] = format_regex
Tim Peters80cebc12003-01-19 04:40:44 +0000334 found = format_regex.match(data_string)
Tim Peters08e54272003-01-18 03:53:49 +0000335 if not found:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000336 raise ValueError("time data %r does not match format %r" %
Raymond Hettinger4a6302b2003-07-13 01:31:38 +0000337 (data_string, format))
Brett Cannon2b6dfec2003-04-28 21:30:13 +0000338 if len(data_string) != found.end():
339 raise ValueError("unconverted data remains: %s" %
340 data_string[found.end():])
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000341
Antoine Pitrou1682e5d2012-05-10 20:17:46 +0200342 year = None
Tim Peters08e54272003-01-18 03:53:49 +0000343 month = day = 1
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000344 hour = minute = second = fraction = 0
Tim Peters08e54272003-01-18 03:53:49 +0000345 tz = -1
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000346 tzoffset = None
Brett Cannon8dc25ad2004-10-18 01:47:46 +0000347 # Default to -1 to signify that values not known; not critical to have,
348 # though
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000349 week_of_year = -1
350 week_of_year_start = -1
Brett Cannon8dc25ad2004-10-18 01:47:46 +0000351 # weekday and julian defaulted to -1 so as to signal need to calculate
352 # values
Tim Peters08e54272003-01-18 03:53:49 +0000353 weekday = julian = -1
354 found_dict = found.groupdict()
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000355 for group_key in found_dict.keys():
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000356 # Directives not explicitly handled below:
357 # c, x, X
358 # handled by making out of other directives
359 # U, W
360 # worthless without day of the week
Tim Peters08e54272003-01-18 03:53:49 +0000361 if group_key == 'y':
362 year = int(found_dict['y'])
363 # Open Group specification for strptime() states that a %y
364 #value in the range of [00, 68] is in the century 2000, while
365 #[69,99] is in the century 1900
366 if year <= 68:
367 year += 2000
368 else:
369 year += 1900
370 elif group_key == 'Y':
371 year = int(found_dict['Y'])
372 elif group_key == 'm':
373 month = int(found_dict['m'])
374 elif group_key == 'B':
Brett Cannon474335c2003-08-05 04:02:49 +0000375 month = locale_time.f_month.index(found_dict['B'].lower())
Tim Peters08e54272003-01-18 03:53:49 +0000376 elif group_key == 'b':
Brett Cannon474335c2003-08-05 04:02:49 +0000377 month = locale_time.a_month.index(found_dict['b'].lower())
Tim Peters08e54272003-01-18 03:53:49 +0000378 elif group_key == 'd':
379 day = int(found_dict['d'])
Neal Norwitz77290f22003-06-29 04:16:49 +0000380 elif group_key == 'H':
Tim Peters08e54272003-01-18 03:53:49 +0000381 hour = int(found_dict['H'])
382 elif group_key == 'I':
383 hour = int(found_dict['I'])
384 ampm = found_dict.get('p', '').lower()
385 # If there was no AM/PM indicator, we'll treat this like AM
Brett Cannon474335c2003-08-05 04:02:49 +0000386 if ampm in ('', locale_time.am_pm[0]):
Tim Peters08e54272003-01-18 03:53:49 +0000387 # We're in AM so the hour is correct unless we're
388 # looking at 12 midnight.
389 # 12 midnight == 12 AM == hour 0
390 if hour == 12:
391 hour = 0
Brett Cannon474335c2003-08-05 04:02:49 +0000392 elif ampm == locale_time.am_pm[1]:
Tim Peters08e54272003-01-18 03:53:49 +0000393 # We're in PM so we need to add 12 to the hour unless
394 # we're looking at 12 noon.
395 # 12 noon == 12 PM == hour 12
396 if hour != 12:
397 hour += 12
398 elif group_key == 'M':
399 minute = int(found_dict['M'])
400 elif group_key == 'S':
401 second = int(found_dict['S'])
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000402 elif group_key == 'f':
403 s = found_dict['f']
404 # Pad to always return microseconds.
405 s += "0" * (6 - len(s))
406 fraction = int(s)
Tim Peters08e54272003-01-18 03:53:49 +0000407 elif group_key == 'A':
Brett Cannon474335c2003-08-05 04:02:49 +0000408 weekday = locale_time.f_weekday.index(found_dict['A'].lower())
Tim Peters08e54272003-01-18 03:53:49 +0000409 elif group_key == 'a':
Brett Cannon474335c2003-08-05 04:02:49 +0000410 weekday = locale_time.a_weekday.index(found_dict['a'].lower())
Tim Peters08e54272003-01-18 03:53:49 +0000411 elif group_key == 'w':
412 weekday = int(found_dict['w'])
413 if weekday == 0:
414 weekday = 6
415 else:
416 weekday -= 1
417 elif group_key == 'j':
418 julian = int(found_dict['j'])
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000419 elif group_key in ('U', 'W'):
420 week_of_year = int(found_dict[group_key])
421 if group_key == 'U':
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000422 # U starts week on Sunday.
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000423 week_of_year_start = 6
424 else:
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000425 # W starts week on Monday.
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000426 week_of_year_start = 0
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000427 elif group_key == 'z':
428 z = found_dict['z']
429 tzoffset = int(z[1:3]) * 60 + int(z[3:5])
430 if z.startswith("-"):
431 tzoffset = -tzoffset
Tim Peters08e54272003-01-18 03:53:49 +0000432 elif group_key == 'Z':
Brett Cannon172d9ef2003-05-11 06:23:36 +0000433 # Since -1 is default value only need to worry about setting tz if
434 # it can be something other than -1.
Tim Peters08e54272003-01-18 03:53:49 +0000435 found_zone = found_dict['Z'].lower()
Brett Cannon5187a3b2003-08-11 07:24:05 +0000436 for value, tz_values in enumerate(locale_time.timezone):
437 if found_zone in tz_values:
438 # Deal with bad locale setup where timezone names are the
439 # same and yet time.daylight is true; too ambiguous to
440 # be able to tell what timezone has daylight savings
Brett Cannon8172ac32004-03-07 23:16:27 +0000441 if (time.tzname[0] == time.tzname[1] and
442 time.daylight and found_zone not in ("utc", "gmt")):
Tim Peters58eb11c2004-01-18 20:29:55 +0000443 break
Brett Cannon5187a3b2003-08-11 07:24:05 +0000444 else:
Brett Cannon474335c2003-08-05 04:02:49 +0000445 tz = value
Brett Cannon5187a3b2003-08-11 07:24:05 +0000446 break
Antoine Pitrou072e4a32012-05-14 19:44:59 +0200447 leap_year_fix = False
Antoine Pitrou1682e5d2012-05-10 20:17:46 +0200448 if year is None and month == 2 and day == 29:
449 year = 1904 # 1904 is first leap year of 20th century
Antoine Pitrou072e4a32012-05-14 19:44:59 +0200450 leap_year_fix = True
Antoine Pitrou1682e5d2012-05-10 20:17:46 +0200451 elif year is None:
452 year = 1900
Brett Cannon8abcc5d2004-10-18 01:37:57 +0000453 # If we know the week of the year and what day of that week, we can figure
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000454 # out the Julian day of the year.
Brett Cannon14adbe72004-10-28 04:49:21 +0000455 if julian == -1 and week_of_year != -1 and weekday != -1:
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000456 week_starts_Mon = True if week_of_year_start == 0 else False
457 julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
458 week_starts_Mon)
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000459 # Cannot pre-calculate datetime_date() since can change in Julian
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000460 # calculation and thus could have different value for the day of the week
461 # calculation.
Tim Peters08e54272003-01-18 03:53:49 +0000462 if julian == -1:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000463 # Need to add 1 to result since first day of the year is 1, not 0.
464 julian = datetime_date(year, month, day).toordinal() - \
465 datetime_date(year, 1, 1).toordinal() + 1
466 else: # Assume that if they bothered to include Julian day it will
Thomas Wouters9fe394c2007-02-05 01:24:16 +0000467 # be accurate.
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000468 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
469 year = datetime_result.year
470 month = datetime_result.month
471 day = datetime_result.day
Tim Peters08e54272003-01-18 03:53:49 +0000472 if weekday == -1:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000473 weekday = datetime_date(year, month, day).weekday()
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000474 # Add timezone info
475 tzname = found_dict.get("Z")
476 if tzoffset is not None:
477 gmtoff = tzoffset * 60
478 else:
479 gmtoff = None
480
Antoine Pitrou072e4a32012-05-14 19:44:59 +0200481 if leap_year_fix:
482 # the caller didn't supply a year but asked for Feb 29th. We couldn't
483 # use the default of 1900 for computations. We set it back to ensure
484 # that February 29th is smaller than March 1st.
485 year = 1900
486
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000487 return (year, month, day,
488 hour, minute, second,
Alexander Belopolskyc142bba2012-06-13 22:15:26 -0400489 weekday, julian, tz, tzname, gmtoff), fraction
Christian Heimesdd15f6c2008-03-16 00:07:10 +0000490
491def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000492 """Return a time struct based on the input string and the
493 format string."""
494 tt = _strptime(data_string, format)[0]
Alexander Belopolskyc142bba2012-06-13 22:15:26 -0400495 return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000496
Alexander Belopolsky4988d7a2010-07-14 13:46:57 +0000497def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
498 """Return a class cls instance based on the input string and the
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000499 format string."""
500 tt, fraction = _strptime(data_string, format)
Alexander Belopolskyc142bba2012-06-13 22:15:26 -0400501 tzname, gmtoff = tt[-2:]
Alexander Belopolskyca94f552010-06-17 18:30:34 +0000502 args = tt[:6] + (fraction,)
503 if gmtoff is not None:
504 tzdelta = datetime_timedelta(seconds=gmtoff)
505 if tzname:
506 tz = datetime_timezone(tzdelta, tzname)
507 else:
508 tz = datetime_timezone(tzdelta)
509 args += (tz,)
510
Alexander Belopolsky4988d7a2010-07-14 13:46:57 +0000511 return cls(*args)