blob: 76b667d1543e990b515f6593a4526180629d6710 [file] [log] [blame]
Paul Ganssle62972d92020-05-16 04:20:06 -04001#include "Python.h"
2#include "structmember.h"
3
4#include <ctype.h>
5#include <stddef.h>
6#include <stdint.h>
7
8#include "datetime.h"
9
10// Imports
11static PyObject *io_open = NULL;
12static PyObject *_tzpath_find_tzfile = NULL;
13static PyObject *_common_mod = NULL;
14
15typedef struct TransitionRuleType TransitionRuleType;
16typedef struct StrongCacheNode StrongCacheNode;
17
18typedef struct {
19 PyObject *utcoff;
20 PyObject *dstoff;
21 PyObject *tzname;
22 long utcoff_seconds;
23} _ttinfo;
24
25typedef struct {
26 _ttinfo std;
27 _ttinfo dst;
28 int dst_diff;
29 TransitionRuleType *start;
30 TransitionRuleType *end;
31 unsigned char std_only;
32} _tzrule;
33
34typedef struct {
35 PyDateTime_TZInfo base;
36 PyObject *key;
37 PyObject *file_repr;
38 PyObject *weakreflist;
Pablo Galindoe4799b92020-05-27 21:48:12 +010039 size_t num_transitions;
40 size_t num_ttinfos;
Paul Ganssle62972d92020-05-16 04:20:06 -040041 int64_t *trans_list_utc;
42 int64_t *trans_list_wall[2];
43 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
44 _ttinfo *ttinfo_before;
45 _tzrule tzrule_after;
46 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
47 unsigned char fixed_offset;
48 unsigned char source;
49} PyZoneInfo_ZoneInfo;
50
51struct TransitionRuleType {
52 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53};
54
55typedef struct {
56 TransitionRuleType base;
57 uint8_t month;
58 uint8_t week;
59 uint8_t day;
60 int8_t hour;
61 int8_t minute;
62 int8_t second;
63} CalendarRule;
64
65typedef struct {
66 TransitionRuleType base;
67 uint8_t julian;
68 unsigned int day;
69 int8_t hour;
70 int8_t minute;
71 int8_t second;
72} DayRule;
73
74struct StrongCacheNode {
75 StrongCacheNode *next;
76 StrongCacheNode *prev;
77 PyObject *key;
78 PyObject *zone;
79};
80
81static PyTypeObject PyZoneInfo_ZoneInfoType;
82
83// Globals
84static PyObject *TIMEDELTA_CACHE = NULL;
85static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88
89static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90
91// Constants
92static const int EPOCHORDINAL = 719163;
93static int DAYS_IN_MONTH[] = {
94 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95};
96
97static int DAYS_BEFORE_MONTH[] = {
98 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99};
100
101static const int SOURCE_NOCACHE = 0;
102static const int SOURCE_CACHE = 1;
103static const int SOURCE_FILE = 2;
104
105// Forward declarations
106static int
107load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108static void
109utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110 unsigned char *isdsts, size_t num_transitions,
111 size_t num_ttinfos);
112static int
113ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114 int64_t *trans_local[2], size_t num_ttinfos,
115 size_t num_transitions);
116
117static int
118parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119
Pablo Galindoe4799b92020-05-27 21:48:12 +0100120static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400121parse_abbr(const char *const p, PyObject **abbr);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100122static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400123parse_tz_delta(const char *const p, long *total_seconds);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100124static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400125parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126 int8_t *second);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100127static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400128parse_transition_rule(const char *const p, TransitionRuleType **out);
129
130static _ttinfo *
131find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132static _ttinfo *
133find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134 unsigned char *fold);
135
136static int
137build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138static void
139xdecref_ttinfo(_ttinfo *ttinfo);
140static int
141ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142
143static int
144build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145 long dst_offset, TransitionRuleType *start,
146 TransitionRuleType *end, _tzrule *out);
147static void
148free_tzrule(_tzrule *tzrule);
149
150static PyObject *
151load_timedelta(long seconds);
152
153static int
154get_local_timestamp(PyObject *dt, int64_t *local_ts);
155static _ttinfo *
156find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157
158static int
159ymd_to_ord(int y, int m, int d);
160static int
161is_leap_year(int year);
162
163static size_t
164_bisect(const int64_t value, const int64_t *arr, size_t size);
165
166static void
167eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168static void
169clear_strong_cache(const PyTypeObject *const type);
170static void
171update_strong_cache(const PyTypeObject *const type, PyObject *key,
172 PyObject *zone);
173static PyObject *
174zone_from_strong_cache(const PyTypeObject *const type, PyObject *key);
175
176static PyObject *
177zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178{
179 PyObject *file_obj = NULL;
180 PyObject *file_path = NULL;
181
182 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183 if (file_path == NULL) {
184 return NULL;
185 }
186 else if (file_path == Py_None) {
187 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188 if (file_obj == NULL) {
189 Py_DECREF(file_path);
190 return NULL;
191 }
192 }
193
194 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195 if (self == NULL) {
196 goto error;
197 }
198
199 if (file_obj == NULL) {
200 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201 if (file_obj == NULL) {
202 goto error;
203 }
204 }
205
206 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207 goto error;
208 }
209
210 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211 Py_DECREF(file_obj);
212 file_obj = NULL;
213 if (rv == NULL) {
214 goto error;
215 }
216 Py_DECREF(rv);
217
218 ((PyZoneInfo_ZoneInfo *)self)->key = key;
219 Py_INCREF(key);
220
221 goto cleanup;
222error:
223 Py_XDECREF(self);
224 self = NULL;
225cleanup:
226 if (file_obj != NULL) {
Zackery Spytzeca25492020-07-20 06:51:26 -0600227 PyObject *exc, *val, *tb;
228 PyErr_Fetch(&exc, &val, &tb);
Paul Ganssle62972d92020-05-16 04:20:06 -0400229 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
Zackery Spytzeca25492020-07-20 06:51:26 -0600230 _PyErr_ChainExceptions(exc, val, tb);
231 if (tmp == NULL) {
232 Py_CLEAR(self);
233 }
234 Py_XDECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400235 Py_DECREF(file_obj);
236 }
237 Py_DECREF(file_path);
238 return self;
239}
240
241static PyObject *
242get_weak_cache(PyTypeObject *type)
243{
244 if (type == &PyZoneInfo_ZoneInfoType) {
245 return ZONEINFO_WEAK_CACHE;
246 }
247 else {
248 PyObject *cache =
249 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
250 // We are assuming that the type lives at least as long as the function
251 // that calls get_weak_cache, and that it holds a reference to the
252 // cache, so we'll return a "borrowed reference".
253 Py_XDECREF(cache);
254 return cache;
255 }
256}
257
258static PyObject *
259zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
260{
261 PyObject *key = NULL;
262 static char *kwlist[] = {"key", NULL};
263 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
264 return NULL;
265 }
266
267 PyObject *instance = zone_from_strong_cache(type, key);
268 if (instance != NULL) {
269 return instance;
270 }
271
272 PyObject *weak_cache = get_weak_cache(type);
273 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
274 if (instance == NULL) {
275 return NULL;
276 }
277
278 if (instance == Py_None) {
279 Py_DECREF(instance);
280 PyObject *tmp = zoneinfo_new_instance(type, key);
281 if (tmp == NULL) {
282 return NULL;
283 }
284
285 instance =
286 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400287 Py_DECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400288 if (instance == NULL) {
289 return NULL;
290 }
Gregory P. Smithd780fa72020-06-22 00:39:28 -0700291 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
Paul Ganssle62972d92020-05-16 04:20:06 -0400292 }
293
294 update_strong_cache(type, key, instance);
295 return instance;
296}
297
298static void
299zoneinfo_dealloc(PyObject *obj_self)
300{
301 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
302
303 if (self->weakreflist != NULL) {
304 PyObject_ClearWeakRefs(obj_self);
305 }
306
307 if (self->trans_list_utc != NULL) {
308 PyMem_Free(self->trans_list_utc);
309 }
310
311 for (size_t i = 0; i < 2; i++) {
312 if (self->trans_list_wall[i] != NULL) {
313 PyMem_Free(self->trans_list_wall[i]);
314 }
315 }
316
317 if (self->_ttinfos != NULL) {
318 for (size_t i = 0; i < self->num_ttinfos; ++i) {
319 xdecref_ttinfo(&(self->_ttinfos[i]));
320 }
321 PyMem_Free(self->_ttinfos);
322 }
323
324 if (self->trans_ttinfos != NULL) {
325 PyMem_Free(self->trans_ttinfos);
326 }
327
328 free_tzrule(&(self->tzrule_after));
329
330 Py_XDECREF(self->key);
331 Py_XDECREF(self->file_repr);
332
333 Py_TYPE(self)->tp_free((PyObject *)self);
334}
335
336static PyObject *
337zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
338{
339 PyObject *file_obj = NULL;
340 PyObject *file_repr = NULL;
341 PyObject *key = Py_None;
342 PyZoneInfo_ZoneInfo *self = NULL;
343
344 static char *kwlist[] = {"", "key", NULL};
345 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
346 &key)) {
347 return NULL;
348 }
349
350 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
351 self = (PyZoneInfo_ZoneInfo *)obj_self;
352 if (self == NULL) {
353 return NULL;
354 }
355
356 file_repr = PyUnicode_FromFormat("%R", file_obj);
357 if (file_repr == NULL) {
358 goto error;
359 }
360
361 if (load_data(self, file_obj)) {
362 goto error;
363 }
364
365 self->source = SOURCE_FILE;
366 self->file_repr = file_repr;
367 self->key = key;
368 Py_INCREF(key);
369
370 return obj_self;
371error:
372 Py_XDECREF(file_repr);
373 Py_XDECREF(self);
374 return NULL;
375}
376
377static PyObject *
378zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
379{
380 static char *kwlist[] = {"key", NULL};
381 PyObject *key = NULL;
382 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
383 return NULL;
384 }
385
386 PyObject *out = zoneinfo_new_instance(cls, key);
387 if (out != NULL) {
388 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
389 }
390
391 return out;
392}
393
394static PyObject *
395zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
396{
397 PyObject *only_keys = NULL;
398 static char *kwlist[] = {"only_keys", NULL};
399
400 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
401 &only_keys))) {
402 return NULL;
403 }
404
405 PyTypeObject *type = (PyTypeObject *)cls;
406 PyObject *weak_cache = get_weak_cache(type);
407
408 if (only_keys == NULL || only_keys == Py_None) {
409 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
410 if (rv != NULL) {
411 Py_DECREF(rv);
412 }
413
414 clear_strong_cache(type);
Paul Ganssle62972d92020-05-16 04:20:06 -0400415 }
416 else {
417 PyObject *item = NULL;
418 PyObject *pop = PyUnicode_FromString("pop");
419 if (pop == NULL) {
420 return NULL;
421 }
422
423 PyObject *iter = PyObject_GetIter(only_keys);
424 if (iter == NULL) {
425 Py_DECREF(pop);
426 return NULL;
427 }
428
429 while ((item = PyIter_Next(iter))) {
430 // Remove from strong cache
431 eject_from_strong_cache(type, item);
432
433 // Remove from weak cache
434 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
435 Py_None, NULL);
436
437 Py_DECREF(item);
438 if (tmp == NULL) {
439 break;
440 }
441 Py_DECREF(tmp);
442 }
443 Py_DECREF(iter);
444 Py_DECREF(pop);
445 }
446
447 if (PyErr_Occurred()) {
448 return NULL;
449 }
450
451 Py_RETURN_NONE;
452}
453
454static PyObject *
455zoneinfo_utcoffset(PyObject *self, PyObject *dt)
456{
457 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
458 if (tti == NULL) {
459 return NULL;
460 }
461 Py_INCREF(tti->utcoff);
462 return tti->utcoff;
463}
464
465static PyObject *
466zoneinfo_dst(PyObject *self, PyObject *dt)
467{
468 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
469 if (tti == NULL) {
470 return NULL;
471 }
472 Py_INCREF(tti->dstoff);
473 return tti->dstoff;
474}
475
476static PyObject *
477zoneinfo_tzname(PyObject *self, PyObject *dt)
478{
479 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
480 if (tti == NULL) {
481 return NULL;
482 }
483 Py_INCREF(tti->tzname);
484 return tti->tzname;
485}
486
Zackery Spytz2e4dd332020-09-23 12:43:45 -0600487#define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
Paul Ganssle62972d92020-05-16 04:20:06 -0400488
489static PyObject *
490zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
491{
492 if (!PyDateTime_Check(dt)) {
493 PyErr_SetString(PyExc_TypeError,
494 "fromutc: argument must be a datetime");
495 return NULL;
496 }
497 if (GET_DT_TZINFO(dt) != obj_self) {
498 PyErr_SetString(PyExc_ValueError,
499 "fromutc: dt.tzinfo "
500 "is not self");
501 return NULL;
502 }
503
504 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
505
506 int64_t timestamp;
507 if (get_local_timestamp(dt, &timestamp)) {
508 return NULL;
509 }
510 size_t num_trans = self->num_transitions;
511
512 _ttinfo *tti = NULL;
513 unsigned char fold = 0;
514
515 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
516 tti = self->ttinfo_before;
517 }
518 else if (num_trans == 0 ||
519 timestamp > self->trans_list_utc[num_trans - 1]) {
520 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
521 PyDateTime_GET_YEAR(dt), &fold);
522
523 // Immediately after the last manual transition, the fold/gap is
524 // between self->trans_ttinfos[num_transitions - 1] and whatever
525 // ttinfo applies immediately after the last transition, not between
526 // the STD and DST rules in the tzrule_after, so we may need to
527 // adjust the fold value.
528 if (num_trans) {
529 _ttinfo *tti_prev = NULL;
530 if (num_trans == 1) {
531 tti_prev = self->ttinfo_before;
532 }
533 else {
534 tti_prev = self->trans_ttinfos[num_trans - 2];
535 }
536 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
537 if (diff > 0 &&
538 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
539 fold = 1;
540 }
541 }
542 }
543 else {
544 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
545 _ttinfo *tti_prev = NULL;
546
547 if (idx >= 2) {
548 tti_prev = self->trans_ttinfos[idx - 2];
549 tti = self->trans_ttinfos[idx - 1];
550 }
551 else {
552 tti_prev = self->ttinfo_before;
553 tti = self->trans_ttinfos[0];
554 }
555
556 // Detect fold
557 int64_t shift =
558 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
559 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
560 fold = 1;
561 }
562 }
563
564 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
565 if (tmp == NULL) {
566 return NULL;
567 }
568
569 if (fold) {
570 if (PyDateTime_CheckExact(tmp)) {
571 ((PyDateTime_DateTime *)tmp)->fold = 1;
572 dt = tmp;
573 }
574 else {
575 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
576 PyObject *args = PyTuple_New(0);
577 PyObject *kwargs = PyDict_New();
578
579 Py_DECREF(tmp);
580 if (args == NULL || kwargs == NULL || replace == NULL) {
581 Py_XDECREF(args);
582 Py_XDECREF(kwargs);
583 Py_XDECREF(replace);
584 return NULL;
585 }
586
587 dt = NULL;
588 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
589 dt = PyObject_Call(replace, args, kwargs);
590 }
591
592 Py_DECREF(args);
593 Py_DECREF(kwargs);
594 Py_DECREF(replace);
595
596 if (dt == NULL) {
597 return NULL;
598 }
599 }
600 }
601 else {
602 dt = tmp;
603 }
604 return dt;
605}
606
607static PyObject *
608zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
609{
610 PyObject *rv = NULL;
611 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
612 if (!(self->key == Py_None)) {
613 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
614 }
615 else {
616 assert(PyUnicode_Check(self->file_repr));
617 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
618 self->file_repr);
619 }
620
621 return rv;
622}
623
624static PyObject *
625zoneinfo_str(PyZoneInfo_ZoneInfo *self)
626{
627 if (!(self->key == Py_None)) {
628 Py_INCREF(self->key);
629 return self->key;
630 }
631 else {
632 return zoneinfo_repr(self);
633 }
634}
635
636/* Pickles the ZoneInfo object by key and source.
637 *
638 * ZoneInfo objects are pickled by reference to the TZif file that they came
639 * from, which means that the exact transitions may be different or the file
640 * may not un-pickle if the data has changed on disk in the interim.
641 *
642 * It is necessary to include a bit indicating whether or not the object
643 * was constructed from the cache, because from-cache objects will hit the
644 * unpickling process's cache, whereas no-cache objects will bypass it.
645 *
646 * Objects constructed from ZoneInfo.from_file cannot be pickled.
647 */
648static PyObject *
649zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
650{
651 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
652 if (self->source == SOURCE_FILE) {
653 // Objects constructed from files cannot be pickled.
654 PyObject *pickle = PyImport_ImportModule("pickle");
655 if (pickle == NULL) {
656 return NULL;
657 }
658
659 PyObject *pickle_error =
660 PyObject_GetAttrString(pickle, "PicklingError");
661 Py_DECREF(pickle);
662 if (pickle_error == NULL) {
663 return NULL;
664 }
665
666 PyErr_Format(pickle_error,
667 "Cannot pickle a ZoneInfo file from a file stream.");
668 Py_DECREF(pickle_error);
669 return NULL;
670 }
671
672 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
673 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
674
675 if (constructor == NULL) {
676 return NULL;
677 }
678
679 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
680 Py_DECREF(constructor);
681 return rv;
682}
683
684static PyObject *
685zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
686{
687 PyObject *key;
688 unsigned char from_cache;
689 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
690 return NULL;
691 }
692
693 if (from_cache) {
694 PyObject *val_args = Py_BuildValue("(O)", key);
695 if (val_args == NULL) {
696 return NULL;
697 }
698
699 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
700
701 Py_DECREF(val_args);
702 return rv;
703 }
704 else {
705 return zoneinfo_new_instance(cls, key);
706 }
707}
708
709/* It is relatively expensive to construct new timedelta objects, and in most
710 * cases we're looking at a relatively small number of timedeltas, such as
711 * integer number of hours, etc. We will keep a cache so that we construct
712 * a minimal number of these.
713 *
714 * Possibly this should be replaced with an LRU cache so that it's not possible
715 * for the memory usage to explode from this, but in order for this to be a
716 * serious problem, one would need to deliberately craft a malicious time zone
717 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
718 * fills the cache with ~450 timedeltas for a total size of ~12kB.
719 *
720 * This returns a new reference to the timedelta.
721 */
722static PyObject *
723load_timedelta(long seconds)
724{
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200725 PyObject *rv;
Paul Ganssle62972d92020-05-16 04:20:06 -0400726 PyObject *pyoffset = PyLong_FromLong(seconds);
727 if (pyoffset == NULL) {
728 return NULL;
729 }
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200730 rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
731 if (rv == NULL) {
732 if (PyErr_Occurred()) {
733 goto error;
734 }
Paul Ganssle62972d92020-05-16 04:20:06 -0400735 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
736 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
737
738 if (tmp == NULL) {
739 goto error;
740 }
741
742 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
743 Py_DECREF(tmp);
744 }
Paul Ganssle62972d92020-05-16 04:20:06 -0400745
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200746 Py_XINCREF(rv);
Paul Ganssle62972d92020-05-16 04:20:06 -0400747 Py_DECREF(pyoffset);
Paul Ganssle62972d92020-05-16 04:20:06 -0400748 return rv;
749error:
750 Py_DECREF(pyoffset);
751 return NULL;
752}
753
754/* Constructor for _ttinfo object - this starts by initializing the _ttinfo
755 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
756 * initialized _ttinfo objects.
757 */
758static int
759build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
760{
761 out->utcoff = NULL;
762 out->dstoff = NULL;
763 out->tzname = NULL;
764
765 out->utcoff_seconds = utcoffset;
766 out->utcoff = load_timedelta(utcoffset);
767 if (out->utcoff == NULL) {
768 return -1;
769 }
770
771 out->dstoff = load_timedelta(dstoffset);
772 if (out->dstoff == NULL) {
773 return -1;
774 }
775
776 out->tzname = tzname;
777 Py_INCREF(tzname);
778
779 return 0;
780}
781
782/* Decrease reference count on any non-NULL members of a _ttinfo */
783static void
784xdecref_ttinfo(_ttinfo *ttinfo)
785{
786 if (ttinfo != NULL) {
787 Py_XDECREF(ttinfo->utcoff);
788 Py_XDECREF(ttinfo->dstoff);
789 Py_XDECREF(ttinfo->tzname);
790 }
791}
792
793/* Equality function for _ttinfo. */
794static int
795ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
796{
797 int rv;
798 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
799 1) {
800 goto end;
801 }
802
803 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
804 1) {
805 goto end;
806 }
807
808 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
809 1) {
810 goto end;
811 }
812end:
813 return rv;
814}
815
816/* Given a file-like object, this populates a ZoneInfo object
817 *
818 * The current version calls into a Python function to read the data from
819 * file into Python objects, and this translates those Python objects into
820 * C values and calculates derived values (e.g. dstoff) in C.
821 *
822 * This returns 0 on success and -1 on failure.
823 *
824 * The function will never return while `self` is partially initialized —
825 * the object only needs to be freed / deallocated if this succeeds.
826 */
827static int
828load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
829{
830 PyObject *data_tuple = NULL;
831
832 long *utcoff = NULL;
833 long *dstoff = NULL;
834 size_t *trans_idx = NULL;
835 unsigned char *isdst = NULL;
836
837 self->trans_list_utc = NULL;
838 self->trans_list_wall[0] = NULL;
839 self->trans_list_wall[1] = NULL;
840 self->trans_ttinfos = NULL;
841 self->_ttinfos = NULL;
842 self->file_repr = NULL;
843
844 size_t ttinfos_allocated = 0;
845
846 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
847
848 if (data_tuple == NULL) {
849 goto error;
850 }
851
852 if (!PyTuple_CheckExact(data_tuple)) {
853 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
854 data_tuple);
855 goto error;
856 }
857
858 // Unpack the data tuple
859 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
860 if (trans_idx_list == NULL) {
861 goto error;
862 }
863
864 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
865 if (trans_utc == NULL) {
866 goto error;
867 }
868
869 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
870 if (utcoff_list == NULL) {
871 goto error;
872 }
873
874 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
875 if (isdst_list == NULL) {
876 goto error;
877 }
878
879 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
880 if (abbr == NULL) {
881 goto error;
882 }
883
884 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
885 if (tz_str == NULL) {
886 goto error;
887 }
888
889 // Load the relevant sizes
890 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100891 if (num_transitions < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400892 goto error;
893 }
894
895 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100896 if (num_ttinfos < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400897 goto error;
898 }
899
900 self->num_transitions = (size_t)num_transitions;
901 self->num_ttinfos = (size_t)num_ttinfos;
902
903 // Load the transition indices and list
904 self->trans_list_utc =
905 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
906 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
907
Pablo Galindoe4799b92020-05-27 21:48:12 +0100908 for (size_t i = 0; i < self->num_transitions; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400909 PyObject *num = PyTuple_GetItem(trans_utc, i);
910 if (num == NULL) {
911 goto error;
912 }
913 self->trans_list_utc[i] = PyLong_AsLongLong(num);
914 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
915 goto error;
916 }
917
918 num = PyTuple_GetItem(trans_idx_list, i);
919 if (num == NULL) {
920 goto error;
921 }
922
923 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
924 if (cur_trans_idx == -1) {
925 goto error;
926 }
927
928 trans_idx[i] = (size_t)cur_trans_idx;
929 if (trans_idx[i] > self->num_ttinfos) {
930 PyErr_Format(
931 PyExc_ValueError,
932 "Invalid transition index found while reading TZif: %zd",
933 cur_trans_idx);
934
935 goto error;
936 }
937 }
938
939 // Load UTC offsets and isdst (size num_ttinfos)
940 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
941 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
942
943 if (utcoff == NULL || isdst == NULL) {
944 goto error;
945 }
Pablo Galindoe4799b92020-05-27 21:48:12 +0100946 for (size_t i = 0; i < self->num_ttinfos; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400947 PyObject *num = PyTuple_GetItem(utcoff_list, i);
948 if (num == NULL) {
949 goto error;
950 }
951
952 utcoff[i] = PyLong_AsLong(num);
953 if (utcoff[i] == -1 && PyErr_Occurred()) {
954 goto error;
955 }
956
957 num = PyTuple_GetItem(isdst_list, i);
958 if (num == NULL) {
959 goto error;
960 }
961
962 int isdst_with_error = PyObject_IsTrue(num);
963 if (isdst_with_error == -1) {
964 goto error;
965 }
966 else {
967 isdst[i] = (unsigned char)isdst_with_error;
968 }
969 }
970
971 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
972 if (dstoff == NULL) {
973 goto error;
974 }
975
976 // Derive dstoff and trans_list_wall from the information we've loaded
977 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
978 self->num_ttinfos);
979
980 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
981 self->trans_list_wall, self->num_ttinfos,
982 self->num_transitions)) {
983 goto error;
984 }
985
986 // Build _ttinfo objects from utcoff, dstoff and abbr
987 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
988 for (size_t i = 0; i < self->num_ttinfos; ++i) {
989 PyObject *tzname = PyTuple_GetItem(abbr, i);
990 if (tzname == NULL) {
991 goto error;
992 }
993
994 ttinfos_allocated++;
995 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
996 goto error;
997 }
998 }
999
1000 // Build our mapping from transition to the ttinfo that applies
1001 self->trans_ttinfos =
1002 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1003 for (size_t i = 0; i < self->num_transitions; ++i) {
1004 size_t ttinfo_idx = trans_idx[i];
1005 assert(ttinfo_idx < self->num_ttinfos);
1006 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1007 }
1008
1009 // Set ttinfo_before to the first non-DST transition
1010 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1011 if (!isdst[i]) {
1012 self->ttinfo_before = &(self->_ttinfos[i]);
1013 break;
1014 }
1015 }
1016
1017 // If there are only DST ttinfos, pick the first one, if there are no
1018 // ttinfos at all, set ttinfo_before to NULL
1019 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1020 self->ttinfo_before = &(self->_ttinfos[0]);
1021 }
1022
1023 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1024 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1025 goto error;
1026 }
1027 }
1028 else {
1029 if (!self->num_ttinfos) {
1030 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1031 goto error;
1032 }
1033
1034 size_t idx;
1035 if (!self->num_transitions) {
1036 idx = self->num_ttinfos - 1;
1037 }
1038 else {
1039 idx = trans_idx[self->num_transitions - 1];
1040 }
1041
1042 _ttinfo *tti = &(self->_ttinfos[idx]);
1043 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1044 &(self->tzrule_after));
1045
1046 // We've abused the build_tzrule constructor to construct an STD-only
1047 // rule mimicking whatever ttinfo we've picked up, but it's possible
1048 // that the one we've picked up is a DST zone, so we need to make sure
1049 // that the dstoff is set correctly in that case.
1050 if (PyObject_IsTrue(tti->dstoff)) {
1051 _ttinfo *tti_after = &(self->tzrule_after.std);
1052 Py_DECREF(tti_after->dstoff);
1053 tti_after->dstoff = tti->dstoff;
1054 Py_INCREF(tti_after->dstoff);
1055 }
1056 }
1057
1058 // Determine if this is a "fixed offset" zone, meaning that the output of
1059 // the utcoffset, dst and tzname functions does not depend on the specific
1060 // datetime passed.
1061 //
1062 // We make three simplifying assumptions here:
1063 //
1064 // 1. If tzrule_after is not std_only, it has transitions that might occur
1065 // (it is possible to construct TZ strings that specify STD and DST but
1066 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1067 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1068 // represent different offsets.
1069 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1070 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1071 // a fixed offset zone).
1072 //
1073 // Violations to these assumptions would be fairly exotic, and exotic
1074 // zones should almost certainly not be used with datetime.time (the
1075 // only thing that would be affected by this).
1076 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1077 self->fixed_offset = 0;
1078 }
1079 else if (self->num_ttinfos == 0) {
1080 self->fixed_offset = 1;
1081 }
1082 else {
1083 int constant_offset =
1084 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1085 if (constant_offset < 0) {
1086 goto error;
1087 }
1088 else {
1089 self->fixed_offset = constant_offset;
1090 }
1091 }
1092
1093 int rv = 0;
1094 goto cleanup;
1095error:
1096 // These resources only need to be freed if we have failed, if we succeed
1097 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1098 // method to free the relevant resources.
1099 if (self->trans_list_utc != NULL) {
1100 PyMem_Free(self->trans_list_utc);
1101 self->trans_list_utc = NULL;
1102 }
1103
1104 for (size_t i = 0; i < 2; ++i) {
1105 if (self->trans_list_wall[i] != NULL) {
1106 PyMem_Free(self->trans_list_wall[i]);
1107 self->trans_list_wall[i] = NULL;
1108 }
1109 }
1110
1111 if (self->_ttinfos != NULL) {
1112 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1113 xdecref_ttinfo(&(self->_ttinfos[i]));
1114 }
1115 PyMem_Free(self->_ttinfos);
1116 self->_ttinfos = NULL;
1117 }
1118
1119 if (self->trans_ttinfos != NULL) {
1120 PyMem_Free(self->trans_ttinfos);
1121 self->trans_ttinfos = NULL;
1122 }
1123
1124 rv = -1;
1125cleanup:
1126 Py_XDECREF(data_tuple);
1127
1128 if (utcoff != NULL) {
1129 PyMem_Free(utcoff);
1130 }
1131
1132 if (dstoff != NULL) {
1133 PyMem_Free(dstoff);
1134 }
1135
1136 if (isdst != NULL) {
1137 PyMem_Free(isdst);
1138 }
1139
1140 if (trans_idx != NULL) {
1141 PyMem_Free(trans_idx);
1142 }
1143
1144 return rv;
1145}
1146
1147/* Function to calculate the local timestamp of a transition from the year. */
1148int64_t
1149calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1150{
1151 CalendarRule *self = (CalendarRule *)base_self;
1152
1153 // We want (year, month, day of month); we have year and month, but we
1154 // need to turn (week, day-of-week) into day-of-month
1155 //
1156 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1157 // Week 5 represents the last occurrence of day `day`, so we need to know
1158 // the first weekday of the month and the number of days in the month.
1159 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1160 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1161 if (self->month == 2 && is_leap_year(year)) {
1162 days_in_month += 1;
1163 }
1164
1165 // This equation seems magical, so I'll break it down:
1166 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1167 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1168 // because this math is mod 7
1169 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1170 // numbers so that -1 % 7 = 6).
1171 // 3. Add 1 because month days are a 1-based index.
1172 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1173 if (month_day < 0) {
1174 month_day += 7;
1175 }
1176 month_day += 1;
1177
1178 // Now use a 0-based index version of `week` to calculate the w-th
1179 // occurrence of `day`
1180 month_day += ((int8_t)(self->week) - 1) * 7;
1181
1182 // month_day will only be > days_in_month if w was 5, and `w` means "last
1183 // occurrence of `d`", so now we just check if we over-shot the end of the
1184 // month and if so knock off 1 week.
1185 if (month_day > days_in_month) {
1186 month_day -= 7;
1187 }
1188
1189 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1190 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1191 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1192}
1193
1194/* Constructor for CalendarRule. */
1195int
1196calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1197 int8_t minute, int8_t second, CalendarRule *out)
1198{
1199 // These bounds come from the POSIX standard, which describes an Mm.n.d
1200 // rule as:
1201 //
1202 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1203 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1204 // may occur in either the fourth or the fifth week). Week 1 is the first
1205 // week in which the d'th day occurs. Day zero is Sunday.
1206 if (month <= 0 || month > 12) {
1207 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1208 return -1;
1209 }
1210
1211 if (week <= 0 || week > 5) {
1212 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1213 return -1;
1214 }
1215
1216 // day is an unsigned integer, so day < 0 should always return false, but
1217 // if day's type changes to a signed integer *without* changing this value,
1218 // it may create a bug. Considering that the compiler should be able to
1219 // optimize out the first comparison if day is an unsigned integer anyway,
1220 // we will leave this comparison in place and disable the compiler warning.
1221#pragma GCC diagnostic push
1222#pragma GCC diagnostic ignored "-Wtype-limits"
1223 if (day < 0 || day > 6) {
1224#pragma GCC diagnostic pop
1225 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1226 return -1;
1227 }
1228
1229 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1230
1231 CalendarRule new_offset = {
1232 .base = base,
1233 .month = month,
1234 .week = week,
1235 .day = day,
1236 .hour = hour,
1237 .minute = minute,
1238 .second = second,
1239 };
1240
1241 *out = new_offset;
1242 return 0;
1243}
1244
1245/* Function to calculate the local timestamp of a transition from the year.
1246 *
1247 * This translates the day of the year into a local timestamp — either a
1248 * 1-based Julian day, not including leap days, or the 0-based year-day,
1249 * including leap days.
1250 * */
1251int64_t
1252dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1253{
1254 // The function signature requires a TransitionRuleType pointer, but this
1255 // function is only applicable to DayRule* objects.
1256 DayRule *self = (DayRule *)base_self;
1257
1258 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1259 // to know the number of days since 1970-01-01, so we must subtract off
1260 // the equivalent of ymd_to_ord(1970, 1, 1).
1261 //
1262 // We subtract off an additional 1 day to account for January 1st (we want
1263 // the number of full days *before* the date of the transition - partial
1264 // days are accounted for in the hour, minute and second portions.
1265 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1266
1267 // The Julian day specification skips over February 29th in leap years,
1268 // from the POSIX standard:
1269 //
1270 // Leap days shall not be counted. That is, in all years-including leap
1271 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1272 // refer explicitly to the occasional February 29.
1273 //
1274 // This is actually more useful than you'd think — if you want a rule that
1275 // always transitions on a given calendar day (other than February 29th),
1276 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1277 // always refers to December 31st.
1278 unsigned int day = self->day;
1279 if (self->julian && day >= 59 && is_leap_year(year)) {
1280 day += 1;
1281 }
1282
1283 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1284 (self->minute * 60) + self->second;
1285}
1286
1287/* Constructor for DayRule. */
1288static int
1289dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1290 int8_t second, DayRule *out)
1291{
1292 // The POSIX standard specifies that Julian days must be in the range (1 <=
1293 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1294 // be in the range (0 <= n <= 365).
1295 if (day < julian || day > 365) {
1296 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1297 julian, day);
1298 return -1;
1299 }
1300
1301 TransitionRuleType base = {
1302 &dayrule_year_to_timestamp,
1303 };
1304
1305 DayRule tmp = {
1306 .base = base,
1307 .julian = julian,
1308 .day = day,
1309 .hour = hour,
1310 .minute = minute,
1311 .second = second,
1312 };
1313
1314 *out = tmp;
1315
1316 return 0;
1317}
1318
1319/* Calculate the start and end rules for a _tzrule in the given year. */
1320static void
1321tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1322{
1323 assert(rule->start != NULL);
1324 assert(rule->end != NULL);
1325 *start = rule->start->year_to_timestamp(rule->start, year);
1326 *end = rule->end->year_to_timestamp(rule->end, year);
1327}
1328
1329/* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1330 *
1331 * This takes a local timestamp and fold for disambiguation purposes; the year
1332 * could technically be calculated from the timestamp, but given that the
1333 * callers of this function already have the year information accessible from
1334 * the datetime struct, it is taken as an additional parameter to reduce
1335 * unncessary calculation.
1336 * */
1337static _ttinfo *
1338find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1339{
1340 if (rule->std_only) {
1341 return &(rule->std);
1342 }
1343
1344 int64_t start, end;
1345 uint8_t isdst;
1346
1347 tzrule_transitions(rule, year, &start, &end);
1348
1349 // With fold = 0, the period (denominated in local time) with the smaller
1350 // offset starts at the end of the gap and ends at the end of the fold;
1351 // with fold = 1, it runs from the start of the gap to the beginning of the
1352 // fold.
1353 //
1354 // So in order to determine the DST boundaries we need to know both the
1355 // fold and whether DST is positive or negative (rare), and it turns out
1356 // that this boils down to fold XOR is_positive.
1357 if (fold == (rule->dst_diff >= 0)) {
1358 end -= rule->dst_diff;
1359 }
1360 else {
1361 start += rule->dst_diff;
1362 }
1363
1364 if (start < end) {
1365 isdst = (ts >= start) && (ts < end);
1366 }
1367 else {
1368 isdst = (ts < end) || (ts >= start);
1369 }
1370
1371 if (isdst) {
1372 return &(rule->dst);
1373 }
1374 else {
1375 return &(rule->std);
1376 }
1377}
1378
1379/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1380 *
1381 * This function can determine the _ttinfo that applies at a given epoch time,
1382 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1383 * This is to be used in the .fromutc() function.
1384 *
1385 * The year is technically a redundant parameter, because it can be calculated
1386 * from the timestamp, but all callers of this function should have the year
1387 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1388 * calculation.
1389 **/
1390static _ttinfo *
1391find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1392 unsigned char *fold)
1393{
1394 if (rule->std_only) {
1395 *fold = 0;
1396 return &(rule->std);
1397 }
1398
1399 int64_t start, end;
1400 uint8_t isdst;
1401 tzrule_transitions(rule, year, &start, &end);
1402 start -= rule->std.utcoff_seconds;
1403 end -= rule->dst.utcoff_seconds;
1404
1405 if (start < end) {
1406 isdst = (ts >= start) && (ts < end);
1407 }
1408 else {
1409 isdst = (ts < end) || (ts >= start);
1410 }
1411
1412 // For positive DST, the ambiguous period is one dst_diff after the end of
1413 // DST; for negative DST, the ambiguous period is one dst_diff before the
1414 // start of DST.
1415 int64_t ambig_start, ambig_end;
1416 if (rule->dst_diff > 0) {
1417 ambig_start = end;
1418 ambig_end = end + rule->dst_diff;
1419 }
1420 else {
1421 ambig_start = start;
1422 ambig_end = start - rule->dst_diff;
1423 }
1424
1425 *fold = (ts >= ambig_start) && (ts < ambig_end);
1426
1427 if (isdst) {
1428 return &(rule->dst);
1429 }
1430 else {
1431 return &(rule->std);
1432 }
1433}
1434
1435/* Parse a TZ string in the format specified by the POSIX standard:
1436 *
1437 * std offset[dst[offset],start[/time],end[/time]]
1438 *
1439 * std and dst must be 3 or more characters long and must not contain a
1440 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1441 * spaces between "std" and "offset" are only for display and are not actually
1442 * present in the string.
1443 *
1444 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1445 *
1446 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1447 *
1448 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1449 */
1450static int
1451parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1452{
1453 PyObject *std_abbr = NULL;
1454 PyObject *dst_abbr = NULL;
1455 TransitionRuleType *start = NULL;
1456 TransitionRuleType *end = NULL;
Dong-hee Naa487a392020-05-22 01:56:03 +09001457 // Initialize offsets to invalid value (> 24 hours)
1458 long std_offset = 1 << 20;
1459 long dst_offset = 1 << 20;
Paul Ganssle62972d92020-05-16 04:20:06 -04001460
1461 char *tz_str = PyBytes_AsString(tz_str_obj);
1462 if (tz_str == NULL) {
1463 return -1;
1464 }
1465 char *p = tz_str;
1466
1467 // Read the `std` abbreviation, which must be at least 3 characters long.
Pablo Galindoe4799b92020-05-27 21:48:12 +01001468 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
Paul Ganssle62972d92020-05-16 04:20:06 -04001469 if (num_chars < 1) {
1470 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1471 goto error;
1472 }
1473
1474 p += num_chars;
1475
1476 // Now read the STD offset, which is required
1477 num_chars = parse_tz_delta(p, &std_offset);
1478 if (num_chars < 0) {
1479 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1480 goto error;
1481 }
1482 p += num_chars;
1483
1484 // If the string ends here, there is no DST, otherwise we must parse the
1485 // DST abbreviation and start and end dates and times.
1486 if (*p == '\0') {
1487 goto complete;
1488 }
1489
1490 num_chars = parse_abbr(p, &dst_abbr);
1491 if (num_chars < 1) {
1492 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1493 goto error;
1494 }
1495 p += num_chars;
1496
1497 if (*p == ',') {
1498 // From the POSIX standard:
1499 //
1500 // If no offset follows dst, the alternative time is assumed to be one
1501 // hour ahead of standard time.
1502 dst_offset = std_offset + 3600;
1503 }
1504 else {
1505 num_chars = parse_tz_delta(p, &dst_offset);
1506 if (num_chars < 0) {
1507 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1508 tz_str_obj);
1509 goto error;
1510 }
1511
1512 p += num_chars;
1513 }
1514
1515 TransitionRuleType **transitions[2] = {&start, &end};
1516 for (size_t i = 0; i < 2; ++i) {
1517 if (*p != ',') {
1518 PyErr_Format(PyExc_ValueError,
1519 "Missing transition rules in TZ string: %R",
1520 tz_str_obj);
1521 goto error;
1522 }
1523 p++;
1524
1525 num_chars = parse_transition_rule(p, transitions[i]);
1526 if (num_chars < 0) {
1527 PyErr_Format(PyExc_ValueError,
1528 "Malformed transition rule in TZ string: %R",
1529 tz_str_obj);
1530 goto error;
1531 }
1532 p += num_chars;
1533 }
1534
1535 if (*p != '\0') {
1536 PyErr_Format(PyExc_ValueError,
1537 "Extraneous characters at end of TZ string: %R",
1538 tz_str_obj);
1539 goto error;
1540 }
1541
1542complete:
1543 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1544 Py_DECREF(std_abbr);
1545 Py_XDECREF(dst_abbr);
1546
1547 return 0;
1548error:
1549 Py_XDECREF(std_abbr);
1550 if (dst_abbr != NULL && dst_abbr != Py_None) {
1551 Py_DECREF(dst_abbr);
1552 }
1553
1554 if (start != NULL) {
1555 PyMem_Free(start);
1556 }
1557
1558 if (end != NULL) {
1559 PyMem_Free(end);
1560 }
1561
1562 return -1;
1563}
1564
Pablo Galindoe4799b92020-05-27 21:48:12 +01001565static int
1566parse_uint(const char *const p, uint8_t *value)
Paul Ganssle62972d92020-05-16 04:20:06 -04001567{
1568 if (!isdigit(*p)) {
1569 return -1;
1570 }
1571
Pablo Galindoe4799b92020-05-27 21:48:12 +01001572 *value = (*p) - '0';
1573 return 0;
Paul Ganssle62972d92020-05-16 04:20:06 -04001574}
1575
1576/* Parse the STD and DST abbreviations from a TZ string. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001577static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001578parse_abbr(const char *const p, PyObject **abbr)
1579{
1580 const char *ptr = p;
1581 char buff = *ptr;
1582 const char *str_start;
1583 const char *str_end;
1584
1585 if (*ptr == '<') {
1586 ptr++;
1587 str_start = ptr;
1588 while ((buff = *ptr) != '>') {
1589 // From the POSIX standard:
1590 //
1591 // In the quoted form, the first character shall be the less-than
1592 // ( '<' ) character and the last character shall be the
1593 // greater-than ( '>' ) character. All characters between these
1594 // quoting characters shall be alphanumeric characters from the
1595 // portable character set in the current locale, the plus-sign (
1596 // '+' ) character, or the minus-sign ( '-' ) character. The std
1597 // and dst fields in this case shall not include the quoting
1598 // characters.
1599 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1600 buff != '-') {
1601 return -1;
1602 }
1603 ptr++;
1604 }
1605 str_end = ptr;
1606 ptr++;
1607 }
1608 else {
1609 str_start = p;
1610 // From the POSIX standard:
1611 //
1612 // In the unquoted form, all characters in these fields shall be
1613 // alphabetic characters from the portable character set in the
1614 // current locale.
1615 while (isalpha(*ptr)) {
1616 ptr++;
1617 }
1618 str_end = ptr;
1619 }
1620
1621 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
Gregory P. Smithd780fa72020-06-22 00:39:28 -07001622 if (*abbr == NULL) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001623 return -1;
1624 }
1625
1626 return ptr - p;
1627}
1628
1629/* Parse a UTC offset from a TZ str. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001630static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001631parse_tz_delta(const char *const p, long *total_seconds)
1632{
1633 // From the POSIX spec:
1634 //
1635 // Indicates the value added to the local time to arrive at Coordinated
1636 // Universal Time. The offset has the form:
1637 //
1638 // hh[:mm[:ss]]
1639 //
1640 // One or more digits may be used; the value is always interpreted as a
1641 // decimal number.
1642 //
1643 // The POSIX spec says that the values for `hour` must be between 0 and 24
1644 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1645 // transition times may be signed and range from -167 to 167.
1646 long sign = -1;
1647 long hours = 0;
1648 long minutes = 0;
1649 long seconds = 0;
1650
1651 const char *ptr = p;
1652 char buff = *ptr;
1653 if (buff == '-' || buff == '+') {
1654 // Negative numbers correspond to *positive* offsets, from the spec:
1655 //
1656 // If preceded by a '-', the timezone shall be east of the Prime
1657 // Meridian; otherwise, it shall be west (which may be indicated by
1658 // an optional preceding '+' ).
1659 if (buff == '-') {
1660 sign = 1;
1661 }
1662
1663 ptr++;
1664 }
1665
1666 // The hour can be 1 or 2 numeric characters
1667 for (size_t i = 0; i < 2; ++i) {
1668 buff = *ptr;
1669 if (!isdigit(buff)) {
1670 if (i == 0) {
1671 return -1;
1672 }
1673 else {
1674 break;
1675 }
1676 }
1677
1678 hours *= 10;
1679 hours += buff - '0';
1680 ptr++;
1681 }
1682
1683 if (hours > 24 || hours < 0) {
1684 return -1;
1685 }
1686
1687 // Minutes and seconds always of the format ":dd"
1688 long *outputs[2] = {&minutes, &seconds};
1689 for (size_t i = 0; i < 2; ++i) {
1690 if (*ptr != ':') {
1691 goto complete;
1692 }
1693 ptr++;
1694
1695 for (size_t j = 0; j < 2; ++j) {
1696 buff = *ptr;
1697 if (!isdigit(buff)) {
1698 return -1;
1699 }
1700 *(outputs[i]) *= 10;
1701 *(outputs[i]) += buff - '0';
1702 ptr++;
1703 }
1704 }
1705
1706complete:
1707 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1708
1709 return ptr - p;
1710}
1711
1712/* Parse the date portion of a transition rule. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001713static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001714parse_transition_rule(const char *const p, TransitionRuleType **out)
1715{
1716 // The full transition rule indicates when to change back and forth between
1717 // STD and DST, and has the form:
1718 //
1719 // date[/time],date[/time]
1720 //
1721 // This function parses an individual date[/time] section, and returns
1722 // the number of characters that contributed to the transition rule. This
1723 // does not include the ',' at the end of the first rule.
1724 //
1725 // The POSIX spec states that if *time* is not given, the default is 02:00.
1726 const char *ptr = p;
1727 int8_t hour = 2;
1728 int8_t minute = 0;
1729 int8_t second = 0;
1730
1731 // Rules come in one of three flavors:
1732 //
1733 // 1. Jn: Julian day n, with no leap days.
1734 // 2. n: Day of year (0-based, with leap days)
1735 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1736
1737 if (*ptr == 'M') {
1738 uint8_t month, week, day;
1739 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001740 if (parse_uint(ptr, &month)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001741 return -1;
1742 }
Paul Ganssle62972d92020-05-16 04:20:06 -04001743 ptr++;
1744 if (*ptr != '.') {
Pablo Galindoe4799b92020-05-27 21:48:12 +01001745 uint8_t tmp;
1746 if (parse_uint(ptr, &tmp)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001747 return -1;
1748 }
1749
1750 month *= 10;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001751 month += tmp;
Paul Ganssle62972d92020-05-16 04:20:06 -04001752 ptr++;
1753 }
1754
1755 uint8_t *values[2] = {&week, &day};
1756 for (size_t i = 0; i < 2; ++i) {
1757 if (*ptr != '.') {
1758 return -1;
1759 }
1760 ptr++;
1761
Pablo Galindoe4799b92020-05-27 21:48:12 +01001762 if (parse_uint(ptr, values[i])) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001763 return -1;
1764 }
1765 ptr++;
Paul Ganssle62972d92020-05-16 04:20:06 -04001766 }
1767
1768 if (*ptr == '/') {
1769 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001770 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001771 parse_transition_time(ptr, &hour, &minute, &second);
1772 if (num_chars < 0) {
1773 return -1;
1774 }
1775 ptr += num_chars;
1776 }
1777
1778 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1779 if (rv == NULL) {
1780 return -1;
1781 }
1782
1783 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1784 PyMem_Free(rv);
1785 return -1;
1786 }
1787
1788 *out = (TransitionRuleType *)rv;
1789 }
1790 else {
1791 uint8_t julian = 0;
1792 unsigned int day = 0;
1793 if (*ptr == 'J') {
1794 julian = 1;
1795 ptr++;
1796 }
1797
1798 for (size_t i = 0; i < 3; ++i) {
1799 if (!isdigit(*ptr)) {
1800 if (i == 0) {
1801 return -1;
1802 }
1803 break;
1804 }
1805 day *= 10;
1806 day += (*ptr) - '0';
1807 ptr++;
1808 }
1809
1810 if (*ptr == '/') {
1811 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001812 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001813 parse_transition_time(ptr, &hour, &minute, &second);
1814 if (num_chars < 0) {
1815 return -1;
1816 }
1817 ptr += num_chars;
1818 }
1819
1820 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1821 if (rv == NULL) {
1822 return -1;
1823 }
1824
1825 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1826 PyMem_Free(rv);
1827 return -1;
1828 }
1829 *out = (TransitionRuleType *)rv;
1830 }
1831
1832 return ptr - p;
1833}
1834
1835/* Parse the time portion of a transition rule (e.g. following an /) */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001836static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001837parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1838 int8_t *second)
1839{
1840 // From the spec:
1841 //
1842 // The time has the same format as offset except that no leading sign
1843 // ( '-' or '+' ) is allowed.
1844 //
1845 // The format for the offset is:
1846 //
1847 // h[h][:mm[:ss]]
1848 //
1849 // RFC 8536 also allows transition times to be signed and to range from
1850 // -167 to +167, but the current version only supports [0, 99].
1851 //
1852 // TODO: Support the full range of transition hours.
1853 int8_t *components[3] = {hour, minute, second};
1854 const char *ptr = p;
1855 int8_t sign = 1;
1856
1857 if (*ptr == '-' || *ptr == '+') {
1858 if (*ptr == '-') {
1859 sign = -1;
1860 }
1861 ptr++;
1862 }
1863
1864 for (size_t i = 0; i < 3; ++i) {
1865 if (i > 0) {
1866 if (*ptr != ':') {
1867 break;
1868 }
1869 ptr++;
1870 }
1871
1872 uint8_t buff = 0;
1873 for (size_t j = 0; j < 2; j++) {
1874 if (!isdigit(*ptr)) {
1875 if (i == 0 && j > 0) {
1876 break;
1877 }
1878 return -1;
1879 }
1880
1881 buff *= 10;
1882 buff += (*ptr) - '0';
1883 ptr++;
1884 }
1885
1886 *(components[i]) = sign * buff;
1887 }
1888
1889 return ptr - p;
1890}
1891
1892/* Constructor for a _tzrule.
1893 *
1894 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1895 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1896 * NULL as well.
1897 *
1898 * Returns 0 on success.
1899 */
1900static int
1901build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1902 long dst_offset, TransitionRuleType *start,
1903 TransitionRuleType *end, _tzrule *out)
1904{
Dong-hee Naa487a392020-05-22 01:56:03 +09001905 _tzrule rv = {{0}};
Paul Ganssle62972d92020-05-16 04:20:06 -04001906
1907 rv.start = start;
1908 rv.end = end;
1909
1910 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1911 goto error;
1912 }
1913
1914 if (dst_abbr != NULL) {
1915 rv.dst_diff = dst_offset - std_offset;
1916 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1917 goto error;
1918 }
1919 }
1920 else {
1921 rv.std_only = 1;
1922 }
1923
1924 *out = rv;
1925
1926 return 0;
1927error:
1928 xdecref_ttinfo(&rv.std);
1929 xdecref_ttinfo(&rv.dst);
1930 return -1;
1931}
1932
1933/* Destructor for _tzrule. */
1934static void
1935free_tzrule(_tzrule *tzrule)
1936{
1937 xdecref_ttinfo(&(tzrule->std));
1938 if (!tzrule->std_only) {
1939 xdecref_ttinfo(&(tzrule->dst));
1940 }
1941
1942 if (tzrule->start != NULL) {
1943 PyMem_Free(tzrule->start);
1944 }
1945
1946 if (tzrule->end != NULL) {
1947 PyMem_Free(tzrule->end);
1948 }
1949}
1950
1951/* Calculate DST offsets from transitions and UTC offsets
1952 *
1953 * This is necessary because each C `ttinfo` only contains the UTC offset,
1954 * time zone abbreviation and an isdst boolean - it does not include the
1955 * amount of the DST offset, but we need the amount for the dst() function.
1956 *
1957 * Thus function uses heuristics to infer what the offset should be, so it
1958 * is not guaranteed that this will work for all zones. If we cannot assign
1959 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1960 * bool(dt.dst()) will always match ttinfo.isdst.
1961 */
1962static void
1963utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1964 unsigned char *isdsts, size_t num_transitions,
1965 size_t num_ttinfos)
1966{
1967 size_t dst_count = 0;
1968 size_t dst_found = 0;
1969 for (size_t i = 0; i < num_ttinfos; ++i) {
1970 dst_count++;
1971 }
1972
1973 for (size_t i = 1; i < num_transitions; ++i) {
1974 if (dst_count == dst_found) {
1975 break;
1976 }
1977
1978 size_t idx = trans_idx[i];
1979 size_t comp_idx = trans_idx[i - 1];
1980
1981 // Only look at DST offsets that have nto been assigned already
1982 if (!isdsts[idx] || dstoffs[idx] != 0) {
1983 continue;
1984 }
1985
1986 long dstoff = 0;
1987 long utcoff = utcoffs[idx];
1988
1989 if (!isdsts[comp_idx]) {
1990 dstoff = utcoff - utcoffs[comp_idx];
1991 }
1992
1993 if (!dstoff && idx < (num_ttinfos - 1)) {
1994 comp_idx = trans_idx[i + 1];
1995
1996 // If the following transition is also DST and we couldn't find
1997 // the DST offset by this point, we're going to have to skip it
1998 // and hope this transition gets assigned later
1999 if (isdsts[comp_idx]) {
2000 continue;
2001 }
2002
2003 dstoff = utcoff - utcoffs[comp_idx];
2004 }
2005
2006 if (dstoff) {
2007 dst_found++;
2008 dstoffs[idx] = dstoff;
2009 }
2010 }
2011
2012 if (dst_found < dst_count) {
2013 // If there are time zones we didn't find a value for, we'll end up
2014 // with dstoff = 0 for something where isdst=1. This is obviously
2015 // wrong — one hour will be a much better guess than 0.
2016 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2017 if (isdsts[idx] && !dstoffs[idx]) {
2018 dstoffs[idx] = 3600;
2019 }
2020 }
2021 }
2022}
2023
2024#define _swap(x, y, buffer) \
2025 buffer = x; \
2026 x = y; \
2027 y = buffer;
2028
2029/* Calculate transitions in local time from UTC time and offsets.
2030 *
2031 * We want to know when each transition occurs, denominated in the number of
2032 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2033 * *local time* (note: this is *not* equivalent to the output of
2034 * datetime.timestamp, which is the total number of seconds actual elapsed
2035 * since 1970-01-01T00:00:00Z in UTC).
2036 *
2037 * This is an ambiguous question because "local time" can be ambiguous — but it
2038 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2039 *
2040 * trans_local[0]: The wall-time transitions for fold=0
2041 * trans_local[1]: The wall-time transitions for fold=1
2042 *
2043 * This returns 0 on success and a negative number of failure. The trans_local
2044 * arrays must be freed if they are not NULL.
2045 */
2046static int
2047ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2048 int64_t *trans_local[2], size_t num_ttinfos,
2049 size_t num_transitions)
2050{
2051 if (num_transitions == 0) {
2052 return 0;
2053 }
2054
2055 // Copy the UTC transitions into each array to be modified in place later
2056 for (size_t i = 0; i < 2; ++i) {
2057 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2058 if (trans_local[i] == NULL) {
2059 return -1;
2060 }
2061
2062 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2063 }
2064
2065 int64_t offset_0, offset_1, buff;
2066 if (num_ttinfos > 1) {
2067 offset_0 = utcoff[0];
2068 offset_1 = utcoff[trans_idx[0]];
2069
2070 if (offset_1 > offset_0) {
2071 _swap(offset_0, offset_1, buff);
2072 }
2073 }
2074 else {
2075 offset_0 = utcoff[0];
2076 offset_1 = utcoff[0];
2077 }
2078
2079 trans_local[0][0] += offset_0;
2080 trans_local[1][0] += offset_1;
2081
2082 for (size_t i = 1; i < num_transitions; ++i) {
2083 offset_0 = utcoff[trans_idx[i - 1]];
2084 offset_1 = utcoff[trans_idx[i]];
2085
2086 if (offset_1 > offset_0) {
2087 _swap(offset_1, offset_0, buff);
2088 }
2089
2090 trans_local[0][i] += offset_0;
2091 trans_local[1][i] += offset_1;
2092 }
2093
2094 return 0;
2095}
2096
2097/* Simple bisect_right binary search implementation */
2098static size_t
2099_bisect(const int64_t value, const int64_t *arr, size_t size)
2100{
2101 size_t lo = 0;
2102 size_t hi = size;
2103 size_t m;
2104
2105 while (lo < hi) {
2106 m = (lo + hi) / 2;
2107 if (arr[m] > value) {
2108 hi = m;
2109 }
2110 else {
2111 lo = m + 1;
2112 }
2113 }
2114
2115 return hi;
2116}
2117
2118/* Find the ttinfo rules that apply at a given local datetime. */
2119static _ttinfo *
2120find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2121{
2122 // datetime.time has a .tzinfo attribute that passes None as the dt
2123 // argument; it only really has meaning for fixed-offset zones.
2124 if (dt == Py_None) {
2125 if (self->fixed_offset) {
2126 return &(self->tzrule_after.std);
2127 }
2128 else {
2129 return &NO_TTINFO;
2130 }
2131 }
2132
2133 int64_t ts;
2134 if (get_local_timestamp(dt, &ts)) {
2135 return NULL;
2136 }
2137
2138 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2139 assert(fold < 2);
2140 int64_t *local_transitions = self->trans_list_wall[fold];
2141 size_t num_trans = self->num_transitions;
2142
2143 if (num_trans && ts < local_transitions[0]) {
2144 return self->ttinfo_before;
2145 }
2146 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2147 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2148 PyDateTime_GET_YEAR(dt));
2149 }
2150 else {
2151 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2152 assert(idx < self->num_transitions);
2153 return self->trans_ttinfos[idx];
2154 }
2155}
2156
2157static int
2158is_leap_year(int year)
2159{
2160 const unsigned int ayear = (unsigned int)year;
2161 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2162}
2163
2164/* Calculates ordinal datetime from year, month and day. */
2165static int
2166ymd_to_ord(int y, int m, int d)
2167{
2168 y -= 1;
2169 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2170 int yearday = DAYS_BEFORE_MONTH[m];
2171 if (m > 2 && is_leap_year(y + 1)) {
2172 yearday += 1;
2173 }
2174
2175 return days_before_year + yearday + d;
2176}
2177
2178/* Calculate the number of seconds since 1970-01-01 in local time.
2179 *
2180 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2181 * can easily determine which transitions a datetime falls between. See the
2182 * comment above ts_to_local for more information.
2183 * */
2184static int
2185get_local_timestamp(PyObject *dt, int64_t *local_ts)
2186{
2187 assert(local_ts != NULL);
2188
2189 int hour, minute, second;
2190 int ord;
2191 if (PyDateTime_CheckExact(dt)) {
2192 int y = PyDateTime_GET_YEAR(dt);
2193 int m = PyDateTime_GET_MONTH(dt);
2194 int d = PyDateTime_GET_DAY(dt);
2195 hour = PyDateTime_DATE_GET_HOUR(dt);
2196 minute = PyDateTime_DATE_GET_MINUTE(dt);
2197 second = PyDateTime_DATE_GET_SECOND(dt);
2198
2199 ord = ymd_to_ord(y, m, d);
2200 }
2201 else {
2202 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2203 if (num == NULL) {
2204 return -1;
2205 }
2206
2207 ord = PyLong_AsLong(num);
2208 Py_DECREF(num);
2209 if (ord == -1 && PyErr_Occurred()) {
2210 return -1;
2211 }
2212
2213 num = PyObject_GetAttrString(dt, "hour");
2214 if (num == NULL) {
2215 return -1;
2216 }
2217 hour = PyLong_AsLong(num);
2218 Py_DECREF(num);
2219 if (hour == -1) {
2220 return -1;
2221 }
2222
2223 num = PyObject_GetAttrString(dt, "minute");
2224 if (num == NULL) {
2225 return -1;
2226 }
2227 minute = PyLong_AsLong(num);
2228 Py_DECREF(num);
2229 if (minute == -1) {
2230 return -1;
2231 }
2232
2233 num = PyObject_GetAttrString(dt, "second");
2234 if (num == NULL) {
2235 return -1;
2236 }
2237 second = PyLong_AsLong(num);
2238 Py_DECREF(num);
2239 if (second == -1) {
2240 return -1;
2241 }
2242 }
2243
2244 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2245 (int64_t)(hour * 3600 + minute * 60 + second);
2246
2247 return 0;
2248}
2249
2250/////
2251// Functions for cache handling
2252
2253/* Constructor for StrongCacheNode */
2254static StrongCacheNode *
2255strong_cache_node_new(PyObject *key, PyObject *zone)
2256{
2257 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2258 if (node == NULL) {
2259 return NULL;
2260 }
2261
2262 Py_INCREF(key);
2263 Py_INCREF(zone);
2264
2265 node->next = NULL;
2266 node->prev = NULL;
2267 node->key = key;
2268 node->zone = zone;
2269
2270 return node;
2271}
2272
2273/* Destructor for StrongCacheNode */
2274void
2275strong_cache_node_free(StrongCacheNode *node)
2276{
2277 Py_XDECREF(node->key);
2278 Py_XDECREF(node->zone);
2279
2280 PyMem_Free(node);
2281}
2282
2283/* Frees all nodes at or after a specified root in the strong cache.
2284 *
2285 * This can be used on the root node to free the entire cache or it can be used
2286 * to clear all nodes that have been expired (which, if everything is going
2287 * right, will actually only be 1 node at a time).
2288 */
2289void
2290strong_cache_free(StrongCacheNode *root)
2291{
2292 StrongCacheNode *node = root;
2293 StrongCacheNode *next_node;
2294 while (node != NULL) {
2295 next_node = node->next;
2296 strong_cache_node_free(node);
2297
2298 node = next_node;
2299 }
2300}
2301
2302/* Removes a node from the cache and update its neighbors.
2303 *
2304 * This is used both when ejecting a node from the cache and when moving it to
2305 * the front of the cache.
2306 */
2307static void
2308remove_from_strong_cache(StrongCacheNode *node)
2309{
2310 if (ZONEINFO_STRONG_CACHE == node) {
2311 ZONEINFO_STRONG_CACHE = node->next;
2312 }
2313
2314 if (node->prev != NULL) {
2315 node->prev->next = node->next;
2316 }
2317
2318 if (node->next != NULL) {
2319 node->next->prev = node->prev;
2320 }
2321
2322 node->next = NULL;
2323 node->prev = NULL;
2324}
2325
2326/* Retrieves the node associated with a key, if it exists.
2327 *
2328 * This traverses the strong cache until it finds a matching key and returns a
2329 * pointer to the relevant node if found. Returns NULL if no node is found.
2330 *
2331 * root may be NULL, indicating an empty cache.
2332 */
2333static StrongCacheNode *
2334find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2335{
2336 const StrongCacheNode *node = root;
2337 while (node != NULL) {
2338 if (PyObject_RichCompareBool(key, node->key, Py_EQ)) {
2339 return (StrongCacheNode *)node;
2340 }
2341
2342 node = node->next;
2343 }
2344
2345 return NULL;
2346}
2347
2348/* Ejects a given key from the class's strong cache, if applicable.
2349 *
2350 * This function is used to enable the per-key functionality in clear_cache.
2351 */
2352static void
2353eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2354{
2355 if (type != &PyZoneInfo_ZoneInfoType) {
2356 return;
2357 }
2358
2359 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2360 if (node != NULL) {
2361 remove_from_strong_cache(node);
2362
2363 strong_cache_node_free(node);
2364 }
2365}
2366
2367/* Moves a node to the front of the LRU cache.
2368 *
2369 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2370 * it is not at the front of the cache, it needs to be moved there.
2371 */
2372static void
2373move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2374{
2375 StrongCacheNode *root_p = *root;
2376 if (root_p == node) {
2377 return;
2378 }
2379
2380 remove_from_strong_cache(node);
2381
2382 node->prev = NULL;
2383 node->next = root_p;
2384
2385 if (root_p != NULL) {
2386 root_p->prev = node;
2387 }
2388
2389 *root = node;
2390}
2391
2392/* Retrieves a ZoneInfo from the strong cache if it's present.
2393 *
2394 * This function finds the ZoneInfo by key and if found will move the node to
2395 * the front of the LRU cache and return a new reference to it. It returns NULL
2396 * if the key is not in the cache.
2397 *
2398 * The strong cache is currently only implemented for the base class, so this
2399 * always returns a cache miss for subclasses.
2400 */
2401static PyObject *
2402zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2403{
2404 if (type != &PyZoneInfo_ZoneInfoType) {
2405 return NULL; // Strong cache currently only implemented for base class
2406 }
2407
2408 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2409
2410 if (node != NULL) {
2411 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2412 Py_INCREF(node->zone);
2413 return node->zone;
2414 }
2415
2416 return NULL; // Cache miss
2417}
2418
2419/* Inserts a new key into the strong LRU cache.
2420 *
2421 * This function is only to be used after a cache miss — it creates a new node
2422 * at the front of the cache and ejects any stale entries (keeping the size of
2423 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2424 */
2425static void
2426update_strong_cache(const PyTypeObject *const type, PyObject *key,
2427 PyObject *zone)
2428{
2429 if (type != &PyZoneInfo_ZoneInfoType) {
2430 return;
2431 }
2432
2433 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2434
2435 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2436
2437 StrongCacheNode *node = new_node->next;
2438 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2439 if (node == NULL) {
2440 return;
2441 }
2442 node = node->next;
2443 }
2444
2445 // Everything beyond this point needs to be freed
2446 if (node != NULL) {
2447 if (node->prev != NULL) {
2448 node->prev->next = NULL;
2449 }
2450 strong_cache_free(node);
2451 }
2452}
2453
2454/* Clears all entries into a type's strong cache.
2455 *
2456 * Because the strong cache is not implemented for subclasses, this is a no-op
2457 * for everything except the base class.
2458 */
2459void
2460clear_strong_cache(const PyTypeObject *const type)
2461{
2462 if (type != &PyZoneInfo_ZoneInfoType) {
2463 return;
2464 }
2465
2466 strong_cache_free(ZONEINFO_STRONG_CACHE);
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002467 ZONEINFO_STRONG_CACHE = NULL;
Paul Ganssle62972d92020-05-16 04:20:06 -04002468}
2469
2470static PyObject *
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002471new_weak_cache(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002472{
2473 PyObject *weakref_module = PyImport_ImportModule("weakref");
2474 if (weakref_module == NULL) {
2475 return NULL;
2476 }
2477
2478 PyObject *weak_cache =
2479 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2480 Py_DECREF(weakref_module);
2481 return weak_cache;
2482}
2483
2484static int
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002485initialize_caches(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002486{
Ammar Askar06a1b892020-05-22 16:10:55 +00002487 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
Paul Ganssle62972d92020-05-16 04:20:06 -04002488 if (TIMEDELTA_CACHE == NULL) {
2489 TIMEDELTA_CACHE = PyDict_New();
2490 }
2491 else {
2492 Py_INCREF(TIMEDELTA_CACHE);
2493 }
2494
2495 if (TIMEDELTA_CACHE == NULL) {
2496 return -1;
2497 }
2498
2499 if (ZONEINFO_WEAK_CACHE == NULL) {
2500 ZONEINFO_WEAK_CACHE = new_weak_cache();
2501 }
2502 else {
2503 Py_INCREF(ZONEINFO_WEAK_CACHE);
2504 }
2505
2506 if (ZONEINFO_WEAK_CACHE == NULL) {
2507 return -1;
2508 }
2509
2510 return 0;
2511}
2512
2513static PyObject *
2514zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2515{
2516 PyObject *weak_cache = new_weak_cache();
2517 if (weak_cache == NULL) {
2518 return NULL;
2519 }
2520
2521 PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache);
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002522 Py_DECREF(weak_cache);
Paul Ganssle62972d92020-05-16 04:20:06 -04002523 Py_RETURN_NONE;
2524}
2525
2526/////
2527// Specify the ZoneInfo type
2528static PyMethodDef zoneinfo_methods[] = {
2529 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2530 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2531 PyDoc_STR("Clear the ZoneInfo cache.")},
2532 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2533 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2534 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2535 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2536 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2537 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2538 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2539 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2540 "the given datetime.")},
2541 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2542 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2543 "in a zone at the given datetime.")},
2544 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2545 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2546 "zone that applies in a zone at a given datetime.")},
2547 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2548 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2549 "datetime in local time.")},
2550 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2551 PyDoc_STR("Function for serialization with the pickle protocol.")},
2552 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2553 PyDoc_STR("Private method used in unpickling.")},
2554 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
Paul Ganssle87d82872020-08-13 22:38:30 -04002555 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
Paul Ganssle62972d92020-05-16 04:20:06 -04002556 PyDoc_STR("Function to initialize subclasses.")},
2557 {NULL} /* Sentinel */
2558};
2559
2560static PyMemberDef zoneinfo_members[] = {
2561 {.name = "key",
2562 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2563 .type = T_OBJECT_EX,
2564 .flags = READONLY,
2565 .doc = NULL},
2566 {NULL}, /* Sentinel */
2567};
2568
2569static PyTypeObject PyZoneInfo_ZoneInfoType = {
2570 PyVarObject_HEAD_INIT(NULL, 0) //
2571 .tp_name = "zoneinfo.ZoneInfo",
2572 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2573 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2574 .tp_repr = (reprfunc)zoneinfo_repr,
2575 .tp_str = (reprfunc)zoneinfo_str,
2576 .tp_getattro = PyObject_GenericGetAttr,
2577 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2578 /* .tp_doc = zoneinfo_doc, */
2579 .tp_methods = zoneinfo_methods,
2580 .tp_members = zoneinfo_members,
2581 .tp_new = zoneinfo_new,
2582 .tp_dealloc = zoneinfo_dealloc,
2583};
2584
2585/////
2586// Specify the _zoneinfo module
2587static PyMethodDef module_methods[] = {{NULL, NULL}};
2588static void
2589module_free()
2590{
2591 Py_XDECREF(_tzpath_find_tzfile);
2592 _tzpath_find_tzfile = NULL;
2593
2594 Py_XDECREF(_common_mod);
2595 _common_mod = NULL;
2596
2597 Py_XDECREF(io_open);
2598 io_open = NULL;
2599
2600 xdecref_ttinfo(&NO_TTINFO);
2601
Ammar Askar06a1b892020-05-22 16:10:55 +00002602 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2603 Py_DECREF(TIMEDELTA_CACHE);
2604 } else {
2605 Py_CLEAR(TIMEDELTA_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002606 }
2607
Ammar Askar06a1b892020-05-22 16:10:55 +00002608 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2609 Py_DECREF(ZONEINFO_WEAK_CACHE);
2610 } else {
2611 Py_CLEAR(ZONEINFO_WEAK_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002612 }
2613
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002614 clear_strong_cache(&PyZoneInfo_ZoneInfoType);
Paul Ganssle62972d92020-05-16 04:20:06 -04002615}
2616
2617static int
2618zoneinfomodule_exec(PyObject *m)
2619{
2620 PyDateTime_IMPORT;
2621 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2622 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2623 goto error;
2624 }
2625
2626 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2627 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2628
2629 /* Populate imports */
2630 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2631 if (_tzpath_module == NULL) {
2632 goto error;
2633 }
2634
2635 _tzpath_find_tzfile =
2636 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2637 Py_DECREF(_tzpath_module);
2638 if (_tzpath_find_tzfile == NULL) {
2639 goto error;
2640 }
2641
2642 PyObject *io_module = PyImport_ImportModule("io");
2643 if (io_module == NULL) {
2644 goto error;
2645 }
2646
2647 io_open = PyObject_GetAttrString(io_module, "open");
2648 Py_DECREF(io_module);
2649 if (io_open == NULL) {
2650 goto error;
2651 }
2652
2653 _common_mod = PyImport_ImportModule("zoneinfo._common");
2654 if (_common_mod == NULL) {
2655 goto error;
2656 }
2657
2658 if (NO_TTINFO.utcoff == NULL) {
2659 NO_TTINFO.utcoff = Py_None;
2660 NO_TTINFO.dstoff = Py_None;
2661 NO_TTINFO.tzname = Py_None;
2662
2663 for (size_t i = 0; i < 3; ++i) {
2664 Py_INCREF(Py_None);
2665 }
2666 }
2667
2668 if (initialize_caches()) {
2669 goto error;
2670 }
2671
2672 return 0;
2673
2674error:
2675 return -1;
2676}
2677
2678static PyModuleDef_Slot zoneinfomodule_slots[] = {
2679 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2680
2681static struct PyModuleDef zoneinfomodule = {
2682 PyModuleDef_HEAD_INIT,
2683 .m_name = "_zoneinfo",
2684 .m_doc = "C implementation of the zoneinfo module",
2685 .m_size = 0,
2686 .m_methods = module_methods,
2687 .m_slots = zoneinfomodule_slots,
2688 .m_free = (freefunc)module_free};
2689
2690PyMODINIT_FUNC
2691PyInit__zoneinfo(void)
2692{
2693 return PyModuleDef_Init(&zoneinfomodule);
2694}