blob: bb32b1496683e488d279c1905d6852f7da99a67d [file] [log] [blame]
Paul Ganssle62972d92020-05-16 04:20:06 -04001#include "Python.h"
Victor Stinner37834132020-10-27 17:12:53 +01002#include "pycore_long.h" // _PyLong_GetOne()
Paul Ganssle62972d92020-05-16 04:20:06 -04003#include "structmember.h"
4
5#include <ctype.h>
6#include <stddef.h>
7#include <stdint.h>
8
9#include "datetime.h"
10
11// Imports
12static PyObject *io_open = NULL;
13static PyObject *_tzpath_find_tzfile = NULL;
14static PyObject *_common_mod = NULL;
15
16typedef struct TransitionRuleType TransitionRuleType;
17typedef struct StrongCacheNode StrongCacheNode;
18
19typedef struct {
20 PyObject *utcoff;
21 PyObject *dstoff;
22 PyObject *tzname;
23 long utcoff_seconds;
24} _ttinfo;
25
26typedef struct {
27 _ttinfo std;
28 _ttinfo dst;
29 int dst_diff;
30 TransitionRuleType *start;
31 TransitionRuleType *end;
32 unsigned char std_only;
33} _tzrule;
34
35typedef struct {
36 PyDateTime_TZInfo base;
37 PyObject *key;
38 PyObject *file_repr;
39 PyObject *weakreflist;
Pablo Galindoe4799b92020-05-27 21:48:12 +010040 size_t num_transitions;
41 size_t num_ttinfos;
Paul Ganssle62972d92020-05-16 04:20:06 -040042 int64_t *trans_list_utc;
43 int64_t *trans_list_wall[2];
44 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
45 _ttinfo *ttinfo_before;
46 _tzrule tzrule_after;
47 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
48 unsigned char fixed_offset;
49 unsigned char source;
50} PyZoneInfo_ZoneInfo;
51
52struct TransitionRuleType {
53 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
54};
55
56typedef struct {
57 TransitionRuleType base;
58 uint8_t month;
59 uint8_t week;
60 uint8_t day;
61 int8_t hour;
62 int8_t minute;
63 int8_t second;
64} CalendarRule;
65
66typedef struct {
67 TransitionRuleType base;
68 uint8_t julian;
69 unsigned int day;
70 int8_t hour;
71 int8_t minute;
72 int8_t second;
73} DayRule;
74
75struct StrongCacheNode {
76 StrongCacheNode *next;
77 StrongCacheNode *prev;
78 PyObject *key;
79 PyObject *zone;
80};
81
82static PyTypeObject PyZoneInfo_ZoneInfoType;
83
84// Globals
85static PyObject *TIMEDELTA_CACHE = NULL;
86static PyObject *ZONEINFO_WEAK_CACHE = NULL;
87static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
88static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
89
90static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
91
92// Constants
93static const int EPOCHORDINAL = 719163;
94static int DAYS_IN_MONTH[] = {
95 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
96};
97
98static int DAYS_BEFORE_MONTH[] = {
99 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
100};
101
102static const int SOURCE_NOCACHE = 0;
103static const int SOURCE_CACHE = 1;
104static const int SOURCE_FILE = 2;
105
106// Forward declarations
107static int
108load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
109static void
110utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
111 unsigned char *isdsts, size_t num_transitions,
112 size_t num_ttinfos);
113static int
114ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
115 int64_t *trans_local[2], size_t num_ttinfos,
116 size_t num_transitions);
117
118static int
119parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
120
Pablo Galindoe4799b92020-05-27 21:48:12 +0100121static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400122parse_abbr(const char *const p, PyObject **abbr);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100123static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400124parse_tz_delta(const char *const p, long *total_seconds);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100125static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400126parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
127 int8_t *second);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100128static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400129parse_transition_rule(const char *const p, TransitionRuleType **out);
130
131static _ttinfo *
132find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
133static _ttinfo *
134find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
135 unsigned char *fold);
136
137static int
138build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
139static void
140xdecref_ttinfo(_ttinfo *ttinfo);
141static int
142ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
143
144static int
145build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
146 long dst_offset, TransitionRuleType *start,
147 TransitionRuleType *end, _tzrule *out);
148static void
149free_tzrule(_tzrule *tzrule);
150
151static PyObject *
152load_timedelta(long seconds);
153
154static int
155get_local_timestamp(PyObject *dt, int64_t *local_ts);
156static _ttinfo *
157find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
158
159static int
160ymd_to_ord(int y, int m, int d);
161static int
162is_leap_year(int year);
163
164static size_t
165_bisect(const int64_t value, const int64_t *arr, size_t size);
166
167static void
168eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
169static void
170clear_strong_cache(const PyTypeObject *const type);
171static void
172update_strong_cache(const PyTypeObject *const type, PyObject *key,
173 PyObject *zone);
174static PyObject *
Victor Stinneraefb69b2020-12-16 16:26:15 +0100175zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
Paul Ganssle62972d92020-05-16 04:20:06 -0400176
177static PyObject *
178zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
179{
180 PyObject *file_obj = NULL;
181 PyObject *file_path = NULL;
182
183 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
184 if (file_path == NULL) {
185 return NULL;
186 }
187 else if (file_path == Py_None) {
188 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
189 if (file_obj == NULL) {
190 Py_DECREF(file_path);
191 return NULL;
192 }
193 }
194
195 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
196 if (self == NULL) {
197 goto error;
198 }
199
200 if (file_obj == NULL) {
201 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
202 if (file_obj == NULL) {
203 goto error;
204 }
205 }
206
207 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
208 goto error;
209 }
210
211 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
212 Py_DECREF(file_obj);
213 file_obj = NULL;
214 if (rv == NULL) {
215 goto error;
216 }
217 Py_DECREF(rv);
218
219 ((PyZoneInfo_ZoneInfo *)self)->key = key;
220 Py_INCREF(key);
221
222 goto cleanup;
223error:
224 Py_XDECREF(self);
225 self = NULL;
226cleanup:
227 if (file_obj != NULL) {
Zackery Spytzeca25492020-07-20 06:51:26 -0600228 PyObject *exc, *val, *tb;
229 PyErr_Fetch(&exc, &val, &tb);
Paul Ganssle62972d92020-05-16 04:20:06 -0400230 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
Zackery Spytzeca25492020-07-20 06:51:26 -0600231 _PyErr_ChainExceptions(exc, val, tb);
232 if (tmp == NULL) {
233 Py_CLEAR(self);
234 }
235 Py_XDECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400236 Py_DECREF(file_obj);
237 }
238 Py_DECREF(file_path);
239 return self;
240}
241
242static PyObject *
243get_weak_cache(PyTypeObject *type)
244{
245 if (type == &PyZoneInfo_ZoneInfoType) {
246 return ZONEINFO_WEAK_CACHE;
247 }
248 else {
249 PyObject *cache =
250 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
251 // We are assuming that the type lives at least as long as the function
252 // that calls get_weak_cache, and that it holds a reference to the
253 // cache, so we'll return a "borrowed reference".
254 Py_XDECREF(cache);
255 return cache;
256 }
257}
258
259static PyObject *
260zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
261{
262 PyObject *key = NULL;
263 static char *kwlist[] = {"key", NULL};
264 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
265 return NULL;
266 }
267
268 PyObject *instance = zone_from_strong_cache(type, key);
269 if (instance != NULL) {
270 return instance;
271 }
272
273 PyObject *weak_cache = get_weak_cache(type);
274 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
275 if (instance == NULL) {
276 return NULL;
277 }
278
279 if (instance == Py_None) {
280 Py_DECREF(instance);
281 PyObject *tmp = zoneinfo_new_instance(type, key);
282 if (tmp == NULL) {
283 return NULL;
284 }
285
286 instance =
287 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400288 Py_DECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400289 if (instance == NULL) {
290 return NULL;
291 }
Gregory P. Smithd780fa72020-06-22 00:39:28 -0700292 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
Paul Ganssle62972d92020-05-16 04:20:06 -0400293 }
294
295 update_strong_cache(type, key, instance);
296 return instance;
297}
298
299static void
300zoneinfo_dealloc(PyObject *obj_self)
301{
302 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
303
304 if (self->weakreflist != NULL) {
305 PyObject_ClearWeakRefs(obj_self);
306 }
307
308 if (self->trans_list_utc != NULL) {
309 PyMem_Free(self->trans_list_utc);
310 }
311
312 for (size_t i = 0; i < 2; i++) {
313 if (self->trans_list_wall[i] != NULL) {
314 PyMem_Free(self->trans_list_wall[i]);
315 }
316 }
317
318 if (self->_ttinfos != NULL) {
319 for (size_t i = 0; i < self->num_ttinfos; ++i) {
320 xdecref_ttinfo(&(self->_ttinfos[i]));
321 }
322 PyMem_Free(self->_ttinfos);
323 }
324
325 if (self->trans_ttinfos != NULL) {
326 PyMem_Free(self->trans_ttinfos);
327 }
328
329 free_tzrule(&(self->tzrule_after));
330
331 Py_XDECREF(self->key);
332 Py_XDECREF(self->file_repr);
333
334 Py_TYPE(self)->tp_free((PyObject *)self);
335}
336
337static PyObject *
338zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
339{
340 PyObject *file_obj = NULL;
341 PyObject *file_repr = NULL;
342 PyObject *key = Py_None;
343 PyZoneInfo_ZoneInfo *self = NULL;
344
345 static char *kwlist[] = {"", "key", NULL};
346 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
347 &key)) {
348 return NULL;
349 }
350
351 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
352 self = (PyZoneInfo_ZoneInfo *)obj_self;
353 if (self == NULL) {
354 return NULL;
355 }
356
357 file_repr = PyUnicode_FromFormat("%R", file_obj);
358 if (file_repr == NULL) {
359 goto error;
360 }
361
362 if (load_data(self, file_obj)) {
363 goto error;
364 }
365
366 self->source = SOURCE_FILE;
367 self->file_repr = file_repr;
368 self->key = key;
369 Py_INCREF(key);
370
371 return obj_self;
372error:
373 Py_XDECREF(file_repr);
374 Py_XDECREF(self);
375 return NULL;
376}
377
378static PyObject *
379zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
380{
381 static char *kwlist[] = {"key", NULL};
382 PyObject *key = NULL;
383 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
384 return NULL;
385 }
386
387 PyObject *out = zoneinfo_new_instance(cls, key);
388 if (out != NULL) {
389 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
390 }
391
392 return out;
393}
394
395static PyObject *
396zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
397{
398 PyObject *only_keys = NULL;
399 static char *kwlist[] = {"only_keys", NULL};
400
401 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
402 &only_keys))) {
403 return NULL;
404 }
405
406 PyTypeObject *type = (PyTypeObject *)cls;
407 PyObject *weak_cache = get_weak_cache(type);
408
409 if (only_keys == NULL || only_keys == Py_None) {
410 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
411 if (rv != NULL) {
412 Py_DECREF(rv);
413 }
414
415 clear_strong_cache(type);
Paul Ganssle62972d92020-05-16 04:20:06 -0400416 }
417 else {
418 PyObject *item = NULL;
419 PyObject *pop = PyUnicode_FromString("pop");
420 if (pop == NULL) {
421 return NULL;
422 }
423
424 PyObject *iter = PyObject_GetIter(only_keys);
425 if (iter == NULL) {
426 Py_DECREF(pop);
427 return NULL;
428 }
429
430 while ((item = PyIter_Next(iter))) {
431 // Remove from strong cache
432 eject_from_strong_cache(type, item);
433
434 // Remove from weak cache
435 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
436 Py_None, NULL);
437
438 Py_DECREF(item);
439 if (tmp == NULL) {
440 break;
441 }
442 Py_DECREF(tmp);
443 }
444 Py_DECREF(iter);
445 Py_DECREF(pop);
446 }
447
448 if (PyErr_Occurred()) {
449 return NULL;
450 }
451
452 Py_RETURN_NONE;
453}
454
455static PyObject *
456zoneinfo_utcoffset(PyObject *self, PyObject *dt)
457{
458 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
459 if (tti == NULL) {
460 return NULL;
461 }
462 Py_INCREF(tti->utcoff);
463 return tti->utcoff;
464}
465
466static PyObject *
467zoneinfo_dst(PyObject *self, PyObject *dt)
468{
469 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
470 if (tti == NULL) {
471 return NULL;
472 }
473 Py_INCREF(tti->dstoff);
474 return tti->dstoff;
475}
476
477static PyObject *
478zoneinfo_tzname(PyObject *self, PyObject *dt)
479{
480 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
481 if (tti == NULL) {
482 return NULL;
483 }
484 Py_INCREF(tti->tzname);
485 return tti->tzname;
486}
487
Zackery Spytz2e4dd332020-09-23 12:43:45 -0600488#define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
Paul Ganssle62972d92020-05-16 04:20:06 -0400489
490static PyObject *
491zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
492{
493 if (!PyDateTime_Check(dt)) {
494 PyErr_SetString(PyExc_TypeError,
495 "fromutc: argument must be a datetime");
496 return NULL;
497 }
498 if (GET_DT_TZINFO(dt) != obj_self) {
499 PyErr_SetString(PyExc_ValueError,
500 "fromutc: dt.tzinfo "
501 "is not self");
502 return NULL;
503 }
504
505 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
506
507 int64_t timestamp;
508 if (get_local_timestamp(dt, &timestamp)) {
509 return NULL;
510 }
511 size_t num_trans = self->num_transitions;
512
513 _ttinfo *tti = NULL;
514 unsigned char fold = 0;
515
516 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
517 tti = self->ttinfo_before;
518 }
519 else if (num_trans == 0 ||
520 timestamp > self->trans_list_utc[num_trans - 1]) {
521 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
522 PyDateTime_GET_YEAR(dt), &fold);
523
524 // Immediately after the last manual transition, the fold/gap is
525 // between self->trans_ttinfos[num_transitions - 1] and whatever
526 // ttinfo applies immediately after the last transition, not between
527 // the STD and DST rules in the tzrule_after, so we may need to
528 // adjust the fold value.
529 if (num_trans) {
530 _ttinfo *tti_prev = NULL;
531 if (num_trans == 1) {
532 tti_prev = self->ttinfo_before;
533 }
534 else {
535 tti_prev = self->trans_ttinfos[num_trans - 2];
536 }
537 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
538 if (diff > 0 &&
539 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
540 fold = 1;
541 }
542 }
543 }
544 else {
545 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
546 _ttinfo *tti_prev = NULL;
547
548 if (idx >= 2) {
549 tti_prev = self->trans_ttinfos[idx - 2];
550 tti = self->trans_ttinfos[idx - 1];
551 }
552 else {
553 tti_prev = self->ttinfo_before;
554 tti = self->trans_ttinfos[0];
555 }
556
557 // Detect fold
558 int64_t shift =
559 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
560 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
561 fold = 1;
562 }
563 }
564
565 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
566 if (tmp == NULL) {
567 return NULL;
568 }
569
570 if (fold) {
571 if (PyDateTime_CheckExact(tmp)) {
572 ((PyDateTime_DateTime *)tmp)->fold = 1;
573 dt = tmp;
574 }
575 else {
576 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
577 PyObject *args = PyTuple_New(0);
578 PyObject *kwargs = PyDict_New();
579
580 Py_DECREF(tmp);
581 if (args == NULL || kwargs == NULL || replace == NULL) {
582 Py_XDECREF(args);
583 Py_XDECREF(kwargs);
584 Py_XDECREF(replace);
585 return NULL;
586 }
587
588 dt = NULL;
Victor Stinner37834132020-10-27 17:12:53 +0100589 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400590 dt = PyObject_Call(replace, args, kwargs);
591 }
592
593 Py_DECREF(args);
594 Py_DECREF(kwargs);
595 Py_DECREF(replace);
596
597 if (dt == NULL) {
598 return NULL;
599 }
600 }
601 }
602 else {
603 dt = tmp;
604 }
605 return dt;
606}
607
608static PyObject *
609zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
610{
611 PyObject *rv = NULL;
612 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
613 if (!(self->key == Py_None)) {
614 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
615 }
616 else {
617 assert(PyUnicode_Check(self->file_repr));
618 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
619 self->file_repr);
620 }
621
622 return rv;
623}
624
625static PyObject *
626zoneinfo_str(PyZoneInfo_ZoneInfo *self)
627{
628 if (!(self->key == Py_None)) {
629 Py_INCREF(self->key);
630 return self->key;
631 }
632 else {
633 return zoneinfo_repr(self);
634 }
635}
636
637/* Pickles the ZoneInfo object by key and source.
638 *
639 * ZoneInfo objects are pickled by reference to the TZif file that they came
640 * from, which means that the exact transitions may be different or the file
641 * may not un-pickle if the data has changed on disk in the interim.
642 *
643 * It is necessary to include a bit indicating whether or not the object
644 * was constructed from the cache, because from-cache objects will hit the
645 * unpickling process's cache, whereas no-cache objects will bypass it.
646 *
647 * Objects constructed from ZoneInfo.from_file cannot be pickled.
648 */
649static PyObject *
650zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
651{
652 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
653 if (self->source == SOURCE_FILE) {
654 // Objects constructed from files cannot be pickled.
655 PyObject *pickle = PyImport_ImportModule("pickle");
656 if (pickle == NULL) {
657 return NULL;
658 }
659
660 PyObject *pickle_error =
661 PyObject_GetAttrString(pickle, "PicklingError");
662 Py_DECREF(pickle);
663 if (pickle_error == NULL) {
664 return NULL;
665 }
666
667 PyErr_Format(pickle_error,
668 "Cannot pickle a ZoneInfo file from a file stream.");
669 Py_DECREF(pickle_error);
670 return NULL;
671 }
672
673 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
674 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
675
676 if (constructor == NULL) {
677 return NULL;
678 }
679
680 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
681 Py_DECREF(constructor);
682 return rv;
683}
684
685static PyObject *
686zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
687{
688 PyObject *key;
689 unsigned char from_cache;
690 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
691 return NULL;
692 }
693
694 if (from_cache) {
695 PyObject *val_args = Py_BuildValue("(O)", key);
696 if (val_args == NULL) {
697 return NULL;
698 }
699
700 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
701
702 Py_DECREF(val_args);
703 return rv;
704 }
705 else {
706 return zoneinfo_new_instance(cls, key);
707 }
708}
709
710/* It is relatively expensive to construct new timedelta objects, and in most
711 * cases we're looking at a relatively small number of timedeltas, such as
712 * integer number of hours, etc. We will keep a cache so that we construct
713 * a minimal number of these.
714 *
715 * Possibly this should be replaced with an LRU cache so that it's not possible
716 * for the memory usage to explode from this, but in order for this to be a
717 * serious problem, one would need to deliberately craft a malicious time zone
718 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
719 * fills the cache with ~450 timedeltas for a total size of ~12kB.
720 *
721 * This returns a new reference to the timedelta.
722 */
723static PyObject *
724load_timedelta(long seconds)
725{
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200726 PyObject *rv;
Paul Ganssle62972d92020-05-16 04:20:06 -0400727 PyObject *pyoffset = PyLong_FromLong(seconds);
728 if (pyoffset == NULL) {
729 return NULL;
730 }
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200731 rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
732 if (rv == NULL) {
733 if (PyErr_Occurred()) {
734 goto error;
735 }
Paul Ganssle62972d92020-05-16 04:20:06 -0400736 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
737 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
738
739 if (tmp == NULL) {
740 goto error;
741 }
742
743 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
744 Py_DECREF(tmp);
745 }
Paul Ganssle62972d92020-05-16 04:20:06 -0400746
Serhiy Storchakafb5db7e2020-10-26 08:43:39 +0200747 Py_XINCREF(rv);
Paul Ganssle62972d92020-05-16 04:20:06 -0400748 Py_DECREF(pyoffset);
Paul Ganssle62972d92020-05-16 04:20:06 -0400749 return rv;
750error:
751 Py_DECREF(pyoffset);
752 return NULL;
753}
754
755/* Constructor for _ttinfo object - this starts by initializing the _ttinfo
756 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
757 * initialized _ttinfo objects.
758 */
759static int
760build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
761{
762 out->utcoff = NULL;
763 out->dstoff = NULL;
764 out->tzname = NULL;
765
766 out->utcoff_seconds = utcoffset;
767 out->utcoff = load_timedelta(utcoffset);
768 if (out->utcoff == NULL) {
769 return -1;
770 }
771
772 out->dstoff = load_timedelta(dstoffset);
773 if (out->dstoff == NULL) {
774 return -1;
775 }
776
777 out->tzname = tzname;
778 Py_INCREF(tzname);
779
780 return 0;
781}
782
783/* Decrease reference count on any non-NULL members of a _ttinfo */
784static void
785xdecref_ttinfo(_ttinfo *ttinfo)
786{
787 if (ttinfo != NULL) {
788 Py_XDECREF(ttinfo->utcoff);
789 Py_XDECREF(ttinfo->dstoff);
790 Py_XDECREF(ttinfo->tzname);
791 }
792}
793
794/* Equality function for _ttinfo. */
795static int
796ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
797{
798 int rv;
799 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
800 1) {
801 goto end;
802 }
803
804 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
805 1) {
806 goto end;
807 }
808
809 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
810 1) {
811 goto end;
812 }
813end:
814 return rv;
815}
816
817/* Given a file-like object, this populates a ZoneInfo object
818 *
819 * The current version calls into a Python function to read the data from
820 * file into Python objects, and this translates those Python objects into
821 * C values and calculates derived values (e.g. dstoff) in C.
822 *
823 * This returns 0 on success and -1 on failure.
824 *
825 * The function will never return while `self` is partially initialized —
826 * the object only needs to be freed / deallocated if this succeeds.
827 */
828static int
829load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
830{
831 PyObject *data_tuple = NULL;
832
833 long *utcoff = NULL;
834 long *dstoff = NULL;
835 size_t *trans_idx = NULL;
836 unsigned char *isdst = NULL;
837
838 self->trans_list_utc = NULL;
839 self->trans_list_wall[0] = NULL;
840 self->trans_list_wall[1] = NULL;
841 self->trans_ttinfos = NULL;
842 self->_ttinfos = NULL;
843 self->file_repr = NULL;
844
845 size_t ttinfos_allocated = 0;
846
847 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
848
849 if (data_tuple == NULL) {
850 goto error;
851 }
852
853 if (!PyTuple_CheckExact(data_tuple)) {
854 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
855 data_tuple);
856 goto error;
857 }
858
859 // Unpack the data tuple
860 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
861 if (trans_idx_list == NULL) {
862 goto error;
863 }
864
865 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
866 if (trans_utc == NULL) {
867 goto error;
868 }
869
870 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
871 if (utcoff_list == NULL) {
872 goto error;
873 }
874
875 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
876 if (isdst_list == NULL) {
877 goto error;
878 }
879
880 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
881 if (abbr == NULL) {
882 goto error;
883 }
884
885 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
886 if (tz_str == NULL) {
887 goto error;
888 }
889
890 // Load the relevant sizes
891 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100892 if (num_transitions < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400893 goto error;
894 }
895
896 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100897 if (num_ttinfos < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400898 goto error;
899 }
900
901 self->num_transitions = (size_t)num_transitions;
902 self->num_ttinfos = (size_t)num_ttinfos;
903
904 // Load the transition indices and list
905 self->trans_list_utc =
906 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
907 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
908
Pablo Galindoe4799b92020-05-27 21:48:12 +0100909 for (size_t i = 0; i < self->num_transitions; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400910 PyObject *num = PyTuple_GetItem(trans_utc, i);
911 if (num == NULL) {
912 goto error;
913 }
914 self->trans_list_utc[i] = PyLong_AsLongLong(num);
915 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
916 goto error;
917 }
918
919 num = PyTuple_GetItem(trans_idx_list, i);
920 if (num == NULL) {
921 goto error;
922 }
923
924 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
925 if (cur_trans_idx == -1) {
926 goto error;
927 }
928
929 trans_idx[i] = (size_t)cur_trans_idx;
930 if (trans_idx[i] > self->num_ttinfos) {
931 PyErr_Format(
932 PyExc_ValueError,
933 "Invalid transition index found while reading TZif: %zd",
934 cur_trans_idx);
935
936 goto error;
937 }
938 }
939
940 // Load UTC offsets and isdst (size num_ttinfos)
941 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
942 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
943
944 if (utcoff == NULL || isdst == NULL) {
945 goto error;
946 }
Pablo Galindoe4799b92020-05-27 21:48:12 +0100947 for (size_t i = 0; i < self->num_ttinfos; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400948 PyObject *num = PyTuple_GetItem(utcoff_list, i);
949 if (num == NULL) {
950 goto error;
951 }
952
953 utcoff[i] = PyLong_AsLong(num);
954 if (utcoff[i] == -1 && PyErr_Occurred()) {
955 goto error;
956 }
957
958 num = PyTuple_GetItem(isdst_list, i);
959 if (num == NULL) {
960 goto error;
961 }
962
963 int isdst_with_error = PyObject_IsTrue(num);
964 if (isdst_with_error == -1) {
965 goto error;
966 }
967 else {
968 isdst[i] = (unsigned char)isdst_with_error;
969 }
970 }
971
972 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
973 if (dstoff == NULL) {
974 goto error;
975 }
976
977 // Derive dstoff and trans_list_wall from the information we've loaded
978 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
979 self->num_ttinfos);
980
981 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
982 self->trans_list_wall, self->num_ttinfos,
983 self->num_transitions)) {
984 goto error;
985 }
986
987 // Build _ttinfo objects from utcoff, dstoff and abbr
988 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
989 for (size_t i = 0; i < self->num_ttinfos; ++i) {
990 PyObject *tzname = PyTuple_GetItem(abbr, i);
991 if (tzname == NULL) {
992 goto error;
993 }
994
995 ttinfos_allocated++;
996 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
997 goto error;
998 }
999 }
1000
1001 // Build our mapping from transition to the ttinfo that applies
1002 self->trans_ttinfos =
1003 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1004 for (size_t i = 0; i < self->num_transitions; ++i) {
1005 size_t ttinfo_idx = trans_idx[i];
1006 assert(ttinfo_idx < self->num_ttinfos);
1007 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1008 }
1009
1010 // Set ttinfo_before to the first non-DST transition
1011 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1012 if (!isdst[i]) {
1013 self->ttinfo_before = &(self->_ttinfos[i]);
1014 break;
1015 }
1016 }
1017
1018 // If there are only DST ttinfos, pick the first one, if there are no
1019 // ttinfos at all, set ttinfo_before to NULL
1020 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1021 self->ttinfo_before = &(self->_ttinfos[0]);
1022 }
1023
1024 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1025 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1026 goto error;
1027 }
1028 }
1029 else {
1030 if (!self->num_ttinfos) {
1031 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1032 goto error;
1033 }
1034
1035 size_t idx;
1036 if (!self->num_transitions) {
1037 idx = self->num_ttinfos - 1;
1038 }
1039 else {
1040 idx = trans_idx[self->num_transitions - 1];
1041 }
1042
1043 _ttinfo *tti = &(self->_ttinfos[idx]);
1044 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1045 &(self->tzrule_after));
1046
1047 // We've abused the build_tzrule constructor to construct an STD-only
1048 // rule mimicking whatever ttinfo we've picked up, but it's possible
1049 // that the one we've picked up is a DST zone, so we need to make sure
1050 // that the dstoff is set correctly in that case.
1051 if (PyObject_IsTrue(tti->dstoff)) {
1052 _ttinfo *tti_after = &(self->tzrule_after.std);
1053 Py_DECREF(tti_after->dstoff);
1054 tti_after->dstoff = tti->dstoff;
1055 Py_INCREF(tti_after->dstoff);
1056 }
1057 }
1058
1059 // Determine if this is a "fixed offset" zone, meaning that the output of
1060 // the utcoffset, dst and tzname functions does not depend on the specific
1061 // datetime passed.
1062 //
1063 // We make three simplifying assumptions here:
1064 //
1065 // 1. If tzrule_after is not std_only, it has transitions that might occur
1066 // (it is possible to construct TZ strings that specify STD and DST but
1067 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1068 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1069 // represent different offsets.
1070 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1071 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1072 // a fixed offset zone).
1073 //
1074 // Violations to these assumptions would be fairly exotic, and exotic
1075 // zones should almost certainly not be used with datetime.time (the
1076 // only thing that would be affected by this).
1077 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1078 self->fixed_offset = 0;
1079 }
1080 else if (self->num_ttinfos == 0) {
1081 self->fixed_offset = 1;
1082 }
1083 else {
1084 int constant_offset =
1085 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1086 if (constant_offset < 0) {
1087 goto error;
1088 }
1089 else {
1090 self->fixed_offset = constant_offset;
1091 }
1092 }
1093
1094 int rv = 0;
1095 goto cleanup;
1096error:
1097 // These resources only need to be freed if we have failed, if we succeed
1098 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1099 // method to free the relevant resources.
1100 if (self->trans_list_utc != NULL) {
1101 PyMem_Free(self->trans_list_utc);
1102 self->trans_list_utc = NULL;
1103 }
1104
1105 for (size_t i = 0; i < 2; ++i) {
1106 if (self->trans_list_wall[i] != NULL) {
1107 PyMem_Free(self->trans_list_wall[i]);
1108 self->trans_list_wall[i] = NULL;
1109 }
1110 }
1111
1112 if (self->_ttinfos != NULL) {
1113 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1114 xdecref_ttinfo(&(self->_ttinfos[i]));
1115 }
1116 PyMem_Free(self->_ttinfos);
1117 self->_ttinfos = NULL;
1118 }
1119
1120 if (self->trans_ttinfos != NULL) {
1121 PyMem_Free(self->trans_ttinfos);
1122 self->trans_ttinfos = NULL;
1123 }
1124
1125 rv = -1;
1126cleanup:
1127 Py_XDECREF(data_tuple);
1128
1129 if (utcoff != NULL) {
1130 PyMem_Free(utcoff);
1131 }
1132
1133 if (dstoff != NULL) {
1134 PyMem_Free(dstoff);
1135 }
1136
1137 if (isdst != NULL) {
1138 PyMem_Free(isdst);
1139 }
1140
1141 if (trans_idx != NULL) {
1142 PyMem_Free(trans_idx);
1143 }
1144
1145 return rv;
1146}
1147
1148/* Function to calculate the local timestamp of a transition from the year. */
1149int64_t
1150calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1151{
1152 CalendarRule *self = (CalendarRule *)base_self;
1153
1154 // We want (year, month, day of month); we have year and month, but we
1155 // need to turn (week, day-of-week) into day-of-month
1156 //
1157 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1158 // Week 5 represents the last occurrence of day `day`, so we need to know
1159 // the first weekday of the month and the number of days in the month.
1160 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1161 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1162 if (self->month == 2 && is_leap_year(year)) {
1163 days_in_month += 1;
1164 }
1165
1166 // This equation seems magical, so I'll break it down:
1167 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1168 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1169 // because this math is mod 7
1170 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1171 // numbers so that -1 % 7 = 6).
1172 // 3. Add 1 because month days are a 1-based index.
1173 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1174 if (month_day < 0) {
1175 month_day += 7;
1176 }
1177 month_day += 1;
1178
1179 // Now use a 0-based index version of `week` to calculate the w-th
1180 // occurrence of `day`
1181 month_day += ((int8_t)(self->week) - 1) * 7;
1182
1183 // month_day will only be > days_in_month if w was 5, and `w` means "last
1184 // occurrence of `d`", so now we just check if we over-shot the end of the
1185 // month and if so knock off 1 week.
1186 if (month_day > days_in_month) {
1187 month_day -= 7;
1188 }
1189
1190 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1191 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1192 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1193}
1194
1195/* Constructor for CalendarRule. */
1196int
1197calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1198 int8_t minute, int8_t second, CalendarRule *out)
1199{
1200 // These bounds come from the POSIX standard, which describes an Mm.n.d
1201 // rule as:
1202 //
1203 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1204 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1205 // may occur in either the fourth or the fifth week). Week 1 is the first
1206 // week in which the d'th day occurs. Day zero is Sunday.
1207 if (month <= 0 || month > 12) {
1208 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1209 return -1;
1210 }
1211
1212 if (week <= 0 || week > 5) {
1213 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1214 return -1;
1215 }
1216
Victor Stinneraefb69b2020-12-16 16:26:15 +01001217 // If the 'day' parameter type is changed to a signed type,
1218 // "day < 0" check must be added.
1219 if (/* day < 0 || */ day > 6) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001220 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1221 return -1;
1222 }
1223
1224 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1225
1226 CalendarRule new_offset = {
1227 .base = base,
1228 .month = month,
1229 .week = week,
1230 .day = day,
1231 .hour = hour,
1232 .minute = minute,
1233 .second = second,
1234 };
1235
1236 *out = new_offset;
1237 return 0;
1238}
1239
1240/* Function to calculate the local timestamp of a transition from the year.
1241 *
1242 * This translates the day of the year into a local timestamp — either a
1243 * 1-based Julian day, not including leap days, or the 0-based year-day,
1244 * including leap days.
1245 * */
1246int64_t
1247dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1248{
1249 // The function signature requires a TransitionRuleType pointer, but this
1250 // function is only applicable to DayRule* objects.
1251 DayRule *self = (DayRule *)base_self;
1252
1253 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1254 // to know the number of days since 1970-01-01, so we must subtract off
1255 // the equivalent of ymd_to_ord(1970, 1, 1).
1256 //
1257 // We subtract off an additional 1 day to account for January 1st (we want
1258 // the number of full days *before* the date of the transition - partial
1259 // days are accounted for in the hour, minute and second portions.
1260 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1261
1262 // The Julian day specification skips over February 29th in leap years,
1263 // from the POSIX standard:
1264 //
1265 // Leap days shall not be counted. That is, in all years-including leap
1266 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1267 // refer explicitly to the occasional February 29.
1268 //
1269 // This is actually more useful than you'd think — if you want a rule that
1270 // always transitions on a given calendar day (other than February 29th),
1271 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1272 // always refers to December 31st.
1273 unsigned int day = self->day;
1274 if (self->julian && day >= 59 && is_leap_year(year)) {
1275 day += 1;
1276 }
1277
1278 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1279 (self->minute * 60) + self->second;
1280}
1281
1282/* Constructor for DayRule. */
1283static int
1284dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1285 int8_t second, DayRule *out)
1286{
1287 // The POSIX standard specifies that Julian days must be in the range (1 <=
1288 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1289 // be in the range (0 <= n <= 365).
1290 if (day < julian || day > 365) {
1291 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1292 julian, day);
1293 return -1;
1294 }
1295
1296 TransitionRuleType base = {
1297 &dayrule_year_to_timestamp,
1298 };
1299
1300 DayRule tmp = {
1301 .base = base,
1302 .julian = julian,
1303 .day = day,
1304 .hour = hour,
1305 .minute = minute,
1306 .second = second,
1307 };
1308
1309 *out = tmp;
1310
1311 return 0;
1312}
1313
1314/* Calculate the start and end rules for a _tzrule in the given year. */
1315static void
1316tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1317{
1318 assert(rule->start != NULL);
1319 assert(rule->end != NULL);
1320 *start = rule->start->year_to_timestamp(rule->start, year);
1321 *end = rule->end->year_to_timestamp(rule->end, year);
1322}
1323
1324/* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1325 *
1326 * This takes a local timestamp and fold for disambiguation purposes; the year
1327 * could technically be calculated from the timestamp, but given that the
1328 * callers of this function already have the year information accessible from
1329 * the datetime struct, it is taken as an additional parameter to reduce
1330 * unncessary calculation.
1331 * */
1332static _ttinfo *
1333find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1334{
1335 if (rule->std_only) {
1336 return &(rule->std);
1337 }
1338
1339 int64_t start, end;
1340 uint8_t isdst;
1341
1342 tzrule_transitions(rule, year, &start, &end);
1343
1344 // With fold = 0, the period (denominated in local time) with the smaller
1345 // offset starts at the end of the gap and ends at the end of the fold;
1346 // with fold = 1, it runs from the start of the gap to the beginning of the
1347 // fold.
1348 //
1349 // So in order to determine the DST boundaries we need to know both the
1350 // fold and whether DST is positive or negative (rare), and it turns out
1351 // that this boils down to fold XOR is_positive.
1352 if (fold == (rule->dst_diff >= 0)) {
1353 end -= rule->dst_diff;
1354 }
1355 else {
1356 start += rule->dst_diff;
1357 }
1358
1359 if (start < end) {
1360 isdst = (ts >= start) && (ts < end);
1361 }
1362 else {
1363 isdst = (ts < end) || (ts >= start);
1364 }
1365
1366 if (isdst) {
1367 return &(rule->dst);
1368 }
1369 else {
1370 return &(rule->std);
1371 }
1372}
1373
1374/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1375 *
1376 * This function can determine the _ttinfo that applies at a given epoch time,
1377 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1378 * This is to be used in the .fromutc() function.
1379 *
1380 * The year is technically a redundant parameter, because it can be calculated
1381 * from the timestamp, but all callers of this function should have the year
1382 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1383 * calculation.
1384 **/
1385static _ttinfo *
1386find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1387 unsigned char *fold)
1388{
1389 if (rule->std_only) {
1390 *fold = 0;
1391 return &(rule->std);
1392 }
1393
1394 int64_t start, end;
1395 uint8_t isdst;
1396 tzrule_transitions(rule, year, &start, &end);
1397 start -= rule->std.utcoff_seconds;
1398 end -= rule->dst.utcoff_seconds;
1399
1400 if (start < end) {
1401 isdst = (ts >= start) && (ts < end);
1402 }
1403 else {
1404 isdst = (ts < end) || (ts >= start);
1405 }
1406
1407 // For positive DST, the ambiguous period is one dst_diff after the end of
1408 // DST; for negative DST, the ambiguous period is one dst_diff before the
1409 // start of DST.
1410 int64_t ambig_start, ambig_end;
1411 if (rule->dst_diff > 0) {
1412 ambig_start = end;
1413 ambig_end = end + rule->dst_diff;
1414 }
1415 else {
1416 ambig_start = start;
1417 ambig_end = start - rule->dst_diff;
1418 }
1419
1420 *fold = (ts >= ambig_start) && (ts < ambig_end);
1421
1422 if (isdst) {
1423 return &(rule->dst);
1424 }
1425 else {
1426 return &(rule->std);
1427 }
1428}
1429
1430/* Parse a TZ string in the format specified by the POSIX standard:
1431 *
1432 * std offset[dst[offset],start[/time],end[/time]]
1433 *
1434 * std and dst must be 3 or more characters long and must not contain a
1435 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1436 * spaces between "std" and "offset" are only for display and are not actually
1437 * present in the string.
1438 *
1439 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1440 *
1441 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1442 *
1443 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1444 */
1445static int
1446parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1447{
1448 PyObject *std_abbr = NULL;
1449 PyObject *dst_abbr = NULL;
1450 TransitionRuleType *start = NULL;
1451 TransitionRuleType *end = NULL;
Dong-hee Naa487a392020-05-22 01:56:03 +09001452 // Initialize offsets to invalid value (> 24 hours)
1453 long std_offset = 1 << 20;
1454 long dst_offset = 1 << 20;
Paul Ganssle62972d92020-05-16 04:20:06 -04001455
1456 char *tz_str = PyBytes_AsString(tz_str_obj);
1457 if (tz_str == NULL) {
1458 return -1;
1459 }
1460 char *p = tz_str;
1461
1462 // Read the `std` abbreviation, which must be at least 3 characters long.
Pablo Galindoe4799b92020-05-27 21:48:12 +01001463 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
Paul Ganssle62972d92020-05-16 04:20:06 -04001464 if (num_chars < 1) {
1465 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1466 goto error;
1467 }
1468
1469 p += num_chars;
1470
1471 // Now read the STD offset, which is required
1472 num_chars = parse_tz_delta(p, &std_offset);
1473 if (num_chars < 0) {
1474 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1475 goto error;
1476 }
1477 p += num_chars;
1478
1479 // If the string ends here, there is no DST, otherwise we must parse the
1480 // DST abbreviation and start and end dates and times.
1481 if (*p == '\0') {
1482 goto complete;
1483 }
1484
1485 num_chars = parse_abbr(p, &dst_abbr);
1486 if (num_chars < 1) {
1487 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1488 goto error;
1489 }
1490 p += num_chars;
1491
1492 if (*p == ',') {
1493 // From the POSIX standard:
1494 //
1495 // If no offset follows dst, the alternative time is assumed to be one
1496 // hour ahead of standard time.
1497 dst_offset = std_offset + 3600;
1498 }
1499 else {
1500 num_chars = parse_tz_delta(p, &dst_offset);
1501 if (num_chars < 0) {
1502 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1503 tz_str_obj);
1504 goto error;
1505 }
1506
1507 p += num_chars;
1508 }
1509
1510 TransitionRuleType **transitions[2] = {&start, &end};
1511 for (size_t i = 0; i < 2; ++i) {
1512 if (*p != ',') {
1513 PyErr_Format(PyExc_ValueError,
1514 "Missing transition rules in TZ string: %R",
1515 tz_str_obj);
1516 goto error;
1517 }
1518 p++;
1519
1520 num_chars = parse_transition_rule(p, transitions[i]);
1521 if (num_chars < 0) {
1522 PyErr_Format(PyExc_ValueError,
1523 "Malformed transition rule in TZ string: %R",
1524 tz_str_obj);
1525 goto error;
1526 }
1527 p += num_chars;
1528 }
1529
1530 if (*p != '\0') {
1531 PyErr_Format(PyExc_ValueError,
1532 "Extraneous characters at end of TZ string: %R",
1533 tz_str_obj);
1534 goto error;
1535 }
1536
1537complete:
1538 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1539 Py_DECREF(std_abbr);
1540 Py_XDECREF(dst_abbr);
1541
1542 return 0;
1543error:
1544 Py_XDECREF(std_abbr);
1545 if (dst_abbr != NULL && dst_abbr != Py_None) {
1546 Py_DECREF(dst_abbr);
1547 }
1548
1549 if (start != NULL) {
1550 PyMem_Free(start);
1551 }
1552
1553 if (end != NULL) {
1554 PyMem_Free(end);
1555 }
1556
1557 return -1;
1558}
1559
Pablo Galindoe4799b92020-05-27 21:48:12 +01001560static int
1561parse_uint(const char *const p, uint8_t *value)
Paul Ganssle62972d92020-05-16 04:20:06 -04001562{
1563 if (!isdigit(*p)) {
1564 return -1;
1565 }
1566
Pablo Galindoe4799b92020-05-27 21:48:12 +01001567 *value = (*p) - '0';
1568 return 0;
Paul Ganssle62972d92020-05-16 04:20:06 -04001569}
1570
1571/* Parse the STD and DST abbreviations from a TZ string. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001572static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001573parse_abbr(const char *const p, PyObject **abbr)
1574{
1575 const char *ptr = p;
1576 char buff = *ptr;
1577 const char *str_start;
1578 const char *str_end;
1579
1580 if (*ptr == '<') {
1581 ptr++;
1582 str_start = ptr;
1583 while ((buff = *ptr) != '>') {
1584 // From the POSIX standard:
1585 //
1586 // In the quoted form, the first character shall be the less-than
1587 // ( '<' ) character and the last character shall be the
1588 // greater-than ( '>' ) character. All characters between these
1589 // quoting characters shall be alphanumeric characters from the
1590 // portable character set in the current locale, the plus-sign (
1591 // '+' ) character, or the minus-sign ( '-' ) character. The std
1592 // and dst fields in this case shall not include the quoting
1593 // characters.
1594 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1595 buff != '-') {
1596 return -1;
1597 }
1598 ptr++;
1599 }
1600 str_end = ptr;
1601 ptr++;
1602 }
1603 else {
1604 str_start = p;
1605 // From the POSIX standard:
1606 //
1607 // In the unquoted form, all characters in these fields shall be
1608 // alphabetic characters from the portable character set in the
1609 // current locale.
1610 while (isalpha(*ptr)) {
1611 ptr++;
1612 }
1613 str_end = ptr;
1614 }
1615
1616 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
Gregory P. Smithd780fa72020-06-22 00:39:28 -07001617 if (*abbr == NULL) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001618 return -1;
1619 }
1620
1621 return ptr - p;
1622}
1623
1624/* Parse a UTC offset from a TZ str. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001625static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001626parse_tz_delta(const char *const p, long *total_seconds)
1627{
1628 // From the POSIX spec:
1629 //
1630 // Indicates the value added to the local time to arrive at Coordinated
1631 // Universal Time. The offset has the form:
1632 //
1633 // hh[:mm[:ss]]
1634 //
1635 // One or more digits may be used; the value is always interpreted as a
1636 // decimal number.
1637 //
1638 // The POSIX spec says that the values for `hour` must be between 0 and 24
1639 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1640 // transition times may be signed and range from -167 to 167.
1641 long sign = -1;
1642 long hours = 0;
1643 long minutes = 0;
1644 long seconds = 0;
1645
1646 const char *ptr = p;
1647 char buff = *ptr;
1648 if (buff == '-' || buff == '+') {
1649 // Negative numbers correspond to *positive* offsets, from the spec:
1650 //
1651 // If preceded by a '-', the timezone shall be east of the Prime
1652 // Meridian; otherwise, it shall be west (which may be indicated by
1653 // an optional preceding '+' ).
1654 if (buff == '-') {
1655 sign = 1;
1656 }
1657
1658 ptr++;
1659 }
1660
1661 // The hour can be 1 or 2 numeric characters
1662 for (size_t i = 0; i < 2; ++i) {
1663 buff = *ptr;
1664 if (!isdigit(buff)) {
1665 if (i == 0) {
1666 return -1;
1667 }
1668 else {
1669 break;
1670 }
1671 }
1672
1673 hours *= 10;
1674 hours += buff - '0';
1675 ptr++;
1676 }
1677
1678 if (hours > 24 || hours < 0) {
1679 return -1;
1680 }
1681
1682 // Minutes and seconds always of the format ":dd"
1683 long *outputs[2] = {&minutes, &seconds};
1684 for (size_t i = 0; i < 2; ++i) {
1685 if (*ptr != ':') {
1686 goto complete;
1687 }
1688 ptr++;
1689
1690 for (size_t j = 0; j < 2; ++j) {
1691 buff = *ptr;
1692 if (!isdigit(buff)) {
1693 return -1;
1694 }
1695 *(outputs[i]) *= 10;
1696 *(outputs[i]) += buff - '0';
1697 ptr++;
1698 }
1699 }
1700
1701complete:
1702 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1703
1704 return ptr - p;
1705}
1706
1707/* Parse the date portion of a transition rule. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001708static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001709parse_transition_rule(const char *const p, TransitionRuleType **out)
1710{
1711 // The full transition rule indicates when to change back and forth between
1712 // STD and DST, and has the form:
1713 //
1714 // date[/time],date[/time]
1715 //
1716 // This function parses an individual date[/time] section, and returns
1717 // the number of characters that contributed to the transition rule. This
1718 // does not include the ',' at the end of the first rule.
1719 //
1720 // The POSIX spec states that if *time* is not given, the default is 02:00.
1721 const char *ptr = p;
1722 int8_t hour = 2;
1723 int8_t minute = 0;
1724 int8_t second = 0;
1725
1726 // Rules come in one of three flavors:
1727 //
1728 // 1. Jn: Julian day n, with no leap days.
1729 // 2. n: Day of year (0-based, with leap days)
1730 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1731
1732 if (*ptr == 'M') {
1733 uint8_t month, week, day;
1734 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001735 if (parse_uint(ptr, &month)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001736 return -1;
1737 }
Paul Ganssle62972d92020-05-16 04:20:06 -04001738 ptr++;
1739 if (*ptr != '.') {
Pablo Galindoe4799b92020-05-27 21:48:12 +01001740 uint8_t tmp;
1741 if (parse_uint(ptr, &tmp)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001742 return -1;
1743 }
1744
1745 month *= 10;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001746 month += tmp;
Paul Ganssle62972d92020-05-16 04:20:06 -04001747 ptr++;
1748 }
1749
1750 uint8_t *values[2] = {&week, &day};
1751 for (size_t i = 0; i < 2; ++i) {
1752 if (*ptr != '.') {
1753 return -1;
1754 }
1755 ptr++;
1756
Pablo Galindoe4799b92020-05-27 21:48:12 +01001757 if (parse_uint(ptr, values[i])) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001758 return -1;
1759 }
1760 ptr++;
Paul Ganssle62972d92020-05-16 04:20:06 -04001761 }
1762
1763 if (*ptr == '/') {
1764 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001765 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001766 parse_transition_time(ptr, &hour, &minute, &second);
1767 if (num_chars < 0) {
1768 return -1;
1769 }
1770 ptr += num_chars;
1771 }
1772
1773 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1774 if (rv == NULL) {
1775 return -1;
1776 }
1777
1778 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1779 PyMem_Free(rv);
1780 return -1;
1781 }
1782
1783 *out = (TransitionRuleType *)rv;
1784 }
1785 else {
1786 uint8_t julian = 0;
1787 unsigned int day = 0;
1788 if (*ptr == 'J') {
1789 julian = 1;
1790 ptr++;
1791 }
1792
1793 for (size_t i = 0; i < 3; ++i) {
1794 if (!isdigit(*ptr)) {
1795 if (i == 0) {
1796 return -1;
1797 }
1798 break;
1799 }
1800 day *= 10;
1801 day += (*ptr) - '0';
1802 ptr++;
1803 }
1804
1805 if (*ptr == '/') {
1806 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001807 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001808 parse_transition_time(ptr, &hour, &minute, &second);
1809 if (num_chars < 0) {
1810 return -1;
1811 }
1812 ptr += num_chars;
1813 }
1814
1815 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1816 if (rv == NULL) {
1817 return -1;
1818 }
1819
1820 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1821 PyMem_Free(rv);
1822 return -1;
1823 }
1824 *out = (TransitionRuleType *)rv;
1825 }
1826
1827 return ptr - p;
1828}
1829
1830/* Parse the time portion of a transition rule (e.g. following an /) */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001831static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001832parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1833 int8_t *second)
1834{
1835 // From the spec:
1836 //
1837 // The time has the same format as offset except that no leading sign
1838 // ( '-' or '+' ) is allowed.
1839 //
1840 // The format for the offset is:
1841 //
1842 // h[h][:mm[:ss]]
1843 //
1844 // RFC 8536 also allows transition times to be signed and to range from
1845 // -167 to +167, but the current version only supports [0, 99].
1846 //
1847 // TODO: Support the full range of transition hours.
1848 int8_t *components[3] = {hour, minute, second};
1849 const char *ptr = p;
1850 int8_t sign = 1;
1851
1852 if (*ptr == '-' || *ptr == '+') {
1853 if (*ptr == '-') {
1854 sign = -1;
1855 }
1856 ptr++;
1857 }
1858
1859 for (size_t i = 0; i < 3; ++i) {
1860 if (i > 0) {
1861 if (*ptr != ':') {
1862 break;
1863 }
1864 ptr++;
1865 }
1866
1867 uint8_t buff = 0;
1868 for (size_t j = 0; j < 2; j++) {
1869 if (!isdigit(*ptr)) {
1870 if (i == 0 && j > 0) {
1871 break;
1872 }
1873 return -1;
1874 }
1875
1876 buff *= 10;
1877 buff += (*ptr) - '0';
1878 ptr++;
1879 }
1880
1881 *(components[i]) = sign * buff;
1882 }
1883
1884 return ptr - p;
1885}
1886
1887/* Constructor for a _tzrule.
1888 *
1889 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1890 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1891 * NULL as well.
1892 *
1893 * Returns 0 on success.
1894 */
1895static int
1896build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1897 long dst_offset, TransitionRuleType *start,
1898 TransitionRuleType *end, _tzrule *out)
1899{
Dong-hee Naa487a392020-05-22 01:56:03 +09001900 _tzrule rv = {{0}};
Paul Ganssle62972d92020-05-16 04:20:06 -04001901
1902 rv.start = start;
1903 rv.end = end;
1904
1905 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1906 goto error;
1907 }
1908
1909 if (dst_abbr != NULL) {
1910 rv.dst_diff = dst_offset - std_offset;
1911 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1912 goto error;
1913 }
1914 }
1915 else {
1916 rv.std_only = 1;
1917 }
1918
1919 *out = rv;
1920
1921 return 0;
1922error:
1923 xdecref_ttinfo(&rv.std);
1924 xdecref_ttinfo(&rv.dst);
1925 return -1;
1926}
1927
1928/* Destructor for _tzrule. */
1929static void
1930free_tzrule(_tzrule *tzrule)
1931{
1932 xdecref_ttinfo(&(tzrule->std));
1933 if (!tzrule->std_only) {
1934 xdecref_ttinfo(&(tzrule->dst));
1935 }
1936
1937 if (tzrule->start != NULL) {
1938 PyMem_Free(tzrule->start);
1939 }
1940
1941 if (tzrule->end != NULL) {
1942 PyMem_Free(tzrule->end);
1943 }
1944}
1945
1946/* Calculate DST offsets from transitions and UTC offsets
1947 *
1948 * This is necessary because each C `ttinfo` only contains the UTC offset,
1949 * time zone abbreviation and an isdst boolean - it does not include the
1950 * amount of the DST offset, but we need the amount for the dst() function.
1951 *
1952 * Thus function uses heuristics to infer what the offset should be, so it
1953 * is not guaranteed that this will work for all zones. If we cannot assign
1954 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1955 * bool(dt.dst()) will always match ttinfo.isdst.
1956 */
1957static void
1958utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1959 unsigned char *isdsts, size_t num_transitions,
1960 size_t num_ttinfos)
1961{
1962 size_t dst_count = 0;
1963 size_t dst_found = 0;
1964 for (size_t i = 0; i < num_ttinfos; ++i) {
1965 dst_count++;
1966 }
1967
1968 for (size_t i = 1; i < num_transitions; ++i) {
1969 if (dst_count == dst_found) {
1970 break;
1971 }
1972
1973 size_t idx = trans_idx[i];
1974 size_t comp_idx = trans_idx[i - 1];
1975
1976 // Only look at DST offsets that have nto been assigned already
1977 if (!isdsts[idx] || dstoffs[idx] != 0) {
1978 continue;
1979 }
1980
1981 long dstoff = 0;
1982 long utcoff = utcoffs[idx];
1983
1984 if (!isdsts[comp_idx]) {
1985 dstoff = utcoff - utcoffs[comp_idx];
1986 }
1987
1988 if (!dstoff && idx < (num_ttinfos - 1)) {
1989 comp_idx = trans_idx[i + 1];
1990
1991 // If the following transition is also DST and we couldn't find
1992 // the DST offset by this point, we're going to have to skip it
1993 // and hope this transition gets assigned later
1994 if (isdsts[comp_idx]) {
1995 continue;
1996 }
1997
1998 dstoff = utcoff - utcoffs[comp_idx];
1999 }
2000
2001 if (dstoff) {
2002 dst_found++;
2003 dstoffs[idx] = dstoff;
2004 }
2005 }
2006
2007 if (dst_found < dst_count) {
2008 // If there are time zones we didn't find a value for, we'll end up
2009 // with dstoff = 0 for something where isdst=1. This is obviously
2010 // wrong — one hour will be a much better guess than 0.
2011 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2012 if (isdsts[idx] && !dstoffs[idx]) {
2013 dstoffs[idx] = 3600;
2014 }
2015 }
2016 }
2017}
2018
2019#define _swap(x, y, buffer) \
2020 buffer = x; \
2021 x = y; \
2022 y = buffer;
2023
2024/* Calculate transitions in local time from UTC time and offsets.
2025 *
2026 * We want to know when each transition occurs, denominated in the number of
2027 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2028 * *local time* (note: this is *not* equivalent to the output of
2029 * datetime.timestamp, which is the total number of seconds actual elapsed
2030 * since 1970-01-01T00:00:00Z in UTC).
2031 *
2032 * This is an ambiguous question because "local time" can be ambiguous — but it
2033 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2034 *
2035 * trans_local[0]: The wall-time transitions for fold=0
2036 * trans_local[1]: The wall-time transitions for fold=1
2037 *
2038 * This returns 0 on success and a negative number of failure. The trans_local
2039 * arrays must be freed if they are not NULL.
2040 */
2041static int
2042ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2043 int64_t *trans_local[2], size_t num_ttinfos,
2044 size_t num_transitions)
2045{
2046 if (num_transitions == 0) {
2047 return 0;
2048 }
2049
2050 // Copy the UTC transitions into each array to be modified in place later
2051 for (size_t i = 0; i < 2; ++i) {
2052 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2053 if (trans_local[i] == NULL) {
2054 return -1;
2055 }
2056
2057 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2058 }
2059
2060 int64_t offset_0, offset_1, buff;
2061 if (num_ttinfos > 1) {
2062 offset_0 = utcoff[0];
2063 offset_1 = utcoff[trans_idx[0]];
2064
2065 if (offset_1 > offset_0) {
2066 _swap(offset_0, offset_1, buff);
2067 }
2068 }
2069 else {
2070 offset_0 = utcoff[0];
2071 offset_1 = utcoff[0];
2072 }
2073
2074 trans_local[0][0] += offset_0;
2075 trans_local[1][0] += offset_1;
2076
2077 for (size_t i = 1; i < num_transitions; ++i) {
2078 offset_0 = utcoff[trans_idx[i - 1]];
2079 offset_1 = utcoff[trans_idx[i]];
2080
2081 if (offset_1 > offset_0) {
2082 _swap(offset_1, offset_0, buff);
2083 }
2084
2085 trans_local[0][i] += offset_0;
2086 trans_local[1][i] += offset_1;
2087 }
2088
2089 return 0;
2090}
2091
2092/* Simple bisect_right binary search implementation */
2093static size_t
2094_bisect(const int64_t value, const int64_t *arr, size_t size)
2095{
2096 size_t lo = 0;
2097 size_t hi = size;
2098 size_t m;
2099
2100 while (lo < hi) {
2101 m = (lo + hi) / 2;
2102 if (arr[m] > value) {
2103 hi = m;
2104 }
2105 else {
2106 lo = m + 1;
2107 }
2108 }
2109
2110 return hi;
2111}
2112
2113/* Find the ttinfo rules that apply at a given local datetime. */
2114static _ttinfo *
2115find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2116{
2117 // datetime.time has a .tzinfo attribute that passes None as the dt
2118 // argument; it only really has meaning for fixed-offset zones.
2119 if (dt == Py_None) {
2120 if (self->fixed_offset) {
2121 return &(self->tzrule_after.std);
2122 }
2123 else {
2124 return &NO_TTINFO;
2125 }
2126 }
2127
2128 int64_t ts;
2129 if (get_local_timestamp(dt, &ts)) {
2130 return NULL;
2131 }
2132
2133 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2134 assert(fold < 2);
2135 int64_t *local_transitions = self->trans_list_wall[fold];
2136 size_t num_trans = self->num_transitions;
2137
2138 if (num_trans && ts < local_transitions[0]) {
2139 return self->ttinfo_before;
2140 }
2141 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2142 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2143 PyDateTime_GET_YEAR(dt));
2144 }
2145 else {
2146 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2147 assert(idx < self->num_transitions);
2148 return self->trans_ttinfos[idx];
2149 }
2150}
2151
2152static int
2153is_leap_year(int year)
2154{
2155 const unsigned int ayear = (unsigned int)year;
2156 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2157}
2158
2159/* Calculates ordinal datetime from year, month and day. */
2160static int
2161ymd_to_ord(int y, int m, int d)
2162{
2163 y -= 1;
2164 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2165 int yearday = DAYS_BEFORE_MONTH[m];
2166 if (m > 2 && is_leap_year(y + 1)) {
2167 yearday += 1;
2168 }
2169
2170 return days_before_year + yearday + d;
2171}
2172
2173/* Calculate the number of seconds since 1970-01-01 in local time.
2174 *
2175 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2176 * can easily determine which transitions a datetime falls between. See the
2177 * comment above ts_to_local for more information.
2178 * */
2179static int
2180get_local_timestamp(PyObject *dt, int64_t *local_ts)
2181{
2182 assert(local_ts != NULL);
2183
2184 int hour, minute, second;
2185 int ord;
2186 if (PyDateTime_CheckExact(dt)) {
2187 int y = PyDateTime_GET_YEAR(dt);
2188 int m = PyDateTime_GET_MONTH(dt);
2189 int d = PyDateTime_GET_DAY(dt);
2190 hour = PyDateTime_DATE_GET_HOUR(dt);
2191 minute = PyDateTime_DATE_GET_MINUTE(dt);
2192 second = PyDateTime_DATE_GET_SECOND(dt);
2193
2194 ord = ymd_to_ord(y, m, d);
2195 }
2196 else {
2197 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2198 if (num == NULL) {
2199 return -1;
2200 }
2201
2202 ord = PyLong_AsLong(num);
2203 Py_DECREF(num);
2204 if (ord == -1 && PyErr_Occurred()) {
2205 return -1;
2206 }
2207
2208 num = PyObject_GetAttrString(dt, "hour");
2209 if (num == NULL) {
2210 return -1;
2211 }
2212 hour = PyLong_AsLong(num);
2213 Py_DECREF(num);
2214 if (hour == -1) {
2215 return -1;
2216 }
2217
2218 num = PyObject_GetAttrString(dt, "minute");
2219 if (num == NULL) {
2220 return -1;
2221 }
2222 minute = PyLong_AsLong(num);
2223 Py_DECREF(num);
2224 if (minute == -1) {
2225 return -1;
2226 }
2227
2228 num = PyObject_GetAttrString(dt, "second");
2229 if (num == NULL) {
2230 return -1;
2231 }
2232 second = PyLong_AsLong(num);
2233 Py_DECREF(num);
2234 if (second == -1) {
2235 return -1;
2236 }
2237 }
2238
2239 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2240 (int64_t)(hour * 3600 + minute * 60 + second);
2241
2242 return 0;
2243}
2244
2245/////
2246// Functions for cache handling
2247
2248/* Constructor for StrongCacheNode */
2249static StrongCacheNode *
2250strong_cache_node_new(PyObject *key, PyObject *zone)
2251{
2252 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2253 if (node == NULL) {
2254 return NULL;
2255 }
2256
2257 Py_INCREF(key);
2258 Py_INCREF(zone);
2259
2260 node->next = NULL;
2261 node->prev = NULL;
2262 node->key = key;
2263 node->zone = zone;
2264
2265 return node;
2266}
2267
2268/* Destructor for StrongCacheNode */
2269void
2270strong_cache_node_free(StrongCacheNode *node)
2271{
2272 Py_XDECREF(node->key);
2273 Py_XDECREF(node->zone);
2274
2275 PyMem_Free(node);
2276}
2277
2278/* Frees all nodes at or after a specified root in the strong cache.
2279 *
2280 * This can be used on the root node to free the entire cache or it can be used
2281 * to clear all nodes that have been expired (which, if everything is going
2282 * right, will actually only be 1 node at a time).
2283 */
2284void
2285strong_cache_free(StrongCacheNode *root)
2286{
2287 StrongCacheNode *node = root;
2288 StrongCacheNode *next_node;
2289 while (node != NULL) {
2290 next_node = node->next;
2291 strong_cache_node_free(node);
2292
2293 node = next_node;
2294 }
2295}
2296
2297/* Removes a node from the cache and update its neighbors.
2298 *
2299 * This is used both when ejecting a node from the cache and when moving it to
2300 * the front of the cache.
2301 */
2302static void
2303remove_from_strong_cache(StrongCacheNode *node)
2304{
2305 if (ZONEINFO_STRONG_CACHE == node) {
2306 ZONEINFO_STRONG_CACHE = node->next;
2307 }
2308
2309 if (node->prev != NULL) {
2310 node->prev->next = node->next;
2311 }
2312
2313 if (node->next != NULL) {
2314 node->next->prev = node->prev;
2315 }
2316
2317 node->next = NULL;
2318 node->prev = NULL;
2319}
2320
2321/* Retrieves the node associated with a key, if it exists.
2322 *
2323 * This traverses the strong cache until it finds a matching key and returns a
2324 * pointer to the relevant node if found. Returns NULL if no node is found.
2325 *
2326 * root may be NULL, indicating an empty cache.
2327 */
2328static StrongCacheNode *
2329find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2330{
2331 const StrongCacheNode *node = root;
2332 while (node != NULL) {
2333 if (PyObject_RichCompareBool(key, node->key, Py_EQ)) {
2334 return (StrongCacheNode *)node;
2335 }
2336
2337 node = node->next;
2338 }
2339
2340 return NULL;
2341}
2342
2343/* Ejects a given key from the class's strong cache, if applicable.
2344 *
2345 * This function is used to enable the per-key functionality in clear_cache.
2346 */
2347static void
2348eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2349{
2350 if (type != &PyZoneInfo_ZoneInfoType) {
2351 return;
2352 }
2353
2354 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2355 if (node != NULL) {
2356 remove_from_strong_cache(node);
2357
2358 strong_cache_node_free(node);
2359 }
2360}
2361
2362/* Moves a node to the front of the LRU cache.
2363 *
2364 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2365 * it is not at the front of the cache, it needs to be moved there.
2366 */
2367static void
2368move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2369{
2370 StrongCacheNode *root_p = *root;
2371 if (root_p == node) {
2372 return;
2373 }
2374
2375 remove_from_strong_cache(node);
2376
2377 node->prev = NULL;
2378 node->next = root_p;
2379
2380 if (root_p != NULL) {
2381 root_p->prev = node;
2382 }
2383
2384 *root = node;
2385}
2386
2387/* Retrieves a ZoneInfo from the strong cache if it's present.
2388 *
2389 * This function finds the ZoneInfo by key and if found will move the node to
2390 * the front of the LRU cache and return a new reference to it. It returns NULL
2391 * if the key is not in the cache.
2392 *
2393 * The strong cache is currently only implemented for the base class, so this
2394 * always returns a cache miss for subclasses.
2395 */
2396static PyObject *
2397zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2398{
2399 if (type != &PyZoneInfo_ZoneInfoType) {
2400 return NULL; // Strong cache currently only implemented for base class
2401 }
2402
2403 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2404
2405 if (node != NULL) {
2406 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2407 Py_INCREF(node->zone);
2408 return node->zone;
2409 }
2410
2411 return NULL; // Cache miss
2412}
2413
2414/* Inserts a new key into the strong LRU cache.
2415 *
2416 * This function is only to be used after a cache miss — it creates a new node
2417 * at the front of the cache and ejects any stale entries (keeping the size of
2418 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2419 */
2420static void
2421update_strong_cache(const PyTypeObject *const type, PyObject *key,
2422 PyObject *zone)
2423{
2424 if (type != &PyZoneInfo_ZoneInfoType) {
2425 return;
2426 }
2427
2428 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2429
2430 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2431
2432 StrongCacheNode *node = new_node->next;
2433 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2434 if (node == NULL) {
2435 return;
2436 }
2437 node = node->next;
2438 }
2439
2440 // Everything beyond this point needs to be freed
2441 if (node != NULL) {
2442 if (node->prev != NULL) {
2443 node->prev->next = NULL;
2444 }
2445 strong_cache_free(node);
2446 }
2447}
2448
2449/* Clears all entries into a type's strong cache.
2450 *
2451 * Because the strong cache is not implemented for subclasses, this is a no-op
2452 * for everything except the base class.
2453 */
2454void
2455clear_strong_cache(const PyTypeObject *const type)
2456{
2457 if (type != &PyZoneInfo_ZoneInfoType) {
2458 return;
2459 }
2460
2461 strong_cache_free(ZONEINFO_STRONG_CACHE);
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002462 ZONEINFO_STRONG_CACHE = NULL;
Paul Ganssle62972d92020-05-16 04:20:06 -04002463}
2464
2465static PyObject *
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002466new_weak_cache(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002467{
2468 PyObject *weakref_module = PyImport_ImportModule("weakref");
2469 if (weakref_module == NULL) {
2470 return NULL;
2471 }
2472
2473 PyObject *weak_cache =
2474 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2475 Py_DECREF(weakref_module);
2476 return weak_cache;
2477}
2478
2479static int
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002480initialize_caches(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002481{
Ammar Askar06a1b892020-05-22 16:10:55 +00002482 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
Paul Ganssle62972d92020-05-16 04:20:06 -04002483 if (TIMEDELTA_CACHE == NULL) {
2484 TIMEDELTA_CACHE = PyDict_New();
2485 }
2486 else {
2487 Py_INCREF(TIMEDELTA_CACHE);
2488 }
2489
2490 if (TIMEDELTA_CACHE == NULL) {
2491 return -1;
2492 }
2493
2494 if (ZONEINFO_WEAK_CACHE == NULL) {
2495 ZONEINFO_WEAK_CACHE = new_weak_cache();
2496 }
2497 else {
2498 Py_INCREF(ZONEINFO_WEAK_CACHE);
2499 }
2500
2501 if (ZONEINFO_WEAK_CACHE == NULL) {
2502 return -1;
2503 }
2504
2505 return 0;
2506}
2507
2508static PyObject *
2509zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2510{
2511 PyObject *weak_cache = new_weak_cache();
2512 if (weak_cache == NULL) {
2513 return NULL;
2514 }
2515
2516 PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache);
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002517 Py_DECREF(weak_cache);
Paul Ganssle62972d92020-05-16 04:20:06 -04002518 Py_RETURN_NONE;
2519}
2520
2521/////
2522// Specify the ZoneInfo type
2523static PyMethodDef zoneinfo_methods[] = {
2524 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2525 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2526 PyDoc_STR("Clear the ZoneInfo cache.")},
2527 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2528 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2529 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2530 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2531 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2532 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2533 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2534 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2535 "the given datetime.")},
2536 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2537 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2538 "in a zone at the given datetime.")},
2539 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2540 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2541 "zone that applies in a zone at a given datetime.")},
2542 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2543 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2544 "datetime in local time.")},
2545 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2546 PyDoc_STR("Function for serialization with the pickle protocol.")},
2547 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2548 PyDoc_STR("Private method used in unpickling.")},
2549 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
Paul Ganssle87d82872020-08-13 22:38:30 -04002550 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
Paul Ganssle62972d92020-05-16 04:20:06 -04002551 PyDoc_STR("Function to initialize subclasses.")},
2552 {NULL} /* Sentinel */
2553};
2554
2555static PyMemberDef zoneinfo_members[] = {
2556 {.name = "key",
2557 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2558 .type = T_OBJECT_EX,
2559 .flags = READONLY,
2560 .doc = NULL},
2561 {NULL}, /* Sentinel */
2562};
2563
2564static PyTypeObject PyZoneInfo_ZoneInfoType = {
2565 PyVarObject_HEAD_INIT(NULL, 0) //
2566 .tp_name = "zoneinfo.ZoneInfo",
2567 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2568 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2569 .tp_repr = (reprfunc)zoneinfo_repr,
2570 .tp_str = (reprfunc)zoneinfo_str,
2571 .tp_getattro = PyObject_GenericGetAttr,
2572 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2573 /* .tp_doc = zoneinfo_doc, */
2574 .tp_methods = zoneinfo_methods,
2575 .tp_members = zoneinfo_members,
2576 .tp_new = zoneinfo_new,
2577 .tp_dealloc = zoneinfo_dealloc,
2578};
2579
2580/////
2581// Specify the _zoneinfo module
2582static PyMethodDef module_methods[] = {{NULL, NULL}};
2583static void
2584module_free()
2585{
2586 Py_XDECREF(_tzpath_find_tzfile);
2587 _tzpath_find_tzfile = NULL;
2588
2589 Py_XDECREF(_common_mod);
2590 _common_mod = NULL;
2591
2592 Py_XDECREF(io_open);
2593 io_open = NULL;
2594
2595 xdecref_ttinfo(&NO_TTINFO);
2596
Ammar Askar06a1b892020-05-22 16:10:55 +00002597 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2598 Py_DECREF(TIMEDELTA_CACHE);
2599 } else {
2600 Py_CLEAR(TIMEDELTA_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002601 }
2602
Ammar Askar06a1b892020-05-22 16:10:55 +00002603 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2604 Py_DECREF(ZONEINFO_WEAK_CACHE);
2605 } else {
2606 Py_CLEAR(ZONEINFO_WEAK_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002607 }
2608
Paul Gansslec3dd7e42020-08-17 18:40:07 -04002609 clear_strong_cache(&PyZoneInfo_ZoneInfoType);
Paul Ganssle62972d92020-05-16 04:20:06 -04002610}
2611
2612static int
2613zoneinfomodule_exec(PyObject *m)
2614{
2615 PyDateTime_IMPORT;
2616 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2617 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2618 goto error;
2619 }
2620
2621 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2622 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2623
2624 /* Populate imports */
2625 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2626 if (_tzpath_module == NULL) {
2627 goto error;
2628 }
2629
2630 _tzpath_find_tzfile =
2631 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2632 Py_DECREF(_tzpath_module);
2633 if (_tzpath_find_tzfile == NULL) {
2634 goto error;
2635 }
2636
2637 PyObject *io_module = PyImport_ImportModule("io");
2638 if (io_module == NULL) {
2639 goto error;
2640 }
2641
2642 io_open = PyObject_GetAttrString(io_module, "open");
2643 Py_DECREF(io_module);
2644 if (io_open == NULL) {
2645 goto error;
2646 }
2647
2648 _common_mod = PyImport_ImportModule("zoneinfo._common");
2649 if (_common_mod == NULL) {
2650 goto error;
2651 }
2652
2653 if (NO_TTINFO.utcoff == NULL) {
2654 NO_TTINFO.utcoff = Py_None;
2655 NO_TTINFO.dstoff = Py_None;
2656 NO_TTINFO.tzname = Py_None;
2657
2658 for (size_t i = 0; i < 3; ++i) {
2659 Py_INCREF(Py_None);
2660 }
2661 }
2662
2663 if (initialize_caches()) {
2664 goto error;
2665 }
2666
2667 return 0;
2668
2669error:
2670 return -1;
2671}
2672
2673static PyModuleDef_Slot zoneinfomodule_slots[] = {
2674 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2675
2676static struct PyModuleDef zoneinfomodule = {
2677 PyModuleDef_HEAD_INIT,
2678 .m_name = "_zoneinfo",
2679 .m_doc = "C implementation of the zoneinfo module",
2680 .m_size = 0,
2681 .m_methods = module_methods,
2682 .m_slots = zoneinfomodule_slots,
2683 .m_free = (freefunc)module_free};
2684
2685PyMODINIT_FUNC
2686PyInit__zoneinfo(void)
2687{
2688 return PyModuleDef_Init(&zoneinfomodule);
2689}