blob: 00b29b31d0954112e726dc761dc6e8befca9813c [file] [log] [blame]
Antoine Pitrouf2c54842010-01-13 08:07:53 +00001/* stringlib: split implementation */
2
3#ifndef STRINGLIB_SPLIT_H
4#define STRINGLIB_SPLIT_H
5
6#ifndef STRINGLIB_FASTSEARCH_H
7#error must include "stringlib/fastsearch.h" before including this module
8#endif
9
10/* Overallocate the initial list to reduce the number of reallocs for small
11 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
12 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
13 text (roughly 11 words per line) and field delimited data (usually 1-10
14 fields). For large strings the split algorithms are bandwidth limited
15 so increasing the preallocation likely will not improve things.*/
16
17#define MAX_PREALLOC 12
18
19/* 5 splits gives 6 elements */
20#define PREALLOC_SIZE(maxsplit) \
21 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
22
23#define SPLIT_APPEND(data, left, right) \
24 sub = STRINGLIB_NEW((data) + (left), \
25 (right) - (left)); \
26 if (sub == NULL) \
27 goto onError; \
28 if (PyList_Append(list, sub)) { \
29 Py_DECREF(sub); \
30 goto onError; \
31 } \
32 else \
33 Py_DECREF(sub);
34
35#define SPLIT_ADD(data, left, right) { \
36 sub = STRINGLIB_NEW((data) + (left), \
37 (right) - (left)); \
38 if (sub == NULL) \
39 goto onError; \
40 if (count < MAX_PREALLOC) { \
41 PyList_SET_ITEM(list, count, sub); \
42 } else { \
43 if (PyList_Append(list, sub)) { \
44 Py_DECREF(sub); \
45 goto onError; \
46 } \
47 else \
48 Py_DECREF(sub); \
49 } \
50 count++; }
51
52
53/* Always force the list to the expected size. */
54#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
55
56Py_LOCAL_INLINE(PyObject *)
57stringlib_split_whitespace(PyObject* str_obj,
58 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
59 Py_ssize_t maxcount)
60{
61 Py_ssize_t i, j, count=0;
62 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
63 PyObject *sub;
64
65 if (list == NULL)
66 return NULL;
67
68 i = j = 0;
69 while (maxcount-- > 0) {
70 while (i < str_len && STRINGLIB_ISSPACE(str[i]))
71 i++;
72 if (i == str_len) break;
73 j = i; i++;
74 while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
75 i++;
76#ifndef STRINGLIB_MUTABLE
77 if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
78 /* No whitespace in str_obj, so just use it as list[0] */
79 Py_INCREF(str_obj);
80 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
81 count++;
82 break;
83 }
84#endif
85 SPLIT_ADD(str, j, i);
86 }
87
88 if (i < str_len) {
89 /* Only occurs when maxcount was reached */
90 /* Skip any remaining whitespace and copy to end of string */
91 while (i < str_len && STRINGLIB_ISSPACE(str[i]))
92 i++;
93 if (i != str_len)
94 SPLIT_ADD(str, i, str_len);
95 }
96 FIX_PREALLOC_SIZE(list);
97 return list;
98
99 onError:
100 Py_DECREF(list);
101 return NULL;
102}
103
104Py_LOCAL_INLINE(PyObject *)
105stringlib_split_char(PyObject* str_obj,
106 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
107 const STRINGLIB_CHAR ch,
108 Py_ssize_t maxcount)
109{
110 Py_ssize_t i, j, count=0;
111 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
112 PyObject *sub;
113
114 if (list == NULL)
115 return NULL;
116
117 i = j = 0;
118 while ((j < str_len) && (maxcount-- > 0)) {
119 for(; j < str_len; j++) {
120 /* I found that using memchr makes no difference */
121 if (str[j] == ch) {
122 SPLIT_ADD(str, i, j);
123 i = j = j + 1;
124 break;
125 }
126 }
127 }
128#ifndef STRINGLIB_MUTABLE
129 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
130 /* ch not in str_obj, so just use str_obj as list[0] */
131 Py_INCREF(str_obj);
132 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
133 count++;
134 } else
135#endif
136 if (i <= str_len) {
137 SPLIT_ADD(str, i, str_len);
138 }
139 FIX_PREALLOC_SIZE(list);
140 return list;
141
142 onError:
143 Py_DECREF(list);
144 return NULL;
145}
146
147Py_LOCAL_INLINE(PyObject *)
148stringlib_split(PyObject* str_obj,
149 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
150 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
151 Py_ssize_t maxcount)
152{
153 Py_ssize_t i, j, pos, count=0;
154 PyObject *list, *sub;
155
156 if (sep_len == 0) {
157 PyErr_SetString(PyExc_ValueError, "empty separator");
158 return NULL;
159 }
160 else if (sep_len == 1)
161 return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
162
163 list = PyList_New(PREALLOC_SIZE(maxcount));
164 if (list == NULL)
165 return NULL;
166
167 i = j = 0;
168 while (maxcount-- > 0) {
169 pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
170 if (pos < 0)
171 break;
172 j = i + pos;
173 SPLIT_ADD(str, i, j);
174 i = j + sep_len;
175 }
176#ifndef STRINGLIB_MUTABLE
177 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
178 /* No match in str_obj, so just use it as list[0] */
179 Py_INCREF(str_obj);
180 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
181 count++;
182 } else
183#endif
184 {
185 SPLIT_ADD(str, i, str_len);
186 }
187 FIX_PREALLOC_SIZE(list);
188 return list;
189
190 onError:
191 Py_DECREF(list);
192 return NULL;
193}
194
195Py_LOCAL_INLINE(PyObject *)
196stringlib_rsplit_whitespace(PyObject* str_obj,
197 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
198 Py_ssize_t maxcount)
199{
200 Py_ssize_t i, j, count=0;
201 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
202 PyObject *sub;
203
204 if (list == NULL)
205 return NULL;
206
207 i = j = str_len - 1;
208 while (maxcount-- > 0) {
209 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
210 i--;
211 if (i < 0) break;
212 j = i; i--;
213 while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
214 i--;
215#ifndef STRINGLIB_MUTABLE
216 if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
217 /* No whitespace in str_obj, so just use it as list[0] */
218 Py_INCREF(str_obj);
219 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
220 count++;
221 break;
222 }
223#endif
224 SPLIT_ADD(str, i + 1, j + 1);
225 }
226
227 if (i >= 0) {
228 /* Only occurs when maxcount was reached */
229 /* Skip any remaining whitespace and copy to beginning of string */
230 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
231 i--;
232 if (i >= 0)
233 SPLIT_ADD(str, 0, i + 1);
234 }
235 FIX_PREALLOC_SIZE(list);
236 if (PyList_Reverse(list) < 0)
237 goto onError;
238 return list;
239
240 onError:
241 Py_DECREF(list);
242 return NULL;
243}
244
245Py_LOCAL_INLINE(PyObject *)
246stringlib_rsplit_char(PyObject* str_obj,
247 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
248 const STRINGLIB_CHAR ch,
249 Py_ssize_t maxcount)
250{
251 Py_ssize_t i, j, count=0;
252 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
253 PyObject *sub;
254
255 if (list == NULL)
256 return NULL;
257
258 i = j = str_len - 1;
259 while ((i >= 0) && (maxcount-- > 0)) {
260 for(; i >= 0; i--) {
261 if (str[i] == ch) {
262 SPLIT_ADD(str, i + 1, j + 1);
263 j = i = i - 1;
264 break;
265 }
266 }
267 }
268#ifndef STRINGLIB_MUTABLE
269 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
270 /* ch not in str_obj, so just use str_obj as list[0] */
271 Py_INCREF(str_obj);
272 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
273 count++;
274 } else
275#endif
276 if (j >= -1) {
277 SPLIT_ADD(str, 0, j + 1);
278 }
279 FIX_PREALLOC_SIZE(list);
280 if (PyList_Reverse(list) < 0)
281 goto onError;
282 return list;
283
284 onError:
285 Py_DECREF(list);
286 return NULL;
287}
288
289Py_LOCAL_INLINE(PyObject *)
290stringlib_rsplit(PyObject* str_obj,
291 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
292 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
293 Py_ssize_t maxcount)
294{
295 Py_ssize_t j, pos, count=0;
296 PyObject *list, *sub;
297
298 if (sep_len == 0) {
299 PyErr_SetString(PyExc_ValueError, "empty separator");
300 return NULL;
301 }
302 else if (sep_len == 1)
303 return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
304
305 list = PyList_New(PREALLOC_SIZE(maxcount));
306 if (list == NULL)
307 return NULL;
308
309 j = str_len;
310 while (maxcount-- > 0) {
311 pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
312 if (pos < 0)
313 break;
314 SPLIT_ADD(str, pos + sep_len, j);
315 j = pos;
316 }
317#ifndef STRINGLIB_MUTABLE
318 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
319 /* No match in str_obj, so just use it as list[0] */
320 Py_INCREF(str_obj);
321 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
322 count++;
323 } else
324#endif
325 {
326 SPLIT_ADD(str, 0, j);
327 }
328 FIX_PREALLOC_SIZE(list);
329 if (PyList_Reverse(list) < 0)
330 goto onError;
331 return list;
332
333 onError:
334 Py_DECREF(list);
335 return NULL;
336}
337
338Py_LOCAL_INLINE(PyObject *)
339stringlib_splitlines(PyObject* str_obj,
340 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
341 int keepends)
342{
343 /* This does not use the preallocated list because splitlines is
344 usually run with hundreds of newlines. The overhead of
345 switching between PyList_SET_ITEM and append causes about a
346 2-3% slowdown for that common case. A smarter implementation
347 could move the if check out, so the SET_ITEMs are done first
348 and the appends only done when the prealloc buffer is full.
349 That's too much work for little gain.*/
350
351 register Py_ssize_t i;
352 register Py_ssize_t j;
353 PyObject *list = PyList_New(0);
354 PyObject *sub;
355
356 if (list == NULL)
357 return NULL;
358
359 for (i = j = 0; i < str_len; ) {
360 Py_ssize_t eol;
361
362 /* Find a line and append it */
363 while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
364 i++;
365
366 /* Skip the line break reading CRLF as one line break */
367 eol = i;
368 if (i < str_len) {
369 if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
370 i += 2;
371 else
372 i++;
373 if (keepends)
374 eol = i;
375 }
376#ifndef STRINGLIB_MUTABLE
377 if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
378 /* No linebreak in str_obj, so just use it as list[0] */
379 if (PyList_Append(list, str_obj))
380 goto onError;
381 break;
382 }
383#endif
384 SPLIT_APPEND(str, j, eol);
385 j = i;
386 }
387 return list;
388
389 onError:
390 Py_DECREF(list);
391 return NULL;
392}
393
394#endif
395/* stringlib: split implementation */
396
397#ifndef STRINGLIB_SPLIT_H
398#define STRINGLIB_SPLIT_H
399
400#ifndef STRINGLIB_FASTSEARCH_H
401#error must include "stringlib/fastsearch.h" before including this module
402#endif
403
404/* Overallocate the initial list to reduce the number of reallocs for small
405 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
406 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
407 text (roughly 11 words per line) and field delimited data (usually 1-10
408 fields). For large strings the split algorithms are bandwidth limited
409 so increasing the preallocation likely will not improve things.*/
410
411#define MAX_PREALLOC 12
412
413/* 5 splits gives 6 elements */
414#define PREALLOC_SIZE(maxsplit) \
415 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
416
417#define SPLIT_APPEND(data, left, right) \
418 sub = STRINGLIB_NEW((data) + (left), \
419 (right) - (left)); \
420 if (sub == NULL) \
421 goto onError; \
422 if (PyList_Append(list, sub)) { \
423 Py_DECREF(sub); \
424 goto onError; \
425 } \
426 else \
427 Py_DECREF(sub);
428
429#define SPLIT_ADD(data, left, right) { \
430 sub = STRINGLIB_NEW((data) + (left), \
431 (right) - (left)); \
432 if (sub == NULL) \
433 goto onError; \
434 if (count < MAX_PREALLOC) { \
435 PyList_SET_ITEM(list, count, sub); \
436 } else { \
437 if (PyList_Append(list, sub)) { \
438 Py_DECREF(sub); \
439 goto onError; \
440 } \
441 else \
442 Py_DECREF(sub); \
443 } \
444 count++; }
445
446
447/* Always force the list to the expected size. */
448#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
449
450Py_LOCAL_INLINE(PyObject *)
451stringlib_split_whitespace(PyObject* str_obj,
452 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
453 Py_ssize_t maxcount)
454{
455 Py_ssize_t i, j, count=0;
456 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
457 PyObject *sub;
458
459 if (list == NULL)
460 return NULL;
461
462 i = j = 0;
463 while (maxcount-- > 0) {
464 while (i < str_len && STRINGLIB_ISSPACE(str[i]))
465 i++;
466 if (i == str_len) break;
467 j = i; i++;
468 while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
469 i++;
470#ifndef STRINGLIB_MUTABLE
471 if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
472 /* No whitespace in str_obj, so just use it as list[0] */
473 Py_INCREF(str_obj);
474 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
475 count++;
476 break;
477 }
478#endif
479 SPLIT_ADD(str, j, i);
480 }
481
482 if (i < str_len) {
483 /* Only occurs when maxcount was reached */
484 /* Skip any remaining whitespace and copy to end of string */
485 while (i < str_len && STRINGLIB_ISSPACE(str[i]))
486 i++;
487 if (i != str_len)
488 SPLIT_ADD(str, i, str_len);
489 }
490 FIX_PREALLOC_SIZE(list);
491 return list;
492
493 onError:
494 Py_DECREF(list);
495 return NULL;
496}
497
498Py_LOCAL_INLINE(PyObject *)
499stringlib_split_char(PyObject* str_obj,
500 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
501 const STRINGLIB_CHAR ch,
502 Py_ssize_t maxcount)
503{
504 Py_ssize_t i, j, count=0;
505 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
506 PyObject *sub;
507
508 if (list == NULL)
509 return NULL;
510
511 i = j = 0;
512 while ((j < str_len) && (maxcount-- > 0)) {
513 for(; j < str_len; j++) {
514 /* I found that using memchr makes no difference */
515 if (str[j] == ch) {
516 SPLIT_ADD(str, i, j);
517 i = j = j + 1;
518 break;
519 }
520 }
521 }
522#ifndef STRINGLIB_MUTABLE
523 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
524 /* ch not in str_obj, so just use str_obj as list[0] */
525 Py_INCREF(str_obj);
526 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
527 count++;
528 } else
529#endif
530 if (i <= str_len) {
531 SPLIT_ADD(str, i, str_len);
532 }
533 FIX_PREALLOC_SIZE(list);
534 return list;
535
536 onError:
537 Py_DECREF(list);
538 return NULL;
539}
540
541Py_LOCAL_INLINE(PyObject *)
542stringlib_split(PyObject* str_obj,
543 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
544 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
545 Py_ssize_t maxcount)
546{
547 Py_ssize_t i, j, pos, count=0;
548 PyObject *list, *sub;
549
550 if (sep_len == 0) {
551 PyErr_SetString(PyExc_ValueError, "empty separator");
552 return NULL;
553 }
554 else if (sep_len == 1)
555 return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
556
557 list = PyList_New(PREALLOC_SIZE(maxcount));
558 if (list == NULL)
559 return NULL;
560
561 i = j = 0;
562 while (maxcount-- > 0) {
563 pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
564 if (pos < 0)
565 break;
566 j = i + pos;
567 SPLIT_ADD(str, i, j);
568 i = j + sep_len;
569 }
570#ifndef STRINGLIB_MUTABLE
571 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
572 /* No match in str_obj, so just use it as list[0] */
573 Py_INCREF(str_obj);
574 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
575 count++;
576 } else
577#endif
578 {
579 SPLIT_ADD(str, i, str_len);
580 }
581 FIX_PREALLOC_SIZE(list);
582 return list;
583
584 onError:
585 Py_DECREF(list);
586 return NULL;
587}
588
589Py_LOCAL_INLINE(PyObject *)
590stringlib_rsplit_whitespace(PyObject* str_obj,
591 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
592 Py_ssize_t maxcount)
593{
594 Py_ssize_t i, j, count=0;
595 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
596 PyObject *sub;
597
598 if (list == NULL)
599 return NULL;
600
601 i = j = str_len - 1;
602 while (maxcount-- > 0) {
603 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
604 i--;
605 if (i < 0) break;
606 j = i; i--;
607 while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
608 i--;
609#ifndef STRINGLIB_MUTABLE
610 if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
611 /* No whitespace in str_obj, so just use it as list[0] */
612 Py_INCREF(str_obj);
613 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
614 count++;
615 break;
616 }
617#endif
618 SPLIT_ADD(str, i + 1, j + 1);
619 }
620
621 if (i >= 0) {
622 /* Only occurs when maxcount was reached */
623 /* Skip any remaining whitespace and copy to beginning of string */
624 while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
625 i--;
626 if (i >= 0)
627 SPLIT_ADD(str, 0, i + 1);
628 }
629 FIX_PREALLOC_SIZE(list);
630 if (PyList_Reverse(list) < 0)
631 goto onError;
632 return list;
633
634 onError:
635 Py_DECREF(list);
636 return NULL;
637}
638
639Py_LOCAL_INLINE(PyObject *)
640stringlib_rsplit_char(PyObject* str_obj,
641 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
642 const STRINGLIB_CHAR ch,
643 Py_ssize_t maxcount)
644{
645 Py_ssize_t i, j, count=0;
646 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
647 PyObject *sub;
648
649 if (list == NULL)
650 return NULL;
651
652 i = j = str_len - 1;
653 while ((i >= 0) && (maxcount-- > 0)) {
654 for(; i >= 0; i--) {
655 if (str[i] == ch) {
656 SPLIT_ADD(str, i + 1, j + 1);
657 j = i = i - 1;
658 break;
659 }
660 }
661 }
662#ifndef STRINGLIB_MUTABLE
663 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
664 /* ch not in str_obj, so just use str_obj as list[0] */
665 Py_INCREF(str_obj);
666 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
667 count++;
668 } else
669#endif
670 if (j >= -1) {
671 SPLIT_ADD(str, 0, j + 1);
672 }
673 FIX_PREALLOC_SIZE(list);
674 if (PyList_Reverse(list) < 0)
675 goto onError;
676 return list;
677
678 onError:
679 Py_DECREF(list);
680 return NULL;
681}
682
683Py_LOCAL_INLINE(PyObject *)
684stringlib_rsplit(PyObject* str_obj,
685 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
686 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
687 Py_ssize_t maxcount)
688{
689 Py_ssize_t j, pos, count=0;
690 PyObject *list, *sub;
691
692 if (sep_len == 0) {
693 PyErr_SetString(PyExc_ValueError, "empty separator");
694 return NULL;
695 }
696 else if (sep_len == 1)
697 return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
698
699 list = PyList_New(PREALLOC_SIZE(maxcount));
700 if (list == NULL)
701 return NULL;
702
703 j = str_len;
704 while (maxcount-- > 0) {
705 pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
706 if (pos < 0)
707 break;
708 SPLIT_ADD(str, pos + sep_len, j);
709 j = pos;
710 }
711#ifndef STRINGLIB_MUTABLE
712 if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
713 /* No match in str_obj, so just use it as list[0] */
714 Py_INCREF(str_obj);
715 PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
716 count++;
717 } else
718#endif
719 {
720 SPLIT_ADD(str, 0, j);
721 }
722 FIX_PREALLOC_SIZE(list);
723 if (PyList_Reverse(list) < 0)
724 goto onError;
725 return list;
726
727 onError:
728 Py_DECREF(list);
729 return NULL;
730}
731
732Py_LOCAL_INLINE(PyObject *)
733stringlib_splitlines(PyObject* str_obj,
734 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
735 int keepends)
736{
737 /* This does not use the preallocated list because splitlines is
738 usually run with hundreds of newlines. The overhead of
739 switching between PyList_SET_ITEM and append causes about a
740 2-3% slowdown for that common case. A smarter implementation
741 could move the if check out, so the SET_ITEMs are done first
742 and the appends only done when the prealloc buffer is full.
743 That's too much work for little gain.*/
744
745 register Py_ssize_t i;
746 register Py_ssize_t j;
747 PyObject *list = PyList_New(0);
748 PyObject *sub;
749
750 if (list == NULL)
751 return NULL;
752
753 for (i = j = 0; i < str_len; ) {
754 Py_ssize_t eol;
755
756 /* Find a line and append it */
757 while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
758 i++;
759
760 /* Skip the line break reading CRLF as one line break */
761 eol = i;
762 if (i < str_len) {
763 if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
764 i += 2;
765 else
766 i++;
767 if (keepends)
768 eol = i;
769 }
770#ifndef STRINGLIB_MUTABLE
771 if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
772 /* No linebreak in str_obj, so just use it as list[0] */
773 if (PyList_Append(list, str_obj))
774 goto onError;
775 break;
776 }
777#endif
778 SPLIT_APPEND(str, j, eol);
779 j = i;
780 }
781 return list;
782
783 onError:
784 Py_DECREF(list);
785 return NULL;
786}
787
788#endif