[3.7] bpo-33899: Make tokenize module mirror end-of-file is end-of-line behavior (GH-7891) (GH-8132)
Most of the change involves fixing up the test suite, which previously made
the assumption that there wouldn't be a new line if the input didn't end in
one.
Contributed by Ammar Askar.
(cherry picked from commit c4ef4896eac86a6759901c8546e26de4695a1389)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 6528b90..0eccc9b 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -496,8 +496,15 @@
# BOM will already have been stripped.
encoding = "utf-8"
yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
- while True: # loop over lines in stream
+ last_line = b''
+ line = b''
+ while True: # loop over lines in stream
try:
+ # We capture the value of the line variable here because
+ # readline uses the empty string '' to signal end of input,
+ # hence `line` itself will always be overwritten at the end
+ # of this loop.
+ last_line = line
line = readline()
except StopIteration:
line = b''
@@ -652,6 +659,9 @@
(lnum, pos), (lnum, pos+1), line)
pos += 1
+ # Add an implicit NEWLINE if the input doesn't end in one
+ if last_line and last_line[-1] not in '\r\n':
+ yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')