wohlganger | 58fc71c | 2017-09-10 16:19:47 -0500 | [diff] [blame] | 1 | """Format a paragraph, comment block, or selection to a max width. |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 2 | |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 3 | Does basic, standard text formatting, and also understands Python |
| 4 | comment blocks. Thus, for editing Python source code, this |
| 5 | extension is really only suitable for reformatting these comment |
| 6 | blocks or triple-quoted strings. |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 7 | |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 8 | Known problems with comment reformatting: |
| 9 | * If there is a selection marked, and the first line of the |
| 10 | selection is not complete, the block will probably not be detected |
| 11 | as comments, and will have the normal "text formatting" rules |
| 12 | applied. |
| 13 | * If a comment block has leading whitespace that mixes tabs and |
| 14 | spaces, they will not be considered part of the same block. |
| 15 | * Fancy comments, like this bulleted list, aren't handled :-) |
| 16 | """ |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 17 | import re |
Terry Jan Reedy | bfbaa6b | 2016-08-31 00:50:55 -0400 | [diff] [blame] | 18 | |
Terry Jan Reedy | 6fa5bdc | 2016-05-28 13:22:31 -0400 | [diff] [blame] | 19 | from idlelib.config import idleConf |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 20 | |
Terry Jan Reedy | bfbaa6b | 2016-08-31 00:50:55 -0400 | [diff] [blame] | 21 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 22 | class FormatParagraph: |
| 23 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 24 | def __init__(self, editwin): |
| 25 | self.editwin = editwin |
| 26 | |
wohlganger | 58fc71c | 2017-09-10 16:19:47 -0500 | [diff] [blame] | 27 | @classmethod |
| 28 | def reload(cls): |
| 29 | cls.max_width = idleConf.GetOption('extensions', 'FormatParagraph', |
| 30 | 'max-width', type='int', default=72) |
| 31 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 32 | def close(self): |
| 33 | self.editwin = None |
| 34 | |
Terry Jan Reedy | d5d4c4e | 2014-04-22 01:11:03 -0400 | [diff] [blame] | 35 | def format_paragraph_event(self, event, limit=None): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 36 | """Formats paragraph to a max width specified in idleConf. |
| 37 | |
| 38 | If text is selected, format_paragraph_event will start breaking lines |
| 39 | at the max width, starting from the beginning selection. |
| 40 | |
| 41 | If no text is selected, format_paragraph_event uses the current |
| 42 | cursor location to determine the paragraph (lines of text surrounded |
| 43 | by blank lines) and formats it. |
Terry Jan Reedy | d5d4c4e | 2014-04-22 01:11:03 -0400 | [diff] [blame] | 44 | |
| 45 | The length limit parameter is for testing with a known value. |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 46 | """ |
wohlganger | 58fc71c | 2017-09-10 16:19:47 -0500 | [diff] [blame] | 47 | limit = self.max_width if limit is None else limit |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 48 | text = self.editwin.text |
| 49 | first, last = self.editwin.get_selection_indices() |
| 50 | if first and last: |
| 51 | data = text.get(first, last) |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 52 | comment_header = get_comment_header(data) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 53 | else: |
| 54 | first, last, comment_header, data = \ |
| 55 | find_paragraph(text, text.index("insert")) |
| 56 | if comment_header: |
Terry Jan Reedy | d5d4c4e | 2014-04-22 01:11:03 -0400 | [diff] [blame] | 57 | newdata = reformat_comment(data, limit, comment_header) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 58 | else: |
Terry Jan Reedy | d5d4c4e | 2014-04-22 01:11:03 -0400 | [diff] [blame] | 59 | newdata = reformat_paragraph(data, limit) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 60 | text.tag_remove("sel", "1.0", "end") |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 61 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 62 | if newdata != data: |
| 63 | text.mark_set("insert", first) |
| 64 | text.undo_block_start() |
| 65 | text.delete(first, last) |
| 66 | text.insert(first, newdata) |
| 67 | text.undo_block_stop() |
| 68 | else: |
| 69 | text.mark_set("insert", last) |
| 70 | text.see("insert") |
Christian Heimes | b76922a | 2007-12-11 01:06:40 +0000 | [diff] [blame] | 71 | return "break" |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 72 | |
wohlganger | 58fc71c | 2017-09-10 16:19:47 -0500 | [diff] [blame] | 73 | |
| 74 | FormatParagraph.reload() |
| 75 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 76 | def find_paragraph(text, mark): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 77 | """Returns the start/stop indices enclosing the paragraph that mark is in. |
| 78 | |
| 79 | Also returns the comment format string, if any, and paragraph of text |
| 80 | between the start/stop indices. |
| 81 | """ |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 82 | lineno, col = map(int, mark.split(".")) |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 83 | line = text.get("%d.0" % lineno, "%d.end" % lineno) |
| 84 | |
| 85 | # Look for start of next paragraph if the index passed in is a blank line |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 86 | while text.compare("%d.0" % lineno, "<", "end") and is_all_white(line): |
| 87 | lineno = lineno + 1 |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 88 | line = text.get("%d.0" % lineno, "%d.end" % lineno) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 89 | first_lineno = lineno |
| 90 | comment_header = get_comment_header(line) |
| 91 | comment_header_len = len(comment_header) |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 92 | |
| 93 | # Once start line found, search for end of paragraph (a blank line) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 94 | while get_comment_header(line)==comment_header and \ |
| 95 | not is_all_white(line[comment_header_len:]): |
| 96 | lineno = lineno + 1 |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 97 | line = text.get("%d.0" % lineno, "%d.end" % lineno) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 98 | last = "%d.0" % lineno |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 99 | |
| 100 | # Search back to beginning of paragraph (first blank line before) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 101 | lineno = first_lineno - 1 |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 102 | line = text.get("%d.0" % lineno, "%d.end" % lineno) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 103 | while lineno > 0 and \ |
| 104 | get_comment_header(line)==comment_header and \ |
| 105 | not is_all_white(line[comment_header_len:]): |
| 106 | lineno = lineno - 1 |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 107 | line = text.get("%d.0" % lineno, "%d.end" % lineno) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 108 | first = "%d.0" % (lineno+1) |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 109 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 110 | return first, last, comment_header, text.get(first, last) |
| 111 | |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 112 | # This should perhaps be replaced with textwrap.wrap |
Raymond Hettinger | 4e49b83 | 2004-06-04 06:31:08 +0000 | [diff] [blame] | 113 | def reformat_paragraph(data, limit): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 114 | """Return data reformatted to specified width (limit).""" |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 115 | lines = data.split("\n") |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 116 | i = 0 |
| 117 | n = len(lines) |
| 118 | while i < n and is_all_white(lines[i]): |
| 119 | i = i+1 |
| 120 | if i >= n: |
| 121 | return data |
| 122 | indent1 = get_indent(lines[i]) |
| 123 | if i+1 < n and not is_all_white(lines[i+1]): |
| 124 | indent2 = get_indent(lines[i+1]) |
| 125 | else: |
| 126 | indent2 = indent1 |
| 127 | new = lines[:i] |
| 128 | partial = indent1 |
| 129 | while i < n and not is_all_white(lines[i]): |
| 130 | # XXX Should take double space after period (etc.) into account |
R David Murray | 44b548d | 2016-09-08 13:59:53 -0400 | [diff] [blame] | 131 | words = re.split(r"(\s+)", lines[i]) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 132 | for j in range(0, len(words), 2): |
| 133 | word = words[j] |
| 134 | if not word: |
| 135 | continue # Can happen when line ends in whitespace |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 136 | if len((partial + word).expandtabs()) > limit and \ |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 137 | partial != indent1: |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 138 | new.append(partial.rstrip()) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 139 | partial = indent2 |
| 140 | partial = partial + word + " " |
| 141 | if j+1 < len(words) and words[j+1] != " ": |
| 142 | partial = partial + " " |
| 143 | i = i+1 |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 144 | new.append(partial.rstrip()) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 145 | # XXX Should reformat remaining paragraphs as well |
| 146 | new.extend(lines[i:]) |
Kurt B. Kaiser | 75e3790 | 2002-09-16 02:22:19 +0000 | [diff] [blame] | 147 | return "\n".join(new) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 148 | |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 149 | def reformat_comment(data, limit, comment_header): |
| 150 | """Return data reformatted to specified width with comment header.""" |
| 151 | |
| 152 | # Remove header from the comment lines |
| 153 | lc = len(comment_header) |
| 154 | data = "\n".join(line[lc:] for line in data.split("\n")) |
| 155 | # Reformat to maxformatwidth chars or a 20 char width, |
| 156 | # whichever is greater. |
| 157 | format_width = max(limit - len(comment_header), 20) |
| 158 | newdata = reformat_paragraph(data, format_width) |
| 159 | # re-split and re-insert the comment header. |
| 160 | newdata = newdata.split("\n") |
luzpaz | a5293b4 | 2017-11-05 07:37:50 -0600 | [diff] [blame] | 161 | # If the block ends in a \n, we don't want the comment prefix |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 162 | # inserted after it. (Im not sure it makes sense to reformat a |
| 163 | # comment block that is not made of complete lines, but whatever!) |
| 164 | # Can't think of a clean solution, so we hack away |
| 165 | block_suffix = "" |
| 166 | if not newdata[-1]: |
| 167 | block_suffix = "\n" |
| 168 | newdata = newdata[:-1] |
| 169 | return '\n'.join(comment_header+line for line in newdata) + block_suffix |
| 170 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 171 | def is_all_white(line): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 172 | """Return True if line is empty or all whitespace.""" |
| 173 | |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 174 | return re.match(r"^\s*$", line) is not None |
| 175 | |
| 176 | def get_indent(line): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 177 | """Return the initial space or tab indent of line.""" |
| 178 | return re.match(r"^([ \t]*)", line).group() |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 179 | |
| 180 | def get_comment_header(line): |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 181 | """Return string with leading whitespace and '#' from line or ''. |
| 182 | |
| 183 | A null return indicates that the line is not a comment line. A non- |
| 184 | null return, such as ' #', will be used to find the other lines of |
| 185 | a comment block with the same indent. |
| 186 | """ |
| 187 | m = re.match(r"^([ \t]*#*)", line) |
David Scherer | 7aced17 | 2000-08-15 01:13:23 +0000 | [diff] [blame] | 188 | if m is None: return "" |
| 189 | return m.group(1) |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 190 | |
Terry Jan Reedy | bfbaa6b | 2016-08-31 00:50:55 -0400 | [diff] [blame] | 191 | |
Terry Jan Reedy | 7c64aad | 2013-08-10 16:56:28 -0400 | [diff] [blame] | 192 | if __name__ == "__main__": |
Terry Jan Reedy | ea3dc80 | 2018-06-18 04:47:59 -0400 | [diff] [blame] | 193 | from unittest import main |
| 194 | main('idlelib.idle_test.test_paragraph', verbosity=2, exit=False) |