blob: 72e3755327ae0563d07224ba12b8d24347a08d11 [file] [log] [blame]
Kamil Rytarowskicb77f0d2017-05-07 23:25:26 +02001#!/usr/bin/env perl
H. Peter Anvin12b31562007-03-12 12:16:30 -07002#
3# Clean a text file -- or directory of text files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation. Use with caution.
5#
6
Kamil Rytarowskicb77f0d2017-05-07 23:25:26 +02007use warnings;
H. Peter Anvin12b31562007-03-12 12:16:30 -07008use bytes;
9use File::Basename;
10
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -070011# Default options
12$max_width = 79;
13
H. Peter Anvin12b31562007-03-12 12:16:30 -070014# Clean up space-tab sequences, either by removing spaces or
15# replacing them with tabs.
16sub clean_space_tabs($)
17{
18 no bytes; # Tab alignment depends on characters
19
20 my($li) = @_;
21 my($lo) = '';
22 my $pos = 0;
23 my $nsp = 0;
24 my($i, $c);
25
26 for ($i = 0; $i < length($li); $i++) {
27 $c = substr($li, $i, 1);
28 if ($c eq "\t") {
29 my $npos = ($pos+$nsp+8) & ~7;
30 my $ntab = ($npos >> 3) - ($pos >> 3);
31 $lo .= "\t" x $ntab;
32 $pos = $npos;
33 $nsp = 0;
34 } elsif ($c eq "\n" || $c eq "\r") {
35 $lo .= " " x $nsp;
36 $pos += $nsp;
37 $nsp = 0;
38 $lo .= $c;
39 $pos = 0;
40 } elsif ($c eq " ") {
41 $nsp++;
42 } else {
43 $lo .= " " x $nsp;
44 $pos += $nsp;
45 $nsp = 0;
46 $lo .= $c;
47 $pos++;
48 }
49 }
50 $lo .= " " x $nsp;
51 return $lo;
52}
53
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -070054# Compute the visual width of a string
55sub strwidth($) {
56 no bytes; # Tab alignment depends on characters
57
58 my($li) = @_;
59 my($c, $i);
60 my $pos = 0;
61 my $mlen = 0;
62
63 for ($i = 0; $i < length($li); $i++) {
64 $c = substr($li,$i,1);
65 if ($c eq "\t") {
66 $pos = ($pos+8) & ~7;
67 } elsif ($c eq "\n") {
68 $mlen = $pos if ($pos > $mlen);
69 $pos = 0;
70 } else {
71 $pos++;
72 }
73 }
74
75 $mlen = $pos if ($pos > $mlen);
76 return $mlen;
77}
78
H. Peter Anvin12b31562007-03-12 12:16:30 -070079$name = basename($0);
80
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -070081@files = ();
82
83while (defined($a = shift(@ARGV))) {
84 if ($a =~ /^-/) {
85 if ($a eq '-width' || $a eq '-w') {
86 $max_width = shift(@ARGV)+0;
87 } else {
88 print STDERR "Usage: $name [-width #] files...\n";
89 exit 1;
90 }
91 } else {
92 push(@files, $a);
93 }
94}
95
96foreach $f ( @files ) {
H. Peter Anvin12b31562007-03-12 12:16:30 -070097 print STDERR "$name: $f\n";
98
99 if (! -f $f) {
100 print STDERR "$f: not a file\n";
101 next;
102 }
103
104 if (!open(FILE, '+<', $f)) {
105 print STDERR "$name: Cannot open file: $f: $!\n";
106 next;
107 }
108
109 binmode FILE;
110
111 # First, verify that it is not a binary file; consider any file
112 # with a zero byte to be a binary file. Is there any better, or
113 # additional, heuristic that should be applied?
114 $is_binary = 0;
115
116 while (read(FILE, $data, 65536) > 0) {
117 if ($data =~ /\0/) {
118 $is_binary = 1;
119 last;
120 }
121 }
122
123 if ($is_binary) {
124 print STDERR "$name: $f: binary file\n";
125 next;
126 }
127
128 seek(FILE, 0, 0);
129
130 $in_bytes = 0;
131 $out_bytes = 0;
132 $blank_bytes = 0;
133
134 @blanks = ();
135 @lines = ();
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -0700136 $lineno = 0;
H. Peter Anvin12b31562007-03-12 12:16:30 -0700137
138 while ( defined($line = <FILE>) ) {
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -0700139 $lineno++;
H. Peter Anvin12b31562007-03-12 12:16:30 -0700140 $in_bytes += length($line);
141 $line =~ s/[ \t\r]*$//; # Remove trailing spaces
142 $line = clean_space_tabs($line);
143
144 if ( $line eq "\n" ) {
145 push(@blanks, $line);
146 $blank_bytes += length($line);
147 } else {
148 push(@lines, @blanks);
149 $out_bytes += $blank_bytes;
150 push(@lines, $line);
151 $out_bytes += length($line);
152 @blanks = ();
153 $blank_bytes = 0;
154 }
H. Peter Anvincb3ed5b2007-05-25 17:58:26 -0700155
156 $l_width = strwidth($line);
157 if ($max_width && $l_width > $max_width) {
158 print STDERR
159 "$f:$lineno: line exceeds $max_width characters ($l_width)\n";
160 }
H. Peter Anvin12b31562007-03-12 12:16:30 -0700161 }
162
163 # Any blanks at the end of the file are discarded
164
165 if ($in_bytes != $out_bytes) {
166 # Only write to the file if changed
167 seek(FILE, 0, 0);
168 print FILE @lines;
169
170 if ( !defined($where = tell(FILE)) ||
171 !truncate(FILE, $where) ) {
172 die "$name: Failed to truncate modified file: $f: $!\n";
173 }
174 }
175
176 close(FILE);
177}