| #! /usr/bin/perl -w |
| |
| # Script to take the output of nroff -man and remove all the backspacing and |
| # the page footers and the screen commands etc so that it is more usefully |
| # readable online. In fact, in the latest nroff, intermediate footers don't |
| # seem to be generated any more. |
| |
| $blankcount = 0; |
| $lastwascut = 0; |
| $firstheader = 1; |
| |
| # Input on STDIN; output to STDOUT. |
| |
| while (<STDIN>) |
| { |
| s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
| s/.\x8//g; # Remove "char, backspace" |
| |
| # Handle header lines. Retain only the first one we encounter, but remove |
| # the blank line that follows. Any others (e.g. at end of document) and the |
| # following blank line are dropped. |
| |
| if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) |
| { |
| if ($firstheader) |
| { |
| $firstheader = 0; |
| print; |
| $lastprinted = $_; |
| $lastwascut = 0; |
| } |
| $_=<STDIN>; # Remove a blank that follows |
| next; |
| } |
| |
| # Count runs of empty lines |
| |
| if (/^\s*$/) |
| { |
| $blankcount++; |
| $lastwascut = 0; |
| next; |
| } |
| |
| # If a chunk of lines has been cut out (page footer) and the next line |
| # has a different indentation, put back one blank line. |
| |
| if ($lastwascut && $blankcount < 1 && defined($lastprinted)) |
| { |
| ($a) = $lastprinted =~ /^(\s*)/; |
| ($b) = $_ =~ /^(\s*)/; |
| $blankcount++ if ($a ne $b); |
| } |
| |
| # We get here only when we have a non-blank line in hand. If it was preceded |
| # by 3 or more blank lines, read the next 3 lines and see if they are blank. |
| # If so, remove all 7 lines, and remember that we have just done a cut. |
| |
| if ($blankcount >= 3) |
| { |
| for ($i = 0; $i < 3; $i++) |
| { |
| $next[$i] = <STDIN>; |
| $next[$i] = "" if !defined $next[$i]; |
| $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
| $next[$i] =~ s/.\x8//g; # Remove "char, backspace" |
| } |
| |
| # Cut out chunks of the form <3 blanks><non-blank><3 blanks> |
| |
| if ($next[0] =~ /^\s*$/ && |
| $next[1] =~ /^\s*$/ && |
| $next[2] =~ /^\s*$/) |
| { |
| $blankcount -= 3; |
| $lastwascut = 1; |
| } |
| |
| # Otherwise output the saved blanks, the current, and the next three |
| # lines. Remember the last printed line. |
| |
| else |
| { |
| for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
| print; |
| for ($i = 0; $i < 3; $i++) |
| { |
| $next[$i] =~ s/.\x8//g; |
| print $next[$i]; |
| $lastprinted = $_; |
| } |
| $lastwascut = 0; |
| $blankcount = 0; |
| } |
| } |
| |
| # This non-blank line is not preceded by 3 or more blank lines. Output |
| # any blanks there are, and the line. Remember it. Force two blank lines |
| # before headings. |
| |
| else |
| { |
| $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && |
| defined($lastprinted); |
| for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
| print; |
| $lastprinted = $_; |
| $lastwascut = 0; |
| $blankcount = 0; |
| } |
| } |
| |
| # End |